{-# LANGUAGE ScopedTypeVariables #-}

module Ouroboros.Consensus.Node.ErrorPolicy (consensusErrorPolicy) where

import Control.Monad.Class.MonadAsync (ExceptionInLinkedThread (..))
import Control.ResourceRegistry
  ( RegistryClosedException
  , ResourceRegistryThreadException
  , TempRegistryException
  )
import Data.Proxy (Proxy)
import Data.Time.Clock (DiffTime)
import Data.Typeable (Typeable)
import Ouroboros.Consensus.Block (StandardHash)
import Ouroboros.Consensus.BlockchainTime
import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server
  ( BlockFetchServerException
  )
import Ouroboros.Consensus.MiniProtocol.ChainSync.Client
  ( ChainSyncClientException
  )
import Ouroboros.Consensus.Node.DbLock
import Ouroboros.Consensus.Node.DbMarker (DbMarkerError)
import Ouroboros.Consensus.Storage.ChainDB.API
  ( ChainDbError (..)
  , ChainDbFailure
  )
import Ouroboros.Consensus.Storage.ImmutableDB.API (ImmutableDBError)
import qualified Ouroboros.Consensus.Storage.ImmutableDB.API as ImmutableDB
import Ouroboros.Consensus.Storage.VolatileDB.API (VolatileDBError)
import qualified Ouroboros.Consensus.Storage.VolatileDB.API as VolatileDB
import Ouroboros.Network.ErrorPolicy
import System.FS.API.Types (FsError)

consensusErrorPolicy ::
  forall blk.
  (Typeable blk, StandardHash blk) =>
  Proxy blk ->
  ErrorPolicies
consensusErrorPolicy :: forall blk.
(Typeable blk, StandardHash blk) =>
Proxy blk -> ErrorPolicies
consensusErrorPolicy Proxy blk
pb =
  ErrorPolicies
    { -- Exception raised during connect
      --
      -- This is entirely a network-side concern.
      epConErrorPolicies :: [ErrorPolicy]
epConErrorPolicies = []
    , -- Exception raised during interaction with the peer
      --
      -- The list below should contain an entry for every type declared as an
      -- instance of 'Exception' within ouroboros-consensus.
      --
      -- If a particular exception is not handled by any policy, a default
      -- kicks in, which currently means logging the exception and disconnecting
      -- from the peer (in both directions), but allowing an immediate
      -- reconnect. This is fine for exceptions that only affect that peer.
      -- It is however essential that we handle exceptions here that /must/
      -- shut down the node (mainly storage layer errors).
      --
      -- TODO: Talk to devops about what they should do when the node does
      -- terminate with a storage layer exception (restart with full recovery).
      epAppErrorPolicies :: [ErrorPolicy]
epAppErrorPolicies =
        [ -- Any exceptions in the storage layer should terminate the node
          --
          -- NOTE: We do not catch IOExceptions here; they /ought/ to be caught
          -- by the FS layer (and turn into FsError). If we do want to catch
          -- them, we'd somehow have to distinguish between IO exceptions
          -- arising from disk I/O (shutdownNode) and those arising from
          -- network failures (SuspendConsumer).
          (DbMarkerError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((DbMarkerError -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (DbMarkerError -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(DbMarkerError
_ :: DbMarkerError) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy)
-> (DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(DbLocked
_ :: DbLocked) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainDbFailure blk
_ :: ChainDbFailure blk) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(VolatileDBError blk
e :: VolatileDBError blk) ->
            case VolatileDBError blk
e of
              VolatileDB.ApiMisuse{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              VolatileDB.UnexpectedFailure{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ImmutableDBError blk
e :: ImmutableDBError blk) ->
            case ImmutableDBError blk
e of
              ImmutableDB.ApiMisuse{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              ImmutableDB.UnexpectedFailure{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy)
-> (FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(FsError
_ :: FsError) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , -- When the system clock moved back, we have to restart the node.
          (SystemClockMovedBackException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((SystemClockMovedBackException
  -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (SystemClockMovedBackException
    -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(SystemClockMovedBackException
_ :: SystemClockMovedBackException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , -- Some chain DB errors are indicative of a bug in our code, others
          -- indicate an invalid request from the peer. If the DB is closed
          -- entirely, it will only be reopened after a node restart.
          (ChainDbError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainDbError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainDbError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainDbError blk
e :: ChainDbError blk) ->
            case ChainDbError blk
e of
              ClosedDBError{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
              ClosedFollowerError{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              InvalidIteratorRange{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
theyBuggyOrEvil
        , -- We have some resource registries that are used per-connection,
          -- and so if we have ResourceRegistry related exception, we close
          -- the connection but leave the rest of the node running.
          (RegistryClosedException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((RegistryClosedException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (RegistryClosedException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(RegistryClosedException
_ :: RegistryClosedException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
        , (ResourceRegistryThreadException
 -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ResourceRegistryThreadException
  -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ResourceRegistryThreadException
    -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ResourceRegistryThreadException
_ :: ResourceRegistryThreadException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
        , (TempRegistryException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((TempRegistryException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (TempRegistryException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(TempRegistryException
_ :: TempRegistryException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
        , -- An exception in the block fetch server meant the client asked
          -- for some blocks we used to have but got GCed. This means the
          -- peer is on a chain that forks off more than @k@ blocks away.
          (BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(BlockFetchServerException
_ :: BlockFetchServerException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
distantPeer
        , -- Chain sync client exceptions indicate malicious behaviour. When we
          -- have diverged too much from a client, making it no longer
          -- interesting to us, we terminate with a result.
          (ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainSyncClientException
_ :: ChainSyncClientException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
theyBuggyOrEvil
        , -- Dispatch on nested exception
          (ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ExceptionInLinkedThread String
_ SomeException
e) ->
            SomeException -> [ErrorPolicy] -> Maybe (SuspendDecision DiffTime)
forall e.
Exception e =>
e -> [ErrorPolicy] -> Maybe (SuspendDecision DiffTime)
evalErrorPolicies SomeException
e (ErrorPolicies -> [ErrorPolicy]
epAppErrorPolicies (Proxy blk -> ErrorPolicies
forall blk.
(Typeable blk, StandardHash blk) =>
Proxy blk -> ErrorPolicies
consensusErrorPolicy Proxy blk
pb))
        ]
    }
 where
  -- Shutdown the node. If we have a storage layer failure, the node /must/
  -- be restarted (triggering recovery).
  shutdownNode :: SuspendDecision DiffTime
  shutdownNode :: SuspendDecision DiffTime
shutdownNode = SuspendDecision DiffTime
forall t. SuspendDecision t
Throw

  -- Peer is either on a distant chain (one that forks more than k blocks ago)
  -- or else is just too far behind; the chain sync client doesn't really have
  -- any way of distinguishing between these two cases. If they are merely
  -- far behind, we might want to reconnect to them later.
  distantPeer :: SuspendDecision DiffTime
  distantPeer :: SuspendDecision DiffTime
distantPeer = DiffTime -> SuspendDecision DiffTime
forall t. t -> SuspendDecision t
SuspendConsumer DiffTime
defaultDelay

  -- The peer sent us some data that they could have known was invalid.
  -- This can only be due to a bug or malice.
  theyBuggyOrEvil :: SuspendDecision DiffTime
  theyBuggyOrEvil :: SuspendDecision DiffTime
theyBuggyOrEvil = DiffTime -> DiffTime -> SuspendDecision DiffTime
forall t. t -> t -> SuspendDecision t
SuspendPeer DiffTime
defaultDelay DiffTime
defaultDelay

  -- Something went wrong due to a bug in our code. We disconnect from the
  -- peer, but allow to try again later in the hope the bug was transient.
  -- We do not close the connection in the other direction; if the bug was
  -- indeed local, it might not affect communication in the other direction.
  ourBug :: SuspendDecision DiffTime
  ourBug :: SuspendDecision DiffTime
ourBug = DiffTime -> SuspendDecision DiffTime
forall t. t -> SuspendDecision t
SuspendConsumer DiffTime
defaultDelay

  -- Default delay
  --
  -- We might want to tweak the delays for the various different kinds of
  -- problems, but we'd need to establish a policy on how to set them.
  defaultDelay :: DiffTime
  defaultDelay :: DiffTime
defaultDelay = DiffTime
200 -- seconds