{-# LANGUAGE ScopedTypeVariables #-}

module Ouroboros.Consensus.Node.ErrorPolicy (consensusErrorPolicy) where

import           Control.Monad.Class.MonadAsync (ExceptionInLinkedThread (..))
import           Control.ResourceRegistry (RegistryClosedException,
                     ResourceRegistryThreadException, TempRegistryException)
import           Data.Proxy (Proxy)
import           Data.Time.Clock (DiffTime)
import           Data.Typeable (Typeable)
import           Ouroboros.Consensus.Block (StandardHash)
import           Ouroboros.Consensus.BlockchainTime
import           Ouroboros.Consensus.MiniProtocol.BlockFetch.Server
                     (BlockFetchServerException)
import           Ouroboros.Consensus.MiniProtocol.ChainSync.Client
                     (ChainSyncClientException)
import           Ouroboros.Consensus.Node.DbLock
import           Ouroboros.Consensus.Node.DbMarker (DbMarkerError)
import           Ouroboros.Consensus.Storage.ChainDB.API (ChainDbError (..),
                     ChainDbFailure)
import           Ouroboros.Consensus.Storage.ImmutableDB.API (ImmutableDBError)
import qualified Ouroboros.Consensus.Storage.ImmutableDB.API as ImmutableDB
import           Ouroboros.Consensus.Storage.VolatileDB.API (VolatileDBError)
import qualified Ouroboros.Consensus.Storage.VolatileDB.API as VolatileDB
import           Ouroboros.Network.ErrorPolicy
import           System.FS.API.Types (FsError)

consensusErrorPolicy ::
     forall blk. (Typeable blk, StandardHash blk)
  => Proxy blk
  -> ErrorPolicies
consensusErrorPolicy :: forall blk.
(Typeable blk, StandardHash blk) =>
Proxy blk -> ErrorPolicies
consensusErrorPolicy Proxy blk
pb = ErrorPolicies {
      -- Exception raised during connect
      --
      -- This is entirely a network-side concern.
      epConErrorPolicies :: [ErrorPolicy]
epConErrorPolicies = []

      -- Exception raised during interaction with the peer
      --
      -- The list below should contain an entry for every type declared as an
      -- instance of 'Exception' within ouroboros-consensus.
      --
      -- If a particular exception is not handled by any policy, a default
      -- kicks in, which currently means logging the exception and disconnecting
      -- from the peer (in both directions), but allowing an immediate
      -- reconnect. This is fine for exceptions that only affect that peer.
      -- It is however essential that we handle exceptions here that /must/
      -- shut down the node (mainly storage layer errors).
      --
      -- TODO: Talk to devops about what they should do when the node does
      -- terminate with a storage layer exception (restart with full recovery).
    , epAppErrorPolicies :: [ErrorPolicy]
epAppErrorPolicies = [
          -- Any exceptions in the storage layer should terminate the node
          --
          -- NOTE: We do not catch IOExceptions here; they /ought/ to be caught
          -- by the FS layer (and turn into FsError). If we do want to catch
          -- them, we'd somehow have to distinguish between IO exceptions
          -- arising from disk I/O (shutdownNode) and those arising from
          -- network failures (SuspendConsumer).
          (DbMarkerError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((DbMarkerError -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (DbMarkerError -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(DbMarkerError
_ :: DbMarkerError)        -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy)
-> (DbLocked -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(DbLocked
_ :: DbLocked)             -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainDbFailure blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainDbFailure blk
_ :: ChainDbFailure blk)   -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (VolatileDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(VolatileDBError blk
e :: VolatileDBError blk)  ->
            case VolatileDBError blk
e of
              VolatileDB.ApiMisuse{}         -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              VolatileDB.UnexpectedFailure{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ImmutableDBError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ImmutableDBError blk
e :: ImmutableDBError blk) ->
            case ImmutableDBError blk
e of
              ImmutableDB.ApiMisuse{}         -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              ImmutableDB.UnexpectedFailure{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
        , (FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy)
-> (FsError -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(FsError
_ :: FsError) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode

          -- When the system clock moved back, we have to restart the node.
        , (SystemClockMovedBackException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((SystemClockMovedBackException
  -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (SystemClockMovedBackException
    -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(SystemClockMovedBackException
_ :: SystemClockMovedBackException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode

          -- Some chain DB errors are indicative of a bug in our code, others
          -- indicate an invalid request from the peer. If the DB is closed
          -- entirely, it will only be reopened after a node restart.
        , (ChainDbError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainDbError blk -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainDbError blk -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainDbError blk
e :: ChainDbError blk) ->
            case ChainDbError blk
e of
              ClosedDBError{}        -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
shutdownNode
              ClosedFollowerError{}  -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
              InvalidIteratorRange{} -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
theyBuggyOrEvil

          -- We have some resource registries that are used per-connection,
          -- and so if we have ResourceRegistry related exception, we close
          -- the connection but leave the rest of the node running.
        , (RegistryClosedException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((RegistryClosedException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (RegistryClosedException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(RegistryClosedException
_ :: RegistryClosedException)         -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
        , (ResourceRegistryThreadException
 -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ResourceRegistryThreadException
  -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ResourceRegistryThreadException
    -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ResourceRegistryThreadException
_ :: ResourceRegistryThreadException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug
        , (TempRegistryException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((TempRegistryException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (TempRegistryException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(TempRegistryException
_ :: TempRegistryException)           -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
ourBug

          -- An exception in the block fetch server meant the client asked
          -- for some blocks we used to have but got GCed. This means the
          -- peer is on a chain that forks off more than @k@ blocks away.
        , (BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (BlockFetchServerException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(BlockFetchServerException
_ :: BlockFetchServerException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
distantPeer

          -- Chain sync client exceptions indicate malicious behaviour. When we
          -- have diverged too much from a client, making it no longer
          -- interesting to us, we terminate with a result.
        , (ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ChainSyncClientException -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ChainSyncClientException
_ :: ChainSyncClientException) -> SuspendDecision DiffTime -> Maybe (SuspendDecision DiffTime)
forall a. a -> Maybe a
Just SuspendDecision DiffTime
theyBuggyOrEvil

          -- Dispatch on nested exception
        , (ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall e.
Exception e =>
(e -> Maybe (SuspendDecision DiffTime)) -> ErrorPolicy
ErrorPolicy ((ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
 -> ErrorPolicy)
-> (ExceptionInLinkedThread -> Maybe (SuspendDecision DiffTime))
-> ErrorPolicy
forall a b. (a -> b) -> a -> b
$ \(ExceptionInLinkedThread String
_ SomeException
e) ->
            SomeException -> [ErrorPolicy] -> Maybe (SuspendDecision DiffTime)
forall e.
Exception e =>
e -> [ErrorPolicy] -> Maybe (SuspendDecision DiffTime)
evalErrorPolicies SomeException
e (ErrorPolicies -> [ErrorPolicy]
epAppErrorPolicies (Proxy blk -> ErrorPolicies
forall blk.
(Typeable blk, StandardHash blk) =>
Proxy blk -> ErrorPolicies
consensusErrorPolicy Proxy blk
pb))
        ]
    }
  where
    -- Shutdown the node. If we have a storage layer failure, the node /must/
    -- be restarted (triggering recovery).
    shutdownNode :: SuspendDecision DiffTime
    shutdownNode :: SuspendDecision DiffTime
shutdownNode = SuspendDecision DiffTime
forall t. SuspendDecision t
Throw

    -- Peer is either on a distant chain (one that forks more than k blocks ago)
    -- or else is just too far behind; the chain sync client doesn't really have
    -- any way of distinguishing between these two cases. If they are merely
    -- far behind, we might want to reconnect to them later.
    distantPeer :: SuspendDecision DiffTime
    distantPeer :: SuspendDecision DiffTime
distantPeer = DiffTime -> SuspendDecision DiffTime
forall t. t -> SuspendDecision t
SuspendConsumer DiffTime
defaultDelay

    -- The peer sent us some data that they could have known was invalid.
    -- This can only be due to a bug or malice.
    theyBuggyOrEvil :: SuspendDecision DiffTime
    theyBuggyOrEvil :: SuspendDecision DiffTime
theyBuggyOrEvil = DiffTime -> DiffTime -> SuspendDecision DiffTime
forall t. t -> t -> SuspendDecision t
SuspendPeer DiffTime
defaultDelay DiffTime
defaultDelay

    -- Something went wrong due to a bug in our code. We disconnect from the
    -- peer, but allow to try again later in the hope the bug was transient.
    -- We do not close the connection in the other direction; if the bug was
    -- indeed local, it might not affect communication in the other direction.
    ourBug :: SuspendDecision DiffTime
    ourBug :: SuspendDecision DiffTime
ourBug = DiffTime -> SuspendDecision DiffTime
forall t. t -> SuspendDecision t
SuspendConsumer DiffTime
defaultDelay

    -- Default delay
    --
    -- We might want to tweak the delays for the various different kinds of
    -- problems, but we'd need to establish a policy on how to set them.
    defaultDelay :: DiffTime
    defaultDelay :: DiffTime
defaultDelay = DiffTime
200 -- seconds