diff --git a/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md b/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md new file mode 100644 index 0000000000..edc421fd17 --- /dev/null +++ b/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md @@ -0,0 +1,3 @@ +### Breaking + +- Implemented a first version of CSJ (ChainSync Jumping). (disabled by default) diff --git a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal index 65774575e3..26c461887c 100644 --- a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal +++ b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal @@ -229,6 +229,7 @@ test-suite consensus-test Test.Consensus.Genesis.Setup.Classifiers Test.Consensus.Genesis.Setup.GenChains Test.Consensus.Genesis.Tests + Test.Consensus.Genesis.Tests.CSJ Test.Consensus.Genesis.Tests.DensityDisconnect Test.Consensus.Genesis.Tests.LoE Test.Consensus.Genesis.Tests.LoP @@ -244,6 +245,7 @@ test-suite consensus-test Test.Consensus.PeerSimulator.ChainSync Test.Consensus.PeerSimulator.Config Test.Consensus.PeerSimulator.Handlers + Test.Consensus.PeerSimulator.NodeLifecycle Test.Consensus.PeerSimulator.Resources Test.Consensus.PeerSimulator.Run Test.Consensus.PeerSimulator.ScheduledBlockFetchServer @@ -257,8 +259,10 @@ test-suite consensus-test Test.Consensus.PeerSimulator.Tests.Timeouts Test.Consensus.PeerSimulator.Trace Test.Consensus.PointSchedule + Test.Consensus.PointSchedule.NodeState Test.Consensus.PointSchedule.Peers Test.Consensus.PointSchedule.Shrinking + Test.Consensus.PointSchedule.Shrinking.Tests Test.Consensus.PointSchedule.SinglePeer Test.Consensus.PointSchedule.SinglePeer.Indices Test.Consensus.PointSchedule.Tests diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs index 993b2c4d37..4dbf6a9361 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs @@ -540,10 +540,11 @@ mkApps :: -> ByteLimits bCS bBF bTX bKA -> m ChainSyncTimeout -> CsClient.ChainSyncLoPBucketConfig + -> CsClient.CSJConfig -> ReportPeerMetrics m (ConnectionId addrNTN) -> Handlers m addrNTN blk -> Apps m addrNTN bCS bBF bTX bKA bPS NodeToNodeInitiatorResult () -mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucketConfig ReportPeerMetrics {..} Handlers {..} = +mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucketConfig csjConfig ReportPeerMetrics {..} Handlers {..} = Apps {..} where aChainSyncClient @@ -572,6 +573,7 @@ mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucke them version lopBucketConfig + csjConfig $ \csState -> do chainSyncTimeout <- genChainSyncTimeout (r, trailing) <- @@ -593,6 +595,7 @@ mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucke , CsClient.idling = csvIdling csState , CsClient.loPBucket = csvLoPBucket csState , CsClient.setLatestSlot = csvSetLatestSlot csState + , CsClient.jumping = csvJumping csState } return (ChainSyncInitiatorResult r, trailing) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs index 61a28b8f65..b2e0609579 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs @@ -81,7 +81,7 @@ import Ouroboros.Consensus.Fragment.InFuture (CheckInFuture, import qualified Ouroboros.Consensus.Fragment.InFuture as InFuture import Ouroboros.Consensus.Ledger.Extended (ExtLedgerState (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncLoPBucketConfig (..)) + (CSJConfig (..), ChainSyncLoPBucketConfig (..)) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import qualified Ouroboros.Consensus.Network.NodeToClient as NTC import qualified Ouroboros.Consensus.Network.NodeToNode as NTN @@ -252,6 +252,9 @@ data LowLevelRunNodeArgs m addrNTN addrNTC versionDataNTN versionDataNTC blk -- | See 'CsClient.ChainSyncLoPBucketConfig' , llrnChainSyncLoPBucketConfig :: ChainSyncLoPBucketConfig + -- | See 'CsClient.CSJConfig' + , llrnCSJConfig :: CSJConfig + -- | How to run the data diffusion applications -- -- 'run' will not return before this does. @@ -519,6 +522,7 @@ runWith RunNodeArgs{..} encAddrNtN decAddrNtN LowLevelRunNodeArgs{..} = NTN.byteLimits llrnChainSyncTimeout llrnChainSyncLoPBucketConfig + llrnCSJConfig (reportMetric Diffusion.peerMetricsConfiguration peerMetrics) (NTN.mkHandlers nodeKernelArgs nodeKernel) @@ -857,6 +861,7 @@ stdLowLevelRunNodeArgsIO RunNodeArgs{ rnProtocolInfo { llrnBfcSalt , llrnChainSyncTimeout = fromMaybe Diffusion.defaultChainSyncTimeout srnChainSyncTimeout , llrnChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled + , llrnCSJConfig = CSJDisabled , llrnCustomiseHardForkBlockchainTimeArgs = id , llrnGsmAntiThunderingHerd , llrnKeepAliveRng diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index 929826cc80..90dffaa070 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1058,6 +1058,7 @@ runThreadNetwork systemTime ThreadNetworkArgs , idleTimeout = waitForever }) CSClient.ChainSyncLoPBucketDisabled + CSClient.CSJDisabled nullMetric -- The purpose of this test is not testing protocols, so -- returning constant empty list is fine if we have thorough diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Main.hs b/ouroboros-consensus-diffusion/test/consensus-test/Main.hs index 0f54918b1d..e7174e7503 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Main.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Main.hs @@ -5,6 +5,7 @@ import qualified Test.Consensus.GSM (tests) import qualified Test.Consensus.HardFork.Combinator (tests) import qualified Test.Consensus.Node (tests) import qualified Test.Consensus.PeerSimulator.Tests (tests) +import qualified Test.Consensus.PointSchedule.Shrinking.Tests (tests) import qualified Test.Consensus.PointSchedule.Tests (tests) import Test.Tasty import Test.Util.TestEnv (defaultMainWithTestEnv, @@ -25,5 +26,6 @@ tests = , Test.Consensus.Genesis.Tests.tests , testGroup "GSM" Test.Consensus.GSM.tests , Test.Consensus.PeerSimulator.Tests.tests + , Test.Consensus.PointSchedule.Shrinking.Tests.tests , Test.Consensus.PointSchedule.Tests.tests ] diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs index 8f08045b7d..ac680ae2c8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs @@ -79,7 +79,7 @@ mkTrunk btTrunk = BlockTree { btTrunk, btBranches = [] } -- | Add a branch to an existing block tree. -- --- PRECONDITION: The given fragment intersects with the trunk or its anchor. +-- Yields @Nothing@ if the given fragment does not intersect with the trunk or its anchor. -- -- FIXME: we should enforce that the branch's prefix shares the same anchor as -- the trunk. @@ -94,7 +94,7 @@ addBranch branch bt = do let btbFull = fromJust $ AF.join btbPrefix btbSuffix pure $ bt { btBranches = BlockTreeBranch { .. } : btBranches bt } --- | Same as @addBranch@ but assumes that the precondition holds. +-- | Same as @addBranch@ but calls to 'error' if the former yields 'Nothing'. addBranch' :: AF.HasHeader blk => AF.AnchoredFragment blk -> BlockTree blk -> BlockTree blk addBranch' branch blockTree = fromMaybe (error "addBranch': precondition does not hold") $ addBranch branch blockTree diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index c208b44d45..ae6eb10095 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -35,7 +35,7 @@ import Test.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.QuickCheck (forAllGenRunShrinkCheck) import Test.Util.TestBlock (TestBlock) -import Test.Util.Tracer (recordingTracerTVar) +import Test.Util.Tracer (recordingTracerM) import Text.Printf (printf) @@ -56,7 +56,7 @@ runGenesisTest :: RunGenesisTestResult runGenesisTest schedulerConfig genesisTest = runSimStrictShutdownOrThrow $ do - (recordingTracer, getTrace) <- recordingTracerTVar + (recordingTracer, getTrace) <- recordingTracerM let tracer = if scDebug schedulerConfig then debugTracer else recordingTracer traceLinesWith tracer $ prettyGenesisTest prettyPeersSchedule genesisTest @@ -104,6 +104,8 @@ forAllGenesisTest generator schedulerConfig shrinker mkProperty = classify (genesisWindowAfterIntersection cls) "Full genesis window after intersection" $ classify (adversaryRollback schCls) "An adversary did a rollback" $ classify (honestRollback schCls) "The honest peer did a rollback" $ + classify (allAdversariesEmpty schCls) "All adversaries have empty schedules" $ + classify (allAdversariesTrivial schCls) "All adversaries have trivial schedules" $ tabulate "Adversaries killed by LoP" [printf "%.1f%%" $ adversariesKilledByLoP resCls] $ tabulate "Adversaries killed by GDD" [printf "%.1f%%" $ adversariesKilledByGDD resCls] $ tabulate "Adversaries killed by Timeout" [printf "%.1f%%" $ adversariesKilledByTimeout resCls] $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs index a33f1d91a2..559b6f1712 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs @@ -199,9 +199,15 @@ resultClassifiers GenesisTest{gtSchedule} RunGenesisTestResult{rgtrStateView} = data ScheduleClassifiers = ScheduleClassifiers{ -- | There is an adversary that did a rollback - adversaryRollback :: Bool, + adversaryRollback :: Bool, -- | The honest peer did a rollback - honestRollback :: Bool + honestRollback :: Bool, + -- | All adversaries have an empty schedule: the only way to disconnect them are + -- network timeouts. + allAdversariesEmpty :: Bool, + -- | All adversaries have trivial schedules: they only have an initial state, and + -- do nothing afterwards. + allAdversariesTrivial :: Bool } scheduleClassifiers :: GenesisTestFull TestBlock -> ScheduleClassifiers @@ -209,6 +215,8 @@ scheduleClassifiers GenesisTest{gtSchedule = schedule} = ScheduleClassifiers { adversaryRollback , honestRollback + , allAdversariesEmpty + , allAdversariesTrivial } where hasRollback :: PeerSchedule TestBlock -> Bool @@ -247,6 +255,15 @@ scheduleClassifiers GenesisTest{gtSchedule = schedule} = honestRollback = value $ honest rollbacks + allAdversariesEmpty = all value $ others $ null <$> schedule + + isTrivial :: PeerSchedule TestBlock -> Bool + isTrivial = \case + [] -> True + (t0, _):points -> all ((== t0) . fst) points + + allAdversariesTrivial = all value $ others $ isTrivial <$> schedule + simpleHash :: HeaderHash block ~ TestHash => ChainHash block -> diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs index 9975f37c45..70bbceb8fa 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs @@ -125,10 +125,11 @@ genChains genNumForks = do gtSlotLength, gtChainSyncTimeouts = chainSyncTimeouts gtSlotLength asc, gtBlockFetchTimeouts = blockFetchTimeouts, - gtLoPBucketParams = LoPBucketParams { lbpCapacity = 10_000, lbpRate = 1_000 }, + gtLoPBucketParams = LoPBucketParams { lbpCapacity = 100_000, lbpRate = 1_000 }, -- ^ REVIEW: Do we want to generate those randomly? For now, the chosen -- values carry no special meaning. Someone needs to think about what values -- would make for interesting tests. + gtCSJParams = CSJParams $ fromIntegral scg, gtBlockTree = foldl' (flip BT.addBranch') (BT.mkTrunk goodChain) $ zipWith (genAdversarialFragment goodBlocks) [1..] alternativeChainSchemas, gtSchedule = () } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs index 6c21341d67..6c8776bb15 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs @@ -1,5 +1,6 @@ module Test.Consensus.Genesis.Tests (tests) where +import qualified Test.Consensus.Genesis.Tests.CSJ as CSJ import qualified Test.Consensus.Genesis.Tests.DensityDisconnect as GDD import qualified Test.Consensus.Genesis.Tests.LoE as LoE import qualified Test.Consensus.Genesis.Tests.LongRangeAttack as LongRangeAttack @@ -9,7 +10,8 @@ import Test.Tasty tests :: TestTree tests = testGroup "Genesis tests" - [ GDD.tests + [ CSJ.tests + , GDD.tests , LongRangeAttack.tests , LoE.tests , LoP.tests diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs new file mode 100644 index 0000000000..25da0a1dce --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -0,0 +1,137 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} + +module Test.Consensus.Genesis.Tests.CSJ (tests) where + +import Control.Monad (replicateM) +import Data.Containers.ListUtils (nubOrd) +import Data.Functor (($>)) +import Data.List (nub) +import Data.Maybe (mapMaybe) +import Ouroboros.Consensus.Block (blockSlot, succWithOrigin) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (TraceChainSyncClientEvent (..)) +import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), + condenseListWithPadding) +import qualified Ouroboros.Network.AnchoredFragment as AF +import Test.Consensus.BlockTree (BlockTree (..)) +import Test.Consensus.Genesis.Setup +import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) +import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), + defaultSchedulerConfig) +import Test.Consensus.PeerSimulator.StateView (StateView (..)) +import Test.Consensus.PeerSimulator.Trace (TraceEvent (..)) +import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), + mkPeers) +import Test.Tasty +import Test.Tasty.QuickCheck +import Test.Util.Orphans.IOLike () +import Test.Util.TestBlock (Header, TestBlock) +import Test.Util.TestEnv (adjustQuickCheckMaxSize) + +tests :: TestTree +tests = + adjustQuickCheckMaxSize (`div` 5) $ + testGroup + "CSJ" + [ testGroup "Happy Path" + [ testProperty "synchronous" $ prop_happyPath True + , testProperty "asynchronous" $ prop_happyPath False + ] + ] + +-- | Test of the “happy path” scenario of ChainSync Jumping (CSJ). +-- +-- This test features one chain (ie. a block tree that is only trunk) and only +-- honest peers and syncs the chain in question with CSJ enabled. What we expect +-- to observe is that one of the honest peers becomes the dynamo while the +-- others become jumpers. Because the jumpers will agree to all the jumps, the +-- whole syncing should happen with CSJ without objectors. +-- +-- The final property is that headers should only ever be downloaded once and +-- only from one peer (the dynamo). This is true except when almost caught-up: +-- when the dynamo is caught-up, it gets disengaged and one of the jumpers takes +-- its place and starts serving headers. This might lead to duplication of +-- headers, but only in a window of @jumpSize@ slots near the tip of the chain. +-- +-- The boolean differentiates between “synchronous” and “asynchronous” +-- scenarios. In a synchronous scenario, all the honest peers have the same +-- schedule: they serve the chain exactly in the same way. In the asynchronous +-- scenario, a random schedule is generated for each peer (but they still serve +-- the same chain). +prop_happyPath :: Bool -> Property +prop_happyPath synchronized = + forAllGenesisTest + ( do + gt <- genChains $ pure 0 + honest <- genHonestSchedule gt + numOthers <- choose (1, 3) + otherHonests <- if synchronized + then pure $ replicate numOthers honest + else replicateM numOthers (genHonestSchedule gt) + pure $ gt $> mkPeers honest otherHonests + ) + ( defaultSchedulerConfig + { scEnableCSJ = True + , scEnableLoE = True + , scEnableLoP = True + } + ) + ( -- NOTE: Shrinking makes the tests fail because the peers reject jumps + -- because their TP is G. This makes them into objectors and they then + -- start serving headers. + \_ _ -> [] + ) + ( \gt StateView{svTrace} -> + let + -- The list of 'TraceDownloadedHeader' events that are not newer than + -- jumpSize from the tip of the chain. These are the ones that we + -- expect to see only once per header if CSJ works properly. + headerDownloadEvents = + mapMaybe + (\case + TraceChainSyncClientEvent pid (TraceDownloadedHeader hdr) + | not (isNewerThanJumpSizeFromTip gt hdr) + -> Just (pid, hdr) + _ -> Nothing + ) + svTrace + receivedHeadersOnlyOnce = length (nub $ snd <$> headerDownloadEvents) == length headerDownloadEvents + -- NOTE: If all the headers are newer than jumpSize from the tip, then + -- 'headerDownloadEvents' is empty and the following condition would + -- violated if we used @==@. + receivedHeadersFromOnlyOnePeer = length (nubOrd $ fst <$> headerDownloadEvents) <= 1 + in + tabulate "" + [ if headerDownloadEvents == [] + then "All headers may be downloaded twice (uninteresting test)" + else "There exist headers that have to be downloaded exactly once" + ] $ + counterexample + ("Downloaded headers (except jumpSize slots near the tip):\n" ++ + ( unlines $ fmap (" " ++) $ zipWith + (\peer header -> peer ++ " | " ++ header) + (condenseListWithPadding PadRight $ fst <$> headerDownloadEvents) + (condenseListWithPadding PadRight $ snd <$> headerDownloadEvents) + ) + ) + (receivedHeadersOnlyOnce && receivedHeadersFromOnlyOnePeer) + ) + where + -- | This might seem wasteful, as we discard generated adversarial schedules. + -- It actually isn't, since we call it on trees that have no branches besides + -- the trunk, so no adversaries are generated. + genHonestSchedule :: GenesisTest TestBlock () -> Gen (PeerSchedule TestBlock) + genHonestSchedule gt = do + ps <- genUniformSchedulePoints gt + pure $ value $ honest ps + + isNewerThanJumpSizeFromTip :: GenesisTestFull TestBlock -> Header TestBlock -> Bool + isNewerThanJumpSizeFromTip gt hdr = + let jumpSize = csjpJumpSize $ gtCSJParams gt + tipSlot = AF.headSlot $ btTrunk $ gtBlockTree gt + hdrSlot = blockSlot hdr + in + -- Sanity check: add @1 +@ after @>@ and watch the World burn. + hdrSlot + jumpSize >= succWithOrigin tipSlot diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index f770bac5fb..87c85f54b9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -1,29 +1,38 @@ {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.DensityDisconnect (tests) where -import Cardano.Slotting.Slot (WithOrigin (..), unSlotNo) +import Cardano.Slotting.Slot (SlotNo (unSlotNo), WithOrigin (..)) import Control.Exception (fromException) import Control.Monad.Class.MonadTime.SI (Time (..)) -import Data.Bifunctor (second) -import Data.Foldable (minimumBy, toList) +import Data.Bifunctor +import Data.Foldable (maximumBy, minimumBy, toList) import Data.Function (on) import Data.Functor (($>), (<&>)) import Data.List (intercalate) +import Data.List.NonEmpty (nonEmpty) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Data.Maybe (fromMaybe) import Data.Semigroup (Endo (..)) -import Ouroboros.Consensus.Block (fromWithOrigin, withOrigin) +import Data.Set (Set, (\\)) +import qualified Data.Set as Set +import Ouroboros.Consensus.Block (Point (GenesisPoint), + WithOrigin (NotOrigin), blockSlot, fromWithOrigin, + withOrigin) import Ouroboros.Consensus.Block.Abstract (Header, getHeader) import Ouroboros.Consensus.Config.SecurityParam (SecurityParam (SecurityParam), maxRollbacks) -import Ouroboros.Consensus.Genesis.Governor (densityDisconnect, - sharedCandidatePrefix) +import Ouroboros.Consensus.Genesis.Governor (DensityBounds, + densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainSyncClientException (DensityTooLow), ChainSyncState (..)) +import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (HasHeader, Tip (TipGenesis), @@ -37,6 +46,7 @@ import Test.Consensus.PeerSimulator.Run import Test.Consensus.PeerSimulator.StateView (PeerSimulatorComponent (..), StateView (..), exceptionsByComponent) +import Test.Consensus.PeerSimulator.Trace (prettyDensityBounds) import Test.Consensus.PointSchedule import Test.Consensus.PointSchedule.Peers import Test.Consensus.PointSchedule.Shrinking @@ -45,17 +55,18 @@ import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..), scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) import qualified Test.QuickCheck as QC import Test.QuickCheck +import Test.QuickCheck.Extras (unsafeMapSuchThatJust) import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () -import Test.Util.TersePrinting (terseHFragment) +import Test.Util.TersePrinting (terseHFragment, terseHeader) import Test.Util.TestBlock (TestBlock) import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) tests :: TestTree tests = - adjustQuickCheckTests (* 4) $ + adjustQuickCheckTests (* 4) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "gdd" [ testProperty "basic" prop_densityDisconnectStatic, @@ -145,19 +156,26 @@ data EvolvingPeer = suffix :: [Header TestBlock], tip :: Tip TestBlock, prefixSlots :: Int, - killed :: Bool + forkSlot :: WithOrigin SlotNo } deriving Show data EvolvingPeers = EvolvingPeers { - k :: SecurityParam, - sgen :: GenesisWindow, - peers :: Peers EvolvingPeer, - loeFrag :: AnchoredFragment (Header TestBlock) + k :: SecurityParam, + sgen :: GenesisWindow, + peers :: Peers EvolvingPeer, + loeFrag :: AnchoredFragment (Header TestBlock), + fullTree :: BlockTree TestBlock } deriving Show +data Evolution = + Evolution { + peers :: Peers EvolvingPeer, + killed :: Set PeerId + } + lastSlot :: AF.HasHeader b => AnchoredFragment b -> @@ -171,7 +189,8 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = k = gtSecurityParam, sgen = gtGenesisWindow, peers, - loeFrag = AF.Empty AF.AnchorGenesis + loeFrag = AF.Empty AF.AnchorGenesis, + fullTree = gtBlockTree } where peers = mkPeers (peer trunk (AF.Empty (AF.headAnchor trunk)) (btTrunk gtBlockTree)) (branchPeer <$> branches) @@ -186,7 +205,7 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = suffix = AF.toOldestFirst headers, tip = branchTip chain, prefixSlots = lastSlot forkPrefix, - killed = False + forkSlot = AF.lastSlot forkSuffix } where headers = toHeaders chain @@ -195,127 +214,241 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = branches = btBranches gtBlockTree +data UpdateEvent = UpdateEvent { + -- | The peer whose candidate was extended in this step + target :: PeerId + -- | The header appended to the candidate of 'target' + , added :: Header TestBlock + -- | Peers that have been disconnected in the current step + , killed :: Set PeerId + -- | The GDD data + , bounds :: Map PeerId (DensityBounds TestBlock) + -- | The current chains + , tree :: BlockTree (Header TestBlock) + , loeFrag :: AnchoredFragment (Header TestBlock) + , curChain :: AnchoredFragment (Header TestBlock) + } + +snapshotTree :: Peers EvolvingPeer -> BlockTree (Header TestBlock) +snapshotTree Peers {honest, others} = + foldr addBranch' (mkTrunk (candidate (value honest))) (candidate . value <$> others) + +prettyUpdateEvent :: UpdateEvent -> [String] +prettyUpdateEvent UpdateEvent {target, added, killed, bounds, tree, loeFrag, curChain} = + [ + "Extended " ++ condense target ++ " with " ++ terseHeader added, + " disconnect: " ++ show killed, + " LoE frag: " ++ terseHFragment loeFrag, + " selection: " ++ terseHFragment curChain + ] + ++ prettyDensityBounds bounds + ++ "" : prettyBlockTree tree + data MonotonicityResult = HonestKilled | - Nonmonotonic + Nonmonotonic UpdateEvent | Finished - deriving Show --- | Check whether the honest peer was killed or a peer's new losing state violates monotonicity, i.e. if it was found --- to be losing before, it shouldn't be found winning later. +-- | Check whether the honest peer was killed or a peer's new losing state +-- violates monotonicity, i.e. if it was found to be losing before, it shouldn't +-- be found winning later. -- --- If that is the case, return @Left (False, peers)@ to indicate that the test is over and failed. +-- If that is the case, return @Left (HonestKilled|Nonmonotonic, peers)@ to +-- indicate that the test is over and failed. -- --- Otherwise, remove all adversaries that either have no more blocks or have more than @sgen@ slots after their fork --- intersection. +-- Otherwise, remove all adversaries that either have no more blocks or have +-- more than @sgen@ slots after their fork intersection. There is not other +-- motivation to shrink the adversary set other than ensuring termination. -- --- If no adversaries remain, return @Left (True, peers)@ to indicate that the test is over and succeeded. +-- If no adversaries remain, return @Left (Finished, peers)@ to indicate that +-- the test is over and succeeded. -- -- Otherwise, return @Right remaining@ to continue with the next step. updatePeers :: GenesisWindow -> - PeerId -> - [PeerId] -> Peers EvolvingPeer -> - Either (MonotonicityResult, Peers EvolvingPeer) (Peers EvolvingPeer) -updatePeers (GenesisWindow sgen) target disconnect peers - | HonestPeer `elem` disconnect + -- | Peers that were disconnected previously + Set PeerId -> + UpdateEvent -> + Either (MonotonicityResult, Peers EvolvingPeer) Evolution +updatePeers (GenesisWindow sgen) peers killedBefore event@UpdateEvent {target, killed = killedNow} + | HonestPeer `Set.member` killedNow = Left (HonestKilled, peers) - | killed peer - , not (target `elem` disconnect) - = Left (Nonmonotonic, peers) + | not (null violations) + = Left (Nonmonotonic event, peers) | null remaining = Left (Finished, peers) | otherwise - = Right peers {others = remaining} + = Right evo where - Peer {value = peer} = getPeer target peers - - remaining = Map.filter (not . discardPeer) (others peers) - - discardPeer Peer {value = EvolvingPeer {candidate, suffix, prefixSlots}} = - null suffix || lastSlot candidate - prefixSlots > fromIntegral sgen - --- | Find the earliest intersection, used to compute the selection. + -- The peers that were killed in an earlier step but not in the current one + violations = killedBefore \\ killedNow + + -- The new state if no violations were detected + evo@Evolution {peers = Peers {others = remaining}} + | targetExhausted + -- If the target is done, reset the set of killed peers, since other peers + -- may have lost only against the target. + -- Remove the target from the active peers. + = Evolution {peers = peers {others = Map.delete target (others peers)}, killed = mempty} + | otherwise + -- Otherwise replace the killed peers with the current set + = Evolution {peers, killed = killedNow} + + -- Whether the extended peer is uninteresting for GDD from now on + targetExhausted = + -- Its fragment cannot be extended anymore, or + null suffix || + -- Its candidate is longer than a Genesis window + lastSlot candidate - prefixSlots > fromIntegral sgen + + Peer {value = EvolvingPeer {candidate, suffix, prefixSlots}} = getPeer target peers + +-- | Find the peer whose candidate has the earliest intersection. +-- If no peer has reached its fork suffix yet, return the one with the highest slot. +-- +-- The selection will then be computed by taking up to k blocks after the immutable tip +-- on this peer's candidate fragment. firstBranch :: Peers EvolvingPeer -> Peer EvolvingPeer -firstBranch peers = - minimumBy (compare `on` predicate) (toList (others peers)) +firstBranch Peers {honest, others} = + fromMaybe newest $ + minimumBy (compare `on` forkAnchor) <$> nonEmpty (filter hasForked (toList others)) + where + newest = maximumBy (compare `on` (AF.headSlot . candidate . value)) (honest : toList others) + forkAnchor = fromWithOrigin 0 . AF.anchorToSlotNo . AF.anchor . forkSuffix . value + hasForked Peer {value = EvolvingPeer {candidate, forkSlot}} = + AF.headSlot candidate >= forkSlot + +-- | Determine the immutable tip by computing the latest point before the fork intesection +-- for all peers, and then taking the earliest among the results. +immutableTip :: Peers EvolvingPeer -> AF.Point (Header TestBlock) +immutableTip peers = + minimum (lastHonest <$> toList (others peers)) where - predicate = fromWithOrigin 0 . AF.anchorToSlotNo . AF.anchor . forkSuffix . value + lastHonest Peer {value = EvolvingPeer {candidate, forkSlot = NotOrigin forkSlot}} = + AF.headPoint $ + AF.dropWhileNewest (\ b -> blockSlot b >= forkSlot) candidate + lastHonest _ = GenesisPoint -- | Take one block off the peer's suffix and append it to the candidate fragment. -- -- Since we don't remove the honest peer when it's exhausted, this may be called with an empty suffix. -movePeer :: EvolvingPeer -> EvolvingPeer +movePeer :: EvolvingPeer -> (EvolvingPeer, Maybe (Header TestBlock)) movePeer = \case - peer@EvolvingPeer {candidate, suffix = h : t} -> peer {candidate = candidate AF.:> h, suffix = t} - peer -> peer + peer@EvolvingPeer {candidate, suffix = h : t} -> + (peer {candidate = candidate AF.:> h, suffix = t}, Just h) + peer -> (peer, Nothing) --- | Repeatedly run the GDD, each time updating a random peer to advance by one block. --- The selection is set to the first k blocks of the first fork. --- The tips are the last blocks of each full branch. --- The returned 'Bool' indicates whether the honest peer won and no monotonicity violations were detected. +-- | Repeatedly run the GDD, each time updating the candidate fragment of a +-- random peer to advance by one header, until all peers have been discarded +-- (not the same as disconnected!) according to 'updatePeers'. +-- +-- The selection is set to the first k blocks of the first fork, the +-- anchor being the intersection. +-- +-- The latest slots are the youngest header of each candidate fragments. +-- +-- The returned 'MonotonicityResult' indicates whether the honest peer won and +-- no monotonicity violations were detected (the peer stays being disconnected +-- if it starts being disconnected). evolveBranches :: EvolvingPeers -> - Gen (MonotonicityResult, EvolvingPeers) -evolveBranches EvolvingPeers {k, sgen, peers = initialPeers} = - step initialPeers + Gen (MonotonicityResult, EvolvingPeers, [UpdateEvent]) +evolveBranches EvolvingPeers {k, sgen, peers = initialPeers, fullTree} = + step [] Evolution {peers = initialPeers, killed = mempty} where - step ps = do - target <- elements ids + step events Evolution {peers = ps, killed = killedBefore} = do + (target, nextPeers, added) <- unsafeMapSuchThatJust $ do + -- Select a random peer + pid <- elements ids + pure $ do + -- Add a block to the candidate. If the peer has no more blocks, + -- this returns 'Nothing' and the generator retries. + (nextPeers, added) <- sequence (updatePeer movePeer pid ps) + pure (pid, nextPeers, added) let - curChain = selection (value (firstBranch ps)) - next = updatePeer movePeer target ps - candidates = candidate . value <$> toMap next - states = - candidates <&> \ csCandidate -> - ChainSyncState { - csCandidate, - csIdling = False, - csLatestSlot = Just (AF.headSlot csCandidate) - } - (loeFrag, suffixes) = sharedCandidatePrefix curChain candidates - disconnect = fst (densityDisconnect sgen k states suffixes loeFrag) - either (pure . second (result loeFrag)) step (updatePeers sgen target disconnect next) + -- Compute the selection. + curChain = selection (immutableTip ps) (firstBranch ps) + candidates = candidate . value <$> toMap nextPeers + states = + candidates <&> \ csCandidate -> + ChainSyncState { + csCandidate, + csIdling = False, + csLatestSlot = Just (AF.headSlot csCandidate) + } + -- Run GDD. + (loeFrag, suffixes) = sharedCandidatePrefix curChain candidates + (killedNow, bounds) = first Set.fromList $ densityDisconnect sgen k states suffixes loeFrag + event = UpdateEvent { + target, + added, + killed = killedNow, + bounds, + tree = snapshotTree nextPeers, + loeFrag, + curChain + } + newEvents = event : events + -- Check the termination condition and remove exhausted peers. + updated = updatePeers sgen nextPeers killedBefore event + either (pure . result newEvents loeFrag) (step newEvents) updated where - result f final = EvolvingPeers {k, sgen, peers = final, loeFrag = f} + result evs f (res, final) = (res, EvolvingPeers {k, sgen, peers = final, loeFrag = f, fullTree}, reverse evs) - selection branch = - AF.takeOldest (AF.length (forkPrefix branch) + fromIntegral k') (forkSuffix branch) + -- Take k blocks after the immutable tip on the first fork. + selection imm Peer {value = EvolvingPeer {candidate}} = + case AF.splitAfterPoint candidate imm of + Just (_, s) -> AF.takeOldest (fromIntegral k') s + Nothing -> error "immutable tip not on candidate" ids = toList (getPeerIds ps) SecurityParam k' = k - _tips = tip <$> toMap' initialPeers peerInfo :: EvolvingPeers -> [String] -peerInfo EvolvingPeers {k = SecurityParam k, sgen = GenesisWindow sgen, peers = Peers {honest, others}, loeFrag} = +peerInfo EvolvingPeers {k = SecurityParam k, sgen = GenesisWindow sgen, loeFrag} = [ "k: " <> show k, "sgen: " <> show sgen, - "loeFrag: " <> terseHFragment loeFrag, - intercalate "\n" (prettyBlockTree tree) + "loeFrag: " <> terseHFragment loeFrag ] - where - tree = foldr addBranch' (mkTrunk (candidate (value honest))) (candidate . value <$> others) +-- | Tests that when GDD disconnects a peer, it continues to disconnect it when +-- its candidate fragment is extended. prop_densityDisconnectMonotonic :: Property prop_densityDisconnectMonotonic = - forAllBlind gen $ \ (result, final) -> + forAllBlind gen $ \ (result, final, events) -> appEndo (foldMap (Endo . counterexample) (peerInfo final)) $ - check result + check final events result where - check = \case - HonestKilled -> counterexample "Honest peer was killed" False - Nonmonotonic -> counterexample "Peer went from losing to winning" False + check final events = \case + HonestKilled -> withEvents $ counterexample "Honest peer was killed" False + Nonmonotonic event -> do + let msg = "Peer went from losing to remaining" + withEvents $ counterexample (catLines (msg : prettyUpdateEvent event)) False Finished -> property True + where + withEvents | debug = counterexample (catLines debugInfo) + | otherwise = id + + debugInfo = + "Event log:" : ((++ [""]) . prettyUpdateEvent =<< events) ++ + ["k: " ++ show k'] ++ + ("Full tree:" : prettyBlockTree (fullTree final) ++ [""]) + + EvolvingPeers {k = SecurityParam k'} = final + + catLines = intercalate "\n" gen = do gt <- genChains (QC.choose (1, 4)) evolveBranches (initCandidates gt) + debug = True + -- | Tests that a GDD disconnection re-triggers chain selection, i.e. when the current -- selection is blocked by LoE, and the leashing adversary reveals it is not dense enough, -- it gets disconnected and then the selection progresses. @@ -335,16 +468,17 @@ prop_densityDisconnectTriggersChainSel = shrinkByRemovingAdversaries - ( \GenesisTest {gtBlockTree} stateView@StateView {svTipBlock} -> + ( \GenesisTest {gtBlockTree, gtSchedule} stateView@StateView {svTipBlock} -> let + othersCount = Map.size (others gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of - [exn] -> - case fromException exn of - Just DensityTooLow -> True - _ -> False - _ -> False + [fromException -> Just DensityTooLow] -> True + [] | othersCount == 0 -> True + _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock - in exnCorrect && tipPointCorrect + in counterexample "Unexpected exceptions" exnCorrect + .&&. + counterexample "The tip of the final selection is not the expected one" tipPointCorrect ) where diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 4f2c13b357..fe6b842b54 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -2,6 +2,8 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.LoE (tests) where @@ -69,10 +71,7 @@ prop_adversaryHitsTimeouts timeoutsEnabled = -- `ExceededTimeLimit` exception in the adversary's ChainSync. exceptionsCorrect = case exceptionsByComponent ChainSyncClient stateView of [] -> not timeoutsEnabled - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> timeoutsEnabled - _ -> False + [fromException -> Just (ExceededTimeLimit _)] -> timeoutsEnabled _ -> False in selectedCorrect && exceptionsCorrect ) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 3faf2eeeee..4c97bac307 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -2,6 +2,7 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.LoP (tests) where @@ -67,12 +68,9 @@ prop_wait mustTimeout = shrinkPeerSchedules ( \_ stateView -> case exceptionsByComponent ChainSyncClient stateView of - [] -> not mustTimeout - [exn] -> - case fromException exn of - Just CSClient.EmptyBucket -> mustTimeout - _ -> False - _ -> False + [] -> not mustTimeout + [fromException -> Just CSClient.EmptyBucket] -> mustTimeout + _ -> False ) where dullSchedule :: (HasHeader blk) => DiffTime -> AnchoredFragment blk -> Peers (PeerSchedule blk) @@ -147,12 +145,9 @@ prop_serve mustTimeout = shrinkPeerSchedules ( \_ stateView -> case exceptionsByComponent ChainSyncClient stateView of - [] -> not mustTimeout - [exn] -> - case fromException exn of - Just CSClient.EmptyBucket -> mustTimeout - _ -> False - _ -> False + [] -> not mustTimeout + [fromException -> Just CSClient.EmptyBucket] -> mustTimeout + _ -> False ) where lbpCapacity :: Integer = 10 @@ -211,10 +206,9 @@ prop_delayAttack lopEnabled = -- If LoP is enabled, then we expect exactly one `EmptyBucket` -- exception in the adversary's ChainSync. exceptionsCorrect = case exceptionsByComponent ChainSyncClient stateView of - [] -> not lopEnabled - [exn] -> - lopEnabled == (fromException exn == Just CSClient.EmptyBucket) - _ -> False + [] -> not lopEnabled + [fromException -> Just CSClient.EmptyBucket] -> lopEnabled + _ -> False in selectedCorrect && exceptionsCorrect ) where diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 9b957c0100..249aadc62c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -11,16 +11,18 @@ -- block tree with the right age (roughly @k@ blocks from the tip). Contrary to -- other tests cases (eg. long range attack), the schedules are not particularly -- biased towards a specific situation. -module Test.Consensus.Genesis.Tests.Uniform (tests) where +module Test.Consensus.Genesis.Tests.Uniform ( + genUniformSchedulePoints + , tests + ) where import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) -import Control.Monad.Class.MonadTime.SI (DiffTime, Time (Time), - addTime) +import Control.Monad.Class.MonadTime.SI (Time, addTime) import Data.List (intercalate, sort) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map -import Data.Maybe (catMaybes, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe) import Data.Word (Word64) import GHC.Stack (HasCallStack) import Ouroboros.Consensus.Block.Abstract (WithOrigin (NotOrigin)) @@ -67,7 +69,13 @@ tests = adjustQuickCheckTests (`div` 10) $ testProperty "serve adversarial branches" prop_serveAdversarialBranches, adjustQuickCheckTests (`div` 100) $ - testProperty "the LoE stalls the chain, but the immutable tip is honest" prop_loeStalling + testProperty "the LoE stalls the chain, but the immutable tip is honest" prop_loeStalling, + adjustQuickCheckTests (`div` 100) $ + -- This is a crude way of ensuring that we don't get chains with more than 100 blocks, + -- because this test writes the immutable chain to disk and `instance Binary TestBlock` + -- chokes on long chains. + adjustQuickCheckMaxSize (const 10) $ + testProperty "the node is shut down and restarted after some time" prop_downtime ] theProperty :: @@ -142,7 +150,11 @@ prop_serveAdversarialBranches = forAllGenesisTest (genChains (QC.choose (1, 4)) `enrichedWith` genUniformSchedulePoints) (defaultSchedulerConfig - {scTraceState = False, scTrace = False, scEnableLoE = True}) + { scTraceState = False + , scTrace = False + , scEnableLoE = True + , scEnableCSJ = True + }) -- We cannot shrink by removing points from the adversarial schedules. -- Otherwise, the immutable tip could get stuck because a peer doesn't @@ -187,6 +199,7 @@ prop_leashingAttackStalling = { scTrace = False , scEnableLoE = True , scEnableLoP = True + , scEnableCSJ = True } shrinkPeerSchedules @@ -201,22 +214,9 @@ prop_leashingAttackStalling = -- timeouts to disconnect adversaries. genLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PeersSchedule TestBlock) genLeashingSchedule genesisTest = do - Peers honest advs0 <- genUniformSchedulePoints genesisTest - let peerCount = 1 + length advs0 - extendedHonest = - duplicateLastPoint (endingDelay peerCount genesisTest) <$> honest + Peers honest advs0 <- ensureScheduleDuration genesisTest <$> genUniformSchedulePoints genesisTest advs <- mapM (mapM dropRandomPoints) advs0 - pure $ Peers extendedHonest advs - - endingDelay peerCount gt = - let cst = gtChainSyncTimeouts gt - bft = gtBlockFetchTimeouts gt - in 1 + fromIntegral peerCount * maximum (0 : catMaybes - [ canAwaitTimeout cst - , intersectTimeout cst - , busyTimeout bft - , streamingTimeout bft - ]) + pure $ Peers honest advs disableBoringTimeouts gt = gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) @@ -239,13 +239,6 @@ prop_leashingAttackStalling = let (ys, zs) = splitAt i xs in ys ++ dropElemsAt (drop 1 zs) is - duplicateLastPoint - :: DiffTime -> [(Time, SchedulePoint TestBlock)] -> [(Time, SchedulePoint TestBlock)] - duplicateLastPoint d [] = [(Time d, ScheduleTipPoint Origin)] - duplicateLastPoint d xs = - let (t, p) = last xs - in xs ++ [(addTime d t, p)] - -- | Test that the leashing attacks do not delay the immutable tip after. The -- immutable tip needs to be advanced enough when the honest peer has offered -- all of its ticks. @@ -256,11 +249,17 @@ prop_leashingAttackStalling = -- See Note [Leashing attacks] prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = - expectFailure $ forAllGenesisTest + forAllGenesisTest - (genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) + (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) - (defaultSchedulerConfig {scTrace = False}) + defaultSchedulerConfig + { scTrace = False + , scEnableLoE = True + , scEnableLoP = True + , scEnableBlockFetchTimeouts = False + , scEnableCSJ = True + } shrinkPeerSchedules @@ -271,34 +270,64 @@ prop_leashingAttackTimeLimited = genTimeLimitedSchedule :: GenesisTest TestBlock () -> QC.Gen (PeersSchedule TestBlock) genTimeLimitedSchedule genesisTest = do Peers honest advs0 <- genUniformSchedulePoints genesisTest - let timeLimit = estimateTimeBound (value honest) (map value $ Map.elems advs0) + let timeLimit = estimateTimeBound + (gtChainSyncTimeouts genesisTest) + (gtLoPBucketParams genesisTest) + (value honest) + (map value $ Map.elems advs0) advs = fmap (fmap (takePointsUntil timeLimit)) advs0 - pure $ Peers honest advs + extendedHonest = extendScheduleUntil timeLimit <$> honest + pure $ Peers extendedHonest advs takePointsUntil limit = takeWhile ((<= limit) . fst) - estimateTimeBound :: AF.HasHeader blk => PeerSchedule blk -> [PeerSchedule blk] -> Time - estimateTimeBound honest advs = - let firstTipPointBlock = headCallStack (mapMaybe fromTipPoint honest) + extendScheduleUntil + :: Time -> [(Time, SchedulePoint TestBlock)] -> [(Time, SchedulePoint TestBlock)] + extendScheduleUntil t [] = [(t, ScheduleTipPoint Origin)] + extendScheduleUntil t xs = + let (t', p) = last xs + in if t < t' then xs + else xs ++ [(t, p)] + + disableBoringTimeouts gt = + gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) + { canAwaitTimeout = Nothing + , mustReplyTimeout = Nothing + , idleTimeout = Nothing + } + } + + estimateTimeBound + :: AF.HasHeader blk + => ChainSyncTimeout + -> LoPBucketParams + -> PeerSchedule blk + -> [PeerSchedule blk] + -> Time + estimateTimeBound cst LoPBucketParams{lbpCapacity, lbpRate} honest advs = + let firstTipPointTime = fst $ headCallStack (mapMaybe fromTipPoint honest) lastBlockPoint = last (mapMaybe fromBlockPoint honest) - peerCount = length advs + 1 - maxBlockNo = maximum $ 0 : blockPointNos honest ++ concatMap blockPointNos advs - -- 0.020s is the amount of time LoP grants per interesting header - -- 5s is the initial fill of the LoP bucket + peerCount = fromIntegral $ length advs + 1 + maxBlockNo = fromIntegral $ maximum $ 0 : blockPointNos honest ++ concatMap blockPointNos advs + timeCapacity = fromRational $ (fromIntegral lbpCapacity) / lbpRate + timePerToken = fromRational $ 1 / lbpRate + intersectDiffTime = fromMaybe (error "no intersect timeout") (intersectTimeout cst) + -- Since the moment a peer offers the first tip, LoP should + -- start ticking for it. This can be no later than what the intersect + -- timeout allows for all peers. -- - -- Since the moment the honest peer offers the first tip, LoP should - -- start ticking. Syncing all the blocks might take longer than it - -- takes to dispatch all ticks to the honest peer. In this case - -- the syncing time is the time bound for the test. If dispatching - -- all the ticks takes longer, then the dispatching time becomes - -- the time bound. + -- Additionally, the actual delay might be greater if the honest peer + -- has its last tick dispatched later. -- -- Adversarial peers might cause more ticks to be sent as well. We -- bound it all by considering the highest block number that is ever -- sent. - in max + in addTime 1 $ max (fst lastBlockPoint) - (addTime (0.020 * fromIntegral maxBlockNo + 5 * fromIntegral peerCount) (fst firstTipPointBlock)) + (addTime + (intersectDiffTime + timePerToken * maxBlockNo + timeCapacity * peerCount) + firstTipPointTime + ) blockPointNos :: AF.HasHeader blk => [(Time, SchedulePoint blk)] -> [Word64] blockPointNos = @@ -355,3 +384,20 @@ prop_loeStalling = allTips = simpleHash . AF.headHash <$> (btTrunk : suffixes) suffixes = btbSuffix <$> btBranches + +-- | This test sets 'scDowntime', which instructs the scheduler to shut all components down whenever a tick's duration +-- is greater than 11 seconds, and restarts it while only preserving the immutable DB after advancing the time. +-- +-- This ensures that a user may shut down their machine while syncing without additional vulnerabilities. +prop_downtime :: Property +prop_downtime = forAllGenesisTest + + (genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> + ensureScheduleDuration gt <$> stToGen (uniformPointsWithDowntime (gtSecurityParam gt) (gtBlockTree gt))) + + (defaultSchedulerConfig + {scEnableLoE = True, scEnableLoP = True, scDowntime = Just 11}) + + shrinkPeerSchedules + + theProperty diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index d4a487818a..536a49f2fc 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -93,7 +93,11 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = -- do not serialize the blocks. (\_hdr -> 1000) slotForgeTime - (pure FetchModeBulkSync) + -- Initially, we tried FetchModeBulkSync, but adversaries had the + -- opportunity to delay syncing by not responding to block requests. + -- The BlockFetch logic would then wait for the timeout to expire + -- before trying to download the block from another peer. + (pure FetchModeDeadline) -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -108,7 +112,7 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = -- advanced to allow completion of the batch. -- bfcMaxConcurrencyBulkSync = 50 - , bfcMaxConcurrencyDeadline = 2 + , bfcMaxConcurrencyDeadline = 50 , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs index 3ac5deb34b..21894f8811 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs @@ -20,10 +20,10 @@ import Ouroboros.Consensus.Block (Header, Point) import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) -import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainDbView, - ChainSyncClientHandle, ChainSyncLoPBucketConfig, - ChainSyncStateView (..), Consensus, bracketChainSyncClient, - chainSyncClient) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (CSJConfig (..), ChainDbView, ChainSyncClientHandle, + ChainSyncLoPBucketConfig, ChainSyncStateView (..), + Consensus, bracketChainSyncClient, chainSyncClient) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Util (ShowProxy) @@ -92,6 +92,7 @@ basicChainSyncClient , CSClient.idling = csvIdling csState , CSClient.loPBucket = csvLoPBucket csState , CSClient.setLatestSlot = csvSetLatestSlot csState + , CSClient.jumping = csvJumping csState } where dummyHeaderInFutureCheck :: @@ -118,6 +119,8 @@ runChainSyncClient :: -- ^ Timeouts for this client. ChainSyncLoPBucketConfig -> -- ^ Configuration for the LoP bucket. + CSJConfig -> + -- ^ Configuration for ChainSync Jumping StateViewTracers blk m -> -- ^ Tracers used to record information for the future 'StateView'. StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> @@ -133,6 +136,7 @@ runChainSyncClient peerId chainSyncTimeouts lopBucketConfig + csjConfig StateViewTracers {svtPeerSimulatorResultsTracer} varHandles channel = do @@ -143,6 +147,7 @@ runChainSyncClient peerId (maxBound :: NodeToNodeVersion) lopBucketConfig + csjConfig $ \csState -> do res <- try $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs index b7f1248bc2..0ac53a11c8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs @@ -46,7 +46,7 @@ import Test.Consensus.PeerSimulator.ScheduledChainSyncServer import Test.Consensus.PeerSimulator.Trace (TraceScheduledBlockFetchServerEvent (..), TraceScheduledChainSyncServerEvent (..)) -import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.NodeState import Test.Util.Orphans.IOLike () import Test.Util.TestBlock (TestBlock, TestHash (TestHash)) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs new file mode 100644 index 0000000000..f83d1c32b5 --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs @@ -0,0 +1,220 @@ +{-# LANGUAGE DisambiguateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Test.Consensus.PeerSimulator.NodeLifecycle ( + LiveInterval (..) + , LiveIntervalResult (..) + , LiveNode (..) + , LiveResources (..) + , NodeLifecycle (..) + , lifecycleStart + , lifecycleStop + , restoreNode + ) where + +import Control.Tracer (Tracer (..), traceWith) +import Data.Functor (void) +import Data.Set (Set) +import qualified Data.Set as Set +import Ouroboros.Consensus.Block +import Ouroboros.Consensus.Config (TopLevelConfig (..)) +import Ouroboros.Consensus.Storage.ChainDB.API +import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB +import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB +import Ouroboros.Consensus.Storage.ChainDB.Impl.Args (cdbsLoE, + updateTracer) +import Ouroboros.Consensus.Util.IOLike +import Ouroboros.Consensus.Util.ResourceRegistry +import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import qualified Ouroboros.Network.AnchoredFragment as AF +import qualified System.FS.Sim.MockFS as MockFS +import System.FS.Sim.MockFS (MockFS) +import Test.Consensus.PeerSimulator.Resources +import Test.Consensus.PeerSimulator.StateView +import Test.Consensus.PeerSimulator.Trace +import Test.Consensus.PointSchedule.Peers (PeerId) +import Test.Util.ChainDB +import Test.Util.Orphans.IOLike () +import Test.Util.TestBlock (TestBlock, testInitExtLedger) + +-- | Resources used for a single live interval of the node, constructed when the +-- node is started. +-- When the node is shut down, 'lnCopyToImmDb' is used to persist the current +-- chain. +data LiveNode blk m = LiveNode { + lnChainDb :: ChainDB m blk + , lnStateViewTracers :: StateViewTracers blk m + , lnStateTracer :: Tracer m () + + -- | Write persistent ChainDB state (the immutable and volatile DBs, but not + -- the ledger and GSM state) to the VFS TVars to preserve it for the next + -- interval. + -- Returns the immutable tip's slot for tracing. + , lnCopyToImmDb :: m (WithOrigin SlotNo) + + -- | The set of peers that should be started. + -- Based on the simulation results at node shutdown, disconnected peers are + -- removed for the next live interval. + , lnPeers :: Set PeerId + } + +-- | Result of a node shutdown at the end of a live interval. +data LiveIntervalResult blk = LiveIntervalResult { + -- | Used to initialize the 'StateViewTracers' of the next run to preserve + -- earlier disconnections for the final result. + lirPeerResults :: [PeerSimulatorResult blk] + + -- | The remaining peers, computed by removing all peers present in + -- 'lrPeerResults' from the current state in 'lnPeers'. + , lirActive :: Set PeerId + } + +-- | Resources used by the handlers 'lifecycleStart' and 'lifecycleStop' to +-- shut down running components, construct tracers used for single intervals, +-- and reset and persist state. +data LiveResources blk m = LiveResources { + lrRegistry :: ResourceRegistry m + , lrPeerSim :: PeerSimulatorResources m blk + , lrTracer :: Tracer m (TraceEvent blk) + , lrSTracer :: ChainDB m blk -> m (Tracer m ()) + , lrConfig :: TopLevelConfig blk + + -- | The chain DB state consists of several transient parts and the + -- immutable DB's virtual file system. + -- After 'lnCopyToImmDb' was executed, the latter will contain the final + -- state of an interval. + -- The rest is reset when the chain DB is recreated. + , lrCdb :: NodeDBs (StrictTVar m MockFS) + + -- | The LoE fragment must be reset for each live interval. + , lrLoEVar :: LoE (StrictTVar m (AnchoredFragment (Header blk))) + } + +data LiveInterval blk m = LiveInterval { + liResources :: LiveResources blk m + , liResult :: LiveIntervalResult blk + , liNode :: LiveNode blk m + } + +-- | Handlers for starting the node and shutting it down for each live interval, +-- using the state of the previous run. +data NodeLifecycle blk m = NodeLifecycle { + -- | The minimum tick duration that triggers a node downtime. + -- If this is 'Nothing', downtimes are disabled. + nlMinDuration :: Maybe DiffTime + + -- | Start the node with prior state. + -- For the first start, this must be called with an empty 'lirPeerResults' + -- and the initial set of all peers in 'lirActive'. + , nlStart :: LiveIntervalResult blk -> m (LiveNode blk m) + , nlShutdown :: LiveNode blk m -> m (LiveIntervalResult blk) + } + +-- | Create a ChainDB and start a BlockRunner that operate on the peers' +-- candidate fragments. +mkChainDb :: + IOLike m => + LiveResources TestBlock m -> + m (ChainDB m TestBlock, m (WithOrigin SlotNo)) +mkChainDb resources = do + atomically $ do + -- Reset only the non-persisted state of the ChainDB's file system mocks: + -- - GSM state and Ledger DB are discarded + -- - Immutable DB and Volatile DB are preserved for the next interval + modifyTVar (nodeDBsGsm lrCdb) (const MockFS.empty) + modifyTVar (nodeDBsLgr lrCdb) (const MockFS.empty) + chainDbArgs <- do + let args = updateTracer + (Tracer (traceWith lrTracer . TraceChainDBEvent)) + (fromMinimalChainDbArgs MinimalChainDbArgs { + mcdbTopLevelConfig = lrConfig + , mcdbChunkInfo = mkTestChunkInfo lrConfig + , mcdbInitLedger = testInitExtLedger + , mcdbRegistry = lrRegistry + , mcdbNodeDBs = lrCdb + }) + pure $ args { ChainDB.cdbsArgs = (ChainDB.cdbsArgs args) { + cdbsLoE = readTVarIO <$> lrLoEVar + } } + (_, (chainDB, internal)) <- allocate + lrRegistry + (\_ -> ChainDB.openDBInternal chainDbArgs False) + (ChainDB.closeDB . fst) + let ChainDB.Internal {intCopyToImmutableDB, intAddBlockRunner} = internal + void $ forkLinkedThread lrRegistry "AddBlockRunner" (void intAddBlockRunner) + pure (chainDB, intCopyToImmutableDB) + where + LiveResources {lrRegistry, lrTracer, lrConfig, lrCdb, lrLoEVar} = resources + +-- | Allocate all the resources that depend on the results of previous live +-- intervals, the ChainDB and its persisted state. +restoreNode :: + IOLike m => + LiveResources TestBlock m -> + LiveIntervalResult TestBlock -> + m (LiveNode TestBlock m) +restoreNode resources LiveIntervalResult {lirPeerResults, lirActive} = do + lnStateViewTracers <- stateViewTracersWithInitial lirPeerResults + (lnChainDb, lnCopyToImmDb) <- mkChainDb resources + lnStateTracer <- lrSTracer resources lnChainDb + pure LiveNode { + lnChainDb + , lnStateViewTracers + , lnStateTracer + , lnCopyToImmDb + , lnPeers = lirActive + } + +-- | Allocate resources with 'restoreNode' and pass them to the callback that +-- starts the node's threads. +lifecycleStart :: + forall m. + IOLike m => + (LiveInterval TestBlock m -> m ()) -> + LiveResources TestBlock m -> + LiveIntervalResult TestBlock -> + m (LiveNode TestBlock m) +lifecycleStart start liResources liResult = do + trace (TraceSchedulerEvent TraceNodeStartupStart) + liNode <- restoreNode liResources liResult + start LiveInterval {liResources, liResult, liNode} + chain <- atomically (ChainDB.getCurrentChain (lnChainDb liNode)) + trace (TraceSchedulerEvent (TraceNodeStartupComplete chain)) + pure liNode + where + trace = traceWith (lrTracer liResources) + +-- | Shut down the node by killing all its threads after extracting the +-- persistent state used to restart the node later. +lifecycleStop :: + (IOLike m, GetHeader blk) => + LiveResources blk m -> + LiveNode blk m -> + m (LiveIntervalResult blk) +lifecycleStop resources LiveNode {lnStateViewTracers, lnCopyToImmDb, lnPeers} = do + -- Trigger writing the immutable tip to the MockFS in our TVar for restoring in 'startNode' + immutableTip <- lnCopyToImmDb + trace (TraceSchedulerEvent (TraceNodeShutdownStart immutableTip)) + -- Remember which peers were still running before shutdown + lirPeerResults <- svtGetPeerSimulatorResults lnStateViewTracers + let disconnectedPeers = Set.fromList (psePeerId <$> lirPeerResults) + lirActive = lnPeers Set.\\ disconnectedPeers + -- Killing the peer overview threads should hopefully clean up all connections promptly + releaseAll lrRegistry + -- Reset the resources in TVars that were allocated by the simulator + atomically $ do + modifyTVar psrHandles (const mempty) + case lrLoEVar of + LoEEnabled var -> modifyTVar var (const (AF.Empty AF.AnchorGenesis)) + LoEDisabled -> pure () + trace (TraceSchedulerEvent TraceNodeShutdownComplete) + pure LiveIntervalResult {lirActive, lirPeerResults} + where + trace = traceWith lrTracer + LiveResources { + lrRegistry + , lrTracer + , lrPeerSim = PeerSimulatorResources {psrHandles} + , lrLoEVar + } = resources diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs index 423660b03f..c4fe394a60 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs @@ -40,7 +40,7 @@ import Test.Consensus.PeerSimulator.ScheduledBlockFetchServer runScheduledBlockFetchServer) import Test.Consensus.PeerSimulator.ScheduledChainSyncServer import Test.Consensus.PeerSimulator.Trace (TraceEvent) -import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.NodeState import Test.Consensus.PointSchedule.Peers (PeerId) import Test.Util.Orphans.IOLike () import Test.Util.TestBlock (TestBlock) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 7213552f85..b7c04a17d3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,6 +10,7 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where +import Control.Monad (foldM, forM) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) @@ -17,26 +18,24 @@ import Data.Foldable (for_) import Data.Functor (void) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Genesis.Governor (runGdd, updateLoEFragGenesis) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) -import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainDbView, +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (CSJConfig (..), CSJEnabledConfig (..), ChainDbView, ChainSyncClientHandle, ChainSyncLoPBucketConfig (..), ChainSyncLoPBucketEnabledConfig (..), ChainSyncState (..), viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import Ouroboros.Consensus.Storage.ChainDB.API import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB -import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB.Impl -import Ouroboros.Consensus.Storage.ChainDB.Impl.Args - (ChainDbSpecificArgs (cdbsLoE), updateTracer) import Ouroboros.Consensus.Util.Condense (Condense (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Network.AnchoredFragment (AnchoredFragment, - HasHeader) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (FetchClientRegistry, bracketSyncWithFetchClient, newFetchClientRegistry) @@ -48,21 +47,21 @@ import Ouroboros.Network.Util.ShowProxy (ShowProxy) import qualified Test.Consensus.PeerSimulator.BlockFetch as BlockFetch import qualified Test.Consensus.PeerSimulator.ChainSync as ChainSync import Test.Consensus.PeerSimulator.Config +import Test.Consensus.PeerSimulator.NodeLifecycle import Test.Consensus.PeerSimulator.Resources import Test.Consensus.PeerSimulator.StateDiagram (peerSimStateDiagramSTMTracerDebug) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PeerSimulator.Trace -import qualified Test.Consensus.PointSchedule as PointSchedule import Test.Consensus.PointSchedule (BlockFetchTimeout, - GenesisTest (GenesisTest), GenesisTestFull, - LoPBucketParams (..), NodeState, PeersSchedule, - peersStatesRelative) + CSJParams (..), GenesisTest (..), GenesisTestFull, + LoPBucketParams (..), PeersSchedule, peersStatesRelative) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, getPeerIds) import Test.Util.ChainDB import Test.Util.Orphans.IOLike () -import Test.Util.TestBlock (Header (..), TestBlock, testInitExtLedger) +import Test.Util.TestBlock (TestBlock) -- | Behavior config for the scheduler. data SchedulerConfig = @@ -92,9 +91,17 @@ data SchedulerConfig = -- governor (GDD). , scEnableLoE :: Bool - -- | Whether to enable to LoP. The parameters of the LoP come from + -- | Whether to enable the LoP. The parameters of the LoP come from -- 'GenesisTest'. , scEnableLoP :: Bool + + -- | Enable node downtime if this is 'Just', using the value as minimum tick + -- duration to trigger it. + , scDowntime :: Maybe DiffTime + + -- | Whether to enable ChainSync Jumping. The parameters come from + -- 'GenesisTest'. + , scEnableCSJ :: Bool } -- | Default scheduler config @@ -107,7 +114,9 @@ defaultSchedulerConfig = scTrace = True, scTraceState = False, scEnableLoE = False, - scEnableLoP = False + scEnableLoP = False, + scDowntime = Nothing, + scEnableCSJ = False } -- | Enable debug tracing during a scheduler test. @@ -133,6 +142,7 @@ startChainSyncConnectionThread :: ChainSyncResources m blk -> ChainSyncTimeout -> ChainSyncLoPBucketConfig -> + CSJConfig -> StateViewTracers blk m -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> m (Thread m (), Thread m ()) @@ -146,6 +156,7 @@ startChainSyncConnectionThread ChainSyncResources {csrServer} chainSyncTimeouts_ chainSyncLoPBucketConfig + csjConfig tracers varHandles = do @@ -153,7 +164,7 @@ startChainSyncConnectionThread clientThread <- forkLinkedThread registry ("ChainSyncClient" <> condense srPeerId) $ bracketSyncWithFetchClient fetchClientRegistry srPeerId $ - ChainSync.runChainSyncClient tracer cfg chainDbView srPeerId chainSyncTimeouts_ chainSyncLoPBucketConfig tracers varHandles clientChannel + ChainSync.runChainSyncClient tracer cfg chainDbView srPeerId chainSyncTimeouts_ chainSyncLoPBucketConfig csjConfig tracers varHandles clientChannel serverThread <- forkLinkedThread registry ("ChainSyncServer" <> condense srPeerId) $ ChainSync.runChainSyncServer tracer srPeerId tracers csrServer serverChannel @@ -199,28 +210,53 @@ startBlockFetchConnectionThread dispatchTick :: forall m blk. IOLike m => Tracer m (TraceSchedulerEvent blk) -> - Tracer m () -> - ChainDB m blk -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> Map PeerId (PeerResources m blk) -> + NodeLifecycle blk m -> + LiveNode blk m -> (Int, (DiffTime, Peer (NodeState blk))) -> - m () -dispatchTick tracer stateTracer chainDb varHandles peers (number, (duration, Peer pid state)) = + m (LiveNode blk m) +dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid state)) = case peers Map.!? pid of Just PeerResources {prUpdateState} -> do traceNewTick atomically (prUpdateState state) - threadDelay duration - traceWith stateTracer () + newNode <- checkDowntime + traceWith (lnStateTracer newNode) () + pure newNode Nothing -> error "“The impossible happened,” as GHC would say." where + checkDowntime + | Just minInterval <- nlMinDuration + , duration > minInterval + = do + results <- nlShutdown node + threadDelay duration + nlStart results + | otherwise + = do + threadDelay duration + pure node + + NodeLifecycle {nlMinDuration, nlStart, nlShutdown} = lifecycle + traceNewTick :: m () traceNewTick = do - currentChain <- atomically $ ChainDB.getCurrentChain chainDb - csState <- atomically $ do + currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) + (csState, jumpingStates) <- atomically $ do m <- readTVar varHandles - traverse (readTVar . CSClient.cschState) (m Map.!? pid) - traceWith tracer $ TraceNewTick number duration (Peer pid state) currentChain (CSClient.csCandidate <$> csState) + csState <- traverse (readTVar . CSClient.cschState) (m Map.!? pid) + jumpingStates <- forM (Map.toList m) $ \(peer, h) -> do + st <- readTVar (CSClient.cschJumping h) + pure (peer, st) + pure (csState, jumpingStates) + traceWith tracer $ TraceNewTick + number + duration + (Peer pid state) + currentChain + (CSClient.csCandidate <$> csState) + jumpingStates -- | Iterate over a 'PointSchedule', sending each tick to the associated peer in turn, -- giving each peer a chunk of computation time, sequentially, until it satisfies the @@ -230,18 +266,19 @@ dispatchTick tracer stateTracer chainDb varHandles peers (number, (duration, Pee runScheduler :: IOLike m => Tracer m (TraceSchedulerEvent blk) -> - Tracer m () -> - ChainDB m blk -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> PeersSchedule blk -> Map PeerId (PeerResources m blk) -> - m () -runScheduler tracer stateTracer chainDb varHandles ps peers = do + NodeLifecycle blk m -> + m (ChainDB m blk, StateViewTracers blk m) +runScheduler tracer varHandles ps peers lifecycle@NodeLifecycle {nlStart} = do + node0 <- nlStart LiveIntervalResult {lirActive = Map.keysSet peers, lirPeerResults = []} traceWith tracer TraceBeginningOfTime - mapM_ - (dispatchTick tracer stateTracer chainDb varHandles peers) - (zip [0..] (peersStatesRelative ps)) + LiveNode {lnChainDb, lnStateViewTracers} <- foldM tick node0 (zip [0..] (peersStatesRelative ps)) traceWith tracer TraceEndOfTime + pure (lnChainDb, lnStateViewTracers) + where + tick = dispatchTick tracer varHandles peers lifecycle -- | Create the shared resource for the LoE if the feature is enabled in the config. -- This is used by the ChainDB and the GDD governor. @@ -255,84 +292,112 @@ mkLoEVar SchedulerConfig {scEnableLoE} | otherwise = pure LoEDisabled --- | Construct STM resources, set up ChainSync and BlockFetch threads, and --- send all ticks in a 'PointSchedule' to all given peers in turn. -runPointSchedule :: +mkStateTracer :: + IOLike m => + SchedulerConfig -> + GenesisTest TestBlock s -> + PeerSimulatorResources m TestBlock -> + ChainDB m TestBlock -> + m (Tracer m ()) +mkStateTracer schedulerConfig GenesisTest {gtBlockTree} PeerSimulatorResources {psrHandles, psrPeers} chainDb + | scTraceState schedulerConfig + , let getCandidates = viewChainSyncState psrHandles CSClient.csCandidate + getCurrentChain = ChainDB.getCurrentChain chainDb + getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers) + = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints + | otherwise + = pure nullTracer + +-- | Start all threads for ChainSync, BlockFetch and GDD, using the resources +-- for a single live interval. +-- Only start peers that haven't been disconnected in a previous interval, +-- provided by 'LiveIntervalResult'. +startNode :: forall m. (IOLike m, MonadTime m, MonadTimer m) => SchedulerConfig -> GenesisTestFull TestBlock -> - Tracer m (TraceEvent TestBlock) -> - m (StateView TestBlock) -runPointSchedule schedulerConfig genesisTest tracer0 = - withRegistry $ \registry -> do - stateViewTracers <- defaultStateViewTracers - resources <- makePeerSimulatorResources tracer gtBlockTree (getPeerIds gtSchedule) - let - handles = psrHandles resources - getCandidates = viewChainSyncState handles CSClient.csCandidate - loEVar <- mkLoEVar schedulerConfig - chainDb <- mkChainDb tracer config registry (readTVarIO <$> loEVar) - fetchClientRegistry <- newFetchClientRegistry - let chainDbView = CSClient.defaultChainDbView chainDb - for_ (psrPeers resources) $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do - forkLinkedThread registry ("Peer overview " ++ (show $ srPeerId prShared)) $ - -- The peerRegistry helps ensuring that if any thread fails, then - -- the registry is closed and all threads related to the peer are - -- killed. - withRegistry $ \peerRegistry -> do - (csClient, csServer) <- startChainSyncConnectionThread peerRegistry tracer config chainDbView fetchClientRegistry prShared prChainSync chainSyncTimeouts_ chainSyncLoPBucketConfig stateViewTracers (psrHandles resources) - BlockFetch.startKeepAliveThread peerRegistry fetchClientRegistry (srPeerId prShared) - (bfClient, bfServer) <- startBlockFetchConnectionThread peerRegistry tracer stateViewTracers fetchClientRegistry (pure Continue) prShared prBlockFetch blockFetchTimeouts_ - waitAnyThread [csClient, csServer, bfClient, bfServer] - -- The block fetch logic needs to be started after the block fetch clients - -- otherwise, an internal assertion fails because getCandidates yields more - -- peer fragments than registered clients. - let getCurrentChain = ChainDB.getCurrentChain chainDb - getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers resources) - mkStateTracer - | scTraceState schedulerConfig - = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints - | otherwise - = pure nullTracer - - gdd = updateLoEFragGenesis config (mkGDDTracerTestBlock tracer) (readTVar handles) - -- We make GDD rerun every time the anchor or the blocks of the - -- selection change. - gddTrigger = do - s <- viewChainSyncState handles (\ s -> (csLatestSlot s, csIdling s)) - c <- getCurrentChain - return (s, [AF.anchorToHash $ AF.headAnchor c]) - - stateTracer <- mkStateTracer - BlockFetch.startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates - for_ loEVar $ \ var -> - void $ forkLinkedThread registry "LoE updater background" $ - runGdd gdd var chainDb gddTrigger - runScheduler - (Tracer $ traceWith tracer . TraceSchedulerEvent) - stateTracer - chainDb - handles - gtSchedule - (psrPeers resources) - snapshotStateView stateViewTracers chainDb + LiveInterval TestBlock m -> + m () +startNode schedulerConfig genesisTest interval = do + let + handles = psrHandles lrPeerSim + getCandidates = viewChainSyncState handles CSClient.csCandidate + fetchClientRegistry <- newFetchClientRegistry + let chainDbView = CSClient.defaultChainDbView lnChainDb + activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) + for_ activePeers $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do + let pid = srPeerId prShared + forkLinkedThread lrRegistry ("Peer overview " ++ show pid) $ + -- The peerRegistry helps ensuring that if any thread fails, then + -- the registry is closed and all threads related to the peer are + -- killed. + withRegistry $ \peerRegistry -> do + (csClient, csServer) <- + startChainSyncConnectionThread + peerRegistry + tracer + lrConfig + chainDbView + fetchClientRegistry + prShared + prChainSync + chainSyncTimeouts_ + chainSyncLoPBucketConfig + csjConfig + lnStateViewTracers + handles + BlockFetch.startKeepAliveThread peerRegistry fetchClientRegistry pid + (bfClient, bfServer) <- + startBlockFetchConnectionThread + peerRegistry + tracer + lnStateViewTracers + fetchClientRegistry + (pure Continue) + prShared + prBlockFetch + blockFetchTimeouts_ + waitAnyThread [csClient, csServer, bfClient, bfServer] + -- The block fetch logic needs to be started after the block fetch clients + -- otherwise, an internal assertion fails because getCandidates yields more + -- peer fragments than registered clients. + let getCurrentChain = ChainDB.getCurrentChain lnChainDb + + gdd = updateLoEFragGenesis lrConfig (mkGDDTracerTestBlock lrTracer) (readTVar handles) + -- We make GDD rerun every time the anchor or the blocks of the + -- selection change. + gddTrigger = do + s <- viewChainSyncState handles (\ s -> (csLatestSlot s, csIdling s)) + c <- getCurrentChain + return (s, [AF.anchorToHash $ AF.headAnchor c]) + + BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry getCandidates + for_ lrLoEVar $ \ var -> do + forkLinkedThread lrRegistry "LoE updater background" $ + void $ runGdd gdd var lnChainDb gddTrigger where - GenesisTest { - PointSchedule.gtSecurityParam = k - , PointSchedule.gtBlockTree - , PointSchedule.gtSchedule - , PointSchedule.gtChainSyncTimeouts - , PointSchedule.gtBlockFetchTimeouts - , PointSchedule.gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } - , PointSchedule.gtForecastRange - , PointSchedule.gtGenesisWindow + LiveResources {lrRegistry, lrTracer, lrConfig, lrPeerSim, lrLoEVar} = resources + + LiveInterval { + liResources = resources + , liResult = liveResult + , liNode = LiveNode {lnChainDb, lnStateViewTracers} + } = interval + + GenesisTest + { gtChainSyncTimeouts + , gtBlockFetchTimeouts + , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } + , gtCSJParams = CSJParams { csjpJumpSize } } = genesisTest - config = defaultCfg k gtForecastRange gtGenesisWindow + StateViewTracers{svtTraceTracer} = lnStateViewTracers -- FIXME: This type of configuration should move to `Trace.mkTracer`. - tracer = if scTrace schedulerConfig then tracer0 else nullTracer + tracer = if scTrace schedulerConfig + then Tracer (\evt -> traceWith lrTracer evt >> traceWith svtTraceTracer evt) + else svtTraceTracer chainSyncTimeouts_ = if scEnableChainSyncTimeouts schedulerConfig @@ -344,38 +409,79 @@ runPointSchedule schedulerConfig genesisTest tracer0 = then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { csbcCapacity = lbpCapacity, csbcRate = lbpRate } else ChainSyncLoPBucketDisabled + csjConfig = + if scEnableCSJ schedulerConfig + then CSJEnabled CSJEnabledConfig { csjcJumpSize = csjpJumpSize } + else CSJDisabled + blockFetchTimeouts_ = if scEnableBlockFetchTimeouts schedulerConfig then gtBlockFetchTimeouts else BlockFetch.blockFetchNoTimeouts --- | Create a ChainDB and start a BlockRunner that operate on the peers' --- candidate fragments. -mkChainDb :: - IOLike m => +-- | Set up all resources related to node start/shutdown. +nodeLifecycle :: + (IOLike m, MonadTime m, MonadTimer m) => + SchedulerConfig -> + GenesisTestFull TestBlock -> Tracer m (TraceEvent TestBlock) -> - TopLevelConfig TestBlock -> ResourceRegistry m -> - GetLoEFragment m TestBlock -> - m (ChainDB m TestBlock) -mkChainDb tracer nodeCfg registry cdbsLoE = do - chainDbArgs <- do - mcdbNodeDBs <- emptyNodeDBs - let args = updateTracer - (Tracer (traceWith tracer . TraceChainDBEvent)) - (fromMinimalChainDbArgs MinimalChainDbArgs { - mcdbTopLevelConfig = nodeCfg - , mcdbChunkInfo = mkTestChunkInfo nodeCfg - , mcdbInitLedger = testInitExtLedger - , mcdbRegistry = registry - , mcdbNodeDBs - } - ) - pure $ args { ChainDB.Impl.cdbsArgs = (ChainDB.Impl.cdbsArgs args) { cdbsLoE } } - (_, (chainDB, ChainDB.Impl.Internal{ChainDB.Impl.intAddBlockRunner})) <- - allocate - registry - (\_ -> ChainDB.Impl.openDBInternal chainDbArgs False) - (ChainDB.closeDB . fst) - _ <- forkLinkedThread registry "AddBlockRunner" intAddBlockRunner - pure chainDB + PeerSimulatorResources m TestBlock -> + m (NodeLifecycle TestBlock m) +nodeLifecycle schedulerConfig genesisTest lrTracer lrRegistry lrPeerSim = do + lrCdb <- emptyNodeDBs + lrLoEVar <- mkLoEVar schedulerConfig + let + resources = + LiveResources { + lrRegistry + , lrTracer + , lrSTracer = mkStateTracer schedulerConfig genesisTest lrPeerSim + , lrConfig + , lrPeerSim + , lrCdb + , lrLoEVar + } + pure NodeLifecycle { + nlMinDuration = scDowntime schedulerConfig + , nlStart = lifecycleStart (startNode schedulerConfig genesisTest) resources + , nlShutdown = lifecycleStop resources + } + where + lrConfig = defaultCfg k gtForecastRange gtGenesisWindow + + GenesisTest { + gtSecurityParam = k + , gtForecastRange + , gtGenesisWindow + } = genesisTest + +-- | Construct STM resources, set up ChainSync and BlockFetch threads, and +-- send all ticks in a 'PointSchedule' to all given peers in turn. +runPointSchedule :: + forall m. + (IOLike m, MonadTime m, MonadTimer m) => + SchedulerConfig -> + GenesisTestFull TestBlock -> + Tracer m (TraceEvent TestBlock) -> + m (StateView TestBlock) +runPointSchedule schedulerConfig genesisTest tracer0 = + withRegistry $ \registry -> do + peerSim <- makePeerSimulatorResources tracer gtBlockTree (getPeerIds gtSchedule) + lifecycle <- nodeLifecycle schedulerConfig genesisTest tracer registry peerSim + (chainDb, stateViewTracers) <- runScheduler + (Tracer $ traceWith tracer . TraceSchedulerEvent) + (psrHandles peerSim) + gtSchedule + (psrPeers peerSim) + lifecycle + snapshotStateView stateViewTracers chainDb + where + + GenesisTest { + gtBlockTree + , gtSchedule + } = genesisTest + + -- FIXME: This type of configuration should move to `Trace.mkTracer`. + tracer = if scTrace schedulerConfig then tracer0 else nullTracer diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs index d0092ed7d3..d3535f895d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs @@ -18,7 +18,7 @@ import Ouroboros.Network.Protocol.BlockFetch.Server import Test.Consensus.PeerSimulator.ScheduledServer (ScheduledServer (..), awaitOnlineState, runHandler) import Test.Consensus.PeerSimulator.Trace -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (PeerId) -- | Return values for the 'handlerSendBlocks'. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs index 1a173a5d8d..9120bb60f9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs @@ -27,7 +27,7 @@ import Test.Consensus.PeerSimulator.ScheduledServer import Test.Consensus.PeerSimulator.Trace (TraceEvent (TraceScheduledChainSyncServerEvent), TraceScheduledChainSyncServerEvent (..)) -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (PeerId) -- | Pure representation of the messages produced by the handler for the @StNext@ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs index 715cc9c464..937f4830ce 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs @@ -54,8 +54,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (HeaderHash) import Test.Consensus.BlockTree (BlockTree (btBranches, btTrunk), BlockTreeBranch (btbSuffix), prettyBlockTree) -import qualified Test.Consensus.PointSchedule as PS -import Test.Consensus.PointSchedule (NodeState, genesisNodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState (..), + genesisNodeState) import Test.Consensus.PointSchedule.Peers (PeerId (..)) import Test.Util.TestBlock (TestBlock, TestHash (TestHash)) @@ -478,7 +478,7 @@ addPoints :: Map PeerId (NodeState TestBlock) -> TreeSlots -> TreeSlots addPoints peerPoints treeSlots = foldl' step treeSlots (Map.toList peerPoints) where - step z (pid, ap) = addTipPoint pid (PS.nsTip ap) z + step z (pid, ap) = addTipPoint pid (nsTip ap) z ---------------------------------------------------------------------------------------------------- -- Cells diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs index 3d83b5a4fa..9b56b6a0a4 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs @@ -15,10 +15,12 @@ module Test.Consensus.PeerSimulator.StateView ( , exceptionsByComponent , pscrToException , snapshotStateView + , stateViewTracersWithInitial ) where -import Control.Tracer (Tracer) +import Control.Tracer (Tracer, traceWith) import Data.Containers.ListUtils (nubOrd) +import Data.Foldable (for_) import Data.List (sort) import Data.Maybe (mapMaybe) import Network.TypedProtocol.Codec (AnyMessage) @@ -34,6 +36,7 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.Block (StandardHash, Tip) import Ouroboros.Network.Protocol.BlockFetch.Type (BlockFetch) import Ouroboros.Network.Protocol.ChainSync.Type (ChainSync) +import Test.Consensus.PeerSimulator.Trace (TraceEvent) import Test.Consensus.PointSchedule.Peers (PeerId) import Test.Util.TersePrinting (terseBlock, terseHFragment, terseMaybe) @@ -141,7 +144,9 @@ data StateView blk = StateView { svPeerSimulatorResults :: [PeerSimulatorResult blk], -- | This field holds the most recent point in the selection (incl. anchor) -- for which we have a full block (not just a header). - svTipBlock :: Maybe blk + svTipBlock :: Maybe blk, + -- | List of all TraceEvent that have been sent during the simulation. + svTrace :: [TraceEvent blk] } instance Condense (StateView TestBlock) where @@ -162,6 +167,8 @@ collectDisconnectedPeers stateView = nubOrd $ data StateViewTracers blk m = StateViewTracers { svtPeerSimulatorResultsTracer :: Tracer m (PeerSimulatorResult blk) , svtGetPeerSimulatorResults :: m [PeerSimulatorResult blk] + , svtTraceTracer :: Tracer m (TraceEvent blk) + , svtGetTracerTrace :: m [TraceEvent blk] } -- | Helper to get exceptions from a StateView. @@ -194,7 +201,23 @@ defaultStateViewTracers :: m (StateViewTracers blk m) defaultStateViewTracers = do (svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults) <- recordingTracerTVar - pure StateViewTracers {svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults} + (svtTraceTracer, svtGetTracerTrace) <- recordingTracerTVar + pure StateViewTracers + { svtPeerSimulatorResultsTracer + , svtGetPeerSimulatorResults + , svtTraceTracer + , svtGetTracerTrace + } + +-- | Call 'defaultStateViewTracers' and add the provided results. +stateViewTracersWithInitial :: + IOLike m => + [PeerSimulatorResult blk] -> + m (StateViewTracers blk m) +stateViewTracersWithInitial initial = do + svt <- defaultStateViewTracers + for_ initial (traceWith (svtPeerSimulatorResultsTracer svt)) + pure svt -- | Use the state view tracers as well as some extra information to produce a -- state view. This mostly consists in reading and storing the current state of @@ -204,8 +227,9 @@ snapshotStateView :: StateViewTracers blk m -> ChainDB m blk -> m (StateView blk) -snapshotStateView StateViewTracers{svtGetPeerSimulatorResults} chainDb = do +snapshotStateView StateViewTracers{svtGetPeerSimulatorResults, svtGetTracerTrace} chainDb = do svPeerSimulatorResults <- svtGetPeerSimulatorResults + svTrace <- svtGetTracerTrace svSelectedChain <- atomically $ ChainDB.getCurrentChain chainDb svTipBlock <- ChainDB.getTipBlock chainDb - pure StateView {svSelectedChain, svPeerSimulatorResults, svTipBlock} + pure StateView {svSelectedChain, svPeerSimulatorResults, svTipBlock, svTrace} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs index 3d64c55719..9a79bbab6e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ViewPatterns #-} -- | The scheduled ChainSync and BlockFetch servers are supposed to be linked, -- such that if one gets disconnected, then so does the other. This module @@ -47,29 +48,17 @@ prop_chainSyncKillsBlockFetch = do ( \_ stateView@StateView {svTipBlock} -> svTipBlock == Nothing && case exceptionsByComponent ChainSyncClient stateView of - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> True - _ -> False - _ -> False + [fromException -> Just (ExceededTimeLimit _)] -> True + _ -> False && case exceptionsByComponent BlockFetchClient stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False && case exceptionsByComponent ChainSyncServer stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False && case exceptionsByComponent BlockFetchServer stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False ) where dullSchedule :: GenesisTest blk () -> DiffTime -> PeersSchedule blk diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 9570f62595..25f64ca973 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.PeerSimulator.Tests.Timeouts (tests) where @@ -49,10 +50,7 @@ prop_timeouts mustTimeout = do case exceptionsByComponent ChainSyncClient stateView of [] -> counterexample ("result: " ++ condense (svSelectedChain stateView)) (not mustTimeout) - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> property mustTimeout - _ -> counterexample ("exception: " ++ show exn) False + [fromException -> Just (ExceededTimeLimit _)] -> property mustTimeout exns -> counterexample ("exceptions: " ++ show exns) False ) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index cc76540ad4..18eddfcced 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -1,8 +1,9 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NumericUnderscores #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} -- | Helpers for tracing used by the peer simulator. module Test.Consensus.PeerSimulator.Trace ( @@ -14,6 +15,7 @@ module Test.Consensus.PeerSimulator.Trace ( , TraceScheduledServerHandlerEvent (..) , TraceSchedulerEvent (..) , mkGDDTracerTestBlock + , prettyDensityBounds , traceLinesWith , tracerTestBlock ) where @@ -28,6 +30,12 @@ import Ouroboros.Consensus.Genesis.Governor (DensityBounds (..), TraceGDDEvent (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping + (Instruction (..), JumpInstruction (..), JumpResult (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State + (ChainSyncJumpingJumperState (..), + ChainSyncJumpingState (..), DynamoInitState (..), + JumpInfo (..)) import Ouroboros.Consensus.Storage.ChainDB.API (LoE (..)) import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB import Ouroboros.Consensus.Storage.ChainDB.Impl.Types @@ -36,14 +44,15 @@ import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Consensus.Util.IOLike (IOLike, MonadMonotonicTime, Time (Time), atomically, getMonotonicTime, readTVarIO, uncheckedNewTVarM, writeTVar) -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment, + headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, terseFragment, terseHFragment, terseHeader, tersePoint, - terseRealPoint, terseTip) + terseRealPoint, terseTip, terseWithOrigin) import Test.Util.TestBlock (TestBlock) import Text.Printf (printf) @@ -56,9 +65,19 @@ data TraceSchedulerEvent blk | -- | Right after running the last tick of the schedule. TraceEndOfTime | -- | When beginning a new tick. Contains the tick number (counting from - -- @0@), the duration of the tick, the states, the current chain and the - -- candidate fragment. - TraceNewTick Int DiffTime (Peer (NodeState blk)) (AnchoredFragment (Header blk)) (Maybe (AnchoredFragment (Header blk))) + -- @0@), the duration of the tick, the states, the current chain, the + -- candidate fragment, and the jumping states. + forall m. TraceNewTick + Int + DiffTime + (Peer (NodeState blk)) + (AnchoredFragment (Header blk)) + (Maybe (AnchoredFragment (Header blk))) + [(PeerId, ChainSyncJumpingState m blk)] + | TraceNodeShutdownStart (WithOrigin SlotNo) + | TraceNodeShutdownComplete + | TraceNodeStartupStart + | TraceNodeStartupComplete (AnchoredFragment (Header blk)) type HandlerName = String @@ -177,7 +196,7 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case [ "╶──────────────────────────────────────────────────────────────────────────────╴", "Finished running point schedule" ] - TraceNewTick number duration (Peer pid state) currentChain mCandidateFrag -> do + TraceNewTick number duration (Peer pid state) currentChain mCandidateFrag jumpingStates -> do time <- getMonotonicTime setTickTime time traceLinesWith tracer0 @@ -189,8 +208,50 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " peer: " ++ condense pid, " state: " ++ condense state, " current chain: " ++ terseHFragment currentChain, - " candidate fragment: " ++ maybe "Nothing" terseHFragment mCandidateFrag + " candidate fragment: " ++ maybe "Nothing" terseHFragment mCandidateFrag, + " jumping states:\n" ++ traceJumpingStates jumpingStates ] + TraceNodeShutdownStart immTip -> + traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) + TraceNodeShutdownComplete -> + traceWith tracer0 " Node shutdown complete" + TraceNodeStartupStart -> + traceWith tracer0 " Initiating node startup" + TraceNodeStartupComplete selection -> + traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + + where + traceJumpingStates :: [(PeerId, ChainSyncJumpingState m TestBlock)] -> String + traceJumpingStates = unlines . map (\(pid, state) -> " " ++ condense pid ++ ": " ++ traceJumpingState state) + + traceJumpingState :: ChainSyncJumpingState m TestBlock -> String + traceJumpingState = \case + Dynamo initState lastJump -> + let showInitState = case initState of + DynamoStarting ji -> terseJumpInfo ji + DynamoStarted -> "DynamoStarted" + in unwords ["Dynamo", showInitState, terseWithOrigin show lastJump] + Objector initState goodJumpInfo badPoint -> unwords + [ "Objector" + , show initState + , terseJumpInfo goodJumpInfo + , tersePoint (castPoint badPoint) + ] + Disengaged initState -> "Disengaged " ++ show initState + Jumper _ st -> "Jumper _ " ++ traceJumperState st + + traceJumperState :: ChainSyncJumpingJumperState TestBlock -> String + traceJumperState = \case + Happy initState mGoodJumpInfo -> + "Happy " ++ show initState ++ " " ++ maybe "Nothing" terseJumpInfo mGoodJumpInfo + FoundIntersection initState goodJumpInfo point -> unwords + [ "(FoundIntersection" + , show initState + , terseJumpInfo goodJumpInfo + , tersePoint $ castPoint point, ")" + ] + LookingForIntersection goodJumpInfo badJumpInfo -> unwords + ["(LookingForIntersection", terseJumpInfo goodJumpInfo, terseJumpInfo badJumpInfo, ")"] traceScheduledServerHandlerEventTestBlockWith :: Tracer m String -> @@ -330,9 +391,33 @@ traceChainSyncClientEventTestBlockWith pid tracer = \case trace $ "Threw an exception: " ++ show exception TraceTermination result -> trace $ "Terminated with result: " ++ show result + TraceOfferJump point -> + trace $ "Offering jump to " ++ tersePoint point + TraceJumpResult (AcceptedJump (JumpTo ji)) -> + trace $ "Accepted jump to " ++ terseJumpInfo ji + TraceJumpResult (RejectedJump (JumpTo ji)) -> + trace $ "Rejected jump to " ++ terseJumpInfo ji + TraceJumpResult (AcceptedJump (JumpToGoodPoint ji)) -> + trace $ "Accepted jump to good point: " ++ terseJumpInfo ji + TraceJumpResult (RejectedJump (JumpToGoodPoint ji)) -> + trace $ "Rejected jump to good point: " ++ terseJumpInfo ji + TraceJumpingWaitingForNextInstruction -> + trace "Waiting for next instruction from the jumping governor" + TraceJumpingInstructionIs instr -> + trace $ "Received instruction: " ++ showInstr instr where trace = traceUnitWith tracer ("ChainSyncClient " ++ condense pid) + showInstr :: Instruction TestBlock -> String + showInstr = \case + JumpInstruction (JumpTo ji) -> "JumpTo " ++ terseJumpInfo ji + JumpInstruction (JumpToGoodPoint ji) -> "JumpToGoodPoint " ++ terseJumpInfo ji + RunNormally -> "RunNormally" + Restart -> "Restart" + +terseJumpInfo :: JumpInfo TestBlock -> String +terseJumpInfo ji = tersePoint (castPoint $ headPoint $ jTheirFragment ji) + traceChainSyncClientTerminationEventTestBlockWith :: PeerId -> Tracer m String -> @@ -363,22 +448,52 @@ traceBlockFetchClientTerminationEventTestBlockWith pid tracer = \case where trace = traceUnitWith tracer ("BlockFetchClient " ++ condense pid) +prettyDensityBounds :: Map.Map PeerId (DensityBounds TestBlock) -> [String] +prettyDensityBounds bounds = + showPeers (showBounds <$> bounds) + where + showBounds DensityBounds {clippedFragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling} = + show lowerBound ++ "/" ++ show upperBound ++ "[" ++ more ++ "], " ++ + lastPoint ++ "latest: " ++ showLatestSlot latestSlot ++ block ++ showIdling + where + more = if offersMoreThanK then "+" else " " + + block = if hasBlockAfter then ", has header after sgen" else " " + + -- Note: At some point, I changed this to use @headPoint@ erroneously, so to be clear about what this signifies: + -- The first point after the anchor (which is returned by @lastPoint@, clearly) is used for the condition that + -- the density comparison should not be applied to two peers if they share any headers after the LoE fragment. + lastPoint = + "point: " ++ + tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.lastPoint clippedFragment)) ++ + ", " + + showLatestSlot = \case + Origin -> "unknown" + NotOrigin (SlotNo slot) -> show slot + + showIdling | idling = ", idling" + | otherwise = "" + + showPeers :: Map.Map PeerId String -> [String] + showPeers = fmap (\ (peer, v) -> " " ++ condense peer ++ ": " ++ v) . Map.toList + -- * Other utilities terseGDDEvent :: TraceGDDEvent PeerId TestBlock -> String terseGDDEvent = \case TraceGDDEvent {sgen = GenesisWindow sgen, curChain, bounds, candidates, candidateSuffixes, losingPeers, loeHead} -> unlines $ [ - "GDD | Window: " ++ window sgen loeHead, + "GDG | Window: " ++ window sgen loeHead, " Selection: " ++ terseHFragment curChain, " Candidates:" ] ++ - showPeers (either (const "G") terseHeader . AF.head <$> candidates) ++ + showPeers (tersePoint . castPoint . AF.headPoint <$> candidates) ++ [ " Candidate suffixes (bounds):" ] ++ - showPeers (terseHFragment . fragment <$> bounds) ++ + showPeers (terseHFragment . clippedFragment <$> bounds) ++ [" Density bounds:"] ++ - showPeers (showBounds <$> bounds) ++ + prettyDensityBounds bounds ++ [" New candidate tips:"] ++ showPeers (tersePoint . castPoint <$> Map.map AF.headPoint candidateSuffixes) ++ [ @@ -386,25 +501,6 @@ terseGDDEvent = \case " Setting loeFrag: " ++ terseAnchor (AF.castAnchor loeHead) ] where - showBounds DensityBounds {fragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling} = - show lowerBound ++ "/" ++ show upperBound ++ "[" ++ more ++ "], " ++ - lastPoint ++ "latest: " ++ showLatestSlot latestSlot ++ block ++ showIdling - where - more = if offersMoreThanK then "+" else " " - - block = if hasBlockAfter then ", has header after sgen" else " " - - lastPoint = - "point: " ++ - tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.headPoint fragment)) ++ - ", " - - showLatestSlot = \case - Origin -> "unknown" - NotOrigin (SlotNo slot) -> show slot - - showIdling | idling = ", idling" - | otherwise = "" window sgen loeHead = show winStart ++ " -> " ++ show winEnd diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index b22189635f..a883a4fc84 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -22,19 +22,19 @@ -- who then activates the next tick's peer. module Test.Consensus.PointSchedule ( BlockFetchTimeout (..) + , CSJParams (..) , ForecastRange (..) , GenesisTest (..) , GenesisTestFull , GenesisWindow (..) , LoPBucketParams (..) - , NodeState (..) , PeerSchedule , PeersSchedule , RunGenesisTestResult (..) , enrichedWith + , ensureScheduleDuration , genesisNodeState , longRangeAttack - , nsTipTip , peerSchedulesBlocks , peerStates , peersStates @@ -43,6 +43,7 @@ module Test.Consensus.PointSchedule ( , prettyPeersSchedule , stToGen , uniformPoints + , uniformPointsWithDowntime ) where import Cardano.Slotting.Time (SlotLength) @@ -52,7 +53,7 @@ import Control.Monad.ST (ST) import Data.Foldable (toList) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') -import Data.Maybe (mapMaybe) +import Data.Maybe (catMaybes, fromMaybe, mapMaybe) import Data.Time (DiffTime) import Data.Word (Word64) import Ouroboros.Consensus.Block.Abstract (WithOrigin (..), @@ -60,19 +61,20 @@ import Ouroboros.Consensus.Block.Abstract (WithOrigin (..), import Ouroboros.Consensus.Ledger.SupportsProtocol (GenesisWindow (..)) import Ouroboros.Consensus.Network.NodeToNode (ChainSyncTimeout (..)) -import Ouroboros.Consensus.Protocol.Abstract (SecurityParam, - maxRollbacks) -import Ouroboros.Consensus.Util.Condense (Condense (..), - CondenseList (..), PaddingDirection (..), - condenseListWithPadding, padListWith) +import Ouroboros.Consensus.Protocol.Abstract + (SecurityParam (SecurityParam), maxRollbacks) +import Ouroboros.Consensus.Util.Condense (CondenseList (..), + PaddingDirection (..), condenseListWithPadding) import qualified Ouroboros.Network.AnchoredFragment as AF -import Ouroboros.Network.Block (Tip (..), tipFromHeader) +import Ouroboros.Network.Block (SlotNo (..), blockSlot) import Ouroboros.Network.Point (withOrigin) import qualified System.Random.Stateful as Random import System.Random.Stateful (STGenM, StatefulGen, runSTGen_) import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), allFragments, prettyBlockTree) import Test.Consensus.PeerSimulator.StateView (StateView) +import Test.Consensus.PointSchedule.NodeState (NodeState (..), + genesisNodeState) import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), mkPeers, peersList) import Test.Consensus.PointSchedule.SinglePeer @@ -84,53 +86,10 @@ import Test.Consensus.PointSchedule.SinglePeer.Indices import Test.Ouroboros.Consensus.ChainGenerator.Params (Delta (Delta)) import Test.QuickCheck (Gen, arbitrary) import Test.QuickCheck.Random (QCGen) -import Test.Util.TersePrinting (terseBlock, terseFragment, - terseWithOrigin) +import Test.Util.TersePrinting (terseFragment) import Test.Util.TestBlock (TestBlock) import Text.Printf (printf) ----------------------------------------------------------------------------------------------------- --- Data types ----------------------------------------------------------------------------------------------------- - --- | The state of a peer at a given point in time. -data NodeState blk = - NodeState { - nsTip :: WithOrigin blk, - nsHeader :: WithOrigin blk, - nsBlock :: WithOrigin blk - } - deriving (Eq, Show) - -nsTipTip :: AF.HasHeader blk => NodeState blk -> Tip blk -nsTipTip = withOrigin TipGenesis tipFromHeader . nsTip - -instance Condense (NodeState TestBlock) where - condense NodeState {nsTip, nsHeader, nsBlock} = - "TP " ++ terseWithOrigin terseBlock nsTip ++ - " | HP " ++ terseWithOrigin terseBlock nsHeader ++ - " | BP " ++ terseWithOrigin terseBlock nsBlock - -instance CondenseList (NodeState TestBlock) where - condenseList points = - zipWith3 - (\tip header block -> - "TP " ++ tip ++ - " | HP " ++ header ++ - " | BP " ++ block - ) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsTip) points) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsHeader) points) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsBlock) points) - -genesisNodeState :: NodeState blk -genesisNodeState = - NodeState { - nsTip = Origin, - nsHeader = Origin, - nsBlock = Origin - } - prettyPeersSchedule :: forall blk. (CondenseList (NodeState blk)) => @@ -306,6 +265,132 @@ uniformPoints BlockTree {btTrunk, btBranches} g = do rollbackProb = 0.2 +minusClamp :: (Ord a, Num a) => a -> a -> a +minusClamp a b | a <= b = 0 + | otherwise = a - b + +zipPadN :: forall a . [[a]] -> [[Maybe a]] +zipPadN = + spin [] + where + spin acc as + | all null as + = reverse acc + | let (h, t) = unzip (takeNext <$> as) + = spin (h : acc) t + + takeNext = \case + [] -> (Nothing, []) + h : t -> (Just h, t) + +isTip :: SchedulePoint blk -> Bool +isTip = \case + ScheduleTipPoint _ -> True + _ -> False + +tipTimes :: [(Time, SchedulePoint blk)] -> [Time] +tipTimes = + fmap fst . filter (isTip . snd) + +bumpTips :: [Time] -> [(Time, SchedulePoint blk)] -> [(Time, SchedulePoint blk)] +bumpTips tips = + snd . mapAccumL step tips + where + step (t0 : tn) (_, p) + | isTip p + = (tn, (t0, p)) + step ts a = (ts, a) + +syncTips :: [(Time, SchedulePoint blk)] -> [[(Time, SchedulePoint blk)]] -> ([(Time, SchedulePoint blk)], [[(Time, SchedulePoint blk)]]) +syncTips honest advs = + (bump honest, bump <$> advs) + where + bump = bumpTips earliestTips + earliestTips = chooseEarliest <$> zipPadN (tipTimes <$> scheds) + scheds = honest : advs + chooseEarliest times = minimum (fromMaybe (Time 0) <$> times) + +-- | This is a variant of 'uniformPoints' that uses multiple tip points, used to simulate node downtimes. +-- Ultimately, this should be replaced by a redesign of the peer schedule generator that is aware of node liveness +-- intervals. +-- +-- Chooses the first tip points somewhere in the middle of the honest chain: +-- The "pause slot" is half of the honest head slot, or the slot of the kth block, whichever is greater. +-- The last block smaller than the pause slot is then used as the first tip for each branch. +-- The second tip is the last block of each branch. +-- +-- Includes rollbacks in some schedules. +uniformPointsWithDowntime :: + (StatefulGen g m, AF.HasHeader blk) => + SecurityParam -> + BlockTree blk -> + g -> + m (PeersSchedule blk) +uniformPointsWithDowntime (SecurityParam k) BlockTree {btTrunk, btBranches} g = do + let + kSlot = withOrigin 0 (fromIntegral . unSlotNo) (AF.headSlot (AF.takeOldest (fromIntegral k) btTrunk)) + midSlot = (AF.length btTrunk) `div` 2 + lowerBound = max kSlot midSlot + pauseSlot <- SlotNo . fromIntegral <$> Random.uniformRM (lowerBound, AF.length btTrunk - 1) g + honestTip0 <- firstTip pauseSlot btTrunk + honest <- mkSchedule [(IsTrunk, [honestTip0, minusClamp (AF.length btTrunk) 1])] [] + advs <- takeBranches pauseSlot btBranches + let (honest', advs') = syncTips honest advs + pure (mkPeers honest' advs') + where + takeBranches pause = \case + [] -> pure [] + [b] -> pure <$> withoutRollback pause b + b1 : b2 : branches -> do + a <- Random.uniformDouble01M g + if a < rollbackProb + then do + this <- withRollback pause b1 b2 + rest <- takeBranches pause branches + pure (this : rest) + else do + this <- withoutRollback pause b1 + rest <- takeBranches pause (b2 : branches) + pure (this : rest) + + withoutRollback pause branch = do + tips <- mkTips pause branch + mkSchedule tips [btbSuffix branch] + + withRollback pause b1 b2 = do + firstTips <- mkTips pause b1 + let secondTips = [minusClamp (AF.length (btbSuffix b2)) 1] + mkSchedule (firstTips ++ [(IsBranch, secondTips)]) [btbSuffix b1, btbSuffix b2] + + mkSchedule tips branches = do + params <- mkParams + peerScheduleFromTipPoints g params tips btTrunk branches + + mkTips pause branch + | AF.length full == 0 = + error "empty branch" + | otherwise = do + tip0 <- firstTip pause (btbFull branch) + let (pre, post) = partition (< firstSuffixBlock) [tip0, fullLen - 1] + pure ((if null pre then [] else [(IsTrunk, pre)]) ++ [(IsBranch, shift <$> post)]) + where + shift i = i - firstSuffixBlock + firstSuffixBlock = fullLen - AF.length (btbSuffix branch) + fullLen = AF.length full + full = btbFull branch + + firstTip pause frag = pure (minusClamp (AF.length (AF.dropWhileNewest (\ b -> blockSlot b > pause) frag)) 1) + + mkParams = do + -- These values appear to be large enough to create pauses of 100 seconds and more. + tipL <- uniformRMDiffTime (0.5, 1) g + tipU <- uniformRMDiffTime (1, 2) g + headerL <- uniformRMDiffTime (0.018, 0.03) g + headerU <- uniformRMDiffTime (0.021, 0.04) g + pure defaultPeerScheduleParams {pspTipDelayInterval = (tipL, tipU), pspHeaderDelayInterval = (headerL, headerU)} + + rollbackProb = 0.2 + newtype ForecastRange = ForecastRange { unForecastRange :: Word64 } deriving (Show) @@ -315,6 +400,11 @@ data LoPBucketParams = LoPBucketParams { lbpRate :: Rational } +data CSJParams = CSJParams { + csjpJumpSize :: SlotNo + } + deriving Show + -- | Similar to 'ChainSyncTimeout' for BlockFetch. Only the states in which the -- server has agency are specified. REVIEW: Should it be upstreamed to -- ouroboros-network-protocols? @@ -333,6 +423,7 @@ data GenesisTest blk schedule = GenesisTest gtChainSyncTimeouts :: ChainSyncTimeout, gtBlockFetchTimeouts :: BlockFetchTimeout, gtLoPBucketParams :: LoPBucketParams, + gtCSJParams :: CSJParams, gtSlotLength :: SlotLength, gtSchedule :: schedule } @@ -353,6 +444,7 @@ prettyGenesisTest prettySchedule genesisTest = , " gtForecastRange: " ++ show (unForecastRange gtForecastRange) , " gtDelay: " ++ show delta , " gtSlotLength: " ++ show gtSlotLength + , " gtCSJParams: " ++ show gtCSJParams , " gtChainSyncTimeouts: " , " canAwait = " ++ show canAwaitTimeout , " intersect = " ++ show intersectTimeout @@ -381,6 +473,7 @@ prettyGenesisTest prettySchedule genesisTest = , gtBlockFetchTimeouts = BlockFetchTimeout{busyTimeout, streamingTimeout} , gtLoPBucketParams = LoPBucketParams{lbpCapacity, lbpRate} , gtSlotLength + , gtCSJParams , gtSchedule } = genesisTest @@ -397,3 +490,28 @@ stToGen :: stToGen gen = do seed :: QCGen <- arbitrary pure (runSTGen_ seed gen) + +duplicateLastPoint + :: DiffTime -> [(Time, SchedulePoint blk)] -> [(Time, SchedulePoint blk)] +duplicateLastPoint d [] = [(Time d, ScheduleTipPoint Origin)] +duplicateLastPoint d xs = + let (t, p) = last xs + in xs ++ [(addTime d t, p)] + +ensureScheduleDuration :: GenesisTest blk a -> PeersSchedule blk -> PeersSchedule blk +ensureScheduleDuration gt Peers {honest, others} = + Peers {honest = extendHonest, others} + where + extendHonest = duplicateLastPoint endingDelay <$> honest + + endingDelay = + let cst = gtChainSyncTimeouts gt + bft = gtBlockFetchTimeouts gt + in 1 + fromIntegral peerCount * maximum (0 : catMaybes + [ canAwaitTimeout cst + , intersectTimeout cst + , busyTimeout bft + , streamingTimeout bft + ]) + + peerCount = 1 + length others diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs new file mode 100644 index 0000000000..33cbbdd8dd --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE NamedFieldPuns #-} + +module Test.Consensus.PointSchedule.NodeState ( + NodeState (..) + , genesisNodeState + , nsTipTip + ) where + +import Ouroboros.Consensus.Block.Abstract (WithOrigin (..)) +import Ouroboros.Consensus.Util.Condense (Condense (..), + CondenseList (..), PaddingDirection (..), padListWith) +import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.Block (Tip (..), tipFromHeader) +import Ouroboros.Network.Point (withOrigin) +import Test.Util.TersePrinting (terseBlock, terseWithOrigin) +import Test.Util.TestBlock (TestBlock) + +-- | The state of a peer at a given point in time. +data NodeState blk = + NodeState { + nsTip :: WithOrigin blk, + nsHeader :: WithOrigin blk, + nsBlock :: WithOrigin blk + } + deriving (Eq, Show) + +nsTipTip :: AF.HasHeader blk => NodeState blk -> Tip blk +nsTipTip = withOrigin TipGenesis tipFromHeader . nsTip + +instance Condense (NodeState TestBlock) where + condense NodeState {nsTip, nsHeader, nsBlock} = + "TP " ++ terseWithOrigin terseBlock nsTip ++ + " | HP " ++ terseWithOrigin terseBlock nsHeader ++ + " | BP " ++ terseWithOrigin terseBlock nsBlock + +instance CondenseList (NodeState TestBlock) where + condenseList points = + zipWith3 + (\tip header block -> + "TP " ++ tip ++ + " | HP " ++ header ++ + " | BP " ++ block + ) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsTip) points) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsHeader) points) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsBlock) points) + +genesisNodeState :: NodeState blk +genesisNodeState = + NodeState { + nsTip = Origin, + nsHeader = Origin, + nsBlock = Origin + } + + diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index bacf1ea728..9d0a084c08 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleContexts #-} @@ -38,6 +39,7 @@ import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.String (IsString (fromString)) import GHC.Generics (Generic) +import NoThunks.Class (NoThunks) import Ouroboros.Consensus.Util.Condense (Condense (..), CondenseList (..), PaddingDirection (..), condenseListWithPadding) @@ -47,7 +49,7 @@ data PeerId = HonestPeer | PeerId String - deriving (Eq, Generic, Show, Ord) + deriving (Eq, Generic, Show, Ord, NoThunks) instance IsString PeerId where fromString "honest" = HonestPeer @@ -108,6 +110,9 @@ data Peers a = instance Functor Peers where fmap f Peers {honest, others} = Peers {honest = f <$> honest, others = fmap f <$> others} +instance Foldable Peers where + foldMap f Peers {honest, others} = (f . value) honest <> foldMap (f . value) others + -- | A set of peers with only one honest peer carrying the given value. peersOnlyHonest :: a -> Peers a peersOnlyHonest value = @@ -127,12 +132,15 @@ getPeer pid peers | otherwise = others peers Map.! pid -updatePeer :: (a -> a) -> PeerId -> Peers a -> Peers a +updatePeer :: (a -> (a, b)) -> PeerId -> Peers a -> (Peers a, b) updatePeer f pid Peers {honest, others} | HonestPeer <- pid - = Peers {honest = f <$> honest, others} + , let (a, b) = f (value honest) + = (Peers {honest = a <$ honest, others}, b) | otherwise - = Peers {honest, others = Map.adjust (fmap f) pid others} + , let p = others Map.! pid + (a, b) = f (value p) + = (Peers {honest, others = Map.adjust (a <$) pid others}, b) -- | Convert 'Peers' to a list of 'Peer'. peersList :: Peers a -> NonEmpty (Peer a) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index 9c7398b1d2..8332e62021 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -1,24 +1,29 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} module Test.Consensus.PointSchedule.Shrinking ( shrinkByRemovingAdversaries + -- | Exported only for testing (that is, checking the properties of the function) + , shrinkHonestPeer , shrinkPeerSchedules - , trimBlockTree' ) where +import Control.Monad.Class.MonadTime.SI (DiffTime, Time, addTime, + diffTime) import Data.Containers.ListUtils (nubOrd) import Data.Functor ((<&>)) import qualified Data.Map.Strict as Map -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, maybeToList) import Ouroboros.Network.AnchoredFragment (AnchoredFragment, AnchoredSeq (Empty), takeWhileOldest) import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), addBranch', mkTrunk) import Test.Consensus.PeerSimulator.StateView (StateView) -import Test.Consensus.PointSchedule - (GenesisTest (gtBlockTree, gtSchedule), GenesisTestFull, - PeerSchedule, PeersSchedule, peerSchedulesBlocks) -import Test.Consensus.PointSchedule.Peers (Peers (..)) +import Test.Consensus.PointSchedule (GenesisTest (..), + GenesisTestFull, PeerSchedule, PeersSchedule, + peerSchedulesBlocks) +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..)) +import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..)) import Test.QuickCheck (shrinkList) import Test.Util.TestBlock (TestBlock, isAncestorOf, isStrictAncestorOf) @@ -32,9 +37,17 @@ shrinkPeerSchedules :: StateView TestBlock -> [GenesisTestFull TestBlock] shrinkPeerSchedules genesisTest _stateView = - shrinkOtherPeers shrinkPeerSchedule (gtSchedule genesisTest) <&> \shrunkSchedule -> - let trimmedBlockTree = trimBlockTree' shrunkSchedule (gtBlockTree genesisTest) - in genesisTest{gtSchedule = shrunkSchedule, gtBlockTree = trimmedBlockTree} + let trimmedBlockTree sch = trimBlockTree' sch (gtBlockTree genesisTest) + shrunkOthers = shrinkOtherPeers shrinkPeerSchedule (gtSchedule genesisTest) <&> + \shrunkSchedule -> genesisTest + { gtSchedule = shrunkSchedule + , gtBlockTree = trimmedBlockTree shrunkSchedule + } + shrunkHonest = shrinkHonestPeer + (gtSchedule genesisTest) + -- No need to update the tree here, shrinking the honest peer never discards blocks + <&> \shrunkSchedule -> genesisTest {gtSchedule = shrunkSchedule} + in shrunkOthers ++ shrunkHonest -- | Shrink a 'Peers PeerSchedule' by removing adversaries. This does not affect -- the honest peer; and it does not remove ticks from the schedules of the @@ -60,6 +73,68 @@ shrinkOtherPeers shrink Peers{honest, others} = map (Peers honest . Map.fromList) $ shrinkList (traverse (traverse shrink)) $ Map.toList others +-- | Shrinks an honest peer by removing ticks. +-- Because we are manipulating 'PeerSchedule' at that point, there is no proper +-- notion of a tick. Instead, we remove points of the honest 'PeerSchedule', +-- and move all other points sooner, including those on the adversarial schedule. +-- We check that this operation neither changes the final state of the honest peer, +-- nor that it removes points from the adversarial schedules. +shrinkHonestPeer :: Peers (PeerSchedule blk) -> [Peers (PeerSchedule blk)] +shrinkHonestPeer Peers{honest, others} = do + (at, speedUpBy) <- splits + (honest', others') <- maybeToList $ do + honest' <- traverse (speedUpHonestSchedule at speedUpBy) honest + others' <- mapM (traverse (speedUpAdversarialSchedule at speedUpBy)) others + pure (honest', others') + pure $ Peers honest' others' + where + -- | A list of non-zero time intervals between successive points of the honest schedule + splits :: [(Time, DiffTime)] + splits = mapMaybe + (\((t1, _), (t2, _)) -> + if t1 == t2 + then Nothing + else Just (t1, diffTime t2 t1) + ) + (zip (value honest) (drop 1 $ value honest)) + +-- | Speeds up an honest schedule after `at` time, by `speedUpBy`. +-- This “speeding up” is done by subtracting @speedUpBy@ to all points after @at@, +-- and removing those points if they fall before `at`. We check that the operation +-- doesn't change the final state of the peer, i.e. it doesn't remove all TP, HP, and BP +-- in the sped up part. +speedUpHonestSchedule :: Time -> DiffTime -> PeerSchedule blk -> Maybe (PeerSchedule blk) +speedUpHonestSchedule at speedUpBy sch = + if stillValid then Just $ beforeSplit ++ spedUpSchedule else Nothing + where + (beforeSplit, afterSplit) = span ((< at) . fst) sch + threshold = addTime speedUpBy at + spedUpSchedule = mapMaybe + (\(t, p) -> if t < threshold then Nothing else Just (addTime (-speedUpBy) t, p)) + afterSplit + stillValid = + (hasTP spedUpSchedule == hasTP afterSplit) + && (hasHP spedUpSchedule == hasHP afterSplit) + && (hasBP spedUpSchedule == hasBP afterSplit) + hasTP = any (\case (_, ScheduleTipPoint _) -> True; _ -> False) + hasHP = any (\case (_, ScheduleHeaderPoint _) -> True; _ -> False) + hasBP = any (\case (_, ScheduleBlockPoint _) -> True; _ -> False) + +-- | Speeds up an adversarial schedule after `at` time, by `speedUpBy`. +-- This "speeding up" is done by removing `speedUpBy` to all points after `at`. +-- We check that the schedule had no points between `at` and `at + speedUpBy`. +-- We also keep the last point where it is, so that the end time stays the same. +speedUpAdversarialSchedule :: Time -> DiffTime -> PeerSchedule blk -> Maybe (PeerSchedule blk) +speedUpAdversarialSchedule at speedUpBy sch = + if losesPoint then Nothing else Just $ beforeSplit ++ spedUpSchedule ++ lastPoint + where + (beforeSplit, afterSplit) = span ((< at) . fst) sch + spedUpSchedule = map (\(t, p) -> (addTime (-speedUpBy) t, p)) $ take (length afterSplit - 1) afterSplit + losesPoint = any ((< (addTime speedUpBy at)) . fst) afterSplit + lastPoint = case afterSplit of + [] -> [] + as -> [last as] + -- | Remove blocks from the given block tree that are not necessary for the -- given peer schedules. If entire branches are unused, they are removed. If the -- trunk is unused, then it remains as an empty anchored fragment. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs new file mode 100644 index 0000000000..8332f7be83 --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + +-- | Test properties of the shrinking functions +module Test.Consensus.PointSchedule.Shrinking.Tests (tests) where + +import Data.Foldable (toList) +import Data.Map (keys) +import Data.Maybe (mapMaybe) +import Test.Consensus.Genesis.Setup (genChains) +import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) +import Test.Consensus.PointSchedule (PeerSchedule, PeersSchedule, + prettyPeersSchedule) +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..)) +import Test.Consensus.PointSchedule.Shrinking (shrinkHonestPeer) +import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..)) +import Test.QuickCheck (Property, conjoin, counterexample) +import Test.Tasty +import Test.Tasty.QuickCheck (choose, forAllBlind, testProperty) +import Test.Util.TestBlock (TestBlock) + +tests :: TestTree +tests = + testGroup "shrinking functions" + [ testGroup "honest peer shrinking" + [ testProperty "actually shortens the schedule" prop_shortens + , testProperty "preserves the final state all peers" prop_preservesFinalStates + , testProperty "doesn't remove points of the adversarial schedule" prop_preserversAdversarial + ] + ] + +prop_shortens :: Property +prop_shortens = checkShrinkProperty isShorterThan + +prop_preservesFinalStates :: Property +prop_preservesFinalStates = checkShrinkProperty doesNotChangeFinalState + +prop_preserversAdversarial :: Property +prop_preserversAdversarial = checkShrinkProperty doesNotRemoveAdversarialPoints + +-- | Apparently, `unsnoc` hasn't been invented yet, so we'll do this manually +lastM :: [a] -> Maybe a +lastM [] = Nothing +lastM [a] = Just a +lastM (_:ps) = lastM ps + +samePeers :: PeersSchedule blk -> PeersSchedule blk -> Bool +samePeers sch1 sch2 = (keys $ others sch1) == (keys $ others sch2) + +-- | Checks whether at least one peer schedule in the second given peers schedule +-- is shorter than its corresponding one in the fist given peers schedule. “Shorter” +-- here means that it executes in less time. +isShorterThan :: PeersSchedule blk -> PeersSchedule blk -> Bool +isShorterThan original shrunk = + samePeers original shrunk + && (or $ zipWith + (\oldSch newSch -> (fst <$> lastM newSch) < (fst <$> lastM oldSch)) + (toList original) + (toList shrunk) + ) + +doesNotChangeFinalState :: Eq blk => PeersSchedule blk -> PeersSchedule blk -> Bool +doesNotChangeFinalState original shrunk = + samePeers original shrunk + && (and $ zipWith + (\oldSch newSch -> + lastTP oldSch == lastTP newSch && + lastHP oldSch == lastHP newSch && + lastBP oldSch == lastBP newSch + ) + (toList original) + (toList shrunk) + ) + where + lastTP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastTP sch = lastM $ mapMaybe (\case (_, p@(ScheduleTipPoint _)) -> Just p ; _ -> Nothing) sch + lastHP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastHP sch = lastM $ mapMaybe (\case (_, p@(ScheduleHeaderPoint _)) -> Just p ; _ -> Nothing) sch + lastBP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastBP sch = lastM $ mapMaybe (\case (_, p@(ScheduleBlockPoint _)) -> Just p ; _ -> Nothing) sch + +doesNotRemoveAdversarialPoints :: Eq blk => PeersSchedule blk -> PeersSchedule blk -> Bool +doesNotRemoveAdversarialPoints original shrunk = + samePeers original shrunk + && (and $ zipWith + (\oldSch newSch -> fmap snd oldSch == fmap snd newSch) + (toList $ (fmap value) $ others original) + (toList $ (fmap value) $ others shrunk) + ) + +checkShrinkProperty :: (PeersSchedule TestBlock -> PeersSchedule TestBlock -> Bool) -> Property +checkShrinkProperty prop = + forAllBlind + (genChains (choose (1, 4)) >>= genUniformSchedulePoints) + (\schedule -> + conjoin $ map + (\shrunk -> + counterexample + ( "Original schedule:\n" + ++ unlines (map (" " ++) $ prettyPeersSchedule schedule) + ++ "\nShrunk schedule:\n" + ++ unlines (map (" " ++) $ prettyPeersSchedule shrunk) + ) + (prop schedule shrunk) + ) + (shrinkHonestPeer schedule) + ) diff --git a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs index 0d8d54e395..e1e69b3da1 100644 --- a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs +++ b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs @@ -138,6 +138,7 @@ oneBenchRun , CSClient.setLatestSlot = \_ -> pure () , CSClient.idling = CSClient.noIdling , CSClient.loPBucket = CSClient.noLoPBucket + , CSClient.jumping = CSClient.noJumping } server :: ChainSyncServer H (Point B) (Tip B) IO () diff --git a/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md b/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md new file mode 100644 index 0000000000..de8c14c7ac --- /dev/null +++ b/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md @@ -0,0 +1,7 @@ +### Non-Breaking + +- Fixed GDD implementation. (still disabled by default) + +### Breaking + +- Implemented a first version of CSJ (ChainSync Jumping). (disabled by default) diff --git a/ouroboros-consensus/ouroboros-consensus.cabal b/ouroboros-consensus/ouroboros-consensus.cabal index 894fcb2c6b..93a5b01352 100644 --- a/ouroboros-consensus/ouroboros-consensus.cabal +++ b/ouroboros-consensus/ouroboros-consensus.cabal @@ -169,6 +169,8 @@ library Ouroboros.Consensus.MiniProtocol.BlockFetch.Server Ouroboros.Consensus.MiniProtocol.ChainSync.Client Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck + Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping + Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State Ouroboros.Consensus.MiniProtocol.ChainSync.Server Ouroboros.Consensus.MiniProtocol.LocalStateQuery.Server Ouroboros.Consensus.MiniProtocol.LocalTxMonitor.Server diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 2448dcd755..02cb70527d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -169,7 +169,7 @@ runGdd loEUpdater varLoEFrag chainDb getTrigger = data DensityBounds blk = DensityBounds { - fragment :: AnchoredFragment (Header blk), + clippedFragment :: AnchoredFragment (Header blk), offersMoreThanK :: Bool, lowerBound :: Word64, upperBound :: Word64, @@ -208,7 +208,7 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe (losingPeers, densityBounds) where densityBounds = Map.fromList $ do - (peer, fragment) <- Map.toList competingFrags + (peer, clippedFragment) <- Map.toList clippedFrags state <- maybeToList (states Map.!? peer) -- Skip peers that haven't sent any headers yet. -- They should be disconnected by timeouts instead. @@ -222,25 +222,23 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe max (AF.headSlot candidateSuffix) latestSlot >= NotOrigin firstSlotAfterGenesisWindow - -- If the peer is idling, we assume it has no more headers to send. - -- -- If the slot of the latest header we know of is _after_ the end of -- the Genesis window (either because the candidate fragment extends -- beyond it or because we are waiting to validate a header beyond the -- forecast horizon that we already received), there can be no headers -- in between and 'potentialSlots' is 0. - -- - -- If the peer has more headers that it hasn't sent yet, each slot - -- between the latest header we know of and the end of the Genesis - -- window could contain a block, so the upper bound for the total - -- number of blocks in the window is the sum of the known blocks and - -- that number of remaining slots. potentialSlots = - if idling || hasBlockAfter then 0 - else sgen - totalBlockCount + if hasBlockAfter then 0 + else unknownTrailingSlots + + -- Number of trailing slots in the genesis window that could have + -- headers which haven't been sent yet + unknownTrailingSlots = unSlotNo $ + -- cannot underflow as the fragment is clipped to the genesis window + firstSlotAfterGenesisWindow - succWithOrigin (AF.headSlot clippedFragment) -- The number of blocks within the Genesis window we know with certainty - lowerBound = fromIntegral $ AF.length fragment + lowerBound = fromIntegral $ AF.length clippedFragment upperBound = lowerBound + potentialSlots @@ -252,20 +250,45 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe -- If not, it is not qualified to compete by density (yet). offersMoreThanK = totalBlockCount > k - pure (peer, DensityBounds {fragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling}) + pure (peer, DensityBounds {clippedFragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling}) - losingPeers = nubOrd $ do - (peer0 , DensityBounds {fragment = frag0, upperBound = ub0}) <- - Map.toList densityBounds - (_peer1, DensityBounds {fragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- + losingPeers = nubOrd $ Map.toList densityBounds >>= \ + (peer0 , DensityBounds { clippedFragment = frag0 + , lowerBound = lb0 + , upperBound = ub0 + , hasBlockAfter = hasBlockAfter0 + , idling = idling0 + }) -> + -- If the density is 0, the peer should be disconnected. This affects + -- ChainSync jumping, where genesis windows with no headers prevent jumps + -- from happening. + if ub0 == 0 then pure peer0 else do + (_peer1, DensityBounds {clippedFragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- Map.toList densityBounds + -- Don't disconnect peer0 if it sent no headers after the intersection yet + -- and it is not idling. + -- + -- See Note [Chain disagreement] + -- + -- Note: hasBlockAfter0 is False if frag0 is empty and ub0>0. + -- But we leave it here as a reminder that we care about it. + guard $ idling0 || not (AF.null frag0) || hasBlockAfter0 -- ensure that the two peer fragments don't share any -- headers after the LoE guard $ AF.lastPoint frag0 /= AF.lastPoint frag1 - -- peer1 offers more than k blocks - guard offersMoreThanK - -- peer1 definitely has higher density than peer0 - guard $ lb1 > ub0 + -- peer1 offers more than k blocks or peer0 has sent all headers in the + -- genesis window after the intersection (idling or not) + guard $ offersMoreThanK || lb0 == ub0 + -- peer1 has the same or better density than peer0 + -- If peer0 is idling, we assume no more headers will be sent. + -- + -- Having the same density is enough to disconnect peer0, as the honest + -- chain is expected to have a strictly higher density than all of the + -- other chains. + -- + -- This matters to ChainSync jumping, where adversarial dynamo and + -- objector could offer chains of equal density. + guard $ lb1 >= (if idling0 then lb0 else ub0) pure peer0 loeIntersectionSlot = AF.headSlot loeFrag @@ -276,9 +299,32 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe dropBeyondGenesisWindow = AF.takeWhileOldest ((< firstSlotAfterGenesisWindow) . blockSlot) - competingFrags = + clippedFrags = Map.map dropBeyondGenesisWindow candidateSuffixes +-- Note [Chain disagreement] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Imagine two peers serving the following chain: +-- +-- > k: 1 +-- > sgen: 2 +-- > +-- > 0 1 2 +-- > G---1-2 +-- +-- Say peer1 sent no headers yet and peer2 sent 2 headers. +-- The intersection of both is G, the density of peer2's chain is 2, +-- while the upperbound of the density of peer1 is also 2. +-- +-- For GDD to disconnect peer1 safely, it is essential that both chains +-- disagree after the intersection. +-- +-- To know if the chains will dissagree we defer disconnecting peer1 +-- until it declares to have no more headers, or until it sends one header +-- after the intersection. If both chains agree on the next header after +-- the intersection, we don't disconnect peer1 either. + data TraceGDDEvent peer blk = TraceGDDEvent { bounds :: Map peer (DensityBounds blk), diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index d33796b324..6d23553141 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -8,6 +8,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -53,15 +54,19 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client ( , Consensus , Our (..) , Their (..) - -- * Trace events + -- * Genesis configuration + , CSJConfig (..) + , CSJEnabledConfig (..) , ChainSyncLoPBucketConfig (..) , ChainSyncLoPBucketEnabledConfig (..) + -- * Trace events , InvalidBlockReason , TraceChainSyncClientEvent (..) -- * State shared with other components , ChainSyncClientHandle (..) , ChainSyncState (..) , ChainSyncStateView (..) + , Jumping.noJumping , chainSyncStateFor , noIdling , noLoPBucket @@ -74,6 +79,7 @@ import Control.Tracer import Data.Kind (Type) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Data.Maybe (fromMaybe) import Data.Proxy import Data.Typeable import Data.Word (Word64) @@ -94,6 +100,8 @@ import Ouroboros.Consensus.Ledger.Basics (LedgerState) import Ouroboros.Consensus.Ledger.Extended import Ouroboros.Consensus.Ledger.SupportsProtocol import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State import Ouroboros.Consensus.Node.NetworkProtocolVersion import Ouroboros.Consensus.Protocol.Abstract import Ouroboros.Consensus.Storage.ChainDB (ChainDB, @@ -104,7 +112,7 @@ import Ouroboros.Consensus.Util.AnchoredFragment (cross) import Ouroboros.Consensus.Util.Assert (assertWithMsg) import Ouroboros.Consensus.Util.EarlyExit (WithEarlyExit, exitEarly) import qualified Ouroboros.Consensus.Util.EarlyExit as EarlyExit -import Ouroboros.Consensus.Util.IOLike +import Ouroboros.Consensus.Util.IOLike hiding (handle) import qualified Ouroboros.Consensus.Util.LeakyBucket as LeakyBucket import Ouroboros.Consensus.Util.STM (Fingerprint, Watcher (..), WithFingerprint (..), withWatcher) @@ -162,6 +170,36 @@ data ChainSyncLoPBucketConfig -- | Enable the leaky bucket. ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig +-- | Configuration of ChainSync Jumping +data CSJConfig + = + -- | Disable ChainSync Jumping. All clients will fully synchronize with + -- the chain of its peer. + CSJDisabled + | + -- | Enable ChainSync Jumping + CSJEnabled CSJEnabledConfig + +newtype CSJEnabledConfig = CSJEnabledConfig { + -- | The _ideal_ size for ChainSync jumps. Note that the algorithm + -- is best-effort: there might not be exactly `csjcJumpSize` slots between two + -- jumps, depending on the chain. + -- + -- There can be a few less slots between jumps if there is not a block exactly + -- at the boundary. Jumps are often made when a block is announced after the + -- jump boundary. + -- + -- There can be even less slots if a dynamo is elected and it requires an + -- initial jump regardless of how far we are from the next jump boundary. + -- + -- csjcJumpSize must be greater than 0 and smaller or equal to the genesis + -- window size. The larger the jump size, the less jumps are made and peers + -- are less involved in the syncing. A jump size as large as the genesis + -- window has a higher change that dishonest peers can delay syncing by a + -- small margin (around 2 minutes per dishonest peer with mainnet parameters). + csjcJumpSize :: SlotNo +} + defaultChainDbView :: (IOLike m, LedgerSupportsProtocol blk) => ChainDB m blk -> ChainDbView m blk @@ -182,53 +220,6 @@ newtype Our a = Our { unOur :: a } deriving stock (Eq) deriving newtype (Show, NoThunks) --- | A ChainSync client's state that's used by other components, like the GDD. -data ChainSyncState blk = ChainSyncState { - - -- | The current candidate fragment. - csCandidate :: !(AnchoredFragment (Header blk)) - - -- | This ChainSync client should ensure that its peer sets this flag while - -- and only while both of the following conditions are satisfied: the - -- peer's latest message has been fully processed (especially that its - -- candidate has been updated; previous argument) and its latest message - -- did not claim that it already has headers that extend its candidate. - -- - -- It's more important that the flag is unset promptly than it is for the - -- flag to be set promptly, because of how this is used by the GSM to - -- determine that the node is done syncing. - , csIdling :: !Bool - - -- | When the client receives a new header, it updates this field before - -- processing it further, and the latest slot may refer to a header beyond - -- the forecast horizon while the candidate fragment isn't extended yet, to - -- signal to GDD that the density is known up to this slot. - , csLatestSlot :: !(Maybe (WithOrigin SlotNo)) - } - deriving stock (Generic) - -deriving anyclass instance ( - HasHeader blk, - NoThunks (Header blk) - ) => NoThunks (ChainSyncState blk) - --- | An interface to a ChainSync client that's used by other components, like --- the GDD governor. -data ChainSyncClientHandle m blk = ChainSyncClientHandle { - -- | Disconnects from the peer when the GDD considers it adversarial - cschGDDKill :: !(m ()) - - -- | Data shared between the client and external components like GDD. - , cschState :: !(StrictTVar m (ChainSyncState blk)) - } - deriving stock (Generic) - -deriving anyclass instance ( - IOLike m, - HasHeader blk, - NoThunks (Header blk) - ) => NoThunks (ChainSyncClientHandle m blk) - -- | Convenience function for reading a nested set of TVars and extracting some -- data from 'ChainSyncState'. viewChainSyncState :: @@ -310,6 +301,9 @@ data ChainSyncStateView m blk = ChainSyncStateView { -- | Control the 'LeakyBucket' for the LoP. , csvLoPBucket :: !(LoPBucket m) + + -- | Jumping-related API. + , csvJumping :: !(Jumping.Jumping m blk) } deriving stock (Generic) @@ -318,7 +312,8 @@ deriving anyclass instance ( HasHeader blk, NoThunks (Header blk) ) => NoThunks (ChainSyncStateView m blk) -bracketChainSyncClient :: + +bracketChainSyncClient :: forall m peer blk a. ( IOLike m , Ord peer , LedgerSupportsProtocol blk @@ -331,6 +326,7 @@ bracketChainSyncClient :: -> peer -> NodeToNodeVersion -> ChainSyncLoPBucketConfig + -> CSJConfig -> (ChainSyncStateView m blk -> m a) -> m a bracketChainSyncClient @@ -340,46 +336,74 @@ bracketChainSyncClient peer version csBucketConfig + csjConfig body - = - bracket acquireHandle releaseHandle - $ \varState -> + = mkChainSyncClientHandleState >>= \csHandleState -> + withCSJCallbacks csHandleState csjConfig $ \csjCallbacks -> withWatcher "ChainSync.Client.rejectInvalidBlocks" - (invalidBlockWatcher varState) + (invalidBlockWatcher csHandleState) $ LeakyBucket.execAgainstBucket lopBucketConfig $ \lopBucket -> body ChainSyncStateView { csvSetCandidate = - modifyTVar varState . \ c s -> s {csCandidate = c} + modifyTVar csHandleState . \ c s -> s {csCandidate = c} , csvSetLatestSlot = - modifyTVar varState . \ ls s -> s {csLatestSlot = Just $! ls} + modifyTVar csHandleState . \ ls s -> s {csLatestSlot = Just $! ls} , csvIdling = Idling { - idlingStart = atomically $ modifyTVar varState $ \ s -> s {csIdling = True} - , idlingStop = atomically $ modifyTVar varState $ \ s -> s {csIdling = False} + idlingStart = atomically $ modifyTVar csHandleState $ \ s -> s {csIdling = True} + , idlingStop = atomically $ modifyTVar csHandleState $ \ s -> s {csIdling = False} } , csvLoPBucket = LoPBucket { lbPause = LeakyBucket.setPaused lopBucket True , lbResume = LeakyBucket.setPaused lopBucket False , lbGrantToken = void $ LeakyBucket.fill lopBucket 1 } + , csvJumping = csjCallbacks } where - acquireHandle = do - cschState <- newTVarIO $ ChainSyncState { - csCandidate = AF.Empty AF.AnchorGenesis - , csLatestSlot = Nothing - , csIdling = False - } + mkChainSyncClientHandleState = + newTVarIO ChainSyncState { + csCandidate = AF.Empty AF.AnchorGenesis + , csLatestSlot = Nothing + , csIdling = False + } + + withCSJCallbacks :: + StrictTVar m (ChainSyncState blk) -> + CSJConfig -> + (Jumping.Jumping m blk -> m a) -> + m a + withCSJCallbacks cschState CSJDisabled f = do + tid <- myThreadId + cschJumpInfo <- newTVarIO Nothing + cschJumping <- newTVarIO (Disengaged DisengagedDone) + let handle = ChainSyncClientHandle { + cschGDDKill = throwTo tid DensityTooLow + , cschState + , cschJumping + , cschJumpInfo + } + insertHandle = atomically $ modifyTVar varHandles $ Map.insert peer handle + deleteHandle = atomically $ modifyTVar varHandles $ Map.delete peer + bracket_ insertHandle deleteHandle $ f Jumping.noJumping + + withCSJCallbacks csHandleState (CSJEnabled csjEnabledConfig) f = + bracket (acquireContext csHandleState csjEnabledConfig) releaseContext $ \peerContext -> + f $ Jumping.mkJumping peerContext + acquireContext cschState (CSJEnabledConfig jumpSize) = do tid <- myThreadId atomically $ do - modifyTVar varHandles $ Map.insert peer ChainSyncClientHandle { - cschGDDKill = throwTo tid DensityTooLow + cschJumpInfo <- newTVar Nothing + context <- Jumping.makeContext varHandles jumpSize + Jumping.registerClient context peer cschState $ \cschJumping -> ChainSyncClientHandle + { cschGDDKill = throwTo tid DensityTooLow , cschState + , cschJumping + , cschJumpInfo } - pure cschState - releaseHandle _ = atomically $ modifyTVar varHandles $ Map.delete peer + releaseContext = atomically . Jumping.unregisterClient invalidBlockWatcher varState = invalidBlockRejector @@ -387,7 +411,7 @@ bracketChainSyncClient -- | Wrapper around 'LeakyBucket.execAgainstBucket' that handles the -- disabled bucket by running the given action with dummy handlers. - lopBucketConfig :: MonadThrow m => LeakyBucket.Config m + lopBucketConfig :: LeakyBucket.Config m lopBucketConfig = case csBucketConfig of ChainSyncLoPBucketDisabled -> LeakyBucket.dummyConfig @@ -713,6 +737,7 @@ data DynamicEnv m blk = DynamicEnv { , setLatestSlot :: WithOrigin SlotNo -> STM m () , idling :: Idling m , loPBucket :: LoPBucket m + , jumping :: Jumping.Jumping m blk } -- | General values collectively needed by the top-level entry points @@ -806,8 +831,7 @@ chainSyncClient cfgEnv dynEnv = } = chainDbView DynamicEnv { - idling, - loPBucket + idling } = dynEnv mkIntEnv :: @@ -853,7 +877,7 @@ chainSyncClient cfgEnv dynEnv = recvMsgRollForward = \_hdr _tip -> go n' s , recvMsgRollBackward = \_pt _tip -> go n' s } - in Stateful $ \s -> do (idlingStop idling >> lbResume loPBucket); go n0 s + in Stateful $ \s -> idlingStop idling >> go n0 s terminate :: ChainSyncClientResult @@ -943,6 +967,7 @@ findIntersectionTop cfgEnv dynEnv intEnv = DynamicEnv { setCandidate + , jumping } = dynEnv InternalEnv { @@ -1044,9 +1069,6 @@ findIntersectionTop cfgEnv dynEnv intEnv = disconnect $ InvalidIntersection intersection (ourTipFromChain ourFrag) theirTip - atomically $ do - setCandidate theirFrag - setLatestSlot dynEnv (AF.headSlot theirFrag) let kis = assertKnownIntersectionInvariants (configConsensus cfg) $ KnownIntersectionState { @@ -1056,6 +1078,10 @@ findIntersectionTop cfgEnv dynEnv intEnv = , theirHeaderStateHistory , kBestBlockNo = uBestBlockNo } + atomically $ do + updateJumpInfoSTM jumping kis + setCandidate theirFrag + setLatestSlot dynEnv (AF.headSlot theirFrag) continueWithState kis $ knownIntersectionStateTop cfgEnv dynEnv intEnv theirTip @@ -1093,6 +1119,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = , idling , loPBucket , setCandidate + , jumping } = dynEnv InternalEnv { @@ -1109,7 +1136,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = -- Request the next message (roll forward or backward). -- -- This is also the place where we checked whether we're asked to terminate - -- by the mux layer. + -- by the mux layer or to wait and perform a CSJ jump. nextStep :: MkPipelineDecision -> Nat n @@ -1117,18 +1144,130 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = -> Stateful m blk (KnownIntersectionState blk) (ClientPipelinedStIdle n) - nextStep mkPipelineDecision n theirTip = Stateful $ \kis -> do + nextStep mkPipelineDecision n theirTip = Stateful $ \kis -> atomically controlMessageSTM >>= \case -- We have been asked to terminate the client Terminate -> terminateAfterDrain n $ AskedToTerminate - _continue -> - return $ - requestNext - kis - mkPipelineDecision - n - theirTip - (AF.headBlockNo (theirFrag kis)) + _continue -> do + -- Wait until next jumping instruction, which can be either to + -- jump, to run normal ChainSync, or to restart the ChainSync + -- client. + -- Pause LoP while waiting, we'll resume it if we get `RunNormally` + -- or `Restart`. + traceWith tracer TraceJumpingWaitingForNextInstruction + lbPause loPBucket + instruction <- Jumping.jgNextInstruction jumping + traceWith tracer $ TraceJumpingInstructionIs instruction + case instruction of + Jumping.JumpInstruction jumpInstruction -> + continueWithState kis + $ drainThePipe n + $ offerJump mkPipelineDecision jumpInstruction + Jumping.RunNormally -> do + lbResume loPBucket + continueWithState kis + $ nextStep' mkPipelineDecision n theirTip + Jumping.Restart -> do + lbResume loPBucket + continueWithState () + $ drainThePipe n + $ findIntersectionTop + cfgEnv + dynEnv + intEnv + (kBestBlockNo kis) + NoMoreIntersection + + nextStep' :: + MkPipelineDecision + -> Nat n + -> Their (Tip blk) + -> Stateful m blk + (KnownIntersectionState blk) + (ClientPipelinedStIdle n) + nextStep' mkPipelineDecision n theirTip = + Stateful $ \kis -> + return $ + requestNext + kis + mkPipelineDecision + n + theirTip + (AF.headBlockNo (theirFrag kis)) + + offerJump :: + MkPipelineDecision + -> Jumping.JumpInstruction blk + -> Stateful m blk + (KnownIntersectionState blk) + (ClientPipelinedStIdle Z) + offerJump mkPipelineDecision jump = Stateful $ \kis -> do + let jumpInfo = case jump of + Jumping.JumpTo ji -> ji + Jumping.JumpToGoodPoint ji -> ji + dynamoTipPt = castPoint $ AF.headPoint $ jTheirFragment jumpInfo + traceWith tracer $ TraceOfferJump dynamoTipPt + return $ + SendMsgFindIntersect [dynamoTipPt] $ + ClientPipelinedStIntersect { + recvMsgIntersectFound = \pt theirTip -> + if + | pt == dynamoTipPt -> do + Jumping.jgProcessJumpResult jumping $ Jumping.AcceptedJump jump + traceWith tracer $ TraceJumpResult $ Jumping.AcceptedJump jump + let kis' = case jump of + -- Since the updated kis is needed to validate headers, + -- we only update it if we are becoming a Dynamo or + -- an objector + Jumping.JumpToGoodPoint{} -> combineJumpInfo kis jumpInfo + _ -> kis + continueWithState kis' $ nextStep mkPipelineDecision Zero (Their theirTip) + | otherwise -> throwIO InvalidJumpResponse + , + recvMsgIntersectNotFound = \theirTip -> do + Jumping.jgProcessJumpResult jumping $ Jumping.RejectedJump jump + traceWith tracer $ TraceJumpResult $ Jumping.RejectedJump jump + continueWithState kis $ nextStep mkPipelineDecision Zero (Their theirTip) + } + where + combineJumpInfo :: + KnownIntersectionState blk + -> JumpInfo blk + -> KnownIntersectionState blk + combineJumpInfo kis ji = + let mRewoundHistory = + HeaderStateHistory.rewind + (AF.castPoint $ AF.headPoint $ jTheirFragment ji) + (jTheirHeaderStateHistory ji) + -- We assume the history is always possible to rewind. The case + -- where this wouldn't be true is if the original candidate + -- fragment provided by the dynamo contained headers that have + -- no corresponding header state. + rewoundHistory = + fromMaybe (error "offerJump: cannot rewind history") mRewoundHistory + -- If the tip of jTheirFragment does not match the tip of + -- jTheirHeaderStateHistory, then the history needs rewinding. + historyNeedsRewinding = + (/= AF.headPoint (jTheirFragment ji)) $ + castPoint $ + either headerStatePoint headerStatePoint $ + AF.head $ + HeaderStateHistory.unHeaderStateHistory $ + jTheirHeaderStateHistory ji + -- Recompute the intersection only if a suffix of the candidate + -- fragment was trimmed. + intersection + | historyNeedsRewinding = case AF.intersectionPoint (jOurFragment ji) (jTheirFragment ji) of + Just po -> castPoint po + Nothing -> error "offerJump: the jumpInfo should have a valid intersection" + | otherwise = jMostRecentIntersection ji + in KnownIntersectionState + { mostRecentIntersection = intersection + , ourFrag = jOurFragment ji + , theirFrag = jTheirFragment ji + , theirHeaderStateHistory = rewoundHistory + , kBestBlockNo = max (fromWithOrigin 0 $ AF.headBlockNo $ jTheirFragment ji) (kBestBlockNo kis) + } requestNext :: KnownIntersectionState blk @@ -1147,7 +1286,8 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = theirTipBlockNo onMsgAwaitReply = idlingStart idling >> - lbPause loPBucket + lbPause loPBucket >> + Jumping.jgOnAwaitReply jumping in case (n, decision) of (Zero, (Request, mkPipelineDecision')) -> @@ -1241,6 +1381,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = checkKnownInvalid cfgEnv dynEnv intEnv hdr + Jumping.jgOnRollForward jumping (blockPoint hdr) atomically (setLatestSlot dynEnv (NotOrigin slotNo)) checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case @@ -1260,6 +1401,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = kis''' <- checkLoP cfgEnv dynEnv hdr kis'' atomically $ do + updateJumpInfoSTM jumping kis''' setCandidate (theirFrag kis''') atomically $ traceWith headerMetricsTracer (slotNo, arrivalTime) @@ -1349,12 +1491,28 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = , kBestBlockNo } atomically $ do + updateJumpInfoSTM jumping kis' setCandidate theirFrag' setLatestSlot dynEnv (pointSlot rollBackPoint) + Jumping.jgOnRollBackward jumping (pointSlot rollBackPoint) + continueWithState kis' $ nextStep mkPipelineDecision n theirTip +-- | Let ChainSync jumping know about an update to the 'KnownIntersectionState'. +updateJumpInfoSTM :: + Jumping.Jumping m blk + -> KnownIntersectionState blk + -> STM m () +updateJumpInfoSTM jumping kis@KnownIntersectionState{ourFrag} = + Jumping.jgUpdateJumpInfo jumping JumpInfo + { jMostRecentIntersection = mostRecentIntersection kis + , jOurFragment = ourFrag + , jTheirFragment = theirFrag kis + , jTheirHeaderStateHistory = theirHeaderStateHistory kis + } + {------------------------------------------------------------------------------- Header checks -------------------------------------------------------------------------------} @@ -1435,7 +1593,7 @@ checkTime :: -> SlotNo -> m (UpdatedIntersectionState blk (LedgerView (BlockProtocol blk))) checkTime cfgEnv dynEnv intEnv = - \kis arrival slotNo -> castEarlyExitIntersects $ do + \kis arrival slotNo -> pauseBucket $ castEarlyExitIntersects $ do Intersects kis2 lst <- checkArrivalTime kis arrival Intersects kis3 ledgerView <- case projectLedgerView slotNo lst of Just ledgerView -> pure $ Intersects kis2 ledgerView @@ -1443,10 +1601,7 @@ checkTime cfgEnv dynEnv intEnv = EarlyExit.lift $ traceWith (tracer cfgEnv) $ TraceWaitingBeyondForecastHorizon slotNo - -- Pause the bucket if LedgerView-Starved. - EarlyExit.lift $ lbPause (loPBucket dynEnv) res <- readLedgerState kis2 (projectLedgerView slotNo) - EarlyExit.lift $ lbResume (loPBucket dynEnv) EarlyExit.lift $ traceWith (tracer cfgEnv) $ TraceAccessingForecastHorizon slotNo @@ -1558,6 +1713,17 @@ checkTime cfgEnv dynEnv intEnv = -- 'intersectsWithCurrentChain' before it. Nothing + -- Pause the LoP bucket for the entire duration of 'checkTime'. It will + -- either execute very fast, or it will block on the time translation or + -- forecast horizon, waiting for our selection to advance. During this + -- period, we should not leak tokens as our peer is not responsible for this + -- waiting time. + pauseBucket :: m a -> m a + pauseBucket = + bracket_ + (lbPause (loPBucket dynEnv)) + (lbResume (loPBucket dynEnv)) + -- | Update the 'KnownIntersectionState' according to the header, if it's valid -- -- Crucially: disconnects if it isn't. @@ -1932,6 +2098,9 @@ data ChainSyncClientException = | EmptyBucket -- ^ The peer lost its race against the bucket. + | + InvalidJumpResponse + -- ^ When the peer responded incorrectly to a jump request. | DensityTooLow -- ^ The peer has been deemed unworthy by the GDD @@ -1963,6 +2132,9 @@ instance Eq ChainSyncClientException where (==) EmptyBucket EmptyBucket = True + (==) + InvalidJumpResponse InvalidJumpResponse + = True (==) DensityTooLow DensityTooLow = True @@ -1972,6 +2144,7 @@ instance Eq ChainSyncClientException where InvalidBlock{} == _ = False InFutureHeaderExceedsClockSkew{} == _ = False EmptyBucket == _ = False + InvalidJumpResponse == _ = False DensityTooLow == _ = False instance Exception ChainSyncClientException @@ -2014,6 +2187,19 @@ data TraceChainSyncClientEvent blk = -- ^ Whether we added a token to the LoP bucket of the peer. Also carries -- the considered header and the best block number known prior to this -- header. + | + TraceOfferJump (Point blk) + -- ^ ChainSync Jumping offering a point to jump to. + | + TraceJumpResult (Jumping.JumpResult blk) + -- ^ ChainSync Jumping -- reply. + | + TraceJumpingWaitingForNextInstruction + -- ^ ChainSync Jumping -- the ChainSync client is requesting the next + -- instruction. + | + TraceJumpingInstructionIs (Jumping.Instruction blk) + -- ^ ChainSync Jumping -- the ChainSync client got its next instruction. deriving instance ( BlockSupportsProtocol blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs new file mode 100644 index 0000000000..e1281a8416 --- /dev/null +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -0,0 +1,828 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UndecidableInstances #-} + +-- | ChainSync jumping (CSJ) is an optimization for the ChainSync protocol that +-- allows nodes to sync without downloading headers from all of the honest +-- peers. This load is undesirable as it slows down all the peers involved. +-- +-- The idea is to download the headers of a chain from a single peer (the +-- dynamo) and then ask periodically to the other peers (the jumpers) whether +-- they agree with the dynamo's chain. +-- +-- When the jumpers disagree with the dynamo, the jumper with the oldest +-- intersection is asked to compete with the dynamo in the GDD logic (becoming +-- an objector). If the dynamo is disconnected, a new dynamo is elected and the +-- objector is demoted to a jumper. +-- +-- If the objector is disconnected, the syncing process continues with the +-- dynamo and the remaining jumpers. +-- +-- The main property of the algorithm is that it never +-- downloads headers from more than two plausibly honest peers at a time (a +-- dynamo and an objector). All other peers are either waiting their turn to +-- compete with the dynamo, or are in agreement with it, or are disengaged +-- (see next section). +-- +-- The algorithm might still download headers redundantly from peers that do +-- historical rollbacks. These rollbacks, however, constitute dishonest +-- behavior, and CSJ does not concern itself with avoiding load to dishonest +-- peers. Avoiding the load induced by dishonest peers on the syncing node would +-- require additionally to disconnect peers that do historical rollbacks. This +-- is not done by CSJ. +-- +-- Interactions with the Genesis Density Disconnection logic +-- --------------------------------------------------------- +-- +-- It is possible that neither the dynamo nor the objector are disconnected. +-- This could happen if: +-- 1. They both serve the same chain, or +-- 2. They both claim to have no more headers. +-- +-- To avoid (1) CSJ checks that the objector disagrees with the dynamo at the +-- point it claimed to disagree as a jumper. If the objector agrees with the +-- dynamo, it is disengaged. A disengaged peer is not asked to jump or act as +-- dynamo or objector. Instead, it continues to offer headers for the rest of +-- the syncing. When the objector is disengaged, a new objector is elected +-- among the dissenting jumpers. If there are no dissenting jumpers left, the +-- syncing continues with the dynamo and the remaining jumpers. +-- +-- To prevent the dynamo from agreeing with the objector instead, the dynamo is +-- not allowed to rollback before the last jump it requested. If the dynamo +-- tries to rollback before the last jump, it is disengaged and a new dynamo is +-- elected. +-- +-- To avoid (2) CSJ disengages a peer as soon as it claims to have no more +-- headers. Syncing continues with a new elected dynamo or objector depending on +-- the disengaged peer's role. +-- +-- CSJ finishes and is turned off when all peers have been disengaged. +-- +-- Interactions with the ChainSync client +-- -------------------------------------- +-- +-- The ChainSync client interacts with CSJ through some callbacks that determine +-- when the client should pause, download headers, or ask about agreement with +-- a given point (jumping). See the 'Jumping' type for more details. +-- +-- Interactions with the Limit on Patience +-- --------------------------------------- +-- +-- Jumpers don't leak the Limit on Patience (LoP) bucket until they are promoted +-- to dynamos or objectors. And the leaking is stopped as soon as they are +-- demoted. +-- +-- If a jumper refrains from answering to jumps, they will be disconnected with +-- the 'intersectTimeout' (in 'ChainSyncTimeout'). +-- +-- A jumper answering just before the timeout will not delay the syncing +-- process by a large amount. If they agree with the dynamo, the dynamo will be +-- busy downloading headers and validating blocks while the jumper answers. If +-- the jumper disagrees with the dynamo, CSJ will look for the precise +-- intersection with the dynamo's chain. This could take a few minutes, but it +-- is a path that will end up in one of the dynamo and the jumper being +-- disconnected or disengaged. +-- +-- +-- Overview of the state transitions +-- --------------------------------- +-- +-- See 'ChainSyncJumpingState' for the implementation of the states. +-- +-- > j ╔════════╗ +-- > ╭────────── ║ Dynamo ║ ◀─────────╮ +-- > │ ╚════════╝ │f +-- > ▼ ▲ │ +-- > ┌────────────┐ │ k ┌──────────┐ +-- > │ Disengaged │ ◀───────────│────────── │ Objector │ +-- > └────────────┘ ╭─────│────────── └──────────┘ +-- > │ │ ▲ ▲ │ +-- > g│ │e b │ │ │ +-- > │ │ ╭─────╯ i│ │c +-- > ╭╌╌╌╌╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ +-- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ | +-- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ | +-- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ | +-- > ┆ Jumper ╰─────┴────────────╯h | +-- > ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ +-- +-- *: LookingForIntersection and FoundIntersection, abbreviated for this +-- drawing only; this abbreviation will not be used elsewhere. +-- +-- A new peer starts as the dynamo if there is no other peer or as a Happy +-- jumper otherwise. The dynamo periodically requests jumps from happy +-- jumpers who, in the ideal case, accept them. +-- +-- In the event that a jumper rejects a jump, it goes from Happy to LFI* (a). +-- From there starts a back-and-forth of intersection search messages until +-- the exact point of disagreement with the dynamo is found. +-- +-- Once the exact point of disagreement is found, and if there is no objector +-- yet, the jumper becomes the objector (b). If there is an objector, then we +-- compare the intersections of the objector and the jumper. If the jumper's +-- intersection is strictly older, then the jumper replaces the objector (b+c). +-- Otherwise, the jumper is marked as FI* (d). +-- +-- If the dynamo disconnects or is disengaged, one peer is elected as the new +-- dynamo (e|f) and all other peers revert to being happy jumpers (g+h). +-- +-- If the objector disconnects or is disengaged, and there are FI* jumpers, then +-- the one with the oldest intersection with the dynamo gets elected (i). +-- +-- If the dynamo rolls back to a point older than the last jump it requested, it +-- is disengaged (j) and a new dynamo is elected (e|f). +-- +-- If the objector agrees with the dynamo, it is disengaged (k). If there are +-- FI* jumpers, then one of them gets elected as the new objector (i). +-- +-- If dynamo or objector claim to have no more headers, they are disengaged +-- (j|k). +-- +module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( + Context + , ContextWith (..) + , Instruction (..) + , JumpInstruction (..) + , JumpResult (..) + , Jumping (..) + , makeContext + , mkJumping + , noJumping + , registerClient + , unregisterClient + ) where + +import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) +import Control.Monad (forM, forM_, when) +import Data.List (sortOn) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe (catMaybes, fromMaybe) +import GHC.Generics (Generic) +import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, + Point (..), castPoint, pointSlot, succWithOrigin) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State + (ChainSyncClientHandle (..), + ChainSyncJumpingJumperState (..), + ChainSyncJumpingState (..), ChainSyncState (..), + DisengagedInitState (..), DynamoInitState (..), + JumpInfo (..), JumperInitState (..), + ObjectorInitState (..)) +import Ouroboros.Consensus.Util.IOLike hiding (handle) +import qualified Ouroboros.Network.AnchoredFragment as AF + +-- | Hooks for ChainSync jumping. +data Jumping m blk = Jumping + { -- | Get the next instruction to execute, which can be either to run normal + -- ChainSync, to jump to a given point, or to restart ChainSync. When the + -- peer is a jumper and there is no jump request, 'jgNextInstruction' blocks + -- until a jump request is made. + jgNextInstruction :: !(m (Instruction blk)), + + -- | To be called whenever the peer claims to have no more headers. + jgOnAwaitReply :: !(m ()), + + -- | To be called whenever a header is received from the peer + -- before it is validated. + jgOnRollForward :: !(Point (Header blk) -> m ()), + + -- | To be called whenever a peer rolls back. + jgOnRollBackward :: !(WithOrigin SlotNo -> m ()), + + -- | Process the result of a jump, either accepted or rejected. + -- + -- The jump result is used to decide on the next jumps or whether to elect + -- an objector. + jgProcessJumpResult :: !(JumpResult blk -> m ()), + + -- | To be called to update the last known jump possible to the tip of + -- the peers candidate fragment. The ChainSync clients for all peers should + -- call this function in case they are or they become dynamos. + -- + -- JumpInfo is meant to be a snapshot of the @KnownIntersectionState@ of + -- the ChainSync client. See 'JumpInfo' for more details. + jgUpdateJumpInfo :: !(JumpInfo blk -> STM m ()) + } + deriving stock (Generic) + +deriving anyclass instance + ( IOLike m, + HasHeader blk, + NoThunks (Header blk) + ) => + NoThunks (Jumping m blk) + +-- | No-op implementation of CSJ +noJumping :: (MonadSTM m) => Jumping m blk +noJumping = + Jumping + { jgNextInstruction = pure RunNormally + , jgOnAwaitReply = pure () + , jgOnRollForward = const $ pure () + , jgOnRollBackward = const $ pure () + , jgProcessJumpResult = const $ pure () + , jgUpdateJumpInfo = const $ pure () + } + +-- | Create the callbacks for a given peer. +mkJumping :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + Jumping m blk +mkJumping peerContext = Jumping + { jgNextInstruction = atomically $ nextInstruction peerContext + , jgOnAwaitReply = atomically $ onAwaitReply peerContext + , jgOnRollForward = atomically . onRollForward peerContext + , jgOnRollBackward = atomically . onRollBackward peerContext + , jgProcessJumpResult = atomically . processJumpResult peerContext + , jgUpdateJumpInfo = updateJumpInfo peerContext + } + +-- | A context for ChainSync jumping +-- +-- Invariants: +-- +-- - If 'handlesVar' is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesVar'. +-- - If there exist 'FoundIntersection' jumpers in 'handlesVar', then there +-- is an objector and the intersection of the objector with the dynamo is +-- at least as old as the oldest intersection of the `FoundIntersection` jumpers +-- with the dynamo. +data ContextWith peerField handleField m peer blk = Context + { peer :: !peerField, + handle :: !handleField, + handlesVar :: !(StrictTVar m (Map peer (ChainSyncClientHandle m blk))), + jumpSize :: !SlotNo + } + +-- | A non-specific, generic context for ChainSync jumping. +type Context = ContextWith () () + +-- | A peer-specific context for ChainSync jumping. This is a 'ContextWith' +-- pointing on the handler of the peer in question. +-- +-- Invariant: The binding from 'peer' to 'handle' is present in 'handlesVar'. +type PeerContext m peer blk = ContextWith peer (ChainSyncClientHandle m blk) m peer blk + +makeContext :: + MonadSTM m => + StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + SlotNo -> + -- ^ The size of jumps, in number of slots. + STM m (Context m peer blk) +makeContext h jumpSize = do + pure $ Context () () h jumpSize + +-- | Get a generic context from a peer context by stripping away the +-- peer-specific fields. +stripContext :: PeerContext m peer blk -> Context m peer blk +stripContext context = context {peer = (), handle = ()} + +-- | Instruction from the jumping governor, either to run normal ChainSync, or +-- to jump to follow a dynamo with the given fragment, or to restart ChainSync. +data Instruction blk + = RunNormally + -- | The restart instruction restarts the ChainSync protocol. This is + -- necessary when disengaging a peer of which we know no point that we + -- could set the intersection of the ChainSync server to. + | Restart + | -- | Jump to the tip of the given fragment. + JumpInstruction !(JumpInstruction blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (Instruction blk) +deriving instance (HasHeader (Header blk), Show (Header blk)) => Show (Instruction blk) +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (Instruction blk) + +data JumpInstruction blk + = JumpTo !(JumpInfo blk) + | -- | Used to set the intersection of the ChainSync servers of starting + -- objectors and dynamos. Otherwise, the ChainSync server wouldn't know + -- which headers to start serving. + JumpToGoodPoint !(JumpInfo blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (JumpInstruction blk) +instance (HasHeader (Header blk), Show (Header blk)) => Show (JumpInstruction blk) where + showsPrec p = \case + JumpTo jumpInfo -> + showParen (p > 10) $ showString "JumpTo " . shows (AF.headPoint $ jTheirFragment jumpInfo) + JumpToGoodPoint jumpInfo -> + showParen (p > 10) $ showString "JumpToGoodPoint " . shows (AF.headPoint $ jTheirFragment jumpInfo) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (JumpInstruction blk) + +-- | The result of a jump request, either accepted or rejected. +data JumpResult blk + = AcceptedJump !(JumpInstruction blk) + | RejectedJump !(JumpInstruction blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (JumpResult blk) +deriving instance (HasHeader (Header blk), Show (Header blk)) => Show (JumpResult blk) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (JumpResult blk) + +-- | Compute the next instruction for the given peer. In the majority of cases, +-- this consists in reading the peer's handle, having the dynamo and objector +-- run normally and the jumpers wait for the next jump. As such, this function +-- mostly only reads from and writes to the handle of the peer. For the dynamo, every once in a +-- while, we need to indicate to the jumpers that they need to jump, and this +-- requires writing to a TVar for every jumper. +nextInstruction :: + ( MonadSTM m ) => + PeerContext m peer blk -> + STM m (Instruction blk) +nextInstruction context = + readTVar (cschJumping (handle context)) >>= \case + Disengaged DisengagedDone -> pure RunNormally + Disengaged Disengaging -> do + writeTVar (cschJumping (handle context)) (Disengaged DisengagedDone) + pure Restart + Dynamo (DynamoStarting goodJumpInfo) lastJumpSlot -> do + writeTVar (cschJumping (handle context)) $ + Dynamo DynamoStarted lastJumpSlot + pure $ JumpInstruction $ JumpToGoodPoint goodJumpInfo + Dynamo DynamoStarted _ -> + pure RunNormally + Objector Starting goodJump badPoint -> do + writeTVar (cschJumping (handle context)) $ + Objector Started goodJump badPoint + pure $ JumpInstruction $ JumpToGoodPoint goodJump + Objector Started _ _ -> pure RunNormally + Jumper nextJumpVar jumperState -> do + readTVar nextJumpVar >>= \case + Nothing -> retry + Just jumpInfo -> do + writeTVar nextJumpVar Nothing + case jumperState of + Happy FreshJumper mGoodJumpInfo -> + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar $ Happy StartedJumper mGoodJumpInfo + _ -> pure () + pure $ JumpInstruction $ JumpTo jumpInfo + +-- | This function is called when we receive a 'MsgRollForward' message before +-- validating it. +-- +-- We request jumpers to jump here if the next header received by the dynamo is +-- at least jump size slots after the last jump. Note that, since this function +-- runs before validating the next header, it will not be part of the fragment +-- considered for the jump. +-- +-- We also check that the Objector disagrees with the header sent at its +-- rejected jump. If it agrees to it, we disengage it. +-- +onRollForward :: forall m peer blk. + ( MonadSTM m, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + Point (Header blk) -> + STM m () +onRollForward context point = + readTVar (cschJumping (handle context)) >>= \case + Objector _ _ badPoint + | badPoint == castPoint point -> do + disengage (handle context) + electNewObjector (stripContext context) + | otherwise -> pure () + Disengaged{} -> pure () + Jumper{} -> pure () + Dynamo _ lastJumpSlot + | let jumpBoundaryPlus1 = jumpSize context + succWithOrigin lastJumpSlot + , succWithOrigin (pointSlot point) > jumpBoundaryPlus1 -> do + mJumpInfo <- readTVar (cschJumpInfo (handle context)) + setJumps mJumpInfo + | otherwise -> pure () + where + setJumps Nothing = error "onRollForward: Dynamo without jump info" + setJumps (Just jumpInfo) = do + writeTVar (cschJumping (handle context)) $ + Dynamo DynamoStarted $ pointSlot $ AF.headPoint $ jTheirFragment jumpInfo + handles <- readTVar (handlesVar context) + forM_ (Map.elems handles) $ \h -> + readTVar (cschJumping h) >>= \case + Jumper nextJumpVar Happy{} -> writeTVar nextJumpVar (Just jumpInfo) + _ -> pure () + +-- | This function is called when we receive a 'MsgRollBackward' message. +-- +-- Here we check if the peer is trying to roll back to a point before the last +-- jump. If so, we disengage the peer. This prevents adversaries from sending +-- as objectors the same chain as the dynamo. +-- +onRollBackward :: forall m peer blk. + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + WithOrigin SlotNo -> + STM m () +onRollBackward context slot = + readTVar (cschJumping (handle context)) >>= \case + Objector _ _ badPoint + | slot < pointSlot badPoint -> do + disengage (handle context) + electNewObjector (stripContext context) + | otherwise -> pure () + Disengaged{} -> pure () + Jumper{} -> pure () + Dynamo _ lastJumpSlot + | slot < lastJumpSlot -> do + disengage (handle context) + electNewDynamo (stripContext context) + | otherwise -> pure () + +-- | This function is called when we receive a 'MsgAwaitReply' message. +-- +-- If this is the dynamo, we need to elect a new dynamo as no more headers +-- are available. +onAwaitReply :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + STM m () +onAwaitReply context = + readTVar (cschJumping (handle context)) >>= \case + Dynamo{} -> do + disengage (handle context) + electNewDynamo (stripContext context) + Objector{} -> do + disengage (handle context) + electNewObjector (stripContext context) + Jumper{} -> + -- A jumper might be receiving a 'MsgAwaitReply' message if it was + -- previously an objector and a new dynamo was elected. + disengage (handle context) + Disengaged{} -> + pure () + +-- | Process the result of a jump. In the happy case, this only consists in +-- updating the peer's handle to take the new candidate fragment and the new +-- last jump point into account. When disagreeing with the dynamo, though, we +-- enter a phase of several jumps to pinpoint exactly where the disagreement +-- occurs. Once this phase is finished, we trigger the election of a new +-- objector, which might update many TVars. +processJumpResult :: forall m peer blk. + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + JumpResult blk -> + STM m () +processJumpResult context jumpResult = + readTVar (cschJumping (handle context)) >>= \case + Dynamo{} -> + case jumpResult of + AcceptedJump (JumpToGoodPoint jumpInfo) -> + updateChainSyncState (handle context) jumpInfo + RejectedJump JumpToGoodPoint{} -> do + startDisengaging (handle context) + electNewDynamo (stripContext context) + + -- Not interesting in the dynamo state + AcceptedJump JumpTo{} -> pure () + RejectedJump JumpTo{} -> pure () + + Disengaged{} -> pure () + Objector{} -> + case jumpResult of + AcceptedJump (JumpToGoodPoint jumpInfo) -> + updateChainSyncState (handle context) jumpInfo + RejectedJump JumpToGoodPoint{} -> do + -- If the objector rejects a good point, it is a sign of a rollback + -- to earlier than the last jump. + startDisengaging (handle context) + electNewObjector (stripContext context) + + -- Not interesting in the objector state + AcceptedJump JumpTo{} -> pure () + RejectedJump JumpTo{} -> pure () + + Jumper nextJumpVar jumperState -> + case jumpResult of + AcceptedJump (JumpTo goodJumpInfo) -> do + -- The jump was accepted; we set the jumper's candidate fragment to + -- the dynamo's candidate fragment up to the accepted point. + -- + -- The candidate fragments of jumpers don't grow otherwise, as only the + -- objector and the dynamo request further headers. + updateChainSyncState (handle context) goodJumpInfo + writeTVar (cschJumpInfo (handle context)) $ Just goodJumpInfo + case jumperState of + LookingForIntersection _goodJumpInfo badJumpInfo -> + -- @AF.headPoint fragment@ is in @badFragment@, as the jumper + -- looking for an intersection is the only client asking for its + -- jumps. + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo + Happy StartedJumper _mGoodJumpInfo -> + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar $ Happy StartedJumper $ Just goodJumpInfo + Happy FreshJumper _mGoodJumpInfo -> + pure () + FoundIntersection{} -> + -- Only happy jumpers are asked to jump by the dynamo, and only + -- jumpers looking for an intersection are asked to jump by + -- themselves. + error "processJumpResult: Jumpers in state FoundIntersection shouldn't be further jumping." + + RejectedJump (JumpTo badJumpInfo) -> + -- The tip of @goodFragment@ is in @jTheirFragment jumpInfo@ or is + -- an ancestor of it. If the jump was requested by the dynamo, this + -- holds because the dynamo is not allowed to rollback before the + -- jumps that it requests. + -- + -- If the jump was requested by the jumper, this holds because the + -- jumper is looking for an intersection, and such jumper only asks + -- for jumps that meet this condition. + case jumperState of + LookingForIntersection goodJumpInfo _ -> + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo + Happy StartedJumper mGoodJumpInfo -> + lookForIntersection nextJumpVar (mkGoodJumpInfo mGoodJumpInfo badJumpInfo) badJumpInfo + Happy FreshJumper _ -> + pure () + FoundIntersection{} -> + error "processJumpResult (rejected): Jumpers in state FoundIntersection shouldn't be further jumping." + + -- These aren't interesting in the case of jumpers. + AcceptedJump JumpToGoodPoint{} -> pure () + RejectedJump JumpToGoodPoint{} -> pure () + where + -- Avoid redundant constraint "HasHeader blk" reported by some ghc's + _ = getHeaderFields @blk + + updateChainSyncState :: ChainSyncClientHandle m blk -> JumpInfo blk -> STM m () + updateChainSyncState handle jump = do + let fragment = jTheirFragment jump + modifyTVar (cschState handle) $ \csState -> + csState {csCandidate = fragment, csLatestSlot = Just (AF.headSlot fragment) } + writeTVar (cschJumpInfo handle) $ Just jump + + mkGoodJumpInfo :: Maybe (JumpInfo blk) -> JumpInfo blk -> JumpInfo blk + mkGoodJumpInfo mGoodJumpInfo badJumpInfo = do + let badFragment = jTheirFragment badJumpInfo + -- use the jump info of the rejected jump if the good jump info is + -- not available (i.e. there were no accepted jumps) + badFragmentStart = AF.takeOldest 0 badFragment + in fromMaybe (badJumpInfo {jTheirFragment = badFragmentStart}) mGoodJumpInfo + + -- | Given a good point (where we know we agree with the dynamo) and a bad + -- fragment (where we know the tip disagrees with the dynamo), either decide + -- that we know the intersection for sure (if the bad point is the successor + -- of the good point) or program a jump somewhere in the middle to refine + -- those points. + -- + -- PRECONDITION: The good point is in the candidate fragment of + -- @badJumpInfo@ or it is an ancestor of it. + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo = do + let badFragment = jTheirFragment badJumpInfo + -- If the good point is not in the bad fragment, the anchor of the bad + -- fragment should be a good point too. + searchFragment = + maybe badFragment snd $ + AF.splitAfterPoint badFragment (AF.headPoint $ jTheirFragment goodJumpInfo) + let len = AF.length searchFragment + if len <= 1 then do + -- If the fragment only contains the bad tip, we know the + -- intersection is the good point. + -- Clear any subsequent jumps requested by the dynamo. + writeTVar nextJumpVar Nothing + maybeElectNewObjector nextJumpVar goodJumpInfo (AF.headPoint badFragment) + else do + let middlePoint = len `div` 2 + theirFragment = AF.dropNewest middlePoint badFragment + writeTVar nextJumpVar $ Just + badJumpInfo { jTheirFragment = theirFragment } + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar (LookingForIntersection goodJumpInfo badJumpInfo) + + maybeElectNewObjector nextJumpVar goodJumpInfo badPoint = do + findObjector (stripContext context) >>= \case + Nothing -> + -- There is no objector yet. Promote the jumper to objector. + writeTVar (cschJumping (handle context)) (Objector Starting goodJumpInfo badPoint) + Just (oInitState, oGoodJump, oPoint, oHandle) + | pointSlot oPoint <= pointSlot badPoint -> + -- The objector's intersection is still old enough. Keep it. + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar (FoundIntersection Starting goodJumpInfo badPoint) + | otherwise -> do + -- Found an earlier intersection. Demote the old objector and + -- promote the jumper to objector. + newJumper Nothing (FoundIntersection oInitState oGoodJump oPoint) >>= + writeTVar (cschJumping oHandle) + writeTVar (cschJumping (handle context)) (Objector Starting goodJumpInfo badPoint) + +updateJumpInfo :: + (MonadSTM m) => + PeerContext m peer blk -> + JumpInfo blk -> + STM m () +updateJumpInfo context jumpInfo = + readTVar (cschJumping (handle context)) >>= \case + Disengaged{} -> pure () + _ -> writeTVar (cschJumpInfo (handle context)) $ Just jumpInfo + +-- | Find the dynamo in a TVar containing a map of handles. Returns then handle +-- of the dynamo, or 'Nothing' if there is none. +getDynamo :: + (MonadSTM m) => + StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + STM m (Maybe (ChainSyncClientHandle m blk)) +getDynamo handlesVar = do + handles <- Map.elems <$> readTVar handlesVar + findM (\handle -> isDynamo <$> readTVar (cschJumping handle)) handles + where + isDynamo Dynamo{} = True + isDynamo _ = False + +-- | Disengage a peer, meaning that it will no longer be asked to jump or +-- act as dynamo or objector. +disengage :: MonadSTM m => ChainSyncClientHandle m blk -> STM m () +disengage = disengageWith DisengagedDone + +-- | Like 'disengage', but additionally restart ChainSync +startDisengaging :: MonadSTM m => ChainSyncClientHandle m blk -> STM m () +startDisengaging = disengageWith Disengaging + +disengageWith :: + MonadSTM m => + DisengagedInitState -> + ChainSyncClientHandle m blk -> + STM m () +disengageWith initState handle = do + writeTVar (cschJumping handle) (Disengaged initState) + writeTVar (cschJumpInfo handle) Nothing + + +-- | Convenience function that, given an intersection point and a jumper state, +-- make a fresh 'Jumper' constructor. +newJumper :: + ( MonadSTM m, + LedgerSupportsProtocol blk + ) => + Maybe (JumpInfo blk) -> + ChainSyncJumpingJumperState blk -> + STM m (ChainSyncJumpingState m blk) +newJumper jumpInfo jumperState = do + nextJumpVar <- newTVar jumpInfo + pure $ Jumper nextJumpVar jumperState + +-- | Register a new ChainSync client to a context, returning a 'PeerContext' for +-- that peer. If there is no dynamo, the peer starts as dynamo; otherwise, it +-- starts as a jumper. +registerClient :: + ( Ord peer, + LedgerSupportsProtocol blk, + IOLike m + ) => + Context m peer blk -> + peer -> + StrictTVar m (ChainSyncState blk) -> + -- | A function to make a client handle from a jumping state. + (StrictTVar m (ChainSyncJumpingState m blk) -> ChainSyncClientHandle m blk) -> + STM m (PeerContext m peer blk) +registerClient context peer csState mkHandle = do + csjState <- getDynamo (handlesVar context) >>= \case + Nothing -> do + fragment <- csCandidate <$> readTVar csState + pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment + Just handle -> do + mJustInfo <- readTVar (cschJumpInfo handle) + newJumper mJustInfo (Happy FreshJumper Nothing) + cschJumping <- newTVar csjState + let handle = mkHandle cschJumping + modifyTVar (handlesVar context) $ Map.insert peer handle + pure $ context {peer, handle} + +-- | Unregister a client from a 'PeerContext'; this might trigger the election +-- of a new dynamo or objector if the peer was one of these two. +unregisterClient :: + ( MonadSTM m, + Ord peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + STM m () +unregisterClient context = do + modifyTVar (handlesVar context) $ Map.delete (peer context) + let context' = stripContext context + readTVar (cschJumping (handle context)) >>= \case + Disengaged{} -> pure () + Jumper{} -> pure () + Objector{} -> electNewObjector context' + Dynamo{} -> electNewDynamo context' + +-- | Choose an unspecified new non-idling dynamo and demote all other peers to +-- jumpers. +electNewDynamo :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + Context m peer blk -> + STM m () +electNewDynamo context = do + peerStates <- Map.toList <$> readTVar (handlesVar context) + mDynamo <- findNonDisengaged peerStates + case mDynamo of + Nothing -> pure () + Just (dynId, dynamo) -> do + fragment <- csCandidate <$> readTVar (cschState dynamo) + mJumpInfo <- readTVar (cschJumpInfo dynamo) + -- If there is no jump info, the dynamo must be just starting and + -- there is no need to set the intersection of the ChainSync server. + let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo + writeTVar (cschJumping dynamo) $ + Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment + -- Demote all other peers to jumpers + forM_ peerStates $ \(peer, st) -> + when (peer /= dynId) $ do + jumpingState <- readTVar (cschJumping st) + when (not (isDisengaged jumpingState)) $ + newJumper mJumpInfo (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping st) + where + findNonDisengaged = + findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) + isDisengaged Disengaged{} = True + isDisengaged _ = False + +findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) +findM _ [] = pure Nothing +findM p (x : xs) = p x >>= \case + True -> pure (Just x) + False -> findM p xs + +-- | Find the objector in a context, if there is one. +findObjector :: + (MonadSTM m) => + Context m peer blk -> + STM m (Maybe (ObjectorInitState, JumpInfo blk, Point (Header blk), ChainSyncClientHandle m blk)) +findObjector context = do + readTVar (handlesVar context) >>= go . Map.toList + where + go [] = pure Nothing + go ((_, handle):xs) = + readTVar (cschJumping handle) >>= \case + Objector initState goodJump badPoint -> + pure $ Just (initState, goodJump, badPoint, handle) + _ -> go xs + +-- | Look into all dissenting jumper and promote the one with the oldest +-- intersection with the dynamo as the new objector. +electNewObjector :: + (MonadSTM m) => + Context m peer blk -> + STM m () +electNewObjector context = do + peerStates <- Map.toList <$> readTVar (handlesVar context) + dissentingJumpers <- collectDissentingJumpers peerStates + let sortedJumpers = sortOn (pointSlot . fst) dissentingJumpers + case sortedJumpers of + (badPoint, (initState, goodJumpInfo, handle)):_ -> + writeTVar (cschJumping handle) $ Objector initState goodJumpInfo badPoint + _ -> + pure () + where + collectDissentingJumpers peerStates = + fmap catMaybes $ + forM peerStates $ \(_, handle) -> + readTVar (cschJumping handle) >>= \case + Jumper _ (FoundIntersection initState goodJumpInfo badPoint) -> + pure $ Just (badPoint, (initState, goodJumpInfo, handle)) + _ -> + pure Nothing diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs new file mode 100644 index 0000000000..189ee361e5 --- /dev/null +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -0,0 +1,221 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UndecidableInstances #-} + +module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( + ChainSyncClientHandle (..) + , ChainSyncJumpingJumperState (..) + , ChainSyncJumpingState (..) + , ChainSyncState (..) + , DisengagedInitState (..) + , DynamoInitState (..) + , JumpInfo (..) + , JumperInitState (..) + , ObjectorInitState (..) + ) where + +import Cardano.Slotting.Slot (SlotNo, WithOrigin) +import Data.Function (on) +import Data.Typeable (Proxy (..), typeRep) +import GHC.Generics (Generic) +import Ouroboros.Consensus.Block (HasHeader, Header, Point) +import Ouroboros.Consensus.HeaderStateHistory (HeaderStateHistory) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks (..), + StrictTVar) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment, + headPoint) + +-- | A ChainSync client's state that's used by other components, like the GDD or +-- the jumping governor. +data ChainSyncState blk = ChainSyncState { + + -- | The current candidate fragment. + csCandidate :: !(AnchoredFragment (Header blk)) + + -- | Whether the last message sent by the peer was MsgAwaitReply. + -- + -- This ChainSync client should ensure that its peer sets this flag while + -- and only while both of the following conditions are satisfied: the + -- peer's latest message has been fully processed (especially that its + -- candidate has been updated; previous argument) and its latest message + -- did not claim that it already has headers that extend its candidate. + -- + -- It's more important that the flag is unset promptly than it is for the + -- flag to be set promptly, because of how this is used by the GSM to + -- determine that the node is done syncing. + , csIdling :: !Bool + + -- | When the client receives a new header, it updates this field before + -- processing it further, and the latest slot may refer to a header beyond + -- the forecast horizon while the candidate fragment isn't extended yet, to + -- signal to GDD that the density is known up to this slot. + , csLatestSlot :: !(Maybe (WithOrigin SlotNo)) + } + deriving stock (Generic) + +deriving anyclass instance ( + HasHeader blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncState blk) + +-- | An interface to a ChainSync client that's used by other components, like +-- the GDD governor. +data ChainSyncClientHandle m blk = ChainSyncClientHandle { + -- | Disconnects from the peer when the GDD considers it adversarial + cschGDDKill :: !(m ()) + + -- | Data shared between the client and external components like GDD. + , cschState :: !(StrictTVar m (ChainSyncState blk)) + + -- | The state of the peer with respect to ChainSync jumping. + , cschJumping :: !(StrictTVar m (ChainSyncJumpingState m blk)) + + -- | ChainSync state needed to jump to the tip of the candidate fragment of + -- the peer. + , cschJumpInfo :: !(StrictTVar m (Maybe (JumpInfo blk))) + } + deriving stock (Generic) + +deriving anyclass instance ( + IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncClientHandle m blk) + +data DynamoInitState blk + = -- | The dynamo has not yet started jumping and we first need to jump to the + -- given jump info to set the intersection of the ChainSync server. + DynamoStarting !(JumpInfo blk) + | DynamoStarted + deriving (Generic) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (DynamoInitState blk) + +data ObjectorInitState + = -- | The objector still needs to set the intersection of the ChainSync + -- server before resuming retrieval of headers. + Starting + | Started + deriving (Generic, Show, NoThunks) + +data DisengagedInitState + = -- | The node is being disengaged and for that we need to restart the + -- ChainSync protocol. + Disengaging + | DisengagedDone + deriving (Generic, Show, NoThunks) + +data JumperInitState + = -- | The jumper hasn't been requested to jump yet + FreshJumper + | StartedJumper + deriving (Generic, Show, NoThunks) + +-- | State of a peer with respect to ChainSync jumping. +data ChainSyncJumpingState m blk + = -- | The dynamo, of which there is exactly one unless there are no peers, + -- runs the normal ChainSync protocol and is morally supposed to give us + -- _the_ chain. This might not be true and the dynamo might be not be + -- honest, but the goal of the algorithm is to eventually have an honest, + -- alert peer as dynamo. + Dynamo + (DynamoInitState blk) + -- | The last slot at which we triggered jumps for the jumpers. + !(WithOrigin SlotNo) + | -- | The objector, of which there is at most one, also runs normal + -- ChainSync. It is a former jumper that disagreed with the dynamo. When + -- that happened, we spun it up to let normal ChainSync and Genesis decide + -- which one to disconnect from. + Objector + ObjectorInitState + -- | The youngest point where the objector agrees with the dynamo. + !(JumpInfo blk) + -- | The point where the objector dissented with the dynamo when it was a + -- jumper. + !(Point (Header blk)) + | -- | Headers continue to be downloaded from 'Disengaged' peers. They + -- are not requested to jump, nor elected as dynamos or objectors. + Disengaged DisengagedInitState + | -- | The jumpers can be in arbitrary numbers. They are queried regularly to + -- see if they agree with the chain that the dynamo is serving; otherwise, + -- they become candidates to be the objector. See + -- 'ChainSyncJumpingJumperState' for more details. + Jumper + -- | A TVar containing the next jump to be executed. + !(StrictTVar m (Maybe (JumpInfo blk))) + -- | More precisely, the state of the jumper. + !(ChainSyncJumpingJumperState blk) + deriving (Generic) + +deriving anyclass instance + ( IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncJumpingState m blk) + +-- | The ChainSync state required for jumps +-- +-- The jump info is mostly a snapshot of the @KnownIntersectionState@ of the +-- dynamo, with the difference that 'jTheirFragment' might be a proper prefix of +-- the original candidate fragment. +-- +-- This can happen if we need to look for an intersection when the jumper +-- rejects a jump. +data JumpInfo blk = JumpInfo + { jMostRecentIntersection :: !(Point blk) + , jOurFragment :: !(AnchoredFragment (Header blk)) + , jTheirFragment :: !(AnchoredFragment (Header blk)) + , jTheirHeaderStateHistory :: !(HeaderStateHistory blk) + } + deriving (Generic) + +instance (HasHeader (Header blk)) => Eq (JumpInfo blk) where + (==) = (==) `on` headPoint . jTheirFragment + +instance LedgerSupportsProtocol blk => NoThunks (JumpInfo blk) where + showTypeOf _ = show $ typeRep (Proxy @(JumpInfo blk)) + +-- | The specific state of a jumper peer. This state is to be understood as “to +-- the best of our knowledge”, that is “last time we asked them”. For instance, +-- a jumper might be marked as 'Happy' even though its chain has been differing +-- from the dynamo's for hundreds of blocks, if we haven't asked them to jump +-- since then. +data ChainSyncJumpingJumperState blk + = -- | The jumper is happy with the dynamo, and we hold the jump info of the + -- last accepted jump. + Happy JumperInitState !(Maybe (JumpInfo blk)) + | -- | The jumper disagrees with the dynamo and we are searching where exactly + -- that happens. All we know is a point where the jumper agrees with the + -- dynamo and a point where the jumper disagrees with the dynamo, carried by + -- this constructor. + -- + -- INVARIANT: The tip of the fragment in the good jump info (first argument) + -- is in the fragment of the bad jump info or is an ancestor of it. + LookingForIntersection !(JumpInfo blk) !(JumpInfo blk) + | -- | The jumper disagrees with the dynamo and we have determined the latest + -- point where dynamo and jumper agree. We store here the jump info of the + -- latest accepted jump and the point of the earliest rejected jump. + -- + -- The init state indicates the initialization to use for the objector in + -- case this jumper is promoted. + FoundIntersection ObjectorInitState !(JumpInfo blk) !(Point (Header blk)) + deriving (Generic) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncJumpingJumperState blk) diff --git a/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs b/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs index 69d6dffe3c..e2218e6e93 100644 --- a/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs +++ b/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs @@ -1,11 +1,14 @@ +{-# LANGUAGE ScopedTypeVariables #-} module Test.Util.Tracer ( recordingTracerIORef + , recordingTracerM , recordingTracerTVar ) where import Control.Tracer import Data.IORef import Ouroboros.Consensus.Util.IOLike +import System.IO.Unsafe (unsafePerformIO) -- | Create a 'Tracer' that stores all events in an 'IORef' that is atomically -- updated. The second return value lets you obtain the events recorded so far @@ -24,3 +27,26 @@ recordingTracerTVar = uncheckedNewTVarM [] >>= \ref -> return ( Tracer $ \ev -> atomically $ modifyTVar ref (ev:) , atomically $ reverse <$> readTVar ref ) + +-- | Like 'recordingTracerIORef', but lifts IO to an arbitrary applicative. +-- This is useful to record events without changing the scheduling during a +-- test. +recordingTracerM :: forall m ev. Monad m => m (Tracer m ev, m [ev]) +recordingTracerM = do + (tr, get) <- liftIOtoM recordingTracerIORef + pure (natTracer liftIOtoM tr, liftIOtoM get) + where + liftIOtoM :: IO a -> m a + liftIOtoM m = do + -- The ficticious state is only used to force unsafePerformIO to run @m@ + -- every time @liftIOtoM m@ is evaluated. + s <- getStateM + pure $! snd $ unsafePerformIO $ do + r <- m + pure (s, r) + + -- We mark this function as NOINLINE to ensure the compiler cannot reason + -- that two calls of @getStateM@ might yield the same value. + {-# NOINLINE getStateM #-} + getStateM :: m Int + getStateM = pure 0 diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index c10542d05b..ff05b517fb 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -80,13 +80,13 @@ import qualified Ouroboros.Consensus.HeaderStateHistory as HeaderStateHistory import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended hiding (ledgerState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainDbView (..), ChainSyncClientException, - ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), - ChainSyncState (..), ChainSyncStateView (..), - ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), - Their (..), TraceChainSyncClientEvent (..), - bracketChainSyncClient, chainSyncClient, chainSyncStateFor, - viewChainSyncState) + (CSJConfig (..), ChainDbView (..), + ChainSyncClientException, ChainSyncClientResult (..), + ChainSyncLoPBucketConfig (..), ChainSyncState (..), + ChainSyncStateView (..), ConfigEnv (..), Consensus, + DynamicEnv (..), Our (..), Their (..), + TraceChainSyncClientEvent (..), bracketChainSyncClient, + chainSyncClient, chainSyncStateFor, viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.NetworkProtocolVersion (NodeToNodeVersion) @@ -404,11 +404,14 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) lopBucketConfig :: ChainSyncLoPBucketConfig lopBucketConfig = ChainSyncLoPBucketDisabled + csjConfig :: CSJConfig + csjConfig = CSJDisabled + client :: ChainSyncStateView m TestBlock -> Consensus ChainSyncClientPipelined TestBlock m - client ChainSyncStateView {csvSetCandidate, csvSetLatestSlot, csvIdling, csvLoPBucket} = + client ChainSyncStateView {csvSetCandidate, csvSetLatestSlot, csvIdling, csvLoPBucket, csvJumping} = chainSyncClient ConfigEnv { chainDbView @@ -426,6 +429,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) , idling = csvIdling , loPBucket = csvLoPBucket , setLatestSlot = csvSetLatestSlot + , jumping = csvJumping } -- Set up the server @@ -499,6 +503,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) serverId maxBound lopBucketConfig + csjConfig $ \csState -> do atomically $ do handles <- readTVar varHandles