diff --git a/crates/example-types/src/state_types.rs b/crates/example-types/src/state_types.rs index 9b52e0c662..b30bc2603b 100644 --- a/crates/example-types/src/state_types.rs +++ b/crates/example-types/src/state_types.rs @@ -23,7 +23,7 @@ use rand::Rng; use serde::{Deserialize, Serialize}; use vbs::version::Version; -pub use crate::node_types::TestTypes; +pub use crate::node_types::{TestTwoStakeTablesTypes, TestTypes}; use crate::{ block_types::{TestBlockPayload, TestTransaction}, testable_delay::{DelayConfig, SupportedTraitTypesForAsyncDelay, TestableDelay}, diff --git a/crates/example-types/src/storage_types.rs b/crates/example-types/src/storage_types.rs index b5f3610f21..49d44e9bb0 100644 --- a/crates/example-types/src/storage_types.rs +++ b/crates/example-types/src/storage_types.rs @@ -33,11 +33,11 @@ use jf_vid::VidScheme; use crate::testable_delay::{DelayConfig, SupportedTraitTypesForAsyncDelay, TestableDelay}; -type VidShares = HashMap< +type VidShares = BTreeMap< ::View, HashMap<::SignatureKey, Proposal>>, >; -type VidShares2 = HashMap< +type VidShares2 = BTreeMap< ::View, HashMap<::SignatureKey, Proposal>>, >; @@ -61,8 +61,8 @@ pub struct TestStorageState { impl Default for TestStorageState { fn default() -> Self { Self { - vids: HashMap::new(), - vid2: HashMap::new(), + vids: BTreeMap::new(), + vid2: BTreeMap::new(), das: HashMap::new(), da2s: HashMap::new(), proposals: BTreeMap::new(), @@ -127,6 +127,9 @@ impl TestStorage { pub async fn last_actioned_epoch(&self) -> TYPES::Epoch { self.inner.read().await.epoch } + pub async fn vids_cloned(&self) -> VidShares2 { + self.inner.read().await.vid2.clone() + } } #[async_trait] diff --git a/crates/hotshot/src/lib.rs b/crates/hotshot/src/lib.rs index ea213a6338..e6faf52cab 100644 --- a/crates/hotshot/src/lib.rs +++ b/crates/hotshot/src/lib.rs @@ -47,7 +47,7 @@ use hotshot_task_impls::{events::HotShotEvent, helpers::broadcast_event}; /// Reexport error type pub use hotshot_types::error::HotShotError; use hotshot_types::{ - consensus::{Consensus, ConsensusMetricsValue, OuterConsensus, View, ViewInner}, + consensus::{Consensus, ConsensusMetricsValue, OuterConsensus, VidShares, View, ViewInner}, constants::{EVENT_CHANNEL_SIZE, EXTERNAL_EVENT_CHANNEL_SIZE}, data::{Leaf2, QuorumProposal, QuorumProposal2}, event::{EventType, LeafInfo}, @@ -69,7 +69,6 @@ use hotshot_types::{ pub use rand; use tokio::{spawn, time::sleep}; use tracing::{debug, instrument, trace}; - // -- Rexports // External use crate::{ @@ -332,6 +331,7 @@ impl, V: Versions> SystemContext { undecided_state: BTreeMap>, /// Proposals we have sent out to provide to others for catchup saved_proposals: BTreeMap>>, + /// Saved VID shares + saved_vid_shares: Option>, } impl HotShotInitializer { @@ -1048,6 +1050,7 @@ impl HotShotInitializer { undecided_leaves: Vec::new(), undecided_state: BTreeMap::new(), instance_state, + saved_vid_shares: None, }) } @@ -1072,6 +1075,7 @@ impl HotShotInitializer { decided_upgrade_certificate: Option>, undecided_leaves: Vec>, undecided_state: BTreeMap>, + saved_vid_shares: Option>, ) -> Self { Self { inner: anchor_leaf, @@ -1087,6 +1091,7 @@ impl HotShotInitializer { decided_upgrade_certificate, undecided_leaves, undecided_state, + saved_vid_shares, } } } diff --git a/crates/testing/src/spinning_task.rs b/crates/testing/src/spinning_task.rs index 66438d701d..fac9856950 100644 --- a/crates/testing/src/spinning_task.rs +++ b/crates/testing/src/spinning_task.rs @@ -166,6 +166,7 @@ where None, Vec::new(), BTreeMap::new(), + None, ); // We assign node's public key and stake value rather than read from config file since it's a test let validator_config = @@ -256,6 +257,7 @@ where read_storage.decided_upgrade_certificate().await, Vec::new(), BTreeMap::new(), + Some(read_storage.vids_cloned().await), ); // We assign node's public key and stake value rather than read from config file since it's a test let validator_config = ValidatorConfig::generated_from_seed_indexed( diff --git a/crates/testing/tests/tests_1/test_with_failures_2.rs b/crates/testing/tests/tests_1/test_with_failures_2.rs index cf9ac8c83f..aa4721e696 100644 --- a/crates/testing/tests/tests_1/test_with_failures_2.rs +++ b/crates/testing/tests/tests_1/test_with_failures_2.rs @@ -74,50 +74,6 @@ cross_tests!( } ); -cross_tests!( - TestName: test_with_failures_2_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], - Types: [TestTwoStakeTablesTypes], - Versions: [EpochsTestVersions], - Ignore: false, - Metadata: { - let mut metadata = TestDescription::default_more_nodes(); - metadata.num_nodes_with_stake = 12; - metadata.da_staked_committee_size = 12; - metadata.start_nodes = 12; - metadata.epoch_height = 10; - let dead_nodes = vec![ - ChangeNode { - idx: 10, - updown: NodeAction::Down, - }, - ChangeNode { - idx: 11, - updown: NodeAction::Down, - }, - ]; - - metadata.spinning_properties = SpinningTaskDescription { - node_changes: vec![(5, dead_nodes)] - }; - - // 2 nodes fail triggering view sync, expect no other timeouts - metadata.overall_safety_properties.num_failed_views = 6; - // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts - metadata.overall_safety_properties.num_successful_views = 20; - metadata.overall_safety_properties.expected_views_to_fail = HashMap::from([ - (ViewNumber::new(5), false), - (ViewNumber::new(11), false), - (ViewNumber::new(17), false), - (ViewNumber::new(23), false), - (ViewNumber::new(29), false), - (ViewNumber::new(35), false), - ]); - - metadata - } -); - cross_tests!( TestName: test_with_double_leader_failures, Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], diff --git a/crates/testing/tests/tests_3/test_with_failures_half_f.rs b/crates/testing/tests/tests_3/test_with_failures_half_f.rs index 3dd8e3ceb7..b5fbdfd0a0 100644 --- a/crates/testing/tests/tests_3/test_with_failures_half_f.rs +++ b/crates/testing/tests/tests_3/test_with_failures_half_f.rs @@ -5,7 +5,7 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::{ - node_types::{EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -53,42 +53,3 @@ cross_tests!( metadata } ); -cross_tests!( - TestName: test_with_failures_half_f_epochs, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], - Types: [TestTypes], - Versions: [EpochsTestVersions], - Ignore: false, - Metadata: { - let mut metadata = TestDescription::default_more_nodes(); - metadata.epoch_height = 0; - metadata.num_bootstrap_nodes = 17; - metadata.epoch_height = 10; - // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the - // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the - // following issue. - let dead_nodes = vec![ - ChangeNode { - idx: 17, - updown: NodeAction::Down, - }, - ChangeNode { - idx: 18, - updown: NodeAction::Down, - }, - ChangeNode { - idx: 19, - updown: NodeAction::Down, - }, - ]; - - metadata.spinning_properties = SpinningTaskDescription { - node_changes: vec![(5, dead_nodes)] - }; - - metadata.overall_safety_properties.num_failed_views = 3; - // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts - metadata.overall_safety_properties.num_successful_views = 22; - metadata - } -); diff --git a/crates/testing/tests/tests_6/test_epochs.rs b/crates/testing/tests/tests_6/test_epochs.rs index c902483860..95d9142a61 100644 --- a/crates/testing/tests/tests_6/test_epochs.rs +++ b/crates/testing/tests/tests_6/test_epochs.rs @@ -4,7 +4,7 @@ // You should have received a copy of the MIT License // along with the HotShot repository. If not, see . -use std::time::Duration; +use std::{collections::HashMap, time::Duration}; use hotshot_example_types::{ node_types::{ @@ -18,10 +18,12 @@ use hotshot_macros::cross_tests; use hotshot_testing::{ block_builder::SimpleBuilderImplementation, completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription}, + overall_safety_task::OverallSafetyPropertiesDescription, spinning_task::{ChangeNode, NodeAction, SpinningTaskDescription}, - test_builder::TestDescription, + test_builder::{TestDescription, TimingData}, view_sync_task::ViewSyncTaskDescription, }; +use hotshot_types::{data::ViewNumber, traits::node_implementation::ConsensusTime}; cross_tests!( TestName: test_success_with_epochs, @@ -238,3 +240,224 @@ cross_tests!( } }, ); + +cross_tests!( + TestName: test_with_failures_2_with_epochs, + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Types: [TestTwoStakeTablesTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription::default_more_nodes(); + metadata.num_nodes_with_stake = 12; + metadata.da_staked_committee_size = 12; + metadata.start_nodes = 12; + metadata.epoch_height = 10; + let dead_nodes = vec![ + ChangeNode { + idx: 10, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 11, + updown: NodeAction::Down, + }, + ]; + + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(5, dead_nodes)] + }; + + // 2 nodes fail triggering view sync, expect no other timeouts + metadata.overall_safety_properties.num_failed_views = 6; + // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts + metadata.overall_safety_properties.num_successful_views = 20; + metadata.overall_safety_properties.expected_views_to_fail = HashMap::from([ + (ViewNumber::new(5), false), + (ViewNumber::new(11), false), + (ViewNumber::new(17), false), + (ViewNumber::new(23), false), + (ViewNumber::new(29), false), + (ViewNumber::new(35), false), + ]); + + metadata + } +); + +cross_tests!( + TestName: test_with_double_leader_failures_with_epochs, + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Types: [TestConsecutiveLeaderTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription::default_more_nodes(); + metadata.num_nodes_with_stake = 12; + metadata.da_staked_committee_size = 12; + metadata.start_nodes = 12; + let dead_nodes = vec![ + ChangeNode { + idx: 5, + updown: NodeAction::Down, + }, + ]; + + // shutdown while node 5 is leader + // we want to trigger `ViewSyncTrigger` during epoch transition + // then ensure we do not fail again as next leader will be leader 2 views also + let view_spin_node_down = 9; + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(view_spin_node_down, dead_nodes)] + }; + + // node 5 is leader twice when we shut down + metadata.overall_safety_properties.num_failed_views = 2; + metadata.overall_safety_properties.expected_views_to_fail = HashMap::from([ + // next views after turning node off + (ViewNumber::new(view_spin_node_down + 1), false), + (ViewNumber::new(view_spin_node_down + 2), false) + ]); + // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts + metadata.overall_safety_properties.num_successful_views = 13; + + // only turning off 1 node, so expected should be num_nodes_with_stake - 1 + let expected_nodes_in_view_sync = metadata.num_nodes_with_stake - 1; + metadata.view_sync_properties = ViewSyncTaskDescription::Threshold(expected_nodes_in_view_sync, expected_nodes_in_view_sync); + + metadata + } +); + +cross_tests!( + TestName: test_with_failures_half_f_epochs, + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Types: [TestTypes, TestTwoStakeTablesTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription::default_more_nodes(); + metadata.epoch_height = 10; + // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the + // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the + // following issue. + let dead_nodes = vec![ + ChangeNode { + idx: 17, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 18, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 19, + updown: NodeAction::Down, + }, + ]; + + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(5, dead_nodes)] + }; + + metadata.overall_safety_properties.num_failed_views = 3; + // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts + metadata.overall_safety_properties.num_successful_views = 19; + metadata + } +); + +cross_tests!( + TestName: test_with_failures_f_epochs, + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Types: [TestTypes, TestTwoStakeTablesTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription::default_more_nodes(); + metadata.overall_safety_properties.num_failed_views = 6; + // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts + metadata.overall_safety_properties.num_successful_views = 15; + let dead_nodes = vec![ + ChangeNode { + idx: 14, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 15, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 16, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 17, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 18, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 19, + updown: NodeAction::Down, + }, + ]; + + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(5, dead_nodes)] + }; + + metadata + } +); + +cross_tests!( + TestName: test_all_restart_epochs, + Impls: [CombinedImpl, PushCdnImpl], + Types: [TestTypes, TestTypesRandomizedLeader], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let timing_data = TimingData { + next_view_timeout: 2000, + ..Default::default() + }; + let mut metadata = TestDescription::default(); + let mut catchup_nodes = vec![]; + + for i in 0..20 { + catchup_nodes.push(ChangeNode { + idx: i, + updown: NodeAction::RestartDown(0), + }) + } + + metadata.timing_data = timing_data; + metadata.start_nodes = 20; + metadata.num_nodes_with_stake = 20; + + metadata.spinning_properties = SpinningTaskDescription { + // Restart all the nodes in view 10 + node_changes: vec![(10, catchup_nodes)], + }; + metadata.view_sync_properties = + hotshot_testing::view_sync_task::ViewSyncTaskDescription::Threshold(0, 20); + + metadata.completion_task_description = + CompletionTaskDescription::TimeBasedCompletionTaskBuilder( + TimeBasedCompletionTaskDescription { + duration: Duration::from_secs(60), + }, + ); + metadata.overall_safety_properties = OverallSafetyPropertiesDescription { + // Make sure we keep committing rounds after the catchup, but not the full 50. + num_successful_views: 22, + num_failed_views: 15, + ..Default::default() + }; + + metadata + }, +); diff --git a/crates/types/src/consensus.rs b/crates/types/src/consensus.rs index d9978a2749..54a093be88 100644 --- a/crates/types/src/consensus.rs +++ b/crates/types/src/consensus.rs @@ -411,6 +411,7 @@ impl Consensus { #[allow(clippy::too_many_arguments)] pub fn new( validated_state_map: BTreeMap>, + vid_shares: Option>, cur_view: TYPES::View, cur_epoch: TYPES::Epoch, locked_view: TYPES::View, @@ -426,7 +427,7 @@ impl Consensus { ) -> Self { Consensus { validated_state_map, - vid_shares: BTreeMap::new(), + vid_shares: vid_shares.unwrap_or_default(), saved_da_certs: HashMap::new(), cur_view, cur_epoch, @@ -535,9 +536,8 @@ impl Consensus { }; let parent_vid = self .vid_shares() - .get(&parent_view_number)? - .get(public_key) - .cloned() + .get(&parent_view_number) + .and_then(|inner_map| inner_map.get(public_key).cloned()) .map(|prop| prop.data); Some(LeafInfo { @@ -861,6 +861,10 @@ impl Consensus { /// # Panics /// On inconsistent stored entries pub fn collect_garbage(&mut self, old_anchor_view: TYPES::View, new_anchor_view: TYPES::View) { + // Nothing to collect + if new_anchor_view <= old_anchor_view { + return; + } let gc_view = TYPES::View::new(new_anchor_view.saturating_sub(1)); // state check let anchor_entry = self