Skip to content

Commit

Permalink
Stake Table Catchup in Hotshot (#2623)
Browse files Browse the repository at this point in the history
Adds Catchup mechanism to Hotshot via the a wrapper around `Membership`
  • Loading branch information
bfish713 authored Mar 6, 2025
1 parent 3019354 commit 7608376
Show file tree
Hide file tree
Showing 66 changed files with 1,468 additions and 866 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 9 additions & 7 deletions hotshot-examples/infra/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use hotshot_testing::block_builder::{
use hotshot_types::{
consensus::ConsensusMetricsValue,
data::{Leaf, TestableLeaf},
epoch_membership::EpochMembershipCoordinator,
event::{Event, EventType},
network::{BuilderType, NetworkConfig, NetworkConfigFile, NetworkConfigSource},
traits::{
Expand Down Expand Up @@ -388,13 +389,14 @@ pub trait RunDa<
// TODO: we need to pass a valid fallback builder url here somehow
fallback_builder_url: config.config.builder_urls.first().clone(),
};
let epoch_height = config.config.epoch_height;

SystemContext::init(
pk,
sk,
config.node_index,
config.config,
membership,
EpochMembershipCoordinator::new(membership, epoch_height),
Arc::from(network),
initializer,
ConsensusMetricsValue::default(),
Expand Down Expand Up @@ -524,15 +526,15 @@ pub trait RunDa<
}
}
}
// Panic if we don't have the genesis epoch, there is no recovery from that
let num_eligible_leaders = context
.hotshot
.memberships
.read()
.membership_coordinator
.membership_for_epoch(genesis_epoch_from_version::<V, TYPES>())
.await
.unwrap()
.committee_leaders(TYPES::View::genesis())
.await
.committee_leaders(
TYPES::View::genesis(),
genesis_epoch_from_version::<V, TYPES>(),
)
.len();
let consensus_lock = context.hotshot.consensus();
let consensus_reader = consensus_lock.read().await;
Expand Down
7 changes: 6 additions & 1 deletion hotshot-query-service/examples/simple-server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use hotshot_query_service::{
use hotshot_testing::block_builder::{SimpleBuilderImplementation, TestBuilderImplementation};
use hotshot_types::{
consensus::ConsensusMetricsValue,
epoch_membership::EpochMembershipCoordinator,
light_client::StateKeyPair,
signature_key::BLSPubKey,
traits::{election::Membership, network::Topic},
Expand Down Expand Up @@ -236,13 +237,17 @@ async fn init_consensus(
));

let storage: TestStorage<MockTypes> = TestStorage::default();
let coordinator = EpochMembershipCoordinator::new(
Arc::new(RwLock::new(membership)),
config.epoch_height,
);

SystemContext::init(
pub_keys[node_id],
priv_key,
node_id as u64,
config,
Arc::new(RwLock::new(membership)),
coordinator,
network,
HotShotInitializer::from_genesis::<MockVersions>(
TestInstanceState::default(),
Expand Down
5 changes: 4 additions & 1 deletion hotshot-query-service/src/testing/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ use hotshot_example_types::{
use hotshot_testing::block_builder::{SimpleBuilderImplementation, TestBuilderImplementation};
use hotshot_types::{
consensus::ConsensusMetricsValue,
epoch_membership::EpochMembershipCoordinator,
light_client::StateKeyPair,
signature_key::BLSPubKey,
traits::{election::Membership, network::Topic, signature_key::SignatureKey as _},
Expand Down Expand Up @@ -181,13 +182,15 @@ impl<D: DataSourceLifeCycle + UpdateStatusData> MockNetwork<D> {
));

let hs_storage: TestStorage<MockTypes> = TestStorage::default();
let memberships =
EpochMembershipCoordinator::new(membership, config.epoch_height);

let hotshot = SystemContext::init(
pub_keys[node_id],
priv_key,
node_id as u64,
config,
membership,
memberships,
network,
HotShotInitializer::from_genesis::<MockVersions>(
TestInstanceState::default(),
Expand Down
86 changes: 46 additions & 40 deletions hotshot-task-impls/src/consensus/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ use chrono::Utc;
use hotshot_types::{
event::{Event, EventType},
simple_vote::{HasEpoch, QuorumVote2, TimeoutData2, TimeoutVote2},
traits::{
election::Membership,
node_implementation::{ConsensusTime, NodeImplementation, NodeType},
},
traits::node_implementation::{ConsensusTime, NodeImplementation, NodeType},
utils::EpochTransitionIndicator,
vote::{HasViewNumber, Vote},
};
Expand Down Expand Up @@ -47,12 +44,14 @@ pub(crate) async fn handle_quorum_vote_recv<
.read()
.await
.is_high_qc_for_last_block();
let we_are_leader = task_state
.membership
.read()
let epoch_membership = task_state
.membership_coordinator
.membership_for_epoch(vote.data.epoch)
.await
.leader(vote.view_number() + 1, vote.data.epoch)?
== task_state.public_key;
.context(warn!("No stake table for epoch"))?;

let we_are_leader =
epoch_membership.leader(vote.view_number() + 1).await? == task_state.public_key;
ensure!(
in_transition || we_are_leader,
info!(
Expand All @@ -70,8 +69,7 @@ pub(crate) async fn handle_quorum_vote_recv<
&mut task_state.vote_collectors,
vote,
task_state.public_key.clone(),
&task_state.membership,
vote.data.epoch,
&epoch_membership,
task_state.id,
&event,
sender,
Expand All @@ -80,20 +78,19 @@ pub(crate) async fn handle_quorum_vote_recv<
)
.await?;

if let Some(vote_epoch) = vote.epoch() {
if vote.epoch().is_some() {
// If the vote sender belongs to the next epoch, collect it separately to form the second QC
let has_stake = task_state
.membership
.read()
.await
.has_stake(&vote.signing_key(), Some(vote_epoch + 1));
let has_stake = epoch_membership
.next_epoch()
.await?
.has_stake(&vote.signing_key())
.await;
if has_stake {
handle_vote(
&mut task_state.next_epoch_vote_collectors,
&vote.clone().into(),
task_state.public_key.clone(),
&task_state.membership,
vote.data.epoch,
&epoch_membership.next_epoch().await?.clone(),
task_state.id,
&event,
sender,
Expand All @@ -118,14 +115,14 @@ pub(crate) async fn handle_timeout_vote_recv<
sender: &Sender<Arc<HotShotEvent<TYPES>>>,
task_state: &mut ConsensusTaskState<TYPES, I, V>,
) -> Result<()> {
let epoch_membership = task_state
.membership_coordinator
.membership_for_epoch(task_state.cur_epoch)
.await
.context(warn!("No stake table for epoch"))?;
// Are we the leader for this view?
ensure!(
task_state
.membership
.read()
.await
.leader(vote.view_number() + 1, task_state.cur_epoch)?
== task_state.public_key,
epoch_membership.leader(vote.view_number() + 1).await? == task_state.public_key,
info!(
"We are not the leader for view {:?}",
vote.view_number() + 1
Expand All @@ -136,8 +133,10 @@ pub(crate) async fn handle_timeout_vote_recv<
&mut task_state.timeout_vote_collectors,
vote,
task_state.public_key.clone(),
&task_state.membership,
vote.data.epoch,
&task_state
.membership_coordinator
.membership_for_epoch(vote.data.epoch)
.await?,
task_state.id,
&event,
sender,
Expand Down Expand Up @@ -201,10 +200,11 @@ pub async fn send_high_qc<TYPES: NodeType, V: Versions, I: NodeImplementation<TY
.await;
} else {
let leader = task_state
.membership
.read()
.await
.leader(new_view_number, task_state.cur_epoch)?;
.membership_coordinator
.membership_for_epoch(task_state.cur_epoch)
.await?
.leader(new_view_number)
.await?;
broadcast_event(
Arc::new(HotShotEvent::HighQcSend(
high_qc,
Expand Down Expand Up @@ -305,10 +305,12 @@ pub(crate) async fn handle_view_change<
std::mem::replace(&mut task_state.timeout_task, new_timeout_task).abort();

let old_view_leader_key = task_state
.membership
.read()
.membership_coordinator
.membership_for_epoch(task_state.cur_epoch)
.await
.leader(old_view_number, task_state.cur_epoch)?;
.context(warn!("No stake table for epoch"))?
.leader(old_view_number)
.await?;

let consensus_reader = task_state.consensus.read().await;
consensus_reader
Expand Down Expand Up @@ -367,10 +369,12 @@ pub(crate) async fn handle_timeout<TYPES: NodeType, I: NodeImplementation<TYPES>

ensure!(
task_state
.membership
.read()
.membership_coordinator
.membership_for_epoch(epoch)
.await
.has_stake(&task_state.public_key, epoch),
.context(warn!("No stake table for epoch"))?
.has_stake(&task_state.public_key)
.await,
debug!(
"We were not chosen for the consensus committee for view {:?}",
view_number
Expand Down Expand Up @@ -416,10 +420,12 @@ pub(crate) async fn handle_timeout<TYPES: NodeType, I: NodeImplementation<TYPES>
.await;

let leader = task_state
.membership
.read()
.membership_coordinator
.membership_for_epoch(task_state.cur_epoch)
.await
.leader(view_number, task_state.cur_epoch);
.context(warn!("No stake table for epoch"))?
.leader(view_number)
.await;

let consensus_reader = task_state.consensus.read().await;
consensus_reader.metrics.number_of_timeouts.add(1);
Expand Down
6 changes: 3 additions & 3 deletions hotshot-task-impls/src/consensus/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
// along with the HotShot repository. If not, see <https://mit-license.org/>.

use async_broadcast::{Receiver, Sender};
use async_lock::RwLock;
use async_trait::async_trait;
use hotshot_task::task::TaskState;
use hotshot_types::{
consensus::OuterConsensus,
epoch_membership::EpochMembershipCoordinator,
event::Event,
message::UpgradeLock,
simple_certificate::{NextEpochQuorumCertificate2, QuorumCertificate2, TimeoutCertificate2},
Expand Down Expand Up @@ -53,7 +53,7 @@ pub struct ConsensusTaskState<TYPES: NodeType, I: NodeImplementation<TYPES>, V:
pub network: Arc<I::Network>,

/// Membership for Quorum Certs/votes
pub membership: Arc<RwLock<TYPES::Membership>>,
pub membership_coordinator: EpochMembershipCoordinator<TYPES>,

/// A map of `QuorumVote` collector tasks.
pub vote_collectors: VoteCollectorsMap<TYPES, QuorumVote2<TYPES>, QuorumCertificate2<TYPES>, V>,
Expand Down Expand Up @@ -183,7 +183,7 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> ConsensusTaskSt
high_qc,
Some(next_epoch_high_qc),
&self.consensus,
&self.membership,
&self.membership_coordinator,
&self.upgrade_lock,
)
.await
Expand Down
Loading

0 comments on commit 7608376

Please sign in to comment.