From 02f68efe71ab1ab48ac6e6e59ade846e16b9e044 Mon Sep 17 00:00:00 2001 From: Karel Moravec Date: Fri, 19 Jul 2024 18:11:40 +0200 Subject: [PATCH] feat: address PR commnets --- Cargo.lock | 1 + fendermint/app/src/cmd/run.rs | 10 +++-- fendermint/app/src/metrics/mod.rs | 1 - fendermint/app/src/metrics/prometheus.rs | 41 +------------------ .../vm/interpreter/src/fvm/checkpoint.rs | 5 ++- fendermint/vm/interpreter/src/fvm/exec.rs | 12 ++++-- fendermint/vm/interpreter/src/fvm/observe.rs | 11 +++-- ipc/observability/Cargo.toml | 1 + ipc/observability/src/lib.rs | 2 + ipc/observability/src/macros.rs | 8 ++++ ipc/observability/src/observe.rs | 30 ++++++++++++++ 11 files changed, 71 insertions(+), 51 deletions(-) create mode 100644 ipc/observability/src/observe.rs diff --git a/Cargo.lock b/Cargo.lock index c55c1291f..b056f037c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5058,6 +5058,7 @@ dependencies = [ name = "ipc-observability" version = "0.1.0" dependencies = [ + "anyhow", "hex", "lazy_static", "prometheus", diff --git a/fendermint/app/src/cmd/run.rs b/fendermint/app/src/cmd/run.rs index 87c1e11cd..2dac3ef6a 100644 --- a/fendermint/app/src/cmd/run.rs +++ b/fendermint/app/src/cmd/run.rs @@ -11,6 +11,7 @@ use fendermint_crypto::SecretKey; use fendermint_rocksdb::{blockstore::NamespaceBlockstore, namespaces, RocksDb, RocksDbConfig}; use fendermint_vm_actor_interface::eam::EthAddress; use fendermint_vm_interpreter::chain::ChainEnv; +use fendermint_vm_interpreter::fvm::observe::register_metrics as register_interpreter_metrics; use fendermint_vm_interpreter::fvm::upgrades::UpgradeScheduler; use fendermint_vm_interpreter::{ bytes::{BytesMessageInterpreter, ProposalPrepareMode}, @@ -27,6 +28,7 @@ use fendermint_vm_topdown::voting::{publish_vote_loop, Error as VoteError, VoteT use fendermint_vm_topdown::{CachedFinalityProvider, IPCParentFinality, Toggle}; use fvm_shared::address::{current_network, Address, Network}; use ipc_ipld_resolver::{Event as ResolverEvent, VoteRecord}; +use ipc_observability::observe::register_metrics as register_default_metrics; use ipc_provider::config::subnet::{EVMSubnet, SubnetConfig}; use ipc_provider::IpcProvider; use libp2p::identity::secp256k1; @@ -38,6 +40,7 @@ use tracing::info; use crate::cmd::key::read_secret_key; use crate::{cmd, options::run::RunArgs, settings::Settings}; +use fendermint_app::observe::register_metrics as register_consensus_metrics; cmd! { RunArgs(self, settings) { @@ -70,10 +73,11 @@ async fn run(settings: Settings) -> anyhow::Result<()> { let metrics_registry = if settings.metrics.enabled { let registry = prometheus::Registry::new(); + register_default_metrics(®istry).context("failed to register default metrics")?; register_topdown_metrics(®istry).context("failed to register topdown metrics")?; - - fendermint_app::metrics::register_app_metrics(®istry) - .context("failed to register metrics")?; + register_interpreter_metrics(®istry) + .context("failed to register interpreter metrics")?; + register_consensus_metrics(®istry).context("failed to register consensus metrics")?; Some(registry) } else { diff --git a/fendermint/app/src/metrics/mod.rs b/fendermint/app/src/metrics/mod.rs index 34459e81c..6ba7a4af6 100644 --- a/fendermint/app/src/metrics/mod.rs +++ b/fendermint/app/src/metrics/mod.rs @@ -3,5 +3,4 @@ mod prometheus; -pub use prometheus::app::register_metrics as register_app_metrics; pub use prometheus::eth::register_metrics as register_eth_metrics; diff --git a/fendermint/app/src/metrics/prometheus.rs b/fendermint/app/src/metrics/prometheus.rs index 8be633924..9594a012c 100644 --- a/fendermint/app/src/metrics/prometheus.rs +++ b/fendermint/app/src/metrics/prometheus.rs @@ -2,42 +2,9 @@ // SPDX-License-Identifier: Apache-2.0, MIT //! Prometheus metrics -macro_rules! metrics { - ($($name:ident : $type:ty = $desc:literal);* $(;)?) => { - $( - paste! { - lazy_static! { - pub static ref $name: $type = $type::new(stringify!([< $name:lower >]), $desc).unwrap(); - } - } - )* - - pub fn register_metrics(registry: &Registry) -> anyhow::Result<()> { - $(registry.register(Box::new($name.clone()))?;)* - Ok(()) - } - }; - } - -/// Metrics emitted by endermint. -pub mod app { - use lazy_static::lazy_static; - use paste::paste; - use prometheus::{IntCounter, IntGauge, Registry}; - - metrics! { - BOTTOMUP_CKPT_BLOCK_HEIGHT: IntGauge = "Highest bottom-up checkpoint created"; - BOTTOMUP_CKPT_CONFIG_NUM: IntGauge = "Highest configuration number checkpointed"; - BOTTOMUP_CKPT_NUM_MSGS: IntCounter = "Number of bottom-up messages observed since start"; - - // This metrics is available in CometBFT as well, but it's something that should increase even without subnets, - // which can be a useful way to check if metrics work at all. - ABCI_COMMITTED_BLOCK_HEIGHT: IntGauge = "Highest committed block"; - } -} - /// Metrics emitted by the Ethereum API facade. pub mod eth { + // TODO - migrate these metrics to new observability architecture use fendermint_eth_api::apis::RPC_METHOD_CALL_LATENCY_SECONDS; pub fn register_metrics(registry: &prometheus::Registry) -> anyhow::Result<()> { @@ -48,12 +15,6 @@ pub mod eth { #[cfg(test)] mod tests { - #[test] - fn can_register_app_metrics() { - let r = prometheus::Registry::new(); - super::app::register_metrics(&r).unwrap(); - } - #[test] fn can_register_eth_metrics() { let r = prometheus::Registry::new(); diff --git a/fendermint/vm/interpreter/src/fvm/checkpoint.rs b/fendermint/vm/interpreter/src/fvm/checkpoint.rs index bf52eff94..00a07aaa5 100644 --- a/fendermint/vm/interpreter/src/fvm/checkpoint.rs +++ b/fendermint/vm/interpreter/src/fvm/checkpoint.rs @@ -24,7 +24,9 @@ use ipc_actors_abis::gateway_getter_facet as getter; use ipc_api::staking::ConfigurationNumber; use ipc_observability::{emit, serde::HexEncodableBlockHash}; -use super::observe::{CheckpointCreated, CheckpointFinalized, CheckpointSigned}; +use super::observe::{ + CheckpointCreated, CheckpointFinalized, CheckpointSigned, CheckpointSignedRole, +}; use super::state::ipc::tokens_to_burn; use super::{ broadcast::Broadcaster, @@ -256,6 +258,7 @@ where .context("failed to broadcast checkpoint signature")?; emit(CheckpointSigned { + role: CheckpointSignedRole::Own, height: height.value(), hash: HexEncodableBlockHash(cp.block_hash.to_vec()), validator: validator_ctx.public_key, diff --git a/fendermint/vm/interpreter/src/fvm/exec.rs b/fendermint/vm/interpreter/src/fvm/exec.rs index 77235229b..902f75be5 100644 --- a/fendermint/vm/interpreter/src/fvm/exec.rs +++ b/fendermint/vm/interpreter/src/fvm/exec.rs @@ -9,14 +9,14 @@ use fendermint_vm_actor_interface::{chainmetadata, cron, system}; use fvm::executor::ApplyRet; use fvm_ipld_blockstore::Blockstore; use fvm_shared::{address::Address, ActorID, MethodNum, BLOCK_GAS_LIMIT}; -use ipc_observability::{emit, measure_time}; +use ipc_observability::{emit, measure_time, observe::TracingError, Traceable}; use tendermint_rpc::Client; use crate::ExecInterpreter; use super::{ checkpoint::{self, PowerUpdates}, - observe::{MsgExec, MsgExecPurpose}, + observe::{CheckpointFinalized, MsgExec, MsgExecPurpose}, state::FvmExecState, FvmMessage, FvmMessageInterpreter, }; @@ -186,7 +186,13 @@ where } async fn end(&self, mut state: Self::State) -> anyhow::Result<(Self::State, Self::EndOutput)> { - checkpoint::emit_trace_if_check_checkpoint_finalized(&self.gateway, &mut state)?; + let _ = checkpoint::emit_trace_if_check_checkpoint_finalized(&self.gateway, &mut state) + .inspect_err(|e| { + emit(TracingError { + affected_event: CheckpointFinalized::name(), + reason: e.to_string(), + }); + }); let updates = if let Some((checkpoint, updates)) = checkpoint::maybe_create_checkpoint(&self.gateway, &mut state) diff --git a/fendermint/vm/interpreter/src/fvm/observe.rs b/fendermint/vm/interpreter/src/fvm/observe.rs index 687819836..d0304b879 100644 --- a/fendermint/vm/interpreter/src/fvm/observe.rs +++ b/fendermint/vm/interpreter/src/fvm/observe.rs @@ -82,9 +82,6 @@ impl_traceables!( CheckpointFinalized ); -/// Hex encoded hash. -pub type HashHex<'a> = &'a str; - #[derive(Debug)] pub struct CheckpointCreated { pub height: u64, @@ -102,8 +99,15 @@ impl Recordable for CheckpointCreated { } } +#[derive(Debug)] +pub enum CheckpointSignedRole { + Own, + Peer, +} + #[derive(Debug)] pub struct CheckpointSigned { + pub role: CheckpointSignedRole, pub height: u64, pub hash: HexEncodableBlockHash, pub validator: PublicKey, @@ -181,6 +185,7 @@ mod tests { let secret_key = SecretKey::random(&mut r); emit(CheckpointSigned { + role: CheckpointSignedRole::Own, height: 1, hash: HexEncodableBlockHash(hash.clone()), validator: secret_key.public_key(), diff --git a/ipc/observability/Cargo.toml b/ipc/observability/Cargo.toml index de39eba22..67679a858 100644 --- a/ipc/observability/Cargo.toml +++ b/ipc/observability/Cargo.toml @@ -13,3 +13,4 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true } tracing-appender = { workspace = true } hex = { workspace = true } +anyhow = { workspace = true } diff --git a/ipc/observability/src/lib.rs b/ipc/observability/src/lib.rs index 504c256a9..57fe44d7a 100644 --- a/ipc/observability/src/lib.rs +++ b/ipc/observability/src/lib.rs @@ -5,6 +5,7 @@ pub mod macros; pub mod traces; mod tracing_layers; pub use lazy_static::lazy_static; +pub mod observe; pub mod serde; use std::fmt::Debug; @@ -19,6 +20,7 @@ pub trait Recordable { pub trait Traceable { fn trace_level(&self) -> TraceLevel; fn domain(&self) -> &'static str; + fn name() -> &'static str; } pub enum TraceLevel { diff --git a/ipc/observability/src/macros.rs b/ipc/observability/src/macros.rs index 5f87f1d23..d0100b098 100644 --- a/ipc/observability/src/macros.rs +++ b/ipc/observability/src/macros.rs @@ -22,6 +22,10 @@ macro_rules! register_metrics { macro_rules! impl_traceable { ($struct_name:ident<$lifetime:tt>, $trace_level:expr, $domain:expr) => { impl<$lifetime> Traceable for $struct_name<$lifetime> { + fn name() -> &'static str { + stringify!($struct_name) + } + fn trace_level(&self) -> TraceLevel { $trace_level } @@ -33,6 +37,10 @@ macro_rules! impl_traceable { }; ($struct_name:ident, $trace_level:expr, $domain:expr) => { impl Traceable for $struct_name { + fn name() -> &'static str { + stringify!($struct_name) + } + fn trace_level(&self) -> TraceLevel { $trace_level } diff --git a/ipc/observability/src/observe.rs b/ipc/observability/src/observe.rs new file mode 100644 index 000000000..80c04f49e --- /dev/null +++ b/ipc/observability/src/observe.rs @@ -0,0 +1,30 @@ +// Copyright 2022-2024 Protocol Labs +// SPDX-License-Identifier: Apache-2.0, MIT + +use crate::{ + impl_traceable, impl_traceables, lazy_static, register_metrics, Recordable, TraceLevel, + Traceable, +}; +use anyhow; +use prometheus::{register_int_counter_vec, IntCounterVec, Registry}; + +register_metrics! { + TRACING_ERRORS: IntCounterVec + = register_int_counter_vec!("tracing_errors", "Number of tracing errors", &["event"]); +} + +impl_traceables!(TraceLevel::Error, "TracingError", TracingError<'a>); + +#[derive(Debug)] +pub struct TracingError<'a> { + pub affected_event: &'a str, + pub reason: String, +} + +impl Recordable for TracingError<'_> { + fn record_metrics(&self) { + TRACING_ERRORS + .with_label_values(&[self.affected_event]) + .inc(); + } +}