diff --git a/src/commons/api/ca.rs b/src/commons/api/ca.rs index 43a0d2d1f..cbb3cfa74 100644 --- a/src/commons/api/ca.rs +++ b/src/commons/api/ca.rs @@ -1,11 +1,11 @@ //! Common data types for Certificate Authorities, defined here so that the //! CLI can have access without needing to depend on the full krill_ca module. +use std::{fmt, ops, str}; +use std::collections::hash_map; use std::collections::HashMap; -use std::ops::{self}; use std::str::FromStr; use std::sync::Arc; -use std::{fmt, str}; use base64::engine::general_purpose::STANDARD as BASE64_ENGINE; use base64::engine::Engine as _; @@ -962,9 +962,7 @@ impl ParentStatuses { self.0.get(parent) } - pub fn iter( - &self, - ) -> impl Iterator { + pub fn iter(&self) -> hash_map::Iter { self.0.iter() } @@ -1062,14 +1060,22 @@ impl ParentStatuses { impl IntoIterator for ParentStatuses { type Item = (ParentHandle, ParentStatus); - type IntoIter = - std::collections::hash_map::IntoIter; + type IntoIter = hash_map::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } +impl<'a> IntoIterator for &'a ParentStatuses { + type Item = (&'a ParentHandle, &'a ParentStatus); + type IntoIter = hash_map::Iter<'a, ParentHandle, ParentStatus>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + impl fmt::Display for ParentStatuses { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (parent, status) in self.0.iter() { diff --git a/src/daemon/http/metrics.rs b/src/daemon/http/metrics.rs new file mode 100644 index 000000000..18f83789d --- /dev/null +++ b/src/daemon/http/metrics.rs @@ -0,0 +1,604 @@ +//! Prometheus-format metrics. + +use std::fmt; +use std::collections::HashMap; +use std::fmt::Write; +use crate::constants::{ + KRILL_VERSION_MAJOR, KRILL_VERSION_MINOR, KRILL_VERSION_PATCH, +}; +use crate::ta::TA_NAME; +use super::{HttpResponse, Request, RoutingResult}; + + +pub async fn metrics(req: Request) -> RoutingResult { + if !req.is_get() || !req.path().segment().starts_with("metrics") { + return Err(req) + } + + let server = req.state(); + let mut target = Target::default(); + + target.single( + Metric::gauge( + "server_start", + "Unix timestamp of the last Krill server start", + ), + server.server_info().started() + ); + + target.single( + Metric::gauge( + "version_major", + "Krill server major version number", + ), + KRILL_VERSION_MAJOR + ); + target.single( + Metric::gauge( + "version_minor", + "Krill server minor version number", + ), + KRILL_VERSION_MINOR + ); + target.single( + Metric::gauge( + "version_patch", + "Krill server patch version number", + ), + KRILL_VERSION_PATCH + ); + + #[cfg(feature = "multi-user")] + target.single( + Metric::gauge( + "auth_session_cache_size", + "total number of cached login session tokens", + ), + server.login_session_cache_size(), + ); + + if let Ok(cas_stats) = server.cas_stats().await { + target.single( + Metric::gauge("cas", "number of CAs in Krill"), + cas_stats.len() + ); + + + if !server.config.metrics.metrics_hide_ca_details { + let mut ca_status_map = HashMap::new(); + + for ca in cas_stats.keys() { + if let Ok(ca_status) = server.ca_status(ca).await { + ca_status_map.insert(ca.clone(), ca_status); + } + } + + let metric = Metric::gauge( + "ca_parent_success", + "status of last CA to parent connection (1 .. success)", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + if ca.as_str() == TA_NAME { + continue + } + + for (parent, status) in status.parents() { + if let Some(exchange) = status.last_exchange() { + target.multi(metric) + .label("ca", ca) + .label("parent", parent) + .value(i32::from(exchange.was_success())) + } + } + } + + let metric = Metric::gauge( + "ca_parent_last_success_time", + "Unix timestamp of last successful CA to parent connection", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + if ca.as_str() == TA_NAME { + continue + } + for (parent, status) in status.parents() { + // Skip parents for which we don’t have had a successful + // connection at all. Most likely they were just added + // (in which case it will come) - or were never successful + // in which case the metric above will say that the status + // is 0 + if let Some(last_success) = status.last_success() { + target.multi(metric) + .label("ca", ca) + .label("parent", parent) + .value(last_success) + } + } + } + + let metric = Metric::gauge( + "ca_ps_success", + "status of last CA to Publication Server connection \ + (1 ..success)", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + if let Some(exchange) = status.repo().last_exchange() { + target.multi(metric) + .label("ca", ca) + .value(i32::from(exchange.was_success())) + } + } + + let metric = Metric::gauge( + "ca_ps_last_success_time", + "unix timestamp of last successful CA to Publication Server \ + connection", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + if let Some(last_success) = status.repo().last_success() { + target.multi(metric).label("ca", ca).value(last_success); + } + } + + // Do not show child metrics if none of the CAs has any + // children.. Many users do not delegate so, + // showing these metrics would just be confusing. + let any_children = cas_stats.values().any(|ca| { + ca.child_count() > 0 + }); + + if any_children + && !server.config.metrics.metrics_hide_child_details + { + let metric = Metric::gauge( + "cas_children", + "number of children for CA", + ); + target.header(metric); + for (ca, status) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(status.child_count()) + } + + let metric = Metric::gauge( + "ca_child_success", + "status of last child to CA connection", + ); + target.header(metric); + for (ca, status) in ca_status_map.iter() { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + for (child, status) in status.children() { + if let Some(exchange) = status.last_exchange() { + target.multi(metric) + .label("ca", ca) + .label("child", child) + .value(i32::from(exchange.was_success())) + } + } + } + + let metric = Metric::gauge( + "ca_child_state", + "child state (0=suspended, 1=active)", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + for (child, status) in status.children() { + target.multi(metric) + .label("ca", ca) + .label("child", child) + .value(i32::from(status.suspended().is_none())) + } + } + + let metric = Metric::gauge( + "ca_child_last_connection", + "unix timestamp of last child to CA connection", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + for (child, status) in status.children() { + if let Some(exchange) = status.last_exchange() { + target.multi(metric) + .label("ca", ca) + .label("child", child) + .value(exchange.timestamp()); + } + } + } + + let metric = Metric::gauge( + "ca_child_last_success", + "unix timestamp last successful child to CA connection", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + for (child, status) in status.children() { + if let Some(time) = status.last_success() { + target.multi(metric) + .label("ca", ca) + .label("child", child) + .value(time); + } + } + } + + let metric = Metric::gauge( + "ca_child_agent_total", + "total children per user agent based on their last \ + connection", + ); + target.header(metric); + for (ca, status) in &ca_status_map { + // Skip the ones for which we have no status yet, i.e + // it was really only just added + // and no attempt to connect has yet been made. + + let mut user_agent_totals = HashMap::new(); + for status in status.children().values() { + if let Some(exchange) = status.last_exchange() { + + let agent = exchange + .user_agent().as_ref() + .map(|s| s.as_str()) + .unwrap_or(""); + if let Some(item) = + user_agent_totals.get_mut(agent) + { + *item += 1; + } + else { + user_agent_totals.insert(agent.to_string(), 1); + } + } + } + + for (ua, total) in &user_agent_totals { + target.multi(metric) + .label("ca", ca) + .label("user_agent", ua) + .value(total); + } + } + } + + if !server.config.metrics.metrics_hide_roa_details { + let metric = Metric::gauge( + "cas_bgp_announcements_valid", + "number of announcements seen for CA resources \ + with RPKI state VALID", + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().announcements_valid); + } + + let metric = Metric::gauge( + "cas_bgp_announcements_invalid_asn", + "number of announcements seen for CA resources with \ + RPKI state INVALID (ASN mismatch)", + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().announcements_invalid_asn); + } + + let metric = Metric::gauge( + "cas_bgp_announcements_invalid_length", + "number of announcements seen for CA resources with \ + RPKI state INVALID (prefix exceeds max length)" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value( + stats.bgp_stats().announcements_invalid_length + ); + } + + let metric = Metric::gauge( + "cas_bgp_announcements_not_found", + "number of announcements seen for CA resources with \ + RPKI state NOT FOUND (none of the CA's ROAs cover this)" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().announcements_not_found); + } + + let metric = Metric::gauge( + "cas_bgp_roas_too_permissive", + "number of ROAs for this CA which allow excess \ + announcements (0 may also indicate that no BGP info \ + is available)" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().roas_too_permissive); + } + + let metric = Metric::gauge( + "cas_bgp_roas_redundant", + "number of ROAs for this CA which are redundant (0 may \ + also indicate that no BGP info is available)" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().roas_redundant); + } + + let metric = Metric::gauge( + "cas_bgp_roas_stale", + "number of ROAs for this CA for which no announcements \ + are seen (0 may also indicate that no BGP info is \ + available)" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().roas_stale); + } + + let metric = Metric::gauge( + "cas_bgp_roas_total", + "total number of ROAs for this CA" + ); + target.header(metric); + for (ca, stats) in &cas_stats { + target.multi(metric) + .label("ca", ca) + .value(stats.bgp_stats().roas_total); + } + } + } + } + + if let Ok(stats) = server.repo_stats() { + let publishers = stats.get_publishers(); + + target.single( + Metric::gauge( + "repo_publisher", + "number of publishers in repository" + ), + publishers.len(), + ); + + if let Some(last_update) = stats.last_update() { + target.single( + Metric::gauge( + "repo_rrdp_last_update", + "unix timestamp of last update by any publisher" + ), + last_update.timestamp(), + ); + } + + target.single( + Metric::counter( + "repo_rrdp_serial", + "RRDP serial" + ), + stats.serial() + ); + + if !server.config.metrics.metrics_hide_publisher_details { + let metric = Metric::gauge( + "repo_objects", + "number of objects in repository for publisher" + ); + target.header(metric); + for (publisher, stats) in publishers { + target.multi(metric) + .label("publisher", publisher) + .value(stats.objects()) + } + + let metric = Metric::gauge( + "repo_size", + "size of objects in bytes in repository for publisher" + ); + target.header(metric); + for (publisher, stats) in publishers { + target.multi(metric) + .label("publisher", publisher) + .value(stats.size()); + } + + let metric = Metric::gauge( + "repo_last_update", + "unix timestamp of last update for publisher" + ); + target.header(metric); + for (publisher, stats) in publishers { + if let Some(last_update) = stats.last_update() { + target.multi(metric) + .label("publisher", publisher) + .value(last_update.timestamp()) + } + } + } + } + + Ok(target.into_response()) +} + + +//============ Metrics Infrastructure ======================================== +// +// This is currently copied from Routinator and should perhaps be moved to +// daemonbase. + + +//------------ Target -------------------------------------------------------- + +#[derive(Clone, Debug, Default)] +struct Target { + buf: String, +} + +impl Target { + pub fn into_response(self) -> HttpResponse { + HttpResponse::prometheus(self.buf.into()) + } + + pub fn single(&mut self, metric: Metric, value: impl fmt::Display) { + metric.header(self); + metric.single(self, value); + } + + pub fn header(&mut self, metric: Metric) { + metric.header(self) + } + + pub fn multi(&mut self, metric: Metric) -> LabelValue { + metric.multi(self) + } +} + + +//------------ Metric -------------------------------------------------------- + +#[derive(Clone, Copy, Debug)] +struct Metric { + prefix: &'static str, + name: &'static str, + help: (&'static str, &'static str), + mtype: MetricType, +} + +impl Metric { + pub fn new( + name: &'static str, help: &'static str, mtype: MetricType + ) -> Self { + Metric { + prefix: "", + name, + help: (help, ""), + mtype + } + } + + pub fn counter(name: &'static str, help: &'static str) -> Self { + Self::new(name, help, MetricType::Counter) + } + + pub fn gauge(name: &'static str, help: &'static str) -> Self { + Self::new(name, help, MetricType::Gauge) + } + + fn header(self, target: &mut Target) { + writeln!(&mut target.buf, + "# HELP krill{}_{} {}{}\n\ + # TYPE krill{}_{} {}", + self.prefix, self.name, self.help.0, self.help.1, + self.prefix, self.name, self.mtype, + ).expect("writing to string"); + } + + fn single(self, target: &mut Target, value: impl fmt::Display) { + writeln!(&mut target.buf, + "krill{}_{} {}", + self.prefix, self.name, value + ).expect("writing to string"); + } + + fn multi(self, target: &mut Target) -> LabelValue { + LabelValue::new(self, target) + } +} + + +//------------ LabelValue ---------------------------------------------------- + +struct LabelValue<'a> { + target: &'a mut Target, + first: bool, +} + +impl<'a> LabelValue<'a> { + fn new(metric: Metric, target: &'a mut Target) -> Self { + write!( + &mut target.buf, "krill{}_{}{{", metric.prefix, metric.name + ).expect("writing to string"); + LabelValue { target, first: true } + } + + pub fn label(mut self, name: &str, value: impl fmt::Display) -> Self { + if self.first { + self.first = false; + } + else { + self.target.buf.push_str(", "); + } + write!( + &mut self.target.buf, "{}=\"{}\"", name, value + ).expect("writing to string"); + self + } + + pub fn value(self, value: impl fmt::Display) { + writeln!( + &mut self.target.buf, "}} {}", value + ).expect("writing to string"); + } +} + + +//------------ MetricType ---------------------------------------------------- + +#[derive(Clone, Copy, Debug)] +enum MetricType { + Counter, + Gauge, + /* Not currently used: + Histogram, + Summary, + */ +} + +impl fmt::Display for MetricType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str( + match *self { + MetricType::Counter => "counter", + MetricType::Gauge => "gauge", + /* + MetricType::Histogram => "histogram", + MetricType::Summary => "summary", + */ + } + ) + } +} + diff --git a/src/daemon/http/mod.rs b/src/daemon/http/mod.rs index 94905a5a7..d00b54268 100644 --- a/src/daemon/http/mod.rs +++ b/src/daemon/http/mod.rs @@ -22,6 +22,7 @@ use crate::{ }; pub mod auth; +pub mod metrics; pub mod server; pub mod statics; pub mod testbed; @@ -41,6 +42,7 @@ enum ContentType { Rfc8181, Rfc6492, Text, + Prometheus, Xml, Html, Fav, @@ -59,6 +61,7 @@ impl AsRef for ContentType { ContentType::Rfc8181 => publication::CONTENT_TYPE, ContentType::Rfc6492 => provisioning::CONTENT_TYPE, ContentType::Text => "text/plain", + ContentType::Prometheus => "text/plain; version=0.0.4", ContentType::Xml => "application/xml", ContentType::Html => "text/html", @@ -144,8 +147,7 @@ impl io::Write for Response { } } -//------------ HttpResponse -//------------ --------------------------------------------------- +//------------ HttpResponse -------------------------------------------------- pub struct HttpResponse { response: HyperResponse, @@ -249,6 +251,10 @@ impl HttpResponse { ) } + pub fn prometheus(body: Vec) -> Self { + Self::ok_response(ContentType::Prometheus, body) + } + pub fn xml(body: Vec) -> Self { Self::ok_response(ContentType::Xml, body) } diff --git a/src/daemon/http/server.rs b/src/daemon/http/server.rs index a6c73c305..9991d6f52 100644 --- a/src/daemon/http/server.rs +++ b/src/daemon/http/server.rs @@ -1,5 +1,4 @@ //! Hyper based HTTP server for Krill. -use std::collections::HashMap; use std::fs::File; use std::io::Read; use std::net::SocketAddr; @@ -30,7 +29,7 @@ use tokio::sync::oneshot; use crate::{ commons::{ api::{ - ApiRepositoryContact, AspaDefinitionUpdates, BgpStats, + ApiRepositoryContact, AspaDefinitionUpdates, CommandHistoryCriteria, ParentCaReq, PublisherList, RepositoryContact, RoaConfigurationUpdates, RtaName, Token, }, @@ -40,13 +39,11 @@ use crate::{ util::file, }, constants::{ - KRILL_ENV_HTTP_LOG_INFO, KRILL_ENV_UPGRADE_ONLY, KRILL_VERSION_MAJOR, - KRILL_VERSION_MINOR, KRILL_VERSION_PATCH, NO_RESOURCE, + KRILL_ENV_HTTP_LOG_INFO, KRILL_ENV_UPGRADE_ONLY, NO_RESOURCE, }, daemon::{ auth::common::permissions::Permission, auth::{Auth, Handle}, - ca::CaStatus, config::Config, http::{ auth::auth, statics::statics, testbed::testbed, tls, tls_keys, @@ -56,7 +53,7 @@ use crate::{ krillserver::KrillServer, properties::PropertiesManager, }, - ta::{self, TA_NAME}, + ta, upgrades::{ finalise_data_migration, post_start_upgrade, prepare_upgrade_data_migrations, UpgradeError, UpgradeMode, @@ -366,7 +363,7 @@ async fn map_requests( res = health(req).await; } if let Err(req) = res { - res = metrics(req).await; + res = super::metrics::metrics(req).await; } if let Err(req) = res { res = stats(req).await; @@ -485,541 +482,6 @@ pub async fn health(req: Request) -> RoutingResult { } } -/// Produce prometheus style metrics -#[allow(clippy::format_push_string)] -pub async fn metrics(req: Request) -> RoutingResult { - if req.is_get() && req.path().segment().starts_with("metrics") { - let server = req.state(); - - struct AllBgpStats { - announcements_valid: HashMap, - announcements_invalid_asn: HashMap, - announcements_invalid_length: HashMap, - announcements_not_found: HashMap, - roas_too_permissive: HashMap, - roas_redundant: HashMap, - roas_stale: HashMap, - roas_total: HashMap, - } - - impl AllBgpStats { - fn add_ca(&mut self, ca: &CaHandle, stats: &BgpStats) { - self.announcements_valid - .insert(ca.clone(), stats.announcements_valid); - self.announcements_invalid_asn - .insert(ca.clone(), stats.announcements_invalid_asn); - self.announcements_invalid_length - .insert(ca.clone(), stats.announcements_invalid_length); - self.announcements_not_found - .insert(ca.clone(), stats.announcements_not_found); - self.roas_too_permissive - .insert(ca.clone(), stats.roas_too_permissive); - self.roas_redundant.insert(ca.clone(), stats.roas_redundant); - self.roas_stale.insert(ca.clone(), stats.roas_stale); - self.roas_total.insert(ca.clone(), stats.roas_total); - } - } - - let mut res = String::new(); - - let info = server.server_info(); - res.push_str("# HELP krill_server_start unix timestamp in seconds of last krill server start\n"); - res.push_str("# TYPE krill_server_start gauge\n"); - res.push_str(&format!("krill_server_start {}\n", info.started())); - res.push('\n'); - - res.push_str( - "# HELP krill_version_major krill server major version number\n", - ); - res.push_str("# TYPE krill_version_major gauge\n"); - res.push_str(&format!( - "krill_version_major {}\n", - KRILL_VERSION_MAJOR - )); - res.push('\n'); - - res.push_str( - "# HELP krill_version_minor krill server minor version number\n", - ); - res.push_str("# TYPE krill_version_minor gauge\n"); - res.push_str(&format!( - "krill_version_minor {}\n", - KRILL_VERSION_MINOR - )); - res.push('\n'); - - res.push_str( - "# HELP krill_version_patch krill server patch version number\n", - ); - res.push_str("# TYPE krill_version_patch gauge\n"); - res.push_str(&format!( - "krill_version_patch {}\n", - KRILL_VERSION_PATCH - )); - - #[cfg(feature = "multi-user")] - { - res.push('\n'); - res.push_str("# HELP krill_auth_session_cache_size total number of cached login session tokens\n"); - res.push_str("# TYPE krill_auth_session_cache_size gauge\n"); - res.push_str(&format!( - "krill_auth_session_cache_size {}\n", - server.login_session_cache_size() - )); - } - - if let Ok(cas_stats) = server.cas_stats().await { - let number_cas = cas_stats.len(); - - res.push('\n'); - res.push_str("# HELP krill_cas number of cas in krill\n"); - res.push_str("# TYPE krill_cas gauge\n"); - res.push_str(&format!("krill_cas {}\n", number_cas)); - - if !server.config.metrics.metrics_hide_ca_details { - // Show per CA details - - let mut ca_status_map: HashMap = - HashMap::new(); - - for ca in cas_stats.keys() { - if let Ok(ca_status) = server.ca_status(ca).await { - ca_status_map.insert(ca.clone(), ca_status); - } - } - - { - // CA -> Parent metrics - - // krill_ca_parent_success{{ca="ca", parent="parent"}} 1 - // krill_ca_parent_last_success_time{{ca="ca", - // parent="parent"}} 1630921599 // timestamp - - res.push('\n'); - res.push_str( - "# HELP krill_ca_parent_success status of last CA to parent connection (0=issue, 1=success)\n", - ); - res.push_str("# TYPE krill_ca_parent_success gauge\n"); - for (ca, status) in ca_status_map.iter() { - if ca.as_str() != TA_NAME { - for (parent, status) in status.parents().iter() { - // skip the ones for which we have no status - // yet, i.e it was really only just added - // and no attempt to connect has yet been - // made. - if let Some(exchange) = status.last_exchange() - { - res.push_str(&format!( - "krill_ca_parent_success{{ca=\"{}\", parent=\"{}\"}} {}\n", - ca, - parent, - i32::from(exchange.was_success()) - )); - } - } - } - } - - res.push('\n'); - res.push_str( - "# HELP krill_ca_parent_last_success_time unix timestamp in seconds of last successful CA to parent connection\n", - ); - res.push_str( - "# TYPE krill_ca_parent_last_success_time gauge\n", - ); - - for (ca, status) in ca_status_map.iter() { - if ca.as_str() != TA_NAME { - for (parent, status) in status.parents().iter() { - // skip the ones for which we have no - // successful connection at all. Most likely - // they were just added (in which case it will - // come) - or were never successful - // in which case the metric above will say - // that the status is 0 - if let Some(last_success) = - status.last_success() - { - res.push_str(&format!( - "krill_ca_parent_last_success_time{{ca=\"{}\", parent=\"{}\"}} {}\n", - ca, parent, last_success - )); - } - } - } - } - } - - { - // CA -> Publication Server status - - // krill_ca_repo_success{{ca="ca"}} 1 - // krill_ca_repo_last_success_time{{ca="ca"}} 1630921599 - // krill_ca_repo_next_before_time{{ca="ca"}} 1630921599 - - res.push('\n'); - res.push_str("# HELP krill_ca_ps_success status of last CA to Publication Server connection (0=issue, 1=success)\n"); - res.push_str("# TYPE krill_ca_ps_success gauge\n"); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - if let Some(exchange) = status.repo().last_exchange() - { - res.push_str(&format!( - "krill_ca_ps_success{{ca=\"{}\"}} {}\n", - ca, - i32::from(exchange.was_success()) - )); - } - } - - res.push('\n'); - res.push_str("# HELP krill_ca_ps_last_success_time unix timestamp in seconds of last successful CA to Publication Server connection\n"); - res.push_str( - "# TYPE krill_ca_ps_last_success_time gauge\n", - ); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - if let Some(last_success) = - status.repo().last_success() - { - res.push_str(&format!( - "krill_ca_ps_last_success_time{{ca=\"{}\"}} {}\n", - ca, last_success - )); - } - } - } - - // Do not show child metrics if none of the CAs has any - // children.. Many users do not delegate so, - // showing these metrics would just be confusing. - let any_children = - cas_stats.values().any(|ca| ca.child_count() > 0); - - if any_children - && !server.config.metrics.metrics_hide_child_details - { - // CA -> Children - - // krill_cas_children{ca="parent"} 11 // nr of children - // krill_ca_child_success{ca="parent", child="child"} 1 - // krill_ca_child_state{ca="parent", child="child"} 1 - // krill_ca_child_last_connection{ca="parent", - // child="child"} 1630921599 - // krill_ca_child_last_success{ca="parent", child="child"} - // 1630921599 - // krill_ca_child_agent_total{ca="parent", - // ua="krill/0.9.2"} 11 - - res.push('\n'); - res.push_str("# HELP krill_cas_children number of children for CA\n"); - res.push_str("# TYPE krill_cas_children gauge\n"); - for (ca, status) in cas_stats.iter() { - res.push_str(&format!( - "krill_cas_children{{ca=\"{}\"}} {}\n", - ca, - status.child_count() - )); - } - - res.push('\n'); - res.push_str( - "# HELP krill_ca_child_success status of last child to CA connection (0=issue, 1=success)\n", - ); - res.push_str("# TYPE krill_ca_child_success gauge\n"); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - for (child, status) in status.children().iter() { - if let Some(exchange) = status.last_exchange() { - res.push_str(&format!( - "krill_ca_child_success{{ca=\"{}\", child=\"{}\"}} {}\n", - ca, - child, - i32::from(exchange.was_success()) - )); - } - } - } - - res.push('\n'); - res.push_str( - "# HELP krill_ca_child_state child state (see 'suspend_child_after_inactive_hours' config) (0=suspended, 1=active)\n", - ); - res.push_str("# TYPE krill_ca_child_state gauge\n"); - for (ca, status) in ca_status_map.iter() { - for (child, status) in status.children().iter() { - res.push_str(&format!( - "krill_ca_child_state{{ca=\"{}\", child=\"{}\"}} {}\n", - ca, - child, - i32::from(status.suspended().is_none()) - )); - } - } - - res.push('\n'); - res.push_str("# HELP krill_ca_child_last_connection unix timestamp in seconds of last child to CA connection\n"); - res.push_str( - "# TYPE krill_ca_child_last_connection gauge\n", - ); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - for (child, status) in status.children().iter() { - if let Some(exchange) = status.last_exchange() { - let timestamp = exchange.timestamp(); - res.push_str(&format!( - "krill_ca_child_last_connection{{ca=\"{}\", child=\"{}\"}} {}\n", - ca, child, timestamp - )); - } - } - } - - res.push('\n'); - res.push_str( - "# HELP krill_ca_child_last_success unix timestamp in seconds of last successful child to CA connection\n", - ); - res.push_str( - "# TYPE krill_ca_child_last_success gauge\n", - ); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - for (child, status) in status.children().iter() { - if let Some(time) = status.last_success() { - res.push_str(&format!( - "krill_ca_child_last_success{{ca=\"{}\", child=\"{}\"}} {}\n", - ca, child, time - )); - } - } - } - - res.push('\n'); - res.push_str( - "# HELP krill_ca_child_agent_total total children per user agent based on their last connection\n", - ); - res.push_str("# TYPE krill_ca_child_agent_total gauge\n"); - for (ca, status) in ca_status_map.iter() { - // skip the ones for which we have no status yet, i.e - // it was really only just added - // and no attempt to connect has yet been made. - - let mut user_agent_totals: HashMap = - HashMap::new(); - for status in status.children().values() { - if let Some(exchange) = status.last_exchange() { - let agent = exchange - .user_agent() - .cloned() - .unwrap_or_else(|| "".to_string()); - *user_agent_totals - .entry(agent) - .or_insert(0) += 1; - } - } - - for (ua, total) in user_agent_totals.iter() { - res.push_str(&format!( - "krill_ca_child_agent_total{{ca=\"{}\", user_agent=\"{}\"}} {}\n", - ca, ua, total - )); - } - } - } - - if !server.config.metrics.metrics_hide_roa_details { - // BGP Announcement metrics - - // Aggregate ROA vs BGP stats per status - let mut all_bgp_stats = AllBgpStats { - announcements_valid: HashMap::new(), - announcements_invalid_asn: HashMap::new(), - announcements_invalid_length: HashMap::new(), - announcements_not_found: HashMap::new(), - roas_too_permissive: HashMap::new(), - roas_redundant: HashMap::new(), - roas_stale: HashMap::new(), - roas_total: HashMap::new(), - }; - - for (ca, ca_stats) in cas_stats.iter() { - all_bgp_stats.add_ca(ca, ca_stats.bgp_stats()); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_announcements_valid number of announcements seen for CA resources with RPKI state VALID\n"); - res.push_str( - "# TYPE krill_cas_bgp_announcements_valid gauge\n", - ); - for (ca, nr) in all_bgp_stats.announcements_valid.iter() { - res.push_str(&format!("krill_cas_bgp_announcements_valid{{ca=\"{}\"}} {}\n", ca, nr)); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_announcements_invalid_asn number of announcements seen for CA resources with RPKI state INVALID (ASN mismatch)\n"); - res.push_str("# TYPE krill_cas_bgp_announcements_invalid_asn gauge\n"); - for (ca, nr) in - all_bgp_stats.announcements_invalid_asn.iter() - { - res.push_str(&format!( - "krill_cas_bgp_announcements_invalid_asn{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_announcements_invalid_length number of announcements seen for CA resources with RPKI state INVALID (prefix exceeds max length)\n"); - res.push_str("# TYPE krill_cas_bgp_announcements_invalid_length gauge\n"); - for (ca, nr) in - all_bgp_stats.announcements_invalid_length.iter() - { - res.push_str(&format!( - "krill_cas_bgp_announcements_invalid_length{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_announcements_not_found number of announcements seen for CA resources with RPKI state NOT FOUND (none of the CA's ROAs cover this)\n"); - res.push_str("# TYPE krill_cas_bgp_announcements_not_found gauge\n"); - for (ca, nr) in - all_bgp_stats.announcements_not_found.iter() - { - res.push_str(&format!( - "krill_cas_bgp_announcements_not_found{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_roas_too_permissive number of ROAs for this CA which allow excess announcements (0 may also indicate that no BGP info is available)\n"); - res.push_str( - "# TYPE krill_cas_bgp_roas_too_permissive gauge\n", - ); - for (ca, nr) in all_bgp_stats.roas_too_permissive.iter() { - res.push_str(&format!("krill_cas_bgp_roas_too_permissive{{ca=\"{}\"}} {}\n", ca, nr)); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_roas_redundant number of ROAs for this CA which are redundant (0 may also indicate that no BGP info is available)\n"); - res.push_str( - "# TYPE krill_cas_bgp_roas_redundant gauge\n", - ); - for (ca, nr) in all_bgp_stats.roas_redundant.iter() { - res.push_str(&format!( - "krill_cas_bgp_roas_redundant{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_roas_stale number of ROAs for this CA for which no announcements are seen (0 may also indicate that no BGP info is available)\n"); - res.push_str("# TYPE krill_cas_bgp_roas_stale gauge\n"); - for (ca, nr) in all_bgp_stats.roas_stale.iter() { - res.push_str(&format!( - "krill_cas_bgp_roas_stale{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - - res.push('\n'); - res.push_str("# HELP krill_cas_bgp_roas_total total number of ROAs for this CA\n"); - res.push_str("# TYPE krill_cas_bgp_roas_total gauge\n"); - for (ca, nr) in all_bgp_stats.roas_total.iter() { - res.push_str(&format!( - "krill_cas_bgp_roas_total{{ca=\"{}\"}} {}\n", - ca, nr - )); - } - } - } - } - - if let Ok(stats) = server.repo_stats() { - let publishers = stats.get_publishers(); - - res.push('\n'); - res.push_str("# HELP krill_repo_publisher number of publishers in repository\n"); - res.push_str("# TYPE krill_repo_publisher gauge\n"); - res.push_str(&format!( - "krill_repo_publisher {}\n", - publishers.len() - )); - - if let Some(last_update) = stats.last_update() { - res.push('\n'); - res.push_str( - "# HELP krill_repo_rrdp_last_update unix timestamp in seconds of last update by any publisher\n", - ); - res.push_str("# TYPE krill_repo_rrdp_last_update gauge\n"); - res.push_str(&format!( - "krill_repo_rrdp_last_update {}\n", - last_update.timestamp() - )); - } - - res.push('\n'); - res.push_str("# HELP krill_repo_rrdp_serial RRDP serial\n"); - res.push_str("# TYPE krill_repo_rrdp_serial counter\n"); - res.push_str(&format!( - "krill_repo_rrdp_serial {}\n", - stats.serial() - )); - - if !server.config.metrics.metrics_hide_publisher_details { - res.push('\n'); - res.push_str("# HELP krill_repo_objects number of objects in repository for publisher\n"); - res.push_str("# TYPE krill_repo_objects gauge\n"); - for (publisher, stats) in publishers { - res.push_str(&format!( - "krill_repo_objects{{publisher=\"{}\"}} {}\n", - publisher, - stats.objects() - )); - } - - res.push('\n'); - res.push_str("# HELP krill_repo_size size of objects in bytes in repository for publisher\n"); - res.push_str("# TYPE krill_repo_size gauge\n"); - for (publisher, stats) in publishers { - res.push_str(&format!( - "krill_repo_size{{publisher=\"{}\"}} {}\n", - publisher, - stats.size() - )); - } - - res.push('\n'); - res.push_str("# HELP krill_repo_last_update unix timestamp in seconds of last update for publisher\n"); - res.push_str("# TYPE krill_repo_last_update gauge\n"); - for (publisher, stats) in publishers { - if let Some(last_update) = stats.last_update() { - res.push_str(&format!( - "krill_repo_last_update{{publisher=\"{}\"}} {}\n", - publisher, - last_update.timestamp() - )); - } - } - } - } - - Ok(HttpResponse::text(res.into_bytes())) - } else { - Err(req) - } -} - //------------ Publication --------------------------------------------------- /// Handle RFC8181 queries and return the appropriate response.