From 47743c1d98600a750cb6982e45f71b857d7e8022 Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 15:47:50 +0800 Subject: [PATCH 01/13] chore: print source error in mem-prof --- src/common/mem-prof/src/error.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/common/mem-prof/src/error.rs b/src/common/mem-prof/src/error.rs index 432c5289683b..1be016426955 100644 --- a/src/common/mem-prof/src/error.rs +++ b/src/common/mem-prof/src/error.rs @@ -23,7 +23,7 @@ pub type Result = std::result::Result; #[derive(Debug, Snafu)] #[snafu(visibility(pub))] pub enum Error { - #[snafu(display("Failed to read OPT_PROF"))] + #[snafu(display("Failed to read OPT_PROF, source: {}", source))] ReadOptProf { source: tikv_jemalloc_ctl::Error }, #[snafu(display("Memory profiling is not enabled"))] @@ -32,13 +32,17 @@ pub enum Error { #[snafu(display("Failed to build temp file from given path: {:?}", path))] BuildTempPath { path: PathBuf, location: Location }, - #[snafu(display("Failed to open temp file: {}", path))] + #[snafu(display("Failed to open temp file: {}, source: {}", path, source))] OpenTempFile { path: String, source: std::io::Error, }, - #[snafu(display("Failed to dump profiling data to temp file: {:?}", path))] + #[snafu(display( + "Failed to dump profiling data to temp file: {:?}, source: {}", + path, + source + ))] DumpProfileData { path: PathBuf, source: tikv_jemalloc_ctl::Error, From 81338d9eceb088b7e1aebc994a80089de6aceb00 Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 16:06:56 +0800 Subject: [PATCH 02/13] feat(common-pprof): add pprof crate --- Cargo.lock | 155 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/common/pprof/Cargo.toml | 16 ++++ src/common/pprof/src/lib.rs | 101 +++++++++++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 src/common/pprof/Cargo.toml create mode 100644 src/common/pprof/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index c733e5c41fcd..b700ef21ad5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1817,6 +1817,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "common-pprof" +version = "0.2.0" +dependencies = [ + "common-error", + "pprof", + "protobuf", + "snafu", + "tokio", +] + [[package]] name = "common-procedure" version = "0.2.0" @@ -2079,6 +2090,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "cpp_demangle" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c76f98bdfc7f66172e6c7065f981ebb576ffc903fe4c0561d9f0c2509226dc6" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "cpufeatures" version = "0.2.7" @@ -2656,6 +2676,15 @@ dependencies = [ "snafu", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "der" version = "0.5.1" @@ -3100,6 +3129,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -4402,6 +4443,24 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" +[[package]] +name = "inferno" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fb7c1b80a1dfa604bb4a649a5c5aeef3d913f7c520cb42b40e534e8a61bcdfc" +dependencies = [ + "ahash 0.8.3", + "indexmap", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml 0.26.0", + "rgb", + "str_stack", +] + [[package]] name = "influxdb_line_protocol" version = "0.1.0" @@ -5643,6 +5702,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -6519,6 +6588,29 @@ dependencies = [ "postgres-protocol", ] +[[package]] +name = "pprof" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059" +dependencies = [ + "backtrace", + "cfg-if 1.0.0", + "findshlibs", + "inferno", + "libc", + "log", + "nix 0.26.2", + "once_cell", + "parking_lot 0.12.1", + "protobuf", + "protobuf-codegen-pure", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -6796,6 +6888,16 @@ dependencies = [ "protobuf", ] +[[package]] +name = "protobuf-codegen-pure" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" +dependencies = [ + "protobuf", + "protobuf-codegen", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -6987,6 +7089,15 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quick-xml" version = "0.27.1" @@ -7342,6 +7453,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "rgb" +version = "0.8.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -8941,6 +9061,12 @@ dependencies = [ "optional", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -9048,6 +9174,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "streaming-stats" version = "0.2.3" @@ -9204,6 +9336,29 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "symbolic-common" +version = "10.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "10.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79be897be8a483a81fff6a3a4e195b4ac838ef73ca42d348b3f722da9902e489" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 12c8bb884853..e4d124c0ba7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ members = [ "src/common/meta", "src/common/procedure", "src/common/procedure-test", + "src/common/pprof", "src/common/query", "src/common/recordbatch", "src/common/runtime", diff --git a/src/common/pprof/Cargo.toml b/src/common/pprof/Cargo.toml new file mode 100644 index 000000000000..d5202974e598 --- /dev/null +++ b/src/common/pprof/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "common-pprof" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +common-error = { path = "../error" } +pprof = { version = "0.11.1", features = [ + "flamegraph", + "protobuf-codec", + "protobuf", +] } +protobuf = "2" +snafu.workspace = true +tokio.workspace = true diff --git a/src/common/pprof/src/lib.rs b/src/common/pprof/src/lib.rs new file mode 100644 index 000000000000..bff5db26ddad --- /dev/null +++ b/src/common/pprof/src/lib.rs @@ -0,0 +1,101 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::time::Duration; + +use common_error::prelude::{ErrorExt, StatusCode}; +use pprof::protos::Message; +use snafu::{ResultExt, Snafu}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Failed to create profiler guard, source: {}", source))] + CreateGuard { source: pprof::Error }, + + #[snafu(display("Failed to create report, source: {}", source))] + CreateReport { source: pprof::Error }, + + #[snafu(display("Failed to create flamegraph, source: {}", source))] + CreateFlamegraph { source: pprof::Error }, + + #[snafu(display("Failed to create pprof report, source: {}", source))] + ReportPprof { source: pprof::Error }, + + #[snafu(display("Failed to write report, source: {}", source))] + WriteReport { source: protobuf::ProtobufError }, +} + +pub type Result = std::result::Result; + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + StatusCode::Unexpected + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// CPU profiler utility. +// Inspired by https://github.com/datafuselabs/databend/blob/67f445e83cd4eceda98f6c1c114858929d564029/src/common/base/src/base/profiling.rs +#[derive(Debug)] +pub struct Profiling { + /// Sample duration. + duration: Duration, + /// Sample frequency. + frequency: i32, +} + +impl Profiling { + /// Creates a new profiler. + pub fn new(duration: Duration, frequency: i32) -> Profiling { + Profiling { + duration, + frequency, + } + } + + /// Profiles and returns a generated pprof report. + pub async fn report(&self) -> Result { + let guard = pprof::ProfilerGuard::new(self.frequency).context(CreateGuardSnafu)?; + tokio::time::sleep(self.duration).await; + guard.report().build().context(CreateReportSnafu) + } + + /// Profiles and returns a generated flamegraph. + pub async fn dump_flamegraph(&self) -> Result> { + let mut body: Vec = Vec::new(); + + let report = self.report().await?; + report + .flamegraph(&mut body) + .context(CreateFlamegraphSnafu)?; + + Ok(body) + } + + /// Profiles and returns a generated proto. + pub async fn dump_proto(&self) -> Result> { + let mut body: Vec = Vec::new(); + + let report = self.report().await?; + // Generate google’s pprof format report. + let profile = report.pprof().context(ReportPprofSnafu)?; + profile.write_to_vec(&mut body).context(WriteReportSnafu)?; + + Ok(body) + } +} From cab56fa36190392ec3871db79451af1d801d2b9a Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 19:31:44 +0800 Subject: [PATCH 03/13] feat(servers): Add pprof handler to router refactor the mem_prof handler to avoid checking feature while registering router --- Cargo.lock | 1 + src/servers/Cargo.toml | 1 + src/servers/src/error.rs | 8 +++++ src/servers/src/http.rs | 42 +++++++++++++++++------- src/servers/src/http/mem_prof.rs | 18 +++++++--- src/servers/src/http/pprof.rs | 56 ++++++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 src/servers/src/http/pprof.rs diff --git a/Cargo.lock b/Cargo.lock index b700ef21ad5c..5a56f6220c2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8534,6 +8534,7 @@ dependencies = [ "common-grpc", "common-grpc-expr", "common-mem-prof", + "common-pprof", "common-query", "common-recordbatch", "common-runtime", diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 379b123e82ad..0b9eba400034 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -25,6 +25,7 @@ common-error = { path = "../common/error" } common-grpc = { path = "../common/grpc" } common-grpc-expr = { path = "../common/grpc-expr" } common-mem-prof = { path = "../common/mem-prof", optional = true } +common-pprof = { path = "../common/pprof" } common-query = { path = "../common/query" } common-recordbatch = { path = "../common/recordbatch" } common-runtime = { path = "../common/runtime" } diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 57d3789c019f..1042b5ad33cc 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -266,6 +266,12 @@ pub enum Error { source: tokio::task::JoinError, location: Location, }, + + #[snafu(display("Failed to dump pprof data, source: {}", source))] + DumpPprof { + #[snafu(backtrace)] + source: common_pprof::Error, + }, } pub type Result = std::result::Result; @@ -341,6 +347,8 @@ impl ErrorExt for Error { StatusCode::Unknown } } + + DumpPprof { source, .. } => source.status_code(), } } diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 2674e25c4825..320abf9c42de 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod admin; pub mod authorize; pub mod handler; pub mod influxdb; +pub mod mem_prof; pub mod opentsdb; +mod pprof; pub mod prometheus; pub mod script; -mod admin; #[cfg(feature = "dashboard")] mod dashboard; -#[cfg(feature = "mem-prof")] -pub mod mem_prof; use std::net::SocketAddr; use std::sync::Arc; @@ -503,14 +503,19 @@ impl HttpServer { ); } - // mem profiler - #[cfg(feature = "mem-prof")] - { - router = router.nest( - &format!("/{HTTP_API_VERSION}/prof"), - Router::new().route("/mem", routing::get(crate::http::mem_prof::mem_prof)), - ); - } + // prof routers + router = router.nest( + &format!("/{HTTP_API_VERSION}/prof"), + Router::new() + .route( + "/cpu", + routing::get(pprof::pprof_handler).post(pprof::pprof_handler), + ) + .route( + "/mem", + routing::get(mem_prof::mem_prof_handler).post(mem_prof::mem_prof_handler), + ), + ); if let Some(metrics_handler) = self.metrics_handler { router = router.nest("", self.route_metrics(metrics_handler)); @@ -609,6 +614,21 @@ impl HttpServer { .route("/flush", routing::post(flush)) .with_state(grpc_handler) } + + // fn route_prof(&self) -> Router { + // Router::new().route("/cpu", routing::get(crate::http::pprof::pprof)) + // // let mut router = Router::new(); + // // // cpu profiler + // // router = router.route("/cpu", routing::get(crate::http::pprof::pprof)); + + // // // mem profiler + // // #[cfg(feature = "mem-prof")] + // // { + // // router = router.route("/mem", routing::get(crate::http::mem_prof::mem_prof)); + // // } + + // // router + // } } /// A middleware to record metrics for HTTP. diff --git a/src/servers/src/http/mem_prof.rs b/src/servers/src/http/mem_prof.rs index 11d04d5094c2..7b21c40503b6 100644 --- a/src/servers/src/http/mem_prof.rs +++ b/src/servers/src/http/mem_prof.rs @@ -14,13 +14,14 @@ use axum::http::StatusCode; use axum::response::IntoResponse; -use snafu::ResultExt; - -use crate::error::DumpProfileDataSnafu; #[cfg(feature = "mem-prof")] #[axum_macros::debug_handler] -pub async fn mem_prof() -> crate::error::Result { +pub async fn mem_prof_handler() -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + Ok(( StatusCode::OK, common_mem_prof::dump_profile() @@ -28,3 +29,12 @@ pub async fn mem_prof() -> crate::error::Result { .context(DumpProfileDataSnafu)?, )) } + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn mem_prof_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +} diff --git a/src/servers/src/http/pprof.rs b/src/servers/src/http/pprof.rs new file mode 100644 index 000000000000..00fe944d1c9e --- /dev/null +++ b/src/servers/src/http/pprof.rs @@ -0,0 +1,56 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::num::NonZeroI32; +use std::time::Duration; + +use axum::extract::Query; +use axum::http::StatusCode; +use axum::response::IntoResponse; +use common_pprof::Profiling; +use common_telemetry::logging; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use snafu::ResultExt; + +use crate::error::{DumpPprofSnafu, Result}; + +#[derive(Serialize, Deserialize, Debug, JsonSchema)] +#[serde(default)] +pub struct PprofQuery { + seconds: u64, + frequency: NonZeroI32, +} + +impl Default for PprofQuery { + fn default() -> PprofQuery { + PprofQuery { + seconds: 5, + // Safety: 99 is non zero. + frequency: NonZeroI32::new(99).unwrap(), + } + } +} + +#[axum_macros::debug_handler] +pub async fn pprof_handler(Query(req): Query) -> Result { + logging::info!("start pprof, request: {:?}", req); + + let profiling = Profiling::new(Duration::from_secs(req.seconds), req.frequency.into()); + let body = profiling.dump_proto().await.context(DumpPprofSnafu)?; + + logging::info!("finish pprof"); + + Ok((StatusCode::OK, body)) +} From d2cd96e657ea75e7eced583d50437842e1afa11a Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 20:40:46 +0800 Subject: [PATCH 04/13] feat(servers): pprof handler support different output type --- src/servers/src/http/pprof.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/servers/src/http/pprof.rs b/src/servers/src/http/pprof.rs index 00fe944d1c9e..e34d36d76629 100644 --- a/src/servers/src/http/pprof.rs +++ b/src/servers/src/http/pprof.rs @@ -26,11 +26,24 @@ use snafu::ResultExt; use crate::error::{DumpPprofSnafu, Result}; +/// Output format. +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum Output { + /// google’s pprof format report in protobuf. + Proto, + /// Simple text format. + Text, + /// svg flamegraph. + Flamegraph, +} + #[derive(Serialize, Deserialize, Debug, JsonSchema)] #[serde(default)] pub struct PprofQuery { seconds: u64, frequency: NonZeroI32, + output: Output, } impl Default for PprofQuery { @@ -39,6 +52,7 @@ impl Default for PprofQuery { seconds: 5, // Safety: 99 is non zero. frequency: NonZeroI32::new(99).unwrap(), + output: Output::Proto, } } } @@ -48,7 +62,14 @@ pub async fn pprof_handler(Query(req): Query) -> Result profiling.dump_proto().await.context(DumpPprofSnafu)?, + Output::Text => { + let report = profiling.report().await.context(DumpPprofSnafu)?; + format!("{:?}", report).into_bytes() + } + Output::Flamegraph => profiling.dump_flamegraph().await.context(DumpPprofSnafu)?, + }; logging::info!("finish pprof"); From 740269db65aca2cba0cd59e7438b1a093418d79c Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 20:52:40 +0800 Subject: [PATCH 05/13] docs(common-pprof): Add readme --- src/common/pprof/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/common/pprof/README.md diff --git a/src/common/pprof/README.md b/src/common/pprof/README.md new file mode 100644 index 000000000000..98d62cf0b1e9 --- /dev/null +++ b/src/common/pprof/README.md @@ -0,0 +1,18 @@ +# Profiling CPU + + +## HTTP API +Sample at 99 Hertz, for 5 seconds, output report in protobuf format. +```bash +curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out +``` + +Sample at 99 Hertz, for 60 seconds, output report in flamegraph format. +```bash +curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg +``` + +Sample at 49 Hertz, for 10 seconds, output report in text format. +```bash +curl -s '0:4000/v1/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt +``` From a69389fe9849dc63e2524e2bef42b55569ca7a0d Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 21:03:38 +0800 Subject: [PATCH 06/13] feat(common-pprof): Build guard using code in pprof-rs's example --- src/common/pprof/src/lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/common/pprof/src/lib.rs b/src/common/pprof/src/lib.rs index bff5db26ddad..62ccdd543faf 100644 --- a/src/common/pprof/src/lib.rs +++ b/src/common/pprof/src/lib.rs @@ -70,7 +70,11 @@ impl Profiling { /// Profiles and returns a generated pprof report. pub async fn report(&self) -> Result { - let guard = pprof::ProfilerGuard::new(self.frequency).context(CreateGuardSnafu)?; + let guard = pprof::ProfilerGuardBuilder::default() + .frequency(self.frequency) + .blocklist(&["libc", "libgcc", "pthread", "vdso"]) + .build() + .context(CreateGuardSnafu)?; tokio::time::sleep(self.duration).await; guard.report().build().context(CreateReportSnafu) } From 4fde8ae17c152480b379a33e5d9b381125715f1c Mon Sep 17 00:00:00 2001 From: evenyag Date: Thu, 1 Jun 2023 21:09:37 +0800 Subject: [PATCH 07/13] feat(common-pprof): use prost --- Cargo.lock | 17 +++++------------ src/common/pprof/Cargo.toml | 4 ++-- src/common/pprof/src/lib.rs | 9 ++------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a56f6220c2b..573705ad4c48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1823,7 +1823,7 @@ version = "0.2.0" dependencies = [ "common-error", "pprof", - "protobuf", + "prost", "snafu", "tokio", ] @@ -6603,8 +6603,11 @@ dependencies = [ "nix 0.26.2", "once_cell", "parking_lot 0.12.1", + "prost", + "prost-build", + "prost-derive", "protobuf", - "protobuf-codegen-pure", + "sha2", "smallvec", "symbolic-demangle", "tempfile", @@ -6888,16 +6891,6 @@ dependencies = [ "protobuf", ] -[[package]] -name = "protobuf-codegen-pure" -version = "2.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" -dependencies = [ - "protobuf", - "protobuf-codegen", -] - [[package]] name = "ptr_meta" version = "0.1.4" diff --git a/src/common/pprof/Cargo.toml b/src/common/pprof/Cargo.toml index d5202974e598..0a29893097dc 100644 --- a/src/common/pprof/Cargo.toml +++ b/src/common/pprof/Cargo.toml @@ -8,9 +8,9 @@ license.workspace = true common-error = { path = "../error" } pprof = { version = "0.11.1", features = [ "flamegraph", - "protobuf-codec", + "prost-codec", "protobuf", ] } -protobuf = "2" +prost.workspace = true snafu.workspace = true tokio.workspace = true diff --git a/src/common/pprof/src/lib.rs b/src/common/pprof/src/lib.rs index 62ccdd543faf..b6f1af477e92 100644 --- a/src/common/pprof/src/lib.rs +++ b/src/common/pprof/src/lib.rs @@ -16,7 +16,7 @@ use std::any::Any; use std::time::Duration; use common_error::prelude::{ErrorExt, StatusCode}; -use pprof::protos::Message; +use prost::Message; use snafu::{ResultExt, Snafu}; #[derive(Debug, Snafu)] @@ -32,9 +32,6 @@ pub enum Error { #[snafu(display("Failed to create pprof report, source: {}", source))] ReportPprof { source: pprof::Error }, - - #[snafu(display("Failed to write report, source: {}", source))] - WriteReport { source: protobuf::ProtobufError }, } pub type Result = std::result::Result; @@ -93,12 +90,10 @@ impl Profiling { /// Profiles and returns a generated proto. pub async fn dump_proto(&self) -> Result> { - let mut body: Vec = Vec::new(); - let report = self.report().await?; // Generate google’s pprof format report. let profile = report.pprof().context(ReportPprofSnafu)?; - profile.write_to_vec(&mut body).context(WriteReportSnafu)?; + let body = profile.encode_to_vec(); Ok(body) } From e7f12602d7ad0520e020e09dc968d61fab26ccb4 Mon Sep 17 00:00:00 2001 From: evenyag Date: Fri, 2 Jun 2023 21:43:35 +0800 Subject: [PATCH 08/13] feat: don't add timeout to perf api --- src/servers/src/http.rs | 54 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 320abf9c42de..2999dfdd1365 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -139,7 +139,7 @@ impl Default for HttpOptions { fn default() -> Self { Self { addr: "127.0.0.1:4000".to_string(), - timeout: Duration::from_secs(30), + timeout: Duration::from_secs(3), disable_dashboard: false, } } @@ -504,18 +504,18 @@ impl HttpServer { } // prof routers - router = router.nest( - &format!("/{HTTP_API_VERSION}/prof"), - Router::new() - .route( - "/cpu", - routing::get(pprof::pprof_handler).post(pprof::pprof_handler), - ) - .route( - "/mem", - routing::get(mem_prof::mem_prof_handler).post(mem_prof::mem_prof_handler), - ), - ); + // router = router.nest( + // &format!("/{HTTP_API_VERSION}/prof"), + // Router::new() + // .route( + // "/cpu", + // routing::get(pprof::pprof_handler).post(pprof::pprof_handler), + // ) + // .route( + // "/mem", + // routing::get(mem_prof::mem_prof_handler).post(mem_prof::mem_prof_handler), + // ), + // ); if let Some(metrics_handler) = self.metrics_handler { router = router.nest("", self.route_metrics(metrics_handler)); @@ -561,6 +561,19 @@ impl HttpServer { HttpAuth::::new(self.user_provider.clone()), )), ) + // Handlers for debug, we don't expect a timeout. + .nest( + &format!("/{HTTP_API_VERSION}/prof"), + Router::new() + .route( + "/cpu", + routing::get(pprof::pprof_handler).post(pprof::pprof_handler), + ) + .route( + "/mem", + routing::get(mem_prof::mem_prof_handler).post(mem_prof::mem_prof_handler), + ), + ) } fn route_metrics(&self, metrics_handler: MetricsHandler) -> Router { @@ -614,21 +627,6 @@ impl HttpServer { .route("/flush", routing::post(flush)) .with_state(grpc_handler) } - - // fn route_prof(&self) -> Router { - // Router::new().route("/cpu", routing::get(crate::http::pprof::pprof)) - // // let mut router = Router::new(); - // // // cpu profiler - // // router = router.route("/cpu", routing::get(crate::http::pprof::pprof)); - - // // // mem profiler - // // #[cfg(feature = "mem-prof")] - // // { - // // router = router.route("/mem", routing::get(crate::http::mem_prof::mem_prof)); - // // } - - // // router - // } } /// A middleware to record metrics for HTTP. From df393fb399c7cdd4d5fad654646e0f643eabdae6 Mon Sep 17 00:00:00 2001 From: evenyag Date: Mon, 5 Jun 2023 11:52:06 +0800 Subject: [PATCH 09/13] feat: add feature pprof --- src/common/pprof/README.md | 5 ++ src/servers/Cargo.toml | 3 +- src/servers/src/error.rs | 2 + src/servers/src/http.rs | 14 ---- src/servers/src/http/pprof.rs | 123 ++++++++++++++++++++-------------- 5 files changed, 81 insertions(+), 66 deletions(-) diff --git a/src/common/pprof/README.md b/src/common/pprof/README.md index 98d62cf0b1e9..71b6131a63ef 100644 --- a/src/common/pprof/README.md +++ b/src/common/pprof/README.md @@ -1,5 +1,10 @@ # Profiling CPU +## Build GreptimeDB with `pprof` feature + +```bash +cargo build --features=pprof +``` ## HTTP API Sample at 99 Hertz, for 5 seconds, output report in protobuf format. diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 0b9eba400034..2523aabcf79d 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -5,6 +5,7 @@ edition.workspace = true license.workspace = true [features] +pprof = ["dep:common-pprof"] mem-prof = ["dep:common-mem-prof"] dashboard = [] @@ -25,7 +26,7 @@ common-error = { path = "../common/error" } common-grpc = { path = "../common/grpc" } common-grpc-expr = { path = "../common/grpc-expr" } common-mem-prof = { path = "../common/mem-prof", optional = true } -common-pprof = { path = "../common/pprof" } +common-pprof = { path = "../common/pprof", optional = true } common-query = { path = "../common/query" } common-recordbatch = { path = "../common/recordbatch" } common-runtime = { path = "../common/runtime" } diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 1042b5ad33cc..02adec1782ea 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -267,6 +267,7 @@ pub enum Error { location: Location, }, + #[cfg(feature = "pprof")] #[snafu(display("Failed to dump pprof data, source: {}", source))] DumpPprof { #[snafu(backtrace)] @@ -348,6 +349,7 @@ impl ErrorExt for Error { } } + #[cfg(feature = "pprof")] DumpPprof { source, .. } => source.status_code(), } } diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 2999dfdd1365..894380cc7e5e 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -503,20 +503,6 @@ impl HttpServer { ); } - // prof routers - // router = router.nest( - // &format!("/{HTTP_API_VERSION}/prof"), - // Router::new() - // .route( - // "/cpu", - // routing::get(pprof::pprof_handler).post(pprof::pprof_handler), - // ) - // .route( - // "/mem", - // routing::get(mem_prof::mem_prof_handler).post(mem_prof::mem_prof_handler), - // ), - // ); - if let Some(metrics_handler) = self.metrics_handler { router = router.nest("", self.route_metrics(metrics_handler)); } diff --git a/src/servers/src/http/pprof.rs b/src/servers/src/http/pprof.rs index e34d36d76629..1a5f651078fd 100644 --- a/src/servers/src/http/pprof.rs +++ b/src/servers/src/http/pprof.rs @@ -12,66 +12,87 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::num::NonZeroI32; -use std::time::Duration; +#[cfg(feature = "pprof")] +pub mod handler { + use std::num::NonZeroI32; + use std::time::Duration; -use axum::extract::Query; -use axum::http::StatusCode; -use axum::response::IntoResponse; -use common_pprof::Profiling; -use common_telemetry::logging; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use snafu::ResultExt; + use axum::extract::Query; + use axum::http::StatusCode; + use axum::response::IntoResponse; + use common_pprof::Profiling; + use common_telemetry::logging; + use schemars::JsonSchema; + use serde::{Deserialize, Serialize}; + use snafu::ResultExt; -use crate::error::{DumpPprofSnafu, Result}; + use crate::error::{DumpPprofSnafu, Result}; -/// Output format. -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum Output { - /// google’s pprof format report in protobuf. - Proto, - /// Simple text format. - Text, - /// svg flamegraph. - Flamegraph, -} + /// Output format. + #[derive(Debug, Serialize, Deserialize, JsonSchema)] + #[serde(rename_all = "snake_case")] + pub enum Output { + /// google’s pprof format report in protobuf. + Proto, + /// Simple text format. + Text, + /// svg flamegraph. + Flamegraph, + } -#[derive(Serialize, Deserialize, Debug, JsonSchema)] -#[serde(default)] -pub struct PprofQuery { - seconds: u64, - frequency: NonZeroI32, - output: Output, -} + #[derive(Serialize, Deserialize, Debug, JsonSchema)] + #[serde(default)] + pub struct PprofQuery { + seconds: u64, + frequency: NonZeroI32, + output: Output, + } -impl Default for PprofQuery { - fn default() -> PprofQuery { - PprofQuery { - seconds: 5, - // Safety: 99 is non zero. - frequency: NonZeroI32::new(99).unwrap(), - output: Output::Proto, + impl Default for PprofQuery { + fn default() -> PprofQuery { + PprofQuery { + seconds: 5, + // Safety: 99 is non zero. + frequency: NonZeroI32::new(99).unwrap(), + output: Output::Proto, + } } } -} -#[axum_macros::debug_handler] -pub async fn pprof_handler(Query(req): Query) -> Result { - logging::info!("start pprof, request: {:?}", req); + #[axum_macros::debug_handler] + pub async fn pprof_handler(Query(req): Query) -> Result { + logging::info!("start pprof, request: {:?}", req); - let profiling = Profiling::new(Duration::from_secs(req.seconds), req.frequency.into()); - let body = match req.output { - Output::Proto => profiling.dump_proto().await.context(DumpPprofSnafu)?, - Output::Text => { - let report = profiling.report().await.context(DumpPprofSnafu)?; - format!("{:?}", report).into_bytes() - } - Output::Flamegraph => profiling.dump_flamegraph().await.context(DumpPprofSnafu)?, - }; + let profiling = Profiling::new(Duration::from_secs(req.seconds), req.frequency.into()); + let body = match req.output { + Output::Proto => profiling.dump_proto().await.context(DumpPprofSnafu)?, + Output::Text => { + let report = profiling.report().await.context(DumpPprofSnafu)?; + format!("{:?}", report).into_bytes() + } + Output::Flamegraph => profiling.dump_flamegraph().await.context(DumpPprofSnafu)?, + }; - logging::info!("finish pprof"); + logging::info!("finish pprof"); - Ok((StatusCode::OK, body)) + Ok((StatusCode::OK, body)) + } } + +#[cfg(not(feature = "pprof"))] +pub mod handler { + use axum::http::StatusCode; + use axum::response::IntoResponse; + + use crate::error::Result; + + #[axum_macros::debug_handler] + pub async fn pprof_handler() -> Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'pprof' feature is disabled", + )) + } +} + +pub use handler::pprof_handler; From 00aec2449e4d930e20e358202c425665cd371313 Mon Sep 17 00:00:00 2001 From: evenyag Date: Mon, 5 Jun 2023 11:57:00 +0800 Subject: [PATCH 10/13] feat: update readme --- src/common/pprof/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/common/pprof/README.md b/src/common/pprof/README.md index 71b6131a63ef..b8393ae3f598 100644 --- a/src/common/pprof/README.md +++ b/src/common/pprof/README.md @@ -7,11 +7,16 @@ cargo build --features=pprof ``` ## HTTP API -Sample at 99 Hertz, for 5 seconds, output report in protobuf format. +Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto). ```bash curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out ``` +Then you can use `pprof` command with the protobuf file. +```bash +go tool pprof -top /tmp/pprof.out +``` + Sample at 99 Hertz, for 60 seconds, output report in flamegraph format. ```bash curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg From 2eff64dac7dfd1853b5fb67f64dd5ec5dc1006d7 Mon Sep 17 00:00:00 2001 From: evenyag Date: Mon, 5 Jun 2023 18:51:20 +0800 Subject: [PATCH 11/13] test: fix tests --- src/servers/src/http.rs | 2 +- src/storage/src/manifest/region.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 894380cc7e5e..cfe9f668a5ec 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -139,7 +139,7 @@ impl Default for HttpOptions { fn default() -> Self { Self { addr: "127.0.0.1:4000".to_string(), - timeout: Duration::from_secs(3), + timeout: Duration::from_secs(30), disable_dashboard: false, } } diff --git a/src/storage/src/manifest/region.rs b/src/storage/src/manifest/region.rs index 337b32077fef..59381348706c 100644 --- a/src/storage/src/manifest/region.rs +++ b/src/storage/src/manifest/region.rs @@ -341,12 +341,13 @@ mod tests { builder.root(&tmp_dir.path().to_string_lossy()); let object_store = ObjectStore::new(builder).unwrap().finish(); + let test_gc_duration = Duration::from_millis(50); let manifest = RegionManifest::with_checkpointer( "/manifest/", object_store, manifest_compress_type(compress), None, - Some(Duration::from_millis(50)), + Some(test_gc_duration), ); manifest.start().await.unwrap(); @@ -492,7 +493,7 @@ mod tests { ); // wait for gc - tokio::time::sleep(Duration::from_millis(60)).await; + tokio::time::sleep(test_gc_duration * 3).await; for v in checkpoint_versions { if v < 4 { From e243124c846a5fd34f04b0532953e62cf0201f4e Mon Sep 17 00:00:00 2001 From: evenyag Date: Tue, 6 Jun 2023 11:10:44 +0800 Subject: [PATCH 12/13] feat: close region in TestBase --- src/storage/src/region/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storage/src/region/tests.rs b/src/storage/src/region/tests.rs index 629a0c364c72..bd9faf844f61 100644 --- a/src/storage/src/region/tests.rs +++ b/src/storage/src/region/tests.rs @@ -94,6 +94,7 @@ impl TesterBase { } pub async fn close(&self) { + self.region.close(&CloseContext::default()).await.unwrap(); self.region.inner.wal.close().await.unwrap(); } From 5d46b9c89c718a39eec322e20bc2fa12f7d4cd02 Mon Sep 17 00:00:00 2001 From: evenyag Date: Tue, 6 Jun 2023 16:46:53 +0800 Subject: [PATCH 13/13] feat(pprof): addres comments --- src/common/pprof/Cargo.toml | 2 +- src/common/pprof/src/lib.rs | 48 +++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/common/pprof/Cargo.toml b/src/common/pprof/Cargo.toml index 0a29893097dc..e002733fda04 100644 --- a/src/common/pprof/Cargo.toml +++ b/src/common/pprof/Cargo.toml @@ -6,7 +6,7 @@ license.workspace = true [dependencies] common-error = { path = "../error" } -pprof = { version = "0.11.1", features = [ +pprof = { version = "0.11", features = [ "flamegraph", "prost-codec", "protobuf", diff --git a/src/common/pprof/src/lib.rs b/src/common/pprof/src/lib.rs index b6f1af477e92..81c0077f00f9 100644 --- a/src/common/pprof/src/lib.rs +++ b/src/common/pprof/src/lib.rs @@ -17,21 +17,45 @@ use std::time::Duration; use common_error::prelude::{ErrorExt, StatusCode}; use prost::Message; -use snafu::{ResultExt, Snafu}; +use snafu::{Location, ResultExt, Snafu}; #[derive(Debug, Snafu)] pub enum Error { - #[snafu(display("Failed to create profiler guard, source: {}", source))] - CreateGuard { source: pprof::Error }, - - #[snafu(display("Failed to create report, source: {}", source))] - CreateReport { source: pprof::Error }, - - #[snafu(display("Failed to create flamegraph, source: {}", source))] - CreateFlamegraph { source: pprof::Error }, - - #[snafu(display("Failed to create pprof report, source: {}", source))] - ReportPprof { source: pprof::Error }, + #[snafu(display( + "Failed to create profiler guard, source: {}, location: {}", + source, + location + ))] + CreateGuard { + source: pprof::Error, + location: Location, + }, + + #[snafu(display("Failed to create report, source: {}, location: {}", source, location))] + CreateReport { + source: pprof::Error, + location: Location, + }, + + #[snafu(display( + "Failed to create flamegraph, source: {}, location: {}", + source, + location + ))] + CreateFlamegraph { + source: pprof::Error, + location: Location, + }, + + #[snafu(display( + "Failed to create pprof report, source: {}, location: {}", + source, + location + ))] + ReportPprof { + source: pprof::Error, + location: Location, + }, } pub type Result = std::result::Result;