From 8dbfede6dea2d17712e74896ba08d271ce70e4b9 Mon Sep 17 00:00:00 2001 From: Levi Zim Date: Tue, 7 Jan 2025 23:12:37 +0800 Subject: [PATCH] Replace arcstr cache with weak hash set of arc str --- Cargo.lock | 106 +++++++++++++++++++++++++--- Cargo.toml | 8 ++- fixtures/exec-stress.rs | 33 +++++++++ src/bpf.rs | 11 ++- src/cache.rs | 152 ++++++++++++++++++++++++++++++++-------- src/event.rs | 2 +- src/export.rs | 2 +- src/printer.rs | 2 +- src/proc.rs | 2 +- src/regex.rs | 10 +-- src/tracer.rs | 2 +- src/tracer/inspect.rs | 2 +- src/tracer/state.rs | 2 +- 13 files changed, 273 insertions(+), 61 deletions(-) create mode 100644 fixtures/exec-stress.rs diff --git a/Cargo.lock b/Cargo.lock index 1e091f42..773f9f83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -103,15 +114,6 @@ dependencies = [ "x11rb", ] -[[package]] -name = "arcstr" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03918c3dbd7701a85c6b9887732e2921175f26c350b4563841d0958c21d57e6d" -dependencies = [ - "serde", -] - [[package]] name = "arrayvec" version = "0.7.6" @@ -206,6 +208,12 @@ dependencies = [ "serde", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.9.0" @@ -1390,6 +1398,15 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + [[package]] name = "predicates" version = "3.1.3" @@ -1466,6 +1483,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "ratatui" version = "0.29.0" @@ -2052,7 +2099,6 @@ name = "tracexec" version = "0.8.0" dependencies = [ "arboard", - "arcstr", "assert_cmd", "atoi", "better-panic", @@ -2079,6 +2125,7 @@ dependencies = [ "nix 0.29.0", "paste", "predicates", + "rand", "ratatui", "regex-cursor", "rstest 0.24.0", @@ -2105,6 +2152,7 @@ dependencies = [ "unicode-segmentation", "unicode-width 0.1.14", "vt100", + "weak-table", ] [[package]] @@ -2313,6 +2361,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "vsprintf" version = "2.0.0" @@ -2444,6 +2498,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "weak-table" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323f4da9523e9a669e1eaf9c6e763892769b1d38c623913647bfdc1532fe4549" +dependencies = [ + "ahash", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2665,3 +2728,24 @@ name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index ff0682b9..b2c60f18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,6 @@ tui-prompts = "0.5.0" unicode-segmentation = "1.11.0" unicode-width = "0.1.12" serial_test = { version = "3.1.1", features = ["file_locks"] } -arcstr = { version = "1.2.0", features = ["serde"] } clap_complete = "4.5.2" regex-cursor = { version = "0.1.4", default-features = false } shell-words = "1.1.0" @@ -78,12 +77,15 @@ libbpf-rs = { version = "0.24.6", optional = true, default-features = false } # libbpf-sys exists here because we want to control its features libbpf-sys = { version = "1", optional = true, default-features = false } libseccomp = { version = "0.3.0", optional = true } +weak-table = { version = "0.3.2", default-features = false, features = ["ahash"] } +rand = "0.8.5" # tui-prompts = { version = "0.3.11", path = "../../contrib/tui-prompts" } # tui-popup = { version = "0.3.0", path = "../../contrib/tui-popup" } [dev-dependencies] assert_cmd = "2.0.14" predicates = "3.1.0" + rstest = "0.24.0" tracing-test = "0.2.4" @@ -126,3 +128,7 @@ path = "fixtures/empty-argv.rs" name = "corrupted-envp" path = "fixtures/corrupted-envp.rs" + +[[bin]] +name = "exec-stress" +path = "fixtures/exec-stress.rs" diff --git a/fixtures/exec-stress.rs b/fixtures/exec-stress.rs new file mode 100644 index 00000000..4de5d526 --- /dev/null +++ b/fixtures/exec-stress.rs @@ -0,0 +1,33 @@ +use std::ffi::CString; + +use nix::unistd::execv; +use rand::{distributions::Alphanumeric, Rng}; + +// A stress test. +// It will exec itself with random strings as arguments for n times +fn main() { + let mut args = std::env::args().skip(1); + let n: u64 = args + .next() + .expect("missing n") + .parse() + .expect("cannot parse n"); + if n == 0 { + // nix::sys::signal::raise(nix::sys::signal::SIGSTOP); + return; + } + let mut rand_args = vec![ + CString::new("Hello").unwrap(), + CString::new((n - 1).to_string()).unwrap(), + ]; + rand_args.extend((0..10).map(|_| unsafe { + CString::from_vec_unchecked( + rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(512) + .chain(Some(0)) + .collect::>(), + ) + })); + execv(c"/proc/self/exe", &rand_args).unwrap(); +} diff --git a/src/bpf.rs b/src/bpf.rs index 81deb420..bee01cca 100644 --- a/src/bpf.rs +++ b/src/bpf.rs @@ -13,12 +13,12 @@ use std::{ process, sync::{ atomic::{AtomicBool, Ordering}, - Arc, OnceLock, RwLock, + Arc, LazyLock, RwLock, }, time::Duration, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use color_eyre::{eyre::eyre, Section}; use enumflags2::{BitFlag, BitFlags}; use event::EventStorage; @@ -818,11 +818,10 @@ fn utf8_lossy_cow_from_bytes_with_nul(data: &[u8]) -> Cow { } fn cached_cow(cow: Cow) -> ArcStr { - let cache = CACHE.get_or_init(|| Arc::new(RwLock::new(StringCache::new()))); match cow { - Cow::Borrowed(s) => cache.write().unwrap().get_or_insert(s), - Cow::Owned(s) => cache.write().unwrap().get_or_insert_owned(s), + Cow::Borrowed(s) => CACHE.write().unwrap().get_or_insert(s), + Cow::Owned(s) => CACHE.write().unwrap().get_or_insert_owned(s), } } -static CACHE: OnceLock>> = OnceLock::new(); +static CACHE: LazyLock> = LazyLock::new(|| RwLock::new(StringCache::new())); diff --git a/src/cache.rs b/src/cache.rs index 35983905..5d77c134 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,52 +1,142 @@ +use serde::{Serialize, Serializer}; use std::{ - collections::HashSet, - ops::{Deref, DerefMut}, + borrow::Cow, + fmt::{Debug, Display}, + ops::Deref, + sync::{Arc, LazyLock, Weak}, }; +use weak_table::WeakHashSet; -use arcstr::ArcStr; +#[derive(Clone)] +#[repr(transparent)] +pub struct ArcStr(Arc); + +impl ArcStr { + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl Deref for ArcStr { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef for ArcStr { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl PartialEq for ArcStr +where + T: AsRef, +{ + fn eq(&self, other: &T) -> bool { + &*self.0 == other.as_ref() + } +} + +impl Eq for ArcStr {} + +impl From> for ArcStr { + fn from(value: Arc) -> Self { + Self(value) + } +} + +impl From for Cow<'_, str> { + fn from(value: ArcStr) -> Self { + Self::Owned(value.to_string()) + } +} + +impl Serialize for ArcStr { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.0) + } +} + +impl std::hash::Hash for ArcStr { + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl PartialOrd for ArcStr { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.0.cmp(&other.0)) + } +} + +impl Ord for ArcStr { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +impl Display for ArcStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl Debug for ArcStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Debug::fmt(&self.0, f) + } +} + +impl From<&str> for ArcStr { + fn from(value: &str) -> Self { + Arc::::from(value).into() + } +} + +impl Default for ArcStr { + fn default() -> Self { + DEFAULT_ARCSTR.clone() + } +} + +static DEFAULT_ARCSTR: LazyLock = LazyLock::new(|| "".into()); pub struct StringCache { - cache: HashSet, + inner: WeakHashSet>, } impl StringCache { pub fn new() -> Self { Self { - cache: HashSet::new(), + inner: WeakHashSet::new(), } } - pub fn get_or_insert_owned(&mut self, s: String) -> ArcStr { - if let Some(s) = self.cache.get(s.as_str()) { - s.clone() + pub fn get_or_insert(&mut self, s: &str) -> ArcStr { + if let Some(s) = self.inner.get(s) { + s.into() } else { - let arc = ArcStr::from(s); - self.cache.insert(arc.clone()); - arc + let arc: Arc = Arc::from(s); + self.inner.insert(arc.clone()); + arc.into() } } - pub fn get_or_insert(&mut self, s: &str) -> ArcStr { - if let Some(s) = self.cache.get(s) { - s.clone() + // Probably this is not necessary. + // I don't think we can find a way to turn a String into Arc in-place. + pub fn get_or_insert_owned(&mut self, s: String) -> ArcStr { + if let Some(s) = self.inner.get(s.as_str()) { + s.into() } else { - let arc = ArcStr::from(s); - self.cache.insert(arc.clone()); - arc + let arc: Arc = Arc::from(s); + self.inner.insert(arc.clone()); + arc.into() } } } - -impl Deref for StringCache { - type Target = HashSet; - - fn deref(&self) -> &Self::Target { - &self.cache - } -} - -impl DerefMut for StringCache { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.cache - } -} diff --git a/src/event.rs b/src/event.rs index 173a2f4c..afaf6824 100644 --- a/src/event.rs +++ b/src/event.rs @@ -7,7 +7,7 @@ use std::{ sync::{atomic::AtomicU64, Arc}, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use clap::ValueEnum; use crossterm::event::KeyEvent; use either::Either; diff --git a/src/export.rs b/src/export.rs index d3d3ae36..1f36fd5a 100644 --- a/src/export.rs +++ b/src/export.rs @@ -1,7 +1,7 @@ //! Data structures for export command use std::{error::Error, sync::Arc}; -use arcstr::ArcStr; +use crate::cache::ArcStr; use nix::libc::pid_t; use serde::Serialize; diff --git a/src/printer.rs b/src/printer.rs index b1c2de54..6dd67217 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -16,7 +16,7 @@ use crate::{ tracer::state::{ExecData, ProcessState}, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use itertools::chain; use nix::{fcntl::OFlag, libc::ENOENT, unistd::Pid}; use owo_colors::{OwoColorize, Style}; diff --git a/src/proc.rs b/src/proc.rs index bb138a03..74b0f787 100644 --- a/src/proc.rs +++ b/src/proc.rs @@ -12,7 +12,7 @@ use std::{ sync::{Arc, LazyLock, RwLock}, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use filedescriptor::AsRawFileDescriptor; use owo_colors::OwoColorize; diff --git a/src/regex.rs b/src/regex.rs index a5e6b7b3..b364951e 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -1,3 +1,5 @@ +use std::sync::LazyLock; + /// Regex and regex-cursor related code use regex_cursor::Cursor; @@ -302,7 +304,7 @@ pub struct ArgvCursor<'a, T> { offset: usize, } -pub const SPACE: OutputMsg = OutputMsg::Ok(arcstr::literal!(" ")); +pub static SPACE: LazyLock = LazyLock::new(|| OutputMsg::Ok(" ".into())); impl<'a, T> ArgvCursor<'a, T> where @@ -380,14 +382,12 @@ where #[cfg(test)] mod cursor_tests { use super::*; - use arcstr::ArcStr; - - pub const SPACE: ArcStr = arcstr::literal!(" "); + use crate::cache::ArcStr; #[test] fn smoke_test() { let single = vec![ArcStr::from("abc")]; - let separator = SPACE; + let separator = " ".into(); let mut cursor = ArgvCursor::new(single.as_slice(), &separator); assert_eq!(cursor.chunk(), "abc".as_bytes()); assert!(!cursor.advance()); diff --git a/src/tracer.rs b/src/tracer.rs index d6e57e61..7203d8d6 100644 --- a/src/tracer.rs +++ b/src/tracer.rs @@ -9,7 +9,7 @@ use std::{ time::Duration, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use cfg_if::cfg_if; use either::Either; use enumflags2::BitFlags; diff --git a/src/tracer/inspect.rs b/src/tracer/inspect.rs index fb5a8bf3..b02445b1 100644 --- a/src/tracer/inspect.rs +++ b/src/tracer/inspect.rs @@ -1,6 +1,6 @@ use std::{collections::BTreeMap, ffi::CString}; -use arcstr::ArcStr; +use crate::cache::ArcStr; use nix::{ errno::Errno, sys::ptrace::{self, AddressType}, diff --git a/src/tracer/state.rs b/src/tracer/state.rs index 9fefe473..fe40952c 100644 --- a/src/tracer/state.rs +++ b/src/tracer/state.rs @@ -5,7 +5,7 @@ use std::{ sync::Arc, }; -use arcstr::ArcStr; +use crate::cache::ArcStr; use nix::unistd::Pid; use regex_cursor::engines::pikevm::{self, PikeVM}; use strum::IntoStaticStr;