diff --git a/Cargo.toml b/Cargo.toml index a025aeba60..99e20896b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,7 +124,7 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] } which = "6.0.3" windows = "0.59.0" z3 = "0.12.1" - +fs2 = "0.4.3" # Used by OnDisk Corpus for file locking [workspace.lints.rust] # Deny diff --git a/fuzzers/baby/tutorial/src/input.rs b/fuzzers/baby/tutorial/src/input.rs index f21eaa4199..c46e85d59b 100644 --- a/fuzzers/baby/tutorial/src/input.rs +++ b/fuzzers/baby/tutorial/src/input.rs @@ -2,10 +2,7 @@ use std::hash::Hash; use lain::prelude::*; -use libafl::{ - corpus::CorpusId, - inputs::{HasTargetBytes, Input}, -}; +use libafl::inputs::{HasTargetBytes, Input}; use libafl_bolts::{ownedref::OwnedSlice, HasLen}; use serde::{Deserialize, Serialize}; @@ -48,15 +45,7 @@ pub enum PacketType { Reset = 0x2, } -impl Input for PacketData { - fn generate_name(&self, id: Option) -> String { - if let Some(id) = id { - format!("id_{}", id.0) - } else { - "id_unknown".into() - } - } -} +impl Input for PacketData {} impl HasTargetBytes for PacketData { #[inline] diff --git a/fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs b/fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs index 184f27f7b3..d531e584ca 100644 --- a/fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs +++ b/fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs @@ -2,14 +2,13 @@ use core::num::NonZeroUsize; use std::{borrow::Cow, hash::Hash}; use libafl::{ - corpus::CorpusId, generators::{Generator, RandBytesGenerator}, inputs::{BytesInput, HasTargetBytes, Input}, mutators::{MutationResult, Mutator}, state::HasRand, Error, SerdeAny, }; -use libafl_bolts::{generic_hash_std, rands::Rand, Named}; +use libafl_bolts::{rands::Rand, Named}; use serde::{Deserialize, Serialize}; /// The custom [`Input`] type used in this example, consisting of a byte array part, a byte array that is not always present, and a boolean @@ -28,11 +27,7 @@ pub struct CustomInput { } /// Hash-based implementation -impl Input for CustomInput { - fn generate_name(&self, _id: Option) -> String { - format!("{:016x}", generic_hash_std(self)) - } -} +impl Input for CustomInput {} impl CustomInput { /// Returns a mutable reference to the byte array diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 40d22236a2..a56fa6224e 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -60,6 +60,7 @@ std = [ "libafl_bolts/std", "typed-builder", "fastbloom", + "fs2", ] ## Tracks the Feedbacks and the Objectives that were interesting for a Testcase @@ -290,6 +291,8 @@ const_panic = { version = "0.2.9", default-features = false } # similarly, for f pyo3 = { workspace = true, optional = true } regex-syntax = { version = "0.8.4", optional = true } # For nautilus +fs2 = { workspace = true, optional = true } # used by OnDisk Corpus for file locking + # optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable) serial_test = { workspace = true, optional = true, default-features = false, features = [ "logging", diff --git a/libafl/src/corpus/inmemory_ondisk.rs b/libafl/src/corpus/inmemory_ondisk.rs index 24fc87ab7d..1e8d469bba 100644 --- a/libafl/src/corpus/inmemory_ondisk.rs +++ b/libafl/src/corpus/inmemory_ondisk.rs @@ -4,16 +4,17 @@ //! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`] //! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner. -use alloc::string::String; +use alloc::string::{String, ToString}; use core::cell::{Ref, RefCell, RefMut}; use std::{ fs, fs::{File, OpenOptions}, io, - io::Write, + io::{Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}, }; +use fs2::FileExt; #[cfg(feature = "gzip")] use libafl_bolts::compress::GzipCompressor; use serde::{Deserialize, Serialize}; @@ -33,7 +34,11 @@ use crate::{ /// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name, /// it will instead return the respective [`io::Error`]. fn create_new>(path: P) -> Result { - OpenOptions::new().write(true).create_new(true).open(path) + OpenOptions::new() + .write(true) + .read(true) + .create_new(true) + .open(path) } /// Tries to create the given `path` and returns `None` _only_ if the file already existed. @@ -85,7 +90,7 @@ where fn add(&mut self, testcase: Testcase) -> Result { let id = self.inner.add(testcase)?; let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, id)?; + self.save_testcase(testcase, Some(id))?; *testcase.input_mut() = None; Ok(id) } @@ -95,7 +100,7 @@ where fn add_disabled(&mut self, testcase: Testcase) -> Result { let id = self.inner.add_disabled(testcase)?; let testcase = &mut self.get_from_all(id).unwrap().borrow_mut(); - self.save_testcase(testcase, id)?; + self.save_testcase(testcase, Some(id))?; *testcase.input_mut() = None; Ok(id) } @@ -106,7 +111,7 @@ where let entry = self.inner.replace(id, testcase)?; self.remove_testcase(&entry)?; let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, id)?; + self.save_testcase(testcase, Some(id))?; *testcase.input_mut() = None; Ok(entry) } @@ -309,12 +314,19 @@ impl InMemoryOnDiskCorpus { /// Sets the filename for a [`Testcase`]. /// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename. + /// Renaming testcases will most likely cause duplicate testcases to not be handled correctly + /// if testcases with the same input are not given the same filename. + /// Only rename when you know what you are doing. #[inline] pub fn rename_testcase( &self, testcase: &mut Testcase, filename: String, - ) -> Result<(), Error> { + id: Option, + ) -> Result<(), Error> + where + I: Input, + { if testcase.filename().is_some() { // We are renaming! @@ -327,36 +339,10 @@ impl InMemoryOnDiskCorpus { return Ok(()); } - if self.locking { - let new_lock_filename = format!(".{new_filename}.lafl_lock"); - - // Try to create lock file for new testcases - if let Err(err) = create_new(self.dir_path.join(&new_lock_filename)) { - *testcase.filename_mut() = Some(old_filename); - return Err(Error::illegal_state(format!( - "Unable to create lock file {new_lock_filename} for new testcase: {err}" - ))); - } - } - let new_file_path = self.dir_path.join(&new_filename); - - fs::rename(testcase.file_path().as_ref().unwrap(), &new_file_path)?; - - let new_metadata_path = { - if let Some(old_metadata_path) = testcase.metadata_path() { - // We have metadata. Let's rename it. - let new_metadata_path = self.dir_path.join(format!(".{new_filename}.metadata")); - fs::rename(old_metadata_path, &new_metadata_path)?; - - Some(new_metadata_path) - } else { - None - } - }; - - *testcase.metadata_path_mut() = new_metadata_path; + self.remove_testcase(testcase)?; *testcase.filename_mut() = Some(new_filename); + self.save_testcase(testcase, id)?; *testcase.file_path_mut() = Some(new_file_path); Ok(()) @@ -367,34 +353,38 @@ impl InMemoryOnDiskCorpus { } } - fn save_testcase(&self, testcase: &mut Testcase, id: CorpusId) -> Result<(), Error> + fn save_testcase(&self, testcase: &mut Testcase, id: Option) -> Result<(), Error> where I: Input, { - let file_name_orig = testcase.filename_mut().take().unwrap_or_else(|| { + let file_name = testcase.filename_mut().take().unwrap_or_else(|| { // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) - testcase.input().as_ref().unwrap().generate_name(Some(id)) + testcase.input().as_ref().unwrap().generate_name(id) }); - // New testcase, we need to save it. - let mut file_name = file_name_orig.clone(); - - let mut ctr = 2; - let file_name = if self.locking { - loop { - let lockfile_name = format!(".{file_name}.lafl_lock"); - let lockfile_path = self.dir_path.join(lockfile_name); - - if try_create_new(lockfile_path)?.is_some() { - break file_name; - } + let mut ctr = String::new(); + if self.locking { + let lockfile_name = format!(".{file_name}"); + let lockfile_path = self.dir_path.join(lockfile_name); + + let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or( + OpenOptions::new() + .write(true) + .read(true) + .open(&lockfile_path)?, + ); + lockfile.lock_exclusive()?; + + lockfile.read_to_string(&mut ctr)?; + ctr = if ctr.is_empty() { + String::from("1") + } else { + (ctr.trim().parse::()? + 1).to_string() + }; - file_name = format!("{file_name_orig}-{ctr}"); - ctr += 1; - } - } else { - file_name - }; + lockfile.seek(SeekFrom::Start(0))?; + lockfile.write_all(ctr.as_bytes())?; + } if testcase.file_path().is_none() { *testcase.file_path_mut() = Some(self.dir_path.join(&file_name)); @@ -402,7 +392,15 @@ impl InMemoryOnDiskCorpus { *testcase.filename_mut() = Some(file_name); if self.meta_format.is_some() { - let metafile_name = format!(".{}.metadata", testcase.filename().as_ref().unwrap()); + let metafile_name = if self.locking { + format!( + ".{}_{}.metadata", + testcase.filename().as_ref().unwrap(), + ctr + ) + } else { + format!(".{}.metadata", testcase.filename().as_ref().unwrap()) + }; let metafile_path = self.dir_path.join(&metafile_name); let mut tmpfile_path = metafile_path.clone(); tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",)); @@ -445,15 +443,36 @@ impl InMemoryOnDiskCorpus { fn remove_testcase(&self, testcase: &Testcase) -> Result<(), Error> { if let Some(filename) = testcase.filename() { + let mut ctr = String::new(); + if self.locking { + let lockfile_path = self.dir_path.join(format!(".{filename}")); + let mut lockfile = OpenOptions::new() + .write(true) + .read(true) + .open(&lockfile_path)?; + + lockfile.lock_exclusive()?; + lockfile.read_to_string(&mut ctr)?; + ctr = ctr.trim().to_string(); + + if ctr == "1" { + FileExt::unlock(&lockfile)?; + drop(fs::remove_file(lockfile_path)); + } else { + lockfile.seek(SeekFrom::Start(0))?; + lockfile.write_all(&(ctr.parse::()? - 1).to_le_bytes())?; + return Ok(()); + } + } + fs::remove_file(self.dir_path.join(filename))?; if self.meta_format.is_some() { - fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?; + if self.locking { + fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?; + } else { + fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?; + } } - // also try to remove the corresponding `.lafl_lock` file if it still exists - // (even though it shouldn't exist anymore, at this point in time) - drop(fs::remove_file( - self.dir_path.join(format!(".{filename}.lafl_lock")), - )); } Ok(()) } diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index 8a86ff6d7a..996cbaf0b8 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -264,6 +264,8 @@ impl Testcase { } /// Create a new Testcase instance given an input and a `filename` + /// If locking is enabled, make sure that testcases with the same input have the same filename + /// to prevent ending up with duplicate testcases #[inline] pub fn with_filename(input: I, filename: String) -> Self { Self { diff --git a/libafl/src/inputs/mod.rs b/libafl/src/inputs/mod.rs index a63866434d..e572124d63 100644 --- a/libafl/src/inputs/mod.rs +++ b/libafl/src/inputs/mod.rs @@ -28,21 +28,23 @@ pub mod nautilus; use alloc::{ boxed::Box, - string::{String, ToString}, + string::String, vec::{Drain, Splice, Vec}, }; use core::{ clone::Clone, fmt::Debug, + hash::Hash, marker::PhantomData, ops::{DerefMut, RangeBounds}, }; #[cfg(feature = "std")] -use std::{fs::File, hash::Hash, io::Read, path::Path}; +use std::{fs::File, io::Read, path::Path}; #[cfg(feature = "std")] use libafl_bolts::fs::write_file_atomic; use libafl_bolts::{ + generic_hash_std, ownedref::{OwnedMutSlice, OwnedSlice}, subrange::{SubRangeMutSlice, SubRangeSlice}, Error, HasLen, @@ -55,7 +57,7 @@ use crate::corpus::CorpusId; /// An input for the target #[cfg(not(feature = "std"))] -pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { +pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug + Hash { /// Write this input to the file fn to_file

(&self, _path: P) -> Result<(), Error> { Err(Error::not_implemented("Not supported in no_std")) @@ -67,12 +69,14 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { } /// Generate a name for this input - fn generate_name(&self, id: Option) -> String; + fn generate_name(&self, _id: Option) -> String { + format!("{:016x}", generic_hash_std(self)) + } } /// An input for the target #[cfg(feature = "std")] -pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { +pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug + Hash { /// Write this input to the file fn to_file

(&self, path: P) -> Result<(), Error> where @@ -93,7 +97,9 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { } /// Generate a name for this input, the user is responsible for making each name of testcase unique. - fn generate_name(&self, id: Option) -> String; + fn generate_name(&self, _id: Option) -> String { + format!("{:016x}", generic_hash_std(self)) + } } /// Convert between two input types with a state @@ -127,12 +133,7 @@ impl NopInput { } } -impl Input for NopInput { - fn generate_name(&self, _id: Option) -> String { - "nop-input".to_string() - } -} - +impl Input for NopInput {} impl HasTargetBytes for NopInput { fn target_bytes(&self) -> OwnedSlice { OwnedSlice::from(vec![0]) diff --git a/libafl/src/inputs/multi.rs b/libafl/src/inputs/multi.rs index ce7007de21..9583a40a12 100644 --- a/libafl/src/inputs/multi.rs +++ b/libafl/src/inputs/multi.rs @@ -16,7 +16,7 @@ use crate::{corpus::CorpusId, inputs::Input}; /// An input composed of multiple parts. Use in situations where subcomponents are not necessarily /// related, or represent distinct parts of the input. -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, Hash)] pub struct MultipartInput { parts: Vec, names: Vec, diff --git a/libafl/src/inputs/nautilus.rs b/libafl/src/inputs/nautilus.rs index 4fade05a1d..e088296429 100644 --- a/libafl/src/inputs/nautilus.rs +++ b/libafl/src/inputs/nautilus.rs @@ -1,7 +1,7 @@ //! Input for the [`Nautilus`](https://github.com/RUB-SysSec/nautilus) grammar fuzzer methods //! //! -use alloc::{rc::Rc, string::String, vec::Vec}; +use alloc::{rc::Rc, vec::Vec}; use core::cell::RefCell; use std::hash::{Hash, Hasher}; @@ -15,7 +15,6 @@ use crate::{ rule::RuleIdOrCustom, tree::{Tree, TreeLike}, }, - corpus::CorpusId, generators::nautilus::NautilusContext, inputs::{BytesInput, Input, InputConverter}, Error, @@ -28,23 +27,7 @@ pub struct NautilusInput { pub tree: Tree, } -impl Input for NautilusInput { - /// Generate a name for this input - #[must_use] - fn generate_name(&self, id: Option) -> String { - /*let mut hasher = AHasher::new_with_keys(0, 0); - for term in &self.terms { - hasher.write(term.symbol.as_bytes()); - } - format!("{:016x}", hasher.finish())*/ - - if let Some(id) = id { - format!("id_{}", id.0) - } else { - "id_unknown".into() - } - } -} +impl Input for NautilusInput {} /// Rc Ref-cell from Input impl From for Rc> { diff --git a/libafl/src/inputs/value.rs b/libafl/src/inputs/value.rs index f2db679831..4948112431 100644 --- a/libafl/src/inputs/value.rs +++ b/libafl/src/inputs/value.rs @@ -1,9 +1,9 @@ //! Newtype pattern style wrapper for [`super::Input`]s -use alloc::{string::String, vec::Vec}; +use alloc::vec::Vec; use core::{fmt::Debug, hash::Hash}; -use libafl_bolts::{generic_hash_std, rands::Rand}; +use libafl_bolts::rands::Rand; use serde::{Deserialize, Serialize}; #[cfg(feature = "std")] use { @@ -12,7 +12,7 @@ use { }; use super::Input; -use crate::{corpus::CorpusId, mutators::numeric::Numeric}; +use crate::mutators::numeric::Numeric; /// Newtype pattern wrapper around an underlying structure to implement inputs /// @@ -56,11 +56,7 @@ impl Copy for ValueInput {} macro_rules! impl_input_for_value_input { ($($t:ty => $name:ident),+ $(,)?) => { $( - impl Input for ValueInput<$t> { - fn generate_name(&self, _id: Option) -> String { - format!("{:016x}", generic_hash_std(self)) - } - } + impl Input for ValueInput<$t> {} /// Input wrapping a <$t> pub type $name = ValueInput<$t>; @@ -86,10 +82,6 @@ impl_input_for_value_input!( /// manually implemented because files can be written more efficiently impl Input for ValueInput> { - fn generate_name(&self, _id: Option) -> String { - format!("{:016x}", generic_hash_std(self)) - } - /// Write this input to the file #[cfg(feature = "std")] fn to_file

(&self, path: P) -> Result<(), Error> diff --git a/libafl_libfuzzer/runtime/src/corpus.rs b/libafl_libfuzzer/runtime/src/corpus.rs index 448c7e4b0d..4712c4a0f3 100644 --- a/libafl_libfuzzer/runtime/src/corpus.rs +++ b/libafl_libfuzzer/runtime/src/corpus.rs @@ -119,7 +119,7 @@ where "The testcase, when added to the corpus, must have an input present!", ) })?; - let name = input.generate_name(Some(id)); + let name = input.generate_name(); let path = self.corpus_dir.join(&name); match input.to_file(&path) { diff --git a/libafl_libfuzzer/runtime/src/feedbacks.rs b/libafl_libfuzzer/runtime/src/feedbacks.rs index c0b018db98..f08090ae97 100644 --- a/libafl_libfuzzer/runtime/src/feedbacks.rs +++ b/libafl_libfuzzer/runtime/src/feedbacks.rs @@ -104,7 +104,7 @@ impl LibfuzzerCrashCauseFeedback { let base = if let Some(filename) = testcase.filename() { filename.clone() } else { - let name = testcase.input().as_ref().unwrap().generate_name(None); + let name = testcase.input().as_ref().unwrap().generate_name(); name }; let file_path = self.artifact_prefix.dir().join(format!( diff --git a/libafl_targets/src/drcov.rs b/libafl_targets/src/drcov.rs index 28056ad14d..8b9fa30221 100644 --- a/libafl_targets/src/drcov.rs +++ b/libafl_targets/src/drcov.rs @@ -255,7 +255,7 @@ fn parse_path(s: &str) -> PathBuf { let s = s.trim(); // If first and last character is a quote, let's remove them - let s = if s.starts_with('\"') && s.ends_with('\"'){ + let s = if s.starts_with('\"') && s.ends_with('\"') { &s[1..s.len() - 1] } else { s