Skip to content

Commit

Permalink
feat(torture): accept custom supervisor workdir
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriele-0201 committed Feb 24, 2025
1 parent 964b1f9 commit 63a0156
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 23 deletions.
7 changes: 7 additions & 0 deletions torture/src/supervisor/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,11 @@ pub struct WorkloadParams {
#[clap(default_value = "false")]
#[arg(long = "trickfs")]
pub trickfs: bool,

/// Folder that will be used as the working directory by the Supervisor.
/// It will contain all workload folders.
///
/// It does not work in conjunction with `trickfs` option.
#[arg(long = "workdir", conflicts_with = "trickfs")]
pub workdir: Option<String>,
}
74 changes: 64 additions & 10 deletions torture/src/supervisor/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
//! The supervisor part. Spawns and manages agents. Assigns work to agents.
use std::{path::PathBuf, process::exit};
use std::{
path::{Path, PathBuf},
process::exit,
};

use anyhow::Result;
use clap::Parser;
use cli::{Cli, WorkloadParams};
use rand::Rng;
use tempfile::TempDir;
use tokio::{
signal::unix::{signal, SignalKind},
task::{self, JoinHandle},
Expand Down Expand Up @@ -157,6 +162,31 @@ pub struct InvestigationFlag {
reason: anyhow::Error,
}

/// Each worklaod will have a dedicated directory in which to store the
/// NOMT instance and any other data. It can be a temporary directory
/// or a directory within the manually specified `workdir`.
pub enum WorkloadDir {
TempDir(TempDir),
Dir(PathBuf),
}

impl WorkloadDir {
fn path(&self) -> PathBuf {
match self {
WorkloadDir::TempDir(temp_dir) => temp_dir.path().into(),
WorkloadDir::Dir(p) => p.clone(),
}
}

// Will persist the TempDir in memory if called.
fn ensure_stable_path(self) -> PathBuf {
match self {
WorkloadDir::TempDir(temp_dir) => temp_dir.into_path(),
WorkloadDir::Dir(p) => p,
}
}
}

/// Run the workload until either it either finishes, errors or gets cancelled.
///
/// Returns `None` if the investigation is not required (i.e. cancelled or succeeded), otherwise,
Expand All @@ -167,20 +197,44 @@ async fn run_workload(
workload_params: &WorkloadParams,
workload_id: u64,
) -> Result<Option<InvestigationFlag>> {
// This creates a temp dir for the working dir of the workload.
let workdir = tempfile::Builder::new()
.prefix("torture-")
.suffix(format!("-workload-{}", workload_id).as_str())
.tempdir()
.expect("Failed to create a temp dir");
let mut workload = Workload::new(seed, workdir, workload_params, workload_id)?;
// Creates a temp or user specified dir for the working dir of the workload.
let mut to_clean_workload_dir = false;
let workload_dir = if workload_params.workdir.is_some() {
let workdir = workload_params.workdir.clone().unwrap();
let rand_chars: String = rand::thread_rng()
.sample_iter(&rand::distributions::Alphanumeric)
.take(6)
.map(char::from)
.collect();
let workload_path = Path::new(&workdir)
.join(format!("torture-{}-workload-{}", rand_chars, workload_id).as_str());
to_clean_workload_dir = true;
std::fs::create_dir_all(workload_path.clone()).unwrap();
WorkloadDir::Dir(workload_path.into())
} else {
let tempdir = tempfile::Builder::new()
.prefix("torture-")
.suffix(format!("-workload-{}", workload_id).as_str())
.tempdir()
.expect("Failed to create a temp dir");
WorkloadDir::TempDir(tempdir)
};
let workload_dir_path = workload_dir.path();

let mut workload = Workload::new(seed, workload_dir, workload_params, workload_id)?;
let result = workload.run(cancel_token).await;
match result {
Ok(()) => Ok(None),
Ok(()) => {
if to_clean_workload_dir {
// Clean the persistent db if the workload succeeded.
std::fs::remove_dir_all(workload_dir_path).unwrap();
}
Ok(None)
}
Err(err) => Ok(Some(InvestigationFlag {
seed,
workload_id,
workdir: workload.into_workdir().into_path(),
workdir: workload.into_workload_dir().ensure_stable_path(),
reason: err,
})),
}
Expand Down
27 changes: 14 additions & 13 deletions torture/src/supervisor/workload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use anyhow::Result;
use imbl::OrdMap;
use rand::{distributions::WeightedIndex, prelude::*};
use std::time::Duration;
use tempfile::TempDir;
use tokio::time::{error::Elapsed, timeout};
use tokio_util::sync::CancellationToken;
use tracing::{info, trace, trace_span, Instrument as _};
Expand All @@ -13,7 +12,7 @@ use crate::{
cli::WorkloadParams,
comms,
controller::{self, SpawnedAgentController},
pbt,
pbt, WorkloadDir,
},
};

Expand Down Expand Up @@ -240,8 +239,8 @@ impl WorkloadState {
/// arises from the fact that as part of the workload we need to crash the agent to check how
/// it behaves.
pub struct Workload {
/// Working directory for this particular workload.
workdir: TempDir,
/// Directory used by this workload.
workload_dir: WorkloadDir,
/// The handle to the trickfs FUSE FS.
///
/// `Some` until the workload is torn down.
Expand Down Expand Up @@ -295,7 +294,7 @@ struct ScheduledRollback {
impl Workload {
pub fn new(
seed: u64,
workdir: TempDir,
workload_dir: WorkloadDir,
workload_params: &WorkloadParams,
workload_id: u64,
) -> anyhow::Result<Self> {
Expand Down Expand Up @@ -325,14 +324,14 @@ impl Workload {
#[cfg(target_os = "linux")]
let trick_handle = workload_params
.trickfs
.then(|| trickfs::spawn_trick(&workdir.path()))
.then(|| trickfs::spawn_trick(&workload_dir.path()))
.transpose()?;

#[cfg(not(target_os = "linux"))]
let trick_handle = None;

Ok(Self {
workdir,
workload_dir,
trick_handle,
agent: None,
rr: None,
Expand Down Expand Up @@ -842,12 +841,14 @@ impl Workload {
assert!(self.agent.is_none());
controller::spawn_agent_into(&mut self.agent).await?;
self.rr = Some(self.agent.as_ref().unwrap().rr().clone());
let workdir = self.workdir.path().display().to_string();
let outcome = self
.agent
.as_mut()
.unwrap()
.init(workdir, self.workload_id)
.init(
self.workload_dir.path().display().to_string(),
self.workload_id,
)
.await?;
if let InitOutcome::Success = outcome {
()
Expand Down Expand Up @@ -915,7 +916,7 @@ impl Workload {
async fn collect_and_display_backtrace(&self) {
if let Some(agent) = self.agent.as_ref() {
if let Some(agent_pid) = agent.pid() {
let filename = self.workdir.path().join("backtrace.txt");
let filename = &self.workload_dir.path().join("backtrace.txt");
match pbt::collect_process_backtrace(&filename, agent_pid).await {
Ok(()) => {
tracing::info!("Backtrace collected in {}", filename.display());
Expand All @@ -941,9 +942,9 @@ impl Workload {
}
}

/// Return the working directory.
pub fn into_workdir(self) -> TempDir {
self.workdir
/// Return the workload directory.
pub fn into_workload_dir(self) -> WorkloadDir {
self.workload_dir
}
}

Expand Down

0 comments on commit 63a0156

Please sign in to comment.