diff --git a/crates/goose-cli/src/commands/bench.rs b/crates/goose-cli/src/commands/bench.rs index 44cf0e51c..1b5df58ae 100644 --- a/crates/goose-cli/src/commands/bench.rs +++ b/crates/goose-cli/src/commands/bench.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use chrono::Local; use goose::config::Config; use goose::message::Message; -use goose_bench::eval_suites::{BenchAgent, EvaluationSuiteFactory}; +use goose_bench::eval_suites::{BenchAgent, EvaluationMetric, EvaluationSuiteFactory}; use goose_bench::work_dir::WorkDir; #[async_trait] @@ -15,6 +15,22 @@ impl BenchAgent for Session { } } +async fn run_eval(mut evaluation: Box) -> anyhow::Result> { + let _ = WorkDir::work_from(format!("./{}", &evaluation.name())); + let session = build_session(None, false, Vec::new(), Vec::new()).await; + let report = evaluation.run(Box::new(session))?.await; + report +} + +async fn run_suite(suite: &str, current_time: &String, current_date: &String) -> anyhow::Result<()> { + let _ = WorkDir::work_from(format!("./{}", &suite))?; + let _ = WorkDir::work_from(format!("./{}-{}", ¤t_date, current_time))?; + for Some(evaluation) in EvaluationSuiteFactory::create(suite) { + run_eval(evaluation)?.await; + } + Ok(()) +} + pub async fn run_benchmark(suites: Vec) { let suites = EvaluationSuiteFactory::available_evaluations() .into_iter() @@ -29,25 +45,8 @@ pub async fn run_benchmark(suites: Vec) { let current_time = Local::now().format("%H:%M:%S").to_string(); let current_date = Local::now().format("%Y-%m-%d").to_string(); - if let Ok(_) = WorkDir::work_from(format!("./benchmark-{}", &provider_name)) { - for suite in suites { - if let Ok(_) = WorkDir::work_from(format!("./{}", &suite)) { - let evaluations = match EvaluationSuiteFactory::create(suite) { - Some(evaluations) => evaluations, - None => continue, - }; - if let Ok(_) = WorkDir::work_from(format!("./{}-{}", ¤t_date, current_time)) { - for evaluation in evaluations { - if let Ok(_) = WorkDir::work_from(format!("./{}", &evaluation.name())) { - let session = build_session(None, false, Vec::new(), Vec::new()).await; - let _ = match evaluation.run(Box::new(session)).await { - Ok(report) => report, - _ => continue, - }; - } - } - } - } - } + let _ = WorkDir::work_from(format!("./benchmark-{}", &provider_name))?; + for suite in suites { + run_suite(suite, ¤t_time, ¤t_date)?.await; } }