From d76f8b4c418c164ca8e50fdc1748fe98a06d8488 Mon Sep 17 00:00:00 2001 From: Marcelle Bonterre Date: Thu, 20 Feb 2025 09:40:05 -0500 Subject: [PATCH] bugfix the flattening --- crates/goose-cli/src/commands/bench.rs | 38 ++++++++++++++++---------- crates/goose-cli/src/main.rs | 2 +- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/crates/goose-cli/src/commands/bench.rs b/crates/goose-cli/src/commands/bench.rs index 1b5df58ae..b7826b866 100644 --- a/crates/goose-cli/src/commands/bench.rs +++ b/crates/goose-cli/src/commands/bench.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use chrono::Local; use goose::config::Config; use goose::message::Message; -use goose_bench::eval_suites::{BenchAgent, EvaluationMetric, EvaluationSuiteFactory}; +use goose_bench::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, EvaluationSuiteFactory}; use goose_bench::work_dir::WorkDir; #[async_trait] @@ -15,23 +15,31 @@ impl BenchAgent for Session { } } -async fn run_eval(mut evaluation: Box) -> anyhow::Result> { - let _ = WorkDir::work_from(format!("./{}", &evaluation.name())); - let session = build_session(None, false, Vec::new(), Vec::new()).await; - let report = evaluation.run(Box::new(session))?.await; - report +async fn run_eval(evaluation: Box) -> anyhow::Result> { + if let Ok(_) = WorkDir::work_from(format!("./{}", &evaluation.name())) { + let session = build_session(None, false, Vec::new(), Vec::new()).await; + let report = evaluation.run(Box::new(session)).await; + report + }else{ + Ok(vec![]) + } } async fn run_suite(suite: &str, current_time: &String, current_date: &String) -> anyhow::Result<()> { - let _ = WorkDir::work_from(format!("./{}", &suite))?; - let _ = WorkDir::work_from(format!("./{}-{}", ¤t_date, current_time))?; - for Some(evaluation) in EvaluationSuiteFactory::create(suite) { - run_eval(evaluation)?.await; + if let Ok(_) = WorkDir::work_from(format!("./{}", &suite)) { + if let Ok(_) = WorkDir::work_from(format!("./{}-{}", ¤t_date, current_time)) { + if let Some(evals) = EvaluationSuiteFactory::create(suite) { + for eval in evals { + run_eval(eval).await?; + } + } + } } + Ok(()) } -pub async fn run_benchmark(suites: Vec) { +pub async fn run_benchmark(suites: Vec) -> anyhow::Result<()> { let suites = EvaluationSuiteFactory::available_evaluations() .into_iter() .filter(|&s| suites.contains(&s.to_string())) @@ -45,8 +53,10 @@ pub async fn run_benchmark(suites: Vec) { let current_time = Local::now().format("%H:%M:%S").to_string(); let current_date = Local::now().format("%Y-%m-%d").to_string(); - let _ = WorkDir::work_from(format!("./benchmark-{}", &provider_name))?; - for suite in suites { - run_suite(suite, ¤t_time, ¤t_date)?.await; + if let Ok(_) = WorkDir::work_from(format!("./benchmark-{}", &provider_name)) { + for suite in suites { + run_suite(suite, ¤t_time, ¤t_date).await?; + } } + Ok(()) } diff --git a/crates/goose-cli/src/main.rs b/crates/goose-cli/src/main.rs index e53883b83..becb1d93d 100644 --- a/crates/goose-cli/src/main.rs +++ b/crates/goose-cli/src/main.rs @@ -227,7 +227,7 @@ async fn main() -> Result<()> { } else { suites }; - run_benchmark(suites).await; + let _ = run_benchmark(suites).await; return Ok(()); } None => {