From 5d8757363e295ef34bfa768edf6656d95513fd38 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 30 Oct 2024 11:48:41 +0000 Subject: [PATCH] Use 95% confidence intervals instead of the range for error. --- reporter/Cargo.toml | 1 + reporter/src/main.rs | 44 ++++++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/reporter/Cargo.toml b/reporter/Cargo.toml index 0f94683..f22eeb5 100644 --- a/reporter/Cargo.toml +++ b/reporter/Cargo.toml @@ -6,4 +6,5 @@ edition = "2021" [dependencies] chrono = "0.4.38" plotters = "0.3.6" +stats-ci = "0.1.1" walkdir = "2.5.0" diff --git a/reporter/src/main.rs b/reporter/src/main.rs index 57e6727..3b6e4ec 100644 --- a/reporter/src/main.rs +++ b/reporter/src/main.rs @@ -4,9 +4,13 @@ use reporter::{ parser::parse, plot::{plot, Line, PlotConfig, Point}, }; +use stats_ci::Confidence; use std::{collections::HashMap, ffi::OsStr, io::Write, path::PathBuf}; use walkdir::{DirEntry, WalkDir}; +/// The confidence level used for computing Y-value confidence intervals. +static CONFIDENCE_LEVEL: f64 = 0.95; + /// Benchmarks to plot. const BENCHES_TO_PLOT: [(&str, &str); 15] = [ // The awfy suite @@ -68,12 +72,14 @@ fn process_file( .and_local_timezone(Local) .unwrap(); // Compute points for the absolute times plot. + let confidence: Confidence = Confidence::new(CONFIDENCE_LEVEL); for (vm, exec_times) in &exec_times { let yval = exec_times.iter().sum::() / (exec_times.len() as f64); let line = abs_lines .entry(vm.to_string()) .or_insert(Line::new(line_colours[vm.as_str()])); - let y_err = (f64_min(exec_times), f64_max(exec_times)); + let ci = stats_ci::mean::Arithmetic::ci(confidence, exec_times).unwrap(); + let y_err = (*ci.left().unwrap(), *ci.right().unwrap()); line.push(Point::new(xval, yval, y_err)); } // Compute Y values for the normalised plot. @@ -83,10 +89,11 @@ fn process_file( .map(|(lua, yklua)| yklua / lua) .collect::>(); let yval = norm_extimes.iter().sum::() / (norm_extimes.len() as f64); + let ci = stats_ci::mean::Arithmetic::ci(confidence, norm_extimes).unwrap(); norm_line.push(Point::new( xval, yval, - (f64_min(norm_extimes), f64_max(norm_extimes)), + (*ci.left().unwrap(), *ci.right().unwrap()), )); // Record what we need to compute a geometric mean speedup over all benchmarks. @@ -121,27 +128,15 @@ fn geomean(vs: &[f64]) -> f64 { prod.powf(1.0 / vs.len() as f64) } -/// Compute the minimum value of 64-bit floats. -/// -/// Panics if the comparison is invalid. -fn f64_min(vs: &[f64]) -> f64 { - *vs.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap() -} - -/// Compute the maximum value of 64-bit floats. -/// -/// Panics if the comparison is invalid. -fn f64_max(vs: &[f64]) -> f64 { - *vs.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap() -} - fn compute_geomean_line(geo_data: &HashMap, Vec>) -> Line { let mut line = Line::new(MAGENTA); + let confidence: Confidence = Confidence::new(CONFIDENCE_LEVEL); for (date, yvals) in geo_data { + let ci = stats_ci::mean::Geometric::ci(confidence, yvals).unwrap(); line.push(Point::new( *date, geomean(yvals), - (f64_min(yvals), f64_max(yvals)), + (*ci.left().unwrap(), *ci.right().unwrap()), )); } line @@ -206,12 +201,16 @@ fn main() { write!(html, "

{bm_name}({bm_arg})

").unwrap(); // Plot aboslute times. + let wallclock_ylabel = format!( + "Wallclock time (ms) with error ({}% CI)", + CONFIDENCE_LEVEL * 100.0 + ); let mut output_path = out_dir.clone(); output_path.push(format!("{bm_name}_{bm_arg}_vs_yklua.png")); let config = PlotConfig::new( "Benchmark performance over time", "Date", - "Wallclock time (ms) with error (min/max)", + &wallclock_ylabel, abs_lines, output_path, ); @@ -231,10 +230,11 @@ fn main() { // Plot data normalised to yklua. let mut output_path = out_dir.clone(); output_path.push(format!("{bm_name}_{bm_arg}_norm_yklua.png")); + let speedup_ylabel = format!("Speedup with error ({}% CI)", CONFIDENCE_LEVEL * 100.0); let config = PlotConfig::new( "Benchmark performance over time, normalised to regular Lua", "Date", - "Speedup with error (min/max)", + &speedup_ylabel, HashMap::from([("Norm".into(), norm_line)]), output_path, ); @@ -249,10 +249,14 @@ fn main() { // Plot the geomean summary. let geo_norm_line = compute_geomean_line(&geo_data); + let geospeedup_ylabel = format!( + "Geometric mean speedup with error ({}% CI)", + CONFIDENCE_LEVEL * 100.0 + ); let config = PlotConfig::new( "Benchmark performance over time, normalised to regular Lua (over all benchmarks)", "Date", - "Geometric mean speedup with error (min/max)", + &geospeedup_ylabel, HashMap::from([("Norm".into(), geo_norm_line)]), geoabs_output_path, );