diff --git a/reporter/Cargo.toml b/reporter/Cargo.toml index 0f94683..f22eeb5 100644 --- a/reporter/Cargo.toml +++ b/reporter/Cargo.toml @@ -6,4 +6,5 @@ edition = "2021" [dependencies] chrono = "0.4.38" plotters = "0.3.6" +stats-ci = "0.1.1" walkdir = "2.5.0" diff --git a/reporter/src/main.rs b/reporter/src/main.rs index eacbce8..3b6e4ec 100644 --- a/reporter/src/main.rs +++ b/reporter/src/main.rs @@ -4,9 +4,13 @@ use reporter::{ parser::parse, plot::{plot, Line, PlotConfig, Point}, }; +use stats_ci::Confidence; use std::{collections::HashMap, ffi::OsStr, io::Write, path::PathBuf}; use walkdir::{DirEntry, WalkDir}; +/// The confidence level used for computing Y-value confidence intervals. +static CONFIDENCE_LEVEL: f64 = 0.95; + /// Benchmarks to plot. const BENCHES_TO_PLOT: [(&str, &str); 15] = [ // The awfy suite @@ -68,12 +72,14 @@ fn process_file( .and_local_timezone(Local) .unwrap(); // Compute points for the absolute times plot. + let confidence: Confidence = Confidence::new(CONFIDENCE_LEVEL); for (vm, exec_times) in &exec_times { let yval = exec_times.iter().sum::() / (exec_times.len() as f64); let line = abs_lines .entry(vm.to_string()) .or_insert(Line::new(line_colours[vm.as_str()])); - let y_err = (f64_min(exec_times), f64_max(exec_times)); + let ci = stats_ci::mean::Arithmetic::ci(confidence, exec_times).unwrap(); + let y_err = (*ci.left().unwrap(), *ci.right().unwrap()); line.push(Point::new(xval, yval, y_err)); } // Compute Y values for the normalised plot. @@ -83,10 +89,11 @@ fn process_file( .map(|(lua, yklua)| yklua / lua) .collect::>(); let yval = norm_extimes.iter().sum::() / (norm_extimes.len() as f64); + let ci = stats_ci::mean::Arithmetic::ci(confidence, norm_extimes).unwrap(); norm_line.push(Point::new( xval, yval, - (f64_min(norm_extimes), f64_max(norm_extimes)), + (*ci.left().unwrap(), *ci.right().unwrap()), )); // Record what we need to compute a geometric mean speedup over all benchmarks. @@ -121,27 +128,15 @@ fn geomean(vs: &[f64]) -> f64 { prod.powf(1.0 / vs.len() as f64) } -/// Compute the minimum value of 64-bit floats. -/// -/// Panics if the comparison is invalid. -fn f64_min(vs: &[f64]) -> f64 { - *vs.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap() -} - -/// Compute the maximum value of 64-bit floats. -/// -/// Panics if the comparison is invalid. -fn f64_max(vs: &[f64]) -> f64 { - *vs.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap() -} - fn compute_geomean_line(geo_data: &HashMap, Vec>) -> Line { let mut line = Line::new(MAGENTA); + let confidence: Confidence = Confidence::new(CONFIDENCE_LEVEL); for (date, yvals) in geo_data { + let ci = stats_ci::mean::Geometric::ci(confidence, yvals).unwrap(); line.push(Point::new( *date, geomean(yvals), - (f64_min(yvals), f64_max(yvals)), + (*ci.left().unwrap(), *ci.right().unwrap()), )); } line @@ -206,16 +201,25 @@ fn main() { write!(html, "

{bm_name}({bm_arg})

").unwrap(); // Plot aboslute times. + let wallclock_ylabel = format!( + "Wallclock time (ms) with error ({}% CI)", + CONFIDENCE_LEVEL * 100.0 + ); let mut output_path = out_dir.clone(); output_path.push(format!("{bm_name}_{bm_arg}_vs_yklua.png")); let config = PlotConfig::new( "Benchmark performance over time", "Date", - "Wallclock time (ms) with error (min/max)", + &wallclock_ylabel, abs_lines, output_path, ); - plot(&config); + + let last_x = plot(&config); + + // Inidcate when the last data point was collected. + write!(html, "

Last X value is {}

", last_x).unwrap(); + write!( html, "", @@ -226,10 +230,11 @@ fn main() { // Plot data normalised to yklua. let mut output_path = out_dir.clone(); output_path.push(format!("{bm_name}_{bm_arg}_norm_yklua.png")); + let speedup_ylabel = format!("Speedup with error ({}% CI)", CONFIDENCE_LEVEL * 100.0); let config = PlotConfig::new( "Benchmark performance over time, normalised to regular Lua", "Date", - "Speedup with error (min/max)", + &speedup_ylabel, HashMap::from([("Norm".into(), norm_line)]), output_path, ); @@ -244,10 +249,14 @@ fn main() { // Plot the geomean summary. let geo_norm_line = compute_geomean_line(&geo_data); + let geospeedup_ylabel = format!( + "Geometric mean speedup with error ({}% CI)", + CONFIDENCE_LEVEL * 100.0 + ); let config = PlotConfig::new( "Benchmark performance over time, normalised to regular Lua (over all benchmarks)", "Date", - "Geometric mean speedup with error (min/max)", + &geospeedup_ylabel, HashMap::from([("Norm".into(), geo_norm_line)]), geoabs_output_path, ); diff --git a/reporter/src/plot.rs b/reporter/src/plot.rs index 235344e..942a1c4 100644 --- a/reporter/src/plot.rs +++ b/reporter/src/plot.rs @@ -116,7 +116,11 @@ fn find_plot_extents(lines: &HashMap) -> (Range>, } /// Plot some data into a SVG file. -pub fn plot(config: &PlotConfig) { +/// +/// If we are plotting more than one line, then they are assumed to contain the same x-values. +/// +/// Returns the last (rightmost) X value. +pub fn plot(config: &PlotConfig) -> DateTime { let (x_extent, y_extent) = find_plot_extents(&config.lines); let drawing = BitMapBackend::new(&config.output_path, (850, 600)).into_drawing_area(); @@ -145,11 +149,18 @@ pub fn plot(config: &PlotConfig) { .draw() .unwrap(); + let mut last_x = None; for (vm, line) in &config.lines { let colour = line.colour; // Sort the points so that the line doesn't zig-zag back and forth across the X-axis. let mut sorted_points = line.points.iter().map(|p| (p.x, p.y)).collect::>(); sorted_points.sort_by(|p1, p2| p1.0.partial_cmp(&p2.0).unwrap()); + + // Cache the rightmost X value. + if last_x.is_none() { + last_x = Some(sorted_points.last().unwrap().0); + } + // Draw line. chart .draw_series(LineSeries::new(sorted_points, colour)) @@ -175,4 +186,6 @@ pub fn plot(config: &PlotConfig) { .unwrap(); drawing.present().unwrap(); + + last_x.unwrap() }