Add CPU vs GPU comparison plots per algebra-plugin

acts-project · Jan 7, 2025 · f458d0c · f458d0c
1 parent c7cb49b
commit f458d0c
Show file tree

Hide file tree

Showing 3 changed files with 200 additions and 115 deletions.
diff --git a/tests/tools/python/impl/__init__.py b/tests/tools/python/impl/__init__.py
@@ -1,7 +1,9 @@
 from .plot_benchmark_results import (
     read_benchmark_data,
     add_track_multiplicity_column,
+    prepare_data,
     plot_benchmark,
+    generate_plot_series,
 )
 from .plot_navigation_validation import (
     read_scan_data,

diff --git a/tests/tools/python/impl/plot_benchmark_results.py b/tests/tools/python/impl/plot_benchmark_results.py
@@ -10,9 +10,10 @@
 # python includes
 from collections import namedtuple
 import json
-import numpy as np
+import itertools
 import os
 import pandas as pd
+import sys
 
 # How to label plots
 label_data = namedtuple("label_data", "title label x_axis y_axis")
@@ -34,7 +35,7 @@
 """ Read google benchmark data from json file """
 
 
-def read_benchmark_data(input_path, logging, benchmark_file):
+def read_benchmark_data(logging, input_path, benchmark_file):
 
     # Input data directory
     input_dir = os.fsencode(input_path)
@@ -76,6 +77,35 @@ def add_track_multiplicity_column(df):
     df["x"] = df["run_name"].apply(find_track_multiplicity)
 
 
+""" Read the benchmark data and prepare it for plotting """
+
+
+def prepare_data(logging, input_dir, file):
+
+    # Convert benchmark timings to 'ms'
+    unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
+
+    # Read the data part into a pandas frame
+    context, data = read_benchmark_data(logging, input_dir, file)
+    # If the cpu data could not be loaded, quit
+    if context is None or data is None:
+        logging.warning(f"Failed to read data in file: {file}")
+        sys.exit(1)
+
+    # Add the number of tracks per benchmark case as new column 'x'
+    # A column called 'x' is expected by the 'plot_benchmark' method
+    add_track_multiplicity_column(data)
+
+    # Convert timings to 'ms'
+    bench_time_unit = data["time_unit"][0]
+    to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])
+
+    data["real_time"] = data["real_time"].apply(to_milliseconds)
+    data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)
+
+    return context, data
+
+
 """ Plot the benchmark latency for different hardware and algebra plugins """
 
 
@@ -144,3 +174,92 @@ def plot_benchmark(
         )
 
     return plot_data
+
+
+""" Plot the data of all benchmark files given in 'data_files' """
+
+
+def generate_plot_series(
+    logging,
+    input_dir,
+    det_name,
+    file_list,
+    label_list,
+    title,
+    plot_series_name,
+    plot_factory,
+    out_format,
+):
+
+    # Cylce through marker style per plot
+    marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
+
+    # Plot types for benchmarks
+    benchmark_plots = namedtuple("benchmark_plots", "latency throughput")
+
+    # Save the different plots per hardware backend
+    plots = benchmark_plots(None, None)
+    marker_style_cycle = itertools.cycle(marker_styles)
+
+    # Go through all benchmark data files for this hardware backend type
+    for i, file in enumerate(file_list):
+        # Benchmark results for the next algebra plugin
+        context, data = prepare_data(logging, input_dir, file)
+        marker = next(marker_style_cycle)
+
+        # Initialize plots
+        if i == 0:
+
+            # Plot the data against the number of tracks
+            latency_plot = plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="real_time",
+                marker=marker,
+                title=title,
+            )
+
+            throughput_plot = plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="TracksPropagated",
+                marker=marker,
+                title=title,
+            )
+
+            plots = benchmark_plots(latency_plot, throughput_plot)
+
+        # Add new data to plots
+        else:
+            plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="real_time",
+                marker=marker,
+                plot=plots.latency,
+            )
+
+            plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="TracksPropagated",
+                marker=marker,
+                plot=plots.throughput,
+            )
+
+    # Write to disk
+    plot_factory.write_plot(
+        plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
+    )
+
+    plot_factory.write_plot(
+        plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
+    )