Skip to content

Commit

Permalink
Add CPU vs GPU comparison plots per algebra-plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
niermann999 committed Jan 8, 2025
1 parent 8f5a937 commit be882e8
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 120 deletions.
2 changes: 2 additions & 0 deletions tests/tools/python/impl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .plot_benchmark_results import (
read_benchmark_data,
add_track_multiplicity_column,
prepare_data,
plot_benchmark,
generate_plot_series,
)
from .plot_navigation_validation import (
read_scan_data,
Expand Down
127 changes: 123 additions & 4 deletions tests/tools/python/impl/plot_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
# python includes
from collections import namedtuple
import json
import numpy as np
import itertools
import os
import pandas as pd
import sys

# How to label plots
label_data = namedtuple("label_data", "title label x_axis y_axis")
Expand All @@ -34,7 +35,7 @@
""" Read google benchmark data from json file """


def read_benchmark_data(input_path, logging, benchmark_file):
def read_benchmark_data(logging, input_path, benchmark_file):

# Input data directory
input_dir = os.fsencode(input_path)
Expand Down Expand Up @@ -65,8 +66,8 @@ def read_benchmark_data(input_path, logging, benchmark_file):

def add_track_multiplicity_column(df):

assert (
str(df["run_name"][0]).find("_TRACKS") != -1
assert "_TRACKS" in str(
df["run_name"][0]
), "Benchmark case name not correctly formatted: (BM_PROPAGATION_<detector name>_<#tracks>_TRACKS)"

# The number of tracks is the second last part of the benchmark name
Expand All @@ -76,6 +77,35 @@ def add_track_multiplicity_column(df):
df["x"] = df["run_name"].apply(find_track_multiplicity)


""" Read the benchmark data and prepare it for plotting """


def prepare_data(logging, input_dir, file):

# Convert benchmark timings to 'ms'
unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}

# Read the data part into a pandas frame
context, data = read_benchmark_data(logging, input_dir, file)
# If the cpu data could not be loaded, quit
if context is None or data is None:
logging.warning(f"Failed to read data in file: {file}")
sys.exit(1)

# Add the number of tracks per benchmark case as new column 'x'
# A column called 'x' is expected by the 'plot_benchmark' method
add_track_multiplicity_column(data)

# Convert timings to 'ms'
bench_time_unit = data["time_unit"][0]
to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])

data["real_time"] = data["real_time"].apply(to_milliseconds)
data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)

return context, data


""" Plot the benchmark latency for different hardware and algebra plugins """


Expand Down Expand Up @@ -144,3 +174,92 @@ def plot_benchmark(
)

return plot_data


""" Plot the data of all benchmark files given in 'data_files' """


def generate_plot_series(
logging,
input_dir,
det_name,
file_list,
label_list,
title,
plot_series_name,
plot_factory,
out_format,
):

# Cylce through marker style per plot
marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]

# Plot types for benchmarks
benchmark_plots = namedtuple("benchmark_plots", "latency throughput")

# Save the different plots per hardware backend
plots = benchmark_plots(None, None)
marker_style_cycle = itertools.cycle(marker_styles)

# Go through all benchmark data files for this hardware backend type
for i, file in enumerate(file_list):
# Benchmark results for the next algebra plugin
context, data = prepare_data(logging, input_dir, file)
marker = next(marker_style_cycle)

# Initialize plots
if i == 0:

# Plot the data against the number of tracks
latency_plot = plot_benchmark(
context=context,
df=data,
plot_factory=plot_factory,
label=label_list[i],
data_type="real_time",
marker=marker,
title=title,
)

throughput_plot = plot_benchmark(
context=context,
df=data,
plot_factory=plot_factory,
label=label_list[i],
data_type="TracksPropagated",
marker=marker,
title=title,
)

plots = benchmark_plots(latency_plot, throughput_plot)

# Add new data to plots
else:
plot_benchmark(
context=context,
df=data,
plot_factory=plot_factory,
label=label_list[i],
data_type="real_time",
marker=marker,
plot=plots.latency,
)

plot_benchmark(
context=context,
df=data,
plot_factory=plot_factory,
label=label_list[i],
data_type="TracksPropagated",
marker=marker,
plot=plots.throughput,
)

# Write to disk
plot_factory.write_plot(
plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
)

plot_factory.write_plot(
plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
)
Loading

0 comments on commit be882e8

Please sign in to comment.