diff --git a/tests/tools/python/impl/plot_benchmark_results.py b/tests/tools/python/impl/plot_benchmark_results.py index 55802c334..3f2cc69af 100644 --- a/tests/tools/python/impl/plot_benchmark_results.py +++ b/tests/tools/python/impl/plot_benchmark_results.py @@ -34,7 +34,7 @@ """ Read google benchmark data from json file """ -def read_benchmark_data(input_path, logging, det_name, plugin): +def read_benchmark_data(input_path, logging, benchmark_file): # Input data directory input_dir = os.fsencode(input_path) @@ -43,11 +43,11 @@ def read_benchmark_data(input_path, logging, det_name, plugin): for file_item in os.listdir(input_dir): file_name = os.fsdecode(file_item) - if (file_name.find(det_name + "_benchmark_data_") != -1) and ( - file_name.find(plugin) != -1 - ): + if file_name == os.path.basename(benchmark_file): file_path = input_path + file_name with open(file_path, "r") as file: + logging.debug(f"Reading file '{file_path}'") + results = json.load(file) context = results["context"] @@ -55,9 +55,7 @@ def read_benchmark_data(input_path, logging, det_name, plugin): return context, data - logging.error( - f"Could not find benchmark results for '{det_name}' and plugin '{plugin}'" - ) + logging.error(f"Could not find file: {benchmark_file}") return None, None diff --git a/tests/tools/python/material_validation.py b/tests/tools/python/material_validation.py index b2f20c767..12deaba1c 100644 --- a/tests/tools/python/material_validation.py +++ b/tests/tools/python/material_validation.py @@ -20,6 +20,7 @@ parse_detector_io_options, parse_plotting_options, ) +from utils import read_detector_name # python includes import argparse @@ -144,10 +145,7 @@ def __main__(): logging.info("Generating data plots...\n") - geo_file = open(args.geometry_file) - json_geo = json.loads(geo_file.read()) - - det_name = json_geo["header"]["common"]["detector"] + det_name = read_detector_name(args.geometry_file, logging) logging.debug("Detector: " + det_name) df_scan, df_cpu, df_cuda = read_material_data(in_dir, logging, det_name, args.cuda) diff --git a/tests/tools/python/navigation_validation.py b/tests/tools/python/navigation_validation.py index 87989e27e..8e3802037 100644 --- a/tests/tools/python/navigation_validation.py +++ b/tests/tools/python/navigation_validation.py @@ -21,6 +21,7 @@ parse_plotting_options, ) from plotting import pyplot_factory as plt_factory +from utils import read_detector_name # python imports import argparse @@ -182,10 +183,7 @@ def __main__(): logging.info("Generating data plots...\n") - geo_file = open(args.geometry_file) - json_geo = json.loads(geo_file.read()) - - det_name = json_geo["header"]["common"]["detector"] + det_name = read_detector_name(args.geometry_file, logging) logging.debug("Detector: " + det_name) # Check the data path (should have been created when running the validation) diff --git a/tests/tools/python/plotting/pyplot_factory.py b/tests/tools/python/plotting/pyplot_factory.py index e66f709b3..98fefffd9 100644 --- a/tests/tools/python/plotting/pyplot_factory.py +++ b/tests/tools/python/plotting/pyplot_factory.py @@ -113,7 +113,6 @@ def graph( y_min=None, y_max=None, color=None, - alpha=0.75, marker=".", set_log_x=False, set_log_y=False, @@ -186,8 +185,7 @@ def add_graph( y_errors=None, label="", marker="+", - color="tab:orange", - alpha=0.75, + color=None, ): # Nothing left to do if len(y) == 0 or plot.data is None: diff --git a/tests/tools/python/propagation_benchmarks.py b/tests/tools/python/propagation_benchmarks.py index 60aa7a34b..53926c421 100644 --- a/tests/tools/python/propagation_benchmarks.py +++ b/tests/tools/python/propagation_benchmarks.py @@ -20,10 +20,12 @@ parse_plotting_options, ) from plotting import pyplot_factory as plt_factory +from utils import read_detector_name # python imports import argparse -import json +from collections import namedtuple +import copy import itertools import os import platform @@ -33,6 +35,12 @@ # Convert benchmark timings to 'ms' unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3} +# Known processor types +proc_types = ["cpu", "cuda", "sycl"] + +# Patterns to be removed from processor names for simplicity +proc_patterns = ["CPU", "(TM)", "GHz", "@"] + def __main__(): @@ -98,6 +106,14 @@ def __main__(): default=["array"], type=str, ) + parser.add_argument( + "--data_files", + "-f", + nargs="*", + help=("Read the benchmark results from a Google benchmark json file instead."), + default=[], + type=str, + ) # Parse options args = parser.parse_args() @@ -109,34 +125,69 @@ def __main__(): # Check bin path bindir = args.bindir.strip("/") - # Gather and check benchmark executables for every plugin - cpu_benchmarks = [] - cuda_benchmarks = [] + # Get detector name + det_name = read_detector_name(args.geometry_file, logging) + logging.debug("Detector: " + det_name) + + # Unique set of algebra plugins to be included in the plots algebra_plugins = set(args.algebra_plugins) - if "array" not in algebra_plugins: - algebra_plugins.insert(0, "array") - for plugin in algebra_plugins: - cpu_benchmark = bindir + "/detray_propagation_benchmark_cpu_" + plugin - cuda_benchmark = bindir + "/detray_detector_validation_cuda_" + plugin + # Check user provided benchmark result files + input_data_files = [] + for file in args.data_files: + if not os.path.isfile(file): + logging.error(f"File not found! ({file})") + sys.exit(1) + + file_name, file_extension = os.path.splitext(file) + format_msg = f"Benchmark data file name needs to be of the form _benchmark_data__.json: e.g. 'toy_detector_benchmark_data_cpu_eigen.json' ({file})" - if not os.path.isdir(bindir) or not os.path.isfile(cpu_benchmark): - logging.error( - f"Propagation benchmark binary was not found! ({cpu_benchmark})" - ) + if file_name.find(f"{det_name}_benchmark_data") == -1: + logging.error("Wrong prefix: " + format_msg) + sys.exit(1) + if file_extension != ".json": + logging.error("Wrong file extension. Should be '.json': " + format_msg) + sys.exit(1) + if not any(p in file_name for p in proc_types): + logging.error("No processor type found (cpu|cuda|sycl): " + format_msg) + sys.exit(1) + if not any(p in file_name for p in algebra_plugins): + logging.error("No algebra-plugin name found: " + format_msg) sys.exit(1) - cpu_benchmarks.append(cpu_benchmark) - if os.path.isfile(cuda_benchmark): - cuda_benchmarks.append(cuda_benchmark) - else: - logging.error( - f"Could not find CUDA propagation benchmark executable: {cuda_benchmark}" - ) + input_data_files.append(file) + # Gather and check benchmark executables and resulting data files for every + # processor type and algebra plugin + benchmark_files = namedtuple("benchmark_files", "bin data_files") + benchmarks = {"cpu": benchmark_files([], [])} + if args.cuda: + benchmarks["cuda"] = benchmark_files([], []) if args.sycl: + # benchmarks["sycl"] = benchmark_files([], []) logging.error("SYCL propagation benchmark is not implemented") + for proc, files in benchmarks.items(): + for plugin in algebra_plugins: + binary = f"{bindir}/detray_propagation_benchmark_{proc}_{plugin}" + data_file = f"{det_name}_benchmark_data_{proc}_{plugin}.json" + + # If the results should not be read from file, run the benchmark + if data_file not in (os.path.basename(f) for f in input_data_files): + # Register binary if it exists + if not os.path.isdir(bindir) or not os.path.isfile(binary): + logging.warning( + f"Propagation benchmark binary not found! ({binary})" + ) + else: + files.bin.append(binary) + files.data_files.append(data_file) + else: + for f in input_data_files: + if data_file == os.path.basename(f): + # Add result file with custom path to be plotted + files.data_files.append(f) + # -----------------------------------------------------------------------run # Pass on the options for the validation tools @@ -191,40 +242,22 @@ def __main__(): if args.material_file: args_list = args_list + ["--material_file", args.material_file] - geo_file = open(args.geometry_file) - json_geo = json.loads(geo_file.read()) - det_name = json_geo["header"]["common"]["detector"] - - # Run the host benchmark - logging.debug("Running CPU Propagation Benchmarks") - - for benchmark in cpu_benchmarks: - plugin = benchmark.split("benchmark_cpu_")[-1] - subprocess.run( - [ - benchmark, - f"--benchmark_context=Plugin={plugin}", - f"--benchmark_out=./{det_name}_benchmark_data_cpu_{plugin}.json", - ] - + benchmark_options - + args_list - ) + # Run the benchmarks + for proc, files in benchmarks.items(): - # Run the device benchmark (if it has been built) - if args.cuda: - logging.debug("Running CUDA Propagation Benchmarks") - - # Try to get the GPU name - gpu = str(subprocess.check_output(["nvidia-smi", "-L"])) - benchmark_options.append(f"--benchmark_context=GPU={gpu}") + if args.cuda or args.sycl: + # Try to get the GPU name + gpu = "Unknown" + # gpu = str(subprocess.check_output(["nvidia-smi", "-L"])) + benchmark_options.append(f"--benchmark_context=GPU={gpu}") - for benchmark in cuda_benchmarks: - plugin = benchmark.split("_")[-1] + for binary in files.bin: + plugin = binary.split(f"benchmark_{proc}_")[-1] subprocess.run( [ - benchmark, + binary, f"--benchmark_context=Plugin={plugin}", - f"--benchmark_out=./{det_name}_benchmark_data_cuda_{plugin}.json", + f"--benchmark_out=./{det_name}_benchmark_data_{proc}_{plugin}.json", ] + benchmark_options + args_list @@ -232,26 +265,27 @@ def __main__(): # ----------------------------------------------------------------------plot - logging.info("\nGenerating plots...\n") - logging.debug("Detector: " + det_name) + logging.info("Generating plots...\n") plot_factory = plt_factory(out_dir, logging) marker_styles = itertools.cycle(["o", "x", "*", "v", "s", "^", "<", ">"]) - # Read the beanchmark data and prepare it for plotting - def __prep_data(logging, input_dir, det_name, plugin): + # Read the benchmark data and prepare it for plotting + def __prep_data(logging, input_dir, file): # Read the data part into a pandas frame - context, data = read_benchmark_data(input_dir, logging, det_name, plugin) - # If the data could not be loaded, quit - if context is None or data is None: + context, data = read_benchmark_data(input_dir, logging, file) + # If the cpu data could not be loaded, quit + if proc == "cpu" and (context is None or data is None): + logging.warning(f"Failed to read data in file: {file}") sys.exit(1) # Add the number of tracks per benchmark case as new column 'x' - # This column is expected by the 'plot_benchmark' method + # A column called 'x' is expected by the 'plot_benchmark' method add_track_multiplicity_column(data) # Convert timings to 'ms' - to_milliseconds = lambda x: (x * unit_conversion[data["time_unit"][0]]) + bench_time_unit = data["time_unit"][0] + to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit]) data["real_time"] = data["real_time"].apply(to_milliseconds) data["cpu_time"] = data["cpu_time"].apply(to_milliseconds) @@ -259,82 +293,90 @@ def __prep_data(logging, input_dir, det_name, plugin): return context, data # Simpler processor tag - def __compactify_proc_name(name): - syllables = name.split(" ") + def __compactify_proc_name(name, patterns=proc_patterns): out = "" - for s in syllables: - if ( - (s.find("CPU") != -1) - or (s.find("(TM)") != -1) - or (s.find("GHz") != -1) - or (s.find("@") != -1) - ): + for sub_string in name.split(" "): + if any(p in sub_string for p in patterns): continue - out = f"{out} {s}" - return out - - # 'array' plugin always exists: use it to intialize the plots) - context, data = __prep_data(logging, input_dir, det_name, "array") - proc = __compactify_proc_name(context["CPU"]) - marker = next(marker_styles) - - # Plot the data against the number of tracks - latency_plot = benchmark_plotter.plot_benchmark( - context=context, - df=data, - plot_factory=plot_factory, - title=proc, - label=f"array", - data_type="real_time", - marker=marker, - ) + out = f"{out} {sub_string}" - throughput_plot = benchmark_plotter.plot_benchmark( - context=context, - df=data, - plot_factory=plot_factory, - title=proc, - label=f"array", - data_type="TracksPropagated", - marker=marker, - ) + return out - # Add plots for the other algebra plugins - for plugin in algebra_plugins: - if plugin == "array": - continue - - # Benchmark results for the next algebra plugin - context, data = __prep_data(logging, input_dir, det_name, plugin) - marker = next(marker_styles) - - # Add new data to plots - benchmark_plotter.plot_benchmark( - context=context, - df=data, - plot_factory=plot_factory, - label=f"{plugin}", - data_type="real_time", - marker=marker, - plot=latency_plot, + benchmark_plots = namedtuple("benchmark_plots", "latency throughput") + plot_dict = {} + + for proc, benchmark_data in benchmarks.items(): + + # Go through all benchmark data files for this processor type + for i, data_file in enumerate(benchmark_data.data_files): + file_stem, extension = os.path.splitext(data_file) + plugin = file_stem.split(f"{det_name}_benchmark_data_{proc}_")[-1] + + # Benchmark results for the next algebra plugin + context, data = __prep_data(logging, input_dir, data_file) + marker = next(marker_styles) + + # Initialize plots + if i == 0: + proc_name = __compactify_proc_name( + context["CPU" if proc == "cpu" else "GPU"] + ) + + # Plot the data against the number of tracks + latency_plot = benchmark_plotter.plot_benchmark( + context=context, + df=data, + plot_factory=plot_factory, + label=f"{plugin}", + data_type="real_time", + marker=marker, + title=proc_name, + ) + + throughput_plot = benchmark_plotter.plot_benchmark( + context=context, + df=data, + plot_factory=plot_factory, + label=f"{plugin}", + data_type="TracksPropagated", + marker=marker, + title=proc_name, + ) + + plot_dict[proc] = benchmark_plots(latency_plot, throughput_plot) + + # Add new data to plots + else: + benchmark_plotter.plot_benchmark( + context=context, + df=data, + plot_factory=plot_factory, + label=f"{plugin}", + data_type="real_time", + marker=marker, + plot=plot_dict[proc].latency, + ) + + benchmark_plotter.plot_benchmark( + context=context, + df=data, + plot_factory=plot_factory, + label=f"{plugin}", + data_type="TracksPropagated", + marker=marker, + plot=plot_dict[proc].throughput, + ) + + # Write to disk + plot_factory.write_plot( + plot_dict[proc].latency, f"{det_name}_prop_latency_{proc}", out_format ) - benchmark_plotter.plot_benchmark( - context=context, - df=data, - plot_factory=plot_factory, - label=f"{plugin}", - data_type="TracksPropagated", - marker=marker, - plot=throughput_plot, + plot_factory.write_plot( + plot_dict[proc].throughput, f"{det_name}_prop_throughput_{proc}", out_format ) - # Write to disk - plot_factory.write_plot(latency_plot, det_name + "_prop_latency", out_format) - - plot_factory.write_plot(throughput_plot, det_name + "_prop_throughput", out_format) - # ------------------------------------------------------------------------------ diff --git a/tests/tools/python/utils/__init__.py b/tests/tools/python/utils/__init__.py new file mode 100644 index 000000000..3778bd72a --- /dev/null +++ b/tests/tools/python/utils/__init__.py @@ -0,0 +1 @@ +from .io_utils import read_detector_name diff --git a/tests/tools/python/utils/io_utils.py b/tests/tools/python/utils/io_utils.py new file mode 100644 index 000000000..59948b449 --- /dev/null +++ b/tests/tools/python/utils/io_utils.py @@ -0,0 +1,23 @@ +# Detray library, part of the ACTS project (R&D line) +# +# (c) 2025 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +import json +import os + + +""" Read the detector name from geometry json file """ + + +def read_detector_name(geometry_file_name, logging): + if not os.path.isfile(geometry_file_name): + logging.error(f"Geometry json file not found! ({geometry_file_name})") + return "unknown_detector" + + with open(geometry_file_name) as geo_file: + json_geo = json.loads(geo_file.read()) + det_name = json_geo["header"]["common"]["detector"] + + return det_name