diff --git a/tests/tools/python/impl/__init__.py b/tests/tools/python/impl/__init__.py
index c8f04fac7..e8abc8b4a 100644
--- a/tests/tools/python/impl/__init__.py
+++ b/tests/tools/python/impl/__init__.py
@@ -1,7 +1,9 @@
 from .plot_benchmark_results import (
     read_benchmark_data,
     add_track_multiplicity_column,
+    prepare_data,
     plot_benchmark,
+    generate_plot_series,
 )
 from .plot_navigation_validation import (
     read_scan_data,
diff --git a/tests/tools/python/impl/plot_benchmark_results.py b/tests/tools/python/impl/plot_benchmark_results.py
index 3f2cc69af..1754a8f3b 100644
--- a/tests/tools/python/impl/plot_benchmark_results.py
+++ b/tests/tools/python/impl/plot_benchmark_results.py
@@ -10,9 +10,10 @@
 # python includes
 from collections import namedtuple
 import json
-import numpy as np
+import itertools
 import os
 import pandas as pd
+import sys
 
 # How to label plots
 label_data = namedtuple("label_data", "title label x_axis y_axis")
@@ -34,7 +35,7 @@
 """ Read google benchmark data from json file """
 
 
-def read_benchmark_data(input_path, logging, benchmark_file):
+def read_benchmark_data(logging, input_path, benchmark_file):
 
     # Input data directory
     input_dir = os.fsencode(input_path)
@@ -65,8 +66,8 @@ def read_benchmark_data(input_path, logging, benchmark_file):
 
 def add_track_multiplicity_column(df):
 
-    assert (
-        str(df["run_name"][0]).find("_TRACKS") != -1
+    assert "_TRACKS" in str(
+        df["run_name"][0]
     ), "Benchmark case name not correctly formatted: (BM_PROPAGATION_<detector name>_<#tracks>_TRACKS)"
 
     # The number of tracks is the second last part of the benchmark name
@@ -76,6 +77,35 @@ def add_track_multiplicity_column(df):
     df["x"] = df["run_name"].apply(find_track_multiplicity)
 
 
+""" Read the benchmark data and prepare it for plotting """
+
+
+def prepare_data(logging, input_dir, file):
+
+    # Convert benchmark timings to 'ms'
+    unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
+
+    # Read the data part into a pandas frame
+    context, data = read_benchmark_data(logging, input_dir, file)
+    # If the cpu data could not be loaded, quit
+    if context is None or data is None:
+        logging.warning(f"Failed to read data in file: {file}")
+        sys.exit(1)
+
+    # Add the number of tracks per benchmark case as new column 'x'
+    # A column called 'x' is expected by the 'plot_benchmark' method
+    add_track_multiplicity_column(data)
+
+    # Convert timings to 'ms'
+    bench_time_unit = data["time_unit"][0]
+    to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])
+
+    data["real_time"] = data["real_time"].apply(to_milliseconds)
+    data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)
+
+    return context, data
+
+
 """ Plot the benchmark latency for different hardware and algebra plugins """
 
 
@@ -144,3 +174,92 @@ def plot_benchmark(
         )
 
     return plot_data
+
+
+""" Plot the data of all benchmark files given in 'data_files' """
+
+
+def generate_plot_series(
+    logging,
+    input_dir,
+    det_name,
+    file_list,
+    label_list,
+    title,
+    plot_series_name,
+    plot_factory,
+    out_format,
+):
+
+    # Cylce through marker style per plot
+    marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
+
+    # Plot types for benchmarks
+    benchmark_plots = namedtuple("benchmark_plots", "latency throughput")
+
+    # Save the different plots per hardware backend
+    plots = benchmark_plots(None, None)
+    marker_style_cycle = itertools.cycle(marker_styles)
+
+    # Go through all benchmark data files for this hardware backend type
+    for i, file in enumerate(file_list):
+        # Benchmark results for the next algebra plugin
+        context, data = prepare_data(logging, input_dir, file)
+        marker = next(marker_style_cycle)
+
+        # Initialize plots
+        if i == 0:
+
+            # Plot the data against the number of tracks
+            latency_plot = plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="real_time",
+                marker=marker,
+                title=title,
+            )
+
+            throughput_plot = plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="TracksPropagated",
+                marker=marker,
+                title=title,
+            )
+
+            plots = benchmark_plots(latency_plot, throughput_plot)
+
+        # Add new data to plots
+        else:
+            plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="real_time",
+                marker=marker,
+                plot=plots.latency,
+            )
+
+            plot_benchmark(
+                context=context,
+                df=data,
+                plot_factory=plot_factory,
+                label=label_list[i],
+                data_type="TracksPropagated",
+                marker=marker,
+                plot=plots.throughput,
+            )
+
+    # Write to disk
+    plot_factory.write_plot(
+        plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
+    )
+
+    plot_factory.write_plot(
+        plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
+    )
diff --git a/tests/tools/python/propagation_benchmarks.py b/tests/tools/python/propagation_benchmarks.py
index 53926c421..277904cfe 100644
--- a/tests/tools/python/propagation_benchmarks.py
+++ b/tests/tools/python/propagation_benchmarks.py
@@ -6,7 +6,7 @@
 
 # detray imports
 from impl import plot_benchmark_results as benchmark_plotter
-from impl import read_benchmark_data, add_track_multiplicity_column
+from impl import read_benchmark_data, generate_plot_series
 from options import (
     common_options,
     detector_io_options,
@@ -26,20 +26,16 @@
 import argparse
 from collections import namedtuple
 import copy
-import itertools
 import os
 import platform
 import subprocess
 import sys
 
-# Convert benchmark timings to 'ms'
-unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
-
-# Known processor types
-proc_types = ["cpu", "cuda", "sycl"]
+# Known hardware backend types
+bknd_types = ["cpu", "cuda", "sycl"]
 
 # Patterns to be removed from processor names for simplicity
-proc_patterns = ["CPU", "(TM)", "GHz", "@"]
+bknd_patterns = ["CPU", "(TM)", "GHz", "@"]
 
 
 def __main__():
@@ -142,14 +138,16 @@ def __main__():
         file_name, file_extension = os.path.splitext(file)
         format_msg = f"Benchmark data file name needs to be of the form <detector>_benchmark_data_<cpu|cuda|sycl>_<algebra-plugin>.json: e.g. 'toy_detector_benchmark_data_cpu_eigen.json' ({file})"
 
-        if file_name.find(f"{det_name}_benchmark_data") == -1:
+        if not f"{det_name}_benchmark_data" in file_name:
             logging.error("Wrong prefix: " + format_msg)
             sys.exit(1)
         if file_extension != ".json":
             logging.error("Wrong file extension. Should be '.json': " + format_msg)
             sys.exit(1)
-        if not any(p in file_name for p in proc_types):
-            logging.error("No processor type found (cpu|cuda|sycl): " + format_msg)
+        if not any(p in file_name for p in bknd_types):
+            logging.error(
+                "No hardware backend type found (cpu|cuda|sycl): " + format_msg
+            )
             sys.exit(1)
         if not any(p in file_name for p in algebra_plugins):
             logging.error("No algebra-plugin name found: " + format_msg)
@@ -158,7 +156,7 @@ def __main__():
         input_data_files.append(file)
 
     # Gather and check benchmark executables and resulting data files for every
-    # processor type and algebra plugin
+    # hardware backend type and algebra plugin
     benchmark_files = namedtuple("benchmark_files", "bin data_files")
     benchmarks = {"cpu": benchmark_files([], [])}
     if args.cuda:
@@ -167,10 +165,10 @@ def __main__():
         # benchmarks["sycl"] = benchmark_files([], [])
         logging.error("SYCL propagation benchmark is not implemented")
 
-    for proc, files in benchmarks.items():
-        for plugin in algebra_plugins:
-            binary = f"{bindir}/detray_propagation_benchmark_{proc}_{plugin}"
-            data_file = f"{det_name}_benchmark_data_{proc}_{plugin}.json"
+    for bknd, files in benchmarks.items():
+        for algebra in algebra_plugins:
+            binary = f"{bindir}/detray_propagation_benchmark_{bknd}_{algebra}"
+            data_file = f"{det_name}_benchmark_data_{bknd}_{algebra}.json"
 
             # If the results should not be read from file, run the benchmark
             if data_file not in (os.path.basename(f) for f in input_data_files):
@@ -243,21 +241,25 @@ def __main__():
         args_list = args_list + ["--material_file", args.material_file]
 
     # Run the benchmarks
-    for proc, files in benchmarks.items():
+    for bknd, files in benchmarks.items():
 
         if args.cuda or args.sycl:
             # Try to get the GPU name
-            gpu = "Unknown"
-            # gpu = str(subprocess.check_output(["nvidia-smi", "-L"]))
+            gpu = ""
+            try:
+                gpu = str(subprocess.check_output(["nvidia-smi", "-L"]))
+            except:
+                gpu = "Unknown"
+
             benchmark_options.append(f"--benchmark_context=GPU={gpu}")
 
         for binary in files.bin:
-            plugin = binary.split(f"benchmark_{proc}_")[-1]
+            algebra = binary.split(f"benchmark_{bknd}_")[-1]
             subprocess.run(
                 [
                     binary,
-                    f"--benchmark_context=Plugin={plugin}",
-                    f"--benchmark_out=./{det_name}_benchmark_data_{proc}_{plugin}.json",
+                    f"--benchmark_context=Algebra={algebra}",
+                    f"--benchmark_out=./{det_name}_benchmark_data_{bknd}_{algebra}.json",
                 ]
                 + benchmark_options
                 + args_list
@@ -268,32 +270,9 @@ def __main__():
     logging.info("Generating plots...\n")
 
     plot_factory = plt_factory(out_dir, logging)
-    marker_styles = itertools.cycle(["o", "x", "*", "v", "s", "^", "<", ">"])
-
-    # Read the benchmark data and prepare it for plotting
-    def __prep_data(logging, input_dir, file):
-        # Read the data part into a pandas frame
-        context, data = read_benchmark_data(input_dir, logging, file)
-        # If the cpu data could not be loaded, quit
-        if proc == "cpu" and (context is None or data is None):
-            logging.warning(f"Failed to read data in file: {file}")
-            sys.exit(1)
-
-        # Add the number of tracks per benchmark case as new column 'x'
-        # A column called 'x' is expected by the 'plot_benchmark' method
-        add_track_multiplicity_column(data)
-
-        # Convert timings to 'ms'
-        bench_time_unit = data["time_unit"][0]
-        to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])
-
-        data["real_time"] = data["real_time"].apply(to_milliseconds)
-        data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)
-
-        return context, data
 
     # Simpler processor tag
-    def __compactify_proc_name(name, patterns=proc_patterns):
+    def __compactify_bknd_name(name, patterns=bknd_patterns):
         out = ""
         for sub_string in name.split(" "):
             if any(p in sub_string for p in patterns):
@@ -303,78 +282,61 @@ def __compactify_proc_name(name, patterns=proc_patterns):
 
         return out
 
-    benchmark_plots = namedtuple("benchmark_plots", "latency throughput")
-    plot_dict = {}
-
-    for proc, benchmark_data in benchmarks.items():
-
-        # Go through all benchmark data files for this processor type
-        for i, data_file in enumerate(benchmark_data.data_files):
-            file_stem, extension = os.path.splitext(data_file)
-            plugin = file_stem.split(f"{det_name}_benchmark_data_{proc}_")[-1]
-
-            # Benchmark results for the next algebra plugin
-            context, data = __prep_data(logging, input_dir, data_file)
-            marker = next(marker_styles)
-
-            # Initialize plots
-            if i == 0:
-                proc_name = __compactify_proc_name(
-                    context["CPU" if proc == "cpu" else "GPU"]
-                )
-
-                # Plot the data against the number of tracks
-                latency_plot = benchmark_plotter.plot_benchmark(
-                    context=context,
-                    df=data,
-                    plot_factory=plot_factory,
-                    label=f"{plugin}",
-                    data_type="real_time",
-                    marker=marker,
-                    title=proc_name,
-                )
-
-                throughput_plot = benchmark_plotter.plot_benchmark(
-                    context=context,
-                    df=data,
-                    plot_factory=plot_factory,
-                    label=f"{plugin}",
-                    data_type="TracksPropagated",
-                    marker=marker,
-                    title=proc_name,
-                )
-
-                plot_dict[proc] = benchmark_plots(latency_plot, throughput_plot)
-
-            # Add new data to plots
-            else:
-                benchmark_plotter.plot_benchmark(
-                    context=context,
-                    df=data,
-                    plot_factory=plot_factory,
-                    label=f"{plugin}",
-                    data_type="real_time",
-                    marker=marker,
-                    plot=plot_dict[proc].latency,
-                )
-
-                benchmark_plotter.plot_benchmark(
-                    context=context,
-                    df=data,
-                    plot_factory=plot_factory,
-                    label=f"{plugin}",
-                    data_type="TracksPropagated",
-                    marker=marker,
-                    plot=plot_dict[proc].throughput,
-                )
-
-        # Write to disk
-        plot_factory.write_plot(
-            plot_dict[proc].latency, f"{det_name}_prop_latency_{proc}", out_format
+    # Plot all data files per hardware backend (different algebra-plugins)
+    for bknd, benchmark_data in benchmarks.items():
+        # Peek into the benchmark context to get the name of the backend
+        context, _ = read_benchmark_data(
+            logging, input_dir, benchmark_data.data_files[0]
+        )
+        bknd_name = __compactify_bknd_name(context["CPU" if "cpu" in bknd else "GPU"])
+
+        # Generate plot labels
+        plot_labels = []
+        for file in benchmark_data.data_files:
+            # Get hardware backend and algebra-plugin from file name
+            file_stem, _ = os.path.splitext(file)
+            algebra = file_stem.split(f"{det_name}_benchmark_data_{bknd}_")[-1]
+            plot_labels.append(algebra)
+
+        generate_plot_series(
+            logging,
+            input_dir,
+            det_name,
+            benchmark_data.data_files,
+            plot_labels,
+            f"hardware backend: {bknd.upper()} ({bknd_name} )",
+            f"prop_benchmark_algebra-plugin_comparison_{bknd}",
+            plot_factory,
+            out_format,
         )
 
-        plot_factory.write_plot(
-            plot_dict[proc].throughput, f"{det_name}_prop_throughput_{proc}", out_format
+    # Plot results for different hardware backends using the same algebra plugin
+    for algebra in algebra_plugins:
+        data_files_per_plugin = []
+        plot_labels = []
+        for bknd, benchmark_data in benchmarks.items():
+            # Peek into the benchmark context to get the name of the backend
+            context, _ = read_benchmark_data(
+                logging, input_dir, benchmark_data.data_files[0]
+            )
+            bknd_name = __compactify_bknd_name(
+                context["CPU" if "cpu" in bknd else "GPU"]
+            )
+            for data_file in benchmark_data.data_files:
+                if algebra in data_file:
+                    data_files_per_plugin.append(data_file)
+                    plot_labels.append(f"{bknd}: {bknd_name}")
+
+        generate_plot_series(
+            logging,
+            input_dir,
+            det_name,
+            data_files_per_plugin,
+            plot_labels,
+            f"algebra-plugin: {algebra}",
+            f"prop_benchmark_backend_comparison_{algebra}",
+            plot_factory,
+            out_format,
         )