Move data generation into plotting function

EdmundGoodman · Mar 2, 2024 · bde1be6 · bde1be6
1 parent 4ed7f9e
commit bde1be6
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 38 deletions.
diff --git a/src/hpc_multibench/analysis.py b/src/hpc_multibench/analysis.py
@@ -5,9 +5,9 @@
 from enum import Enum, auto
 from re import search as re_search
 
-from hpc_multibench.yaml_model import RooflinePlotModel, LinePlotModel, BarChartModel
-from hpc_multibench.run_configuration import RunConfiguration
 from hpc_multibench.roofline_model import RooflineDataModel
+from hpc_multibench.run_configuration import RunConfiguration
+from hpc_multibench.yaml_model import BarChartModel, LinePlotModel, RooflinePlotModel
 
 
 class PlotBackend(Enum):
@@ -26,18 +26,22 @@ class PlotBackend(Enum):
     import plotext as plt
 else:
     import matplotlib.pyplot as plt
+
     # from labellines import labelLines
 
     if PLOT_BACKEND == PlotBackend.SEABORN:
+        from functools import reduce
+
         import pandas as pd
         import seaborn as sns
-        from functools import reduce
+
         sns.set_theme()
 
 
 def extract_metrics(
     output: str, metric_definitions: dict[str, str]
 ) -> dict[str, str] | None:
+    """Extract a set of specified metrics from an output file's contents."""
     metrics: dict[str, str] = {}
     for metric, regex in metric_definitions.items():
         metric_search = re_search(regex, output)
@@ -51,9 +55,9 @@ def extract_metrics(
 def get_line_plot_data(
     plot: LinePlotModel,
     run_outputs: dict[int, tuple[RunConfiguration, str | None]],
-    metric_definitions: dict[str, str]
+    metric_definitions: dict[str, str],
 ) -> dict[tuple[str, ...], list[tuple[float, float]]]:
-    """Draw a specified line plot for a set of run outputs."""
+    """Get the data needed to plot a specified line plot for a set of runs."""
     data: dict[tuple[str, ...], list[tuple[float, float]]] = {}
 
     # Extract the outputs into the data format needed for the line plot
@@ -93,9 +97,12 @@ def get_line_plot_data(
 
 def draw_line_plot(
     plot: LinePlotModel,
-    data: dict[tuple[str, ...], list[tuple[float, float]]]
+    run_outputs: dict[int, tuple[RunConfiguration, str | None]],
+    metric_definitions: dict[str, str],
 ) -> None:
-    """."""
+    """Draw a specified line plot for a set of run outputs."""
+    data = get_line_plot_data(plot, run_outputs, metric_definitions)
+
     if PLOT_BACKEND == PlotBackend.SEABORN:
         dataframes = []
         for name, results in data.items():
@@ -142,7 +149,7 @@ def get_bar_chart_data(
     run_outputs: dict[int, tuple[RunConfiguration, str | None]],
     metric_definitions: dict[str, str],
 ) -> dict[tuple[str, ...], float]:
-    """Draw a specified bar chart for a set of run outputs."""
+    """Get the data needed to plot a specified bar chart for a set of runs."""
     data: dict[tuple[str, ...], float] = {}  # {("a", "b"): 1.0, ("a", "c"): 2.0}
 
     # Extract the outputs into the data format needed for the line plot
@@ -173,9 +180,12 @@ def get_bar_chart_data(
 
 def draw_bar_chart(
     plot: BarChartModel,
-    data: dict[tuple[str, ...], float]
+    run_outputs: dict[int, tuple[RunConfiguration, str | None]],
+    metric_definitions: dict[str, str],
 ) -> None:
-    """."""
+    """Draw a specified bar chart for a set of run outputs."""
+    data = get_bar_chart_data(plot, run_outputs, metric_definitions)
+
     if PLOT_BACKEND == PlotBackend.SEABORN:
         dataframe = pd.DataFrame(
             {
@@ -198,9 +208,7 @@ def draw_bar_chart(
             [(", ".join(name), metric) for name, metric in data.items()],
             key=lambda x: x[1],
         )
-        plt.bar(
-            *zip(*shaped_data, strict=True), orientation="horizontal", width=3 / 5
-        )
+        plt.bar(*zip(*shaped_data, strict=True), orientation="horizontal", width=3 / 5)
         plt.ylabel(plot.y)
         plt.theme(PLOTEXT_THEME)
     else:
@@ -222,24 +230,26 @@ def get_roofline_plot_data(
     _run_outputs: dict[int, tuple[RunConfiguration, str | None]],
     _metric_definitions: dict[str, str],
 ) -> tuple[RooflineDataModel, dict[str, tuple[float, float]]]:
-    """Draw a specified roofline plots for a set of run outputs."""
+    """Get the data needed to plot a specified roofline plot."""
     roofline_data = RooflineDataModel.from_json(plot.ert_json)
     return (roofline_data, {})
 
 
 def draw_roofline_plot(
     plot: RooflinePlotModel,
-    data: tuple[RooflineDataModel, dict[str, tuple[float, float]]],
+    run_outputs: dict[int, tuple[RunConfiguration, str | None]],
+    metric_definitions: dict[str, str],
 ) -> None:
-    """."""
+    """Draw a specified roofline plots for a set of run outputs."""
+    data = get_roofline_plot_data(plot, run_outputs, metric_definitions)
 
     if PLOT_BACKEND == PlotBackend.PLOTEXT:
         plt.clear_figure()
         for label, memory_bound_data in data[0].memory_bound_ceilings.items():
             plt.plot(
                 *zip(*memory_bound_data, strict=True),
                 label=label,
-                marker=PLOTEXT_MARKER
+                marker=PLOTEXT_MARKER,
             )
         for label, compute_bound_data in data[0].compute_bound_ceilings.items():
             plt.plot(

diff --git a/src/hpc_multibench/test_bench.py b/src/hpc_multibench/test_bench.py
@@ -11,23 +11,21 @@
 from pickle import dumps as pickle_dumps  # nosec
 from pickle import loads as pickle_loads  # nosec
 from shutil import rmtree
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from typing_extensions import Self
 
-from hpc_multibench.run_configuration import RunConfiguration
-from hpc_multibench.analysis import (
-    get_line_plot_data,
-    draw_line_plot,
-    get_bar_chart_data,
+from hpc_multibench.analysis import (  # extract_metrics,
     draw_bar_chart,
-    get_roofline_plot_data,
-    draw_roofline_plot
+    draw_line_plot,
+    draw_roofline_plot,
 )
 from hpc_multibench.yaml_model import BenchModel, RunConfigurationModel
 
-BASE_OUTPUT_DIRECTORY = Path("results/")
+if TYPE_CHECKING:
+    from hpc_multibench.run_configuration import RunConfiguration
 
+BASE_OUTPUT_DIRECTORY = Path("results/")
 
 
 @dataclass(frozen=True)
@@ -239,23 +237,24 @@ def report(self) -> None:
             for job_id, run_configuration in reconstructed_run_configurations.items()
         }
 
+        # # Extract the metrics from the outputs of the jobs
+        # run_metrics: list[tuple[RunConfiguration, dict[str, str] | None]] = [
+        #     (
+        #         run_configuration,
+        #         extract_metrics(output, self.bench_model.analysis.metrics),
+        #     )
+        #     for run_configuration, output in run_outputs.values()
+        #     if output is not None
+        # ]
+
         # Draw the specified line plots
         for line_plot in self.bench_model.analysis.line_plots:
-            line_plot_data = get_line_plot_data(
-                line_plot,
-                run_outputs,
-                self.bench_model.analysis.metrics
-            )
-            draw_line_plot(line_plot, line_plot_data)
+            draw_line_plot(line_plot, run_outputs, self.bench_model.analysis.metrics)
 
         for bar_chart in self.bench_model.analysis.bar_charts:
-            bar_chart_data = get_bar_chart_data(
-                bar_chart, run_outputs, self.bench_model.analysis.metrics
-            )
-            draw_bar_chart(bar_chart, bar_chart_data)
+            draw_bar_chart(bar_chart, run_outputs, self.bench_model.analysis.metrics)
 
         for roofline_plot in self.bench_model.analysis.roofline_plots:
-            roofline_plot_data = get_roofline_plot_data(
+            draw_roofline_plot(
                 roofline_plot, run_outputs, self.bench_model.analysis.metrics
             )
-            draw_roofline_plot(roofline_plot, roofline_plot_data)