diff --git a/.gitignore b/.gitignore index e71392ff8..4cd211734 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,10 @@ euphonic.egg-info/ .tox tests_and_analysis/test/reports/ tests_and_analysis/static_code_analysis/reports/ +tests_and_analysis/performance_benchmarking/reports/ .coverage *.pyd +*.log +*.err *.so +venv diff --git a/tests_and_analysis/performance_benchmarking/Jenkinsfile b/tests_and_analysis/performance_benchmarking/Jenkinsfile new file mode 100644 index 000000000..89b1f5bb5 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/Jenkinsfile @@ -0,0 +1,30 @@ +#!groovy + +pipeline { + + agent { label "SCARF" } + + triggers { + cron("0 0 * * 0") + } + + stages { + + stage("Benchmark"){ + steps { + checkout scm + sh """ + cd tests_and_analysis/performance_benchmarking && + sbatch run_benchmark_tests.sbatch + """ + } + } + + } + + post { + cleanup { + deleteDir() + } + } +} diff --git a/tests_and_analysis/performance_benchmarking/__init__.py b/tests_and_analysis/performance_benchmarking/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests_and_analysis/performance_benchmarking/data/La2Zr2O7.castep_bin b/tests_and_analysis/performance_benchmarking/data/La2Zr2O7.castep_bin new file mode 100644 index 000000000..e46c289ce Binary files /dev/null and b/tests_and_analysis/performance_benchmarking/data/La2Zr2O7.castep_bin differ diff --git a/tests_and_analysis/performance_benchmarking/data/Nb-242424-s0.25.castep_bin b/tests_and_analysis/performance_benchmarking/data/Nb-242424-s0.25.castep_bin new file mode 100644 index 000000000..26caf9d3e Binary files /dev/null and b/tests_and_analysis/performance_benchmarking/data/Nb-242424-s0.25.castep_bin differ diff --git a/tests_and_analysis/performance_benchmarking/data/qpts_10000.npy b/tests_and_analysis/performance_benchmarking/data/qpts_10000.npy new file mode 100644 index 000000000..a91735b7c Binary files /dev/null and b/tests_and_analysis/performance_benchmarking/data/qpts_10000.npy differ diff --git a/tests_and_analysis/performance_benchmarking/data/quartz.castep_bin b/tests_and_analysis/performance_benchmarking/data/quartz.castep_bin new file mode 100644 index 000000000..72ac4963e Binary files /dev/null and b/tests_and_analysis/performance_benchmarking/data/quartz.castep_bin differ diff --git a/tests_and_analysis/performance_benchmarking/requirements.txt b/tests_and_analysis/performance_benchmarking/requirements.txt new file mode 100644 index 000000000..51ef1cce9 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/requirements.txt @@ -0,0 +1,5 @@ +pytest==5.4.1 +pytest-benchmark[histogram]==3.2.3 +numpy +pandas==1.0.3 +matplotlib diff --git a/tests_and_analysis/performance_benchmarking/run_benchmark_tests.py b/tests_and_analysis/performance_benchmarking/run_benchmark_tests.py new file mode 100644 index 000000000..07ac0f251 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/run_benchmark_tests.py @@ -0,0 +1,16 @@ +import os +import pytest + +if __name__ == "__main__": + + test_dir = os.path.dirname(os.path.abspath(__file__)) + reports_dir = os.path.join(test_dir, "reports") + if not os.path.exists(reports_dir): + os.mkdir(reports_dir) + + os.chdir(reports_dir) + test_exit_code = pytest.main([ + test_dir, + "--benchmark-json=performance_benchmarks.json" + ]) + os.chdir("..") diff --git a/tests_and_analysis/performance_benchmarking/speedups.py b/tests_and_analysis/performance_benchmarking/speedups.py new file mode 100644 index 000000000..1b1d1bbd7 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/speedups.py @@ -0,0 +1,131 @@ +import argparse +import json +from typing import Dict +import os + + +def get_file_or_dir() -> str: + """ + Get the filename to calculate speedups of that has + been specified on the command line. + + Returns + ------- + str + The filename to calculate speedups for. + """ + parser = argparse.ArgumentParser() + dir_file_group = parser.add_mutually_exclusive_group() + dir_file_group.add_argument("-f", action="store", dest="filename", + help="The file to calculate speedups for") + dir_file_group.add_argument("-d", action="store", dest="dirname", + help="The directory containing files" + " to calculate speedups for", + default="reports") + args_parsed = parser.parse_args() + if args_parsed.filename: + return args_parsed.filename + else: + return args_parsed.dirname + + +def median_value(benchmark: Dict) -> float: + """ + Extract the median value from the benchmark disctionary. + + Parameters + ---------- + benchmark : Dict + A dictionary containing a median values + + Returns + ------- + float + The median time taken value from the benchmark data + """ + return benchmark["stats"]["median"] + + +def calculate_speedups(filename: str) -> Dict[str, Dict[str, Dict[int, float]]]: + """ + Calculate speedups for the tests that are parameterised to + use a number of different threads. + + Parameters + ---------- + filename : str + The file to calculate speedups for + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + The keys of the top level dictionary are the name of the test. + The keys of the next level of the dictionary are the seednames + used in the tests. + The keys of the next level dictionary are the number of threads used. + The values are the speedups for the given test and number of threads. + """ + data = json.load(open(filename)) + data["benchmarks"].sort(key=median_value) + # Extract the time taken for all the tests at the various numbers of threads + # and format the data to easily calculate speedups + speed_at_threads = {} + for benchmark in data["benchmarks"]: + # Filter out the tests that haven't used different numbers of threads + if "use_c" in benchmark["params"] and \ + benchmark["params"]["use_c"] is True: + # Initialise performance data structure + test = benchmark["name"].split("[")[0] + if test not in speed_at_threads: + speed_at_threads[test] = {} + seedname = benchmark["params"]["seedname"] + if seedname not in speed_at_threads[test]: + speed_at_threads[test][seedname] = {} + # At the given test and number of threads extract the + # median time taken + speed_at_threads[test][seedname][benchmark["params"]["n_threads"]] \ + = benchmark["stats"]["median"] + # Calculate the speedups from the formatted data + speedups = {} + for test in speed_at_threads: + speedups[test] = {} + for seedname in speed_at_threads[test]: + speedups[test][seedname] = {} + sequential_speed = speed_at_threads[test][seedname][1] + for n_threads in speed_at_threads[test][seedname]: + speedups[test][seedname][n_threads] = \ + sequential_speed / speed_at_threads[test][seedname][n_threads] + return speedups + + +def write_speedups(filename: str, speedups: Dict[str, Dict[str, Dict[int, float]]]): + """ + Write the calculated speedups to the given json file in + the "speedups" entry. + + Parameters + ---------- + filename : str + The file to write the speedups to + speedups : Dict[str, Dict[str, Dict[int, float]]] + The calculated speedups to write to file. + """ + # Load in the data and update with the speedups + data = json.load(open(filename)) + data["speedups"] = speedups + # Format the data nicely when overwriting to the file + json.dump(data, open(filename, "w+"), indent=4, sort_keys=True) + + +if __name__ == "__main__": + path: str = get_file_or_dir() + if os.path.isdir(path): + for filename in os.listdir(path): + filepath = os.path.join(path, filename) + speedups: Dict[str, Dict[str, Dict[int, float]]] = \ + calculate_speedups(filepath) + write_speedups(filepath, speedups) + elif os.path.isfile(path): + speedups: Dict[str, Dict[str, Dict[int, float]]] = \ + calculate_speedups(path) + write_speedups(path, speedups) diff --git a/tests_and_analysis/performance_benchmarking/test_benchmark_fc.py b/tests_and_analysis/performance_benchmarking/test_benchmark_fc.py new file mode 100644 index 000000000..e9feac0d5 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/test_benchmark_fc.py @@ -0,0 +1,53 @@ +import os +import pytest +from utils import get_data_path, get_seednames,\ + get_qpts, get_threads + +from euphonic import ureg, ForceConstants + + +@pytest.mark.parametrize("seedname", get_seednames()) +@pytest.mark.parametrize("use_c", [True, False]) +@pytest.mark.parametrize("n_threads", get_threads()) +def test_calculate_qpoint_phonon_modes(seedname, use_c, n_threads, benchmark): + # Set up + fc = ForceConstants.from_castep( + os.path.join(get_data_path(), f'{seedname}.castep_bin')) + qpts = get_qpts() + # Benchmark + if use_c: + benchmark( + fc.calculate_qpoint_phonon_modes, + qpts, use_c=True, + fall_back_on_python=False, + n_threads=n_threads, + asr='reciprocal', eta_scale=0.75 + ) + elif n_threads == 1: + benchmark( + fc.calculate_qpoint_phonon_modes, + qpts, use_c=False, + asr='reciprocal', eta_scale=0.75 + ) + + +@pytest.mark.parametrize("seedname", get_seednames()) +def test_calculate_structure_factor(seedname, benchmark): + # Set up + qpts = get_qpts() + fc = ForceConstants.from_castep( + os.path.join(get_data_path(), f'{seedname}.castep_bin')) + phonons = fc.calculate_qpoint_phonon_modes( + qpts, use_c=True, fall_back_on_python=False, n_threads=5 + ) + fm = ureg('fm') + scattering_lengths = { + 'La': 8.24*fm, 'Zr': 7.16*fm, 'O': 5.803*fm, 'C': 6.646*fm, + 'Si': 4.1491*fm, 'H': -3.7390*fm, 'N': 9.36*fm, 'S': 2.847*fm, + 'Nb': 7.054*fm + } + # Benchmark + benchmark( + phonons.calculate_structure_factor, + scattering_lengths=scattering_lengths + ) diff --git a/tests_and_analysis/performance_benchmarking/utils.py b/tests_and_analysis/performance_benchmarking/utils.py new file mode 100644 index 000000000..4aca323db --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/utils.py @@ -0,0 +1,44 @@ +import numpy as np +import os +from typing import List + + +def get_data_path() -> str: + """ + Returns + ------- + str + The path to the data files for use in performance benchmarking + """ + return os.path.join(os.path.dirname(__file__), "data") + + +def get_seednames() -> List[str]: + """ + Returns + ------- + List[str] + A list of the seednames to test with + """ + return ["Nb-242424-s0.25", "quartz", "La2Zr2O7"] + + +def get_threads() -> List[int]: + """ + Returns + ------- + List[int] + A list of the number of threads to test with + """ + return [1, 2, 4, 8, 12, 16, 24] + + +def get_qpts() -> np.ndarray: + """ + Returns + ------- + np.ndarray + A numpy array of 10,000 q-points + """ + qpts_npy_file = os.path.join(get_data_path(), "qpts_10000.npy") + return np.load(qpts_npy_file) diff --git a/tests_and_analysis/performance_benchmarking/visualise.py b/tests_and_analysis/performance_benchmarking/visualise.py new file mode 100644 index 000000000..d5249dc24 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/visualise.py @@ -0,0 +1,47 @@ +import argparse +import matplotlib.pyplot as plt +from visualise.performance_over_time import plot_median_values +from visualise.speedups_over_time import plot_speedups_over_time +from visualise.speedups import plot_speedups_for_file + + +def get_parser() -> argparse.ArgumentParser: + """ + Get the directory specified as an argument on the command line. + + Returns + ------- + str + The path of the directory + """ + parser = argparse.ArgumentParser() + parser.add_argument("-st", "--speedup-over-time", action="store", + dest="speedup_over_time_dir", + help="Plot and show how the speedups data has changed" + " over time for the files in the directory you" + " have specified as part of this argument") + parser.add_argument("-p", "--performance", action="store", + dest="performance_dir", + help="Plot and show how performance data has changed" + " over time for the files in the directory you" + " have specified as part of this argument") + parser.add_argument("-sf", "--speedup-file", action="store", + dest="speedup_file", + help="Plot and show how using more threads affects the" + " performance of functions across multiple" + " different materials for the specified file") + return parser + + +if __name__ == "__main__": + parser = get_parser() + args_parsed = parser.parse_args() + if args_parsed.speedup_over_time_dir: + figure_index = plot_speedups_over_time( + args_parsed.speedup_over_time_dir + ) + if args_parsed.performance_dir: + figure_index = plot_median_values(args_parsed.performance_dir) + if args_parsed.speedup_file: + plot_speedups_for_file(args_parsed.speedup_file) + plt.show() diff --git a/tests_and_analysis/performance_benchmarking/visualise/__init__.py b/tests_and_analysis/performance_benchmarking/visualise/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests_and_analysis/performance_benchmarking/visualise/figures.py b/tests_and_analysis/performance_benchmarking/visualise/figures.py new file mode 100644 index 000000000..59e953fe1 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/visualise/figures.py @@ -0,0 +1,74 @@ +from abc import ABC, abstractmethod +from typing import Dict +import os + +# Adapted from: +# https://matplotlib.org/3.1.0/gallery/lines_bars_and_markers/linestyles.html +linestyle_tuple = [ + 'solid', 'dotted', 'dashed', 'dashdot' +] + + +def json_files(directory: str): + """ + A generator for all JSON files stored in directory and it's subdirectories. + + Parameters + ---------- + directory: str + The top level directory to start the search from. + + Yields + ------- + str + A JSON file stored below in the directory structure + """ + for subdir, _, files in os.walk(directory): + for file in files: + if file.lower().endswith(".json"): + yield os.path.join(subdir, file) + + +class Figure(ABC): + + def __init__(self, machine_info: str): + # The details of the machine the tests have been run on + self.machine_info: str = machine_info + # The tests for which we are recording performance over time + self.tests: Dict[str, Dict] = {} + + def add_test(self, test: str): + """ + The figure has a set of tests (keys for the test dict) + for which we are recording the performance + over time. Add to this set of tests. + + Parameters + ---------- + test : str + The test to add to the set of tests we are recording. + """ + if test not in self.tests: + self.tests[test] = {} + + @abstractmethod + def plot(self): + """ + Plot the test performance over time held by this figure. + """ + raise NotImplementedError + + +class Figures(ABC): + + def __init__(self): + # A dictionary of figures with keys as the machine information + # and the figure as the value + self.figures: Dict[str, Figure] = {} + + def plot(self): + """ + Plot the figures currently held in this object. + """ + for figure in self.figures: + self.figures[figure].plot() diff --git a/tests_and_analysis/performance_benchmarking/visualise/performance_over_time.py b/tests_and_analysis/performance_benchmarking/visualise/performance_over_time.py new file mode 100644 index 000000000..ba2c5faa5 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/visualise/performance_over_time.py @@ -0,0 +1,241 @@ +from .figures import Figures, Figure, linestyle_tuple, json_files +from utils import get_seednames +from datetime import datetime +from typing import Dict, List +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as dates +import json + + +class MedianMachineFigure(Figure): + + def add_time_taken(self, test: str, date: datetime.date, time_taken: float): + """ + Add data on the time taken to run the given test on the given date. + + Parameters + ---------- + test : str + The test being benchmarked. + date : datetime.date + The date the benchmarking took place. + time_taken : float + The time taken in seconds + """ + test_name = test.split("[")[0] + test_params = test.split("[")[1][:-1] + self.add_test_params(test_name, test_params) + self.tests[test_name][test_params][date] = time_taken + + def add_test_params(self, test: str, params: str): + """ + Add parameters used for a test. + + Parameters + ---------- + test : str + The test being benchmarked. + params : str + The parameter values for an execution of the test. + """ + self.add_test(test) + if params not in self.tests[test]: + self.tests[test][params] = {} + + def plot(self): + """ + Plot the (possibly multiple) figures. One figure for each test with + a line for each set of parameters of the test and a point + on each line for each date it has been run. + + Parameters + ---------- + figure_index : int + The first free figure index to use to plot a figure + + Returns + ------- + int + The next free figure index to use after these plots. + """ + dataframes: Dict[str, pd.DataFrame] = self.build_dataframes() + self.plot_dataframes(dataframes) + + def build_dataframes(self) -> Dict[str, pd.DataFrame]: + """ + Build a dataframe for each test. Each set of params has a new y_axis + points in the dataframe. + + Returns + ------- + Dict[str, pd.DataFrame] + The key is what will be the title of the plots. + The value is the dataframe to create a figure for. + """ + dataframes: Dict[str, pd.DataFrame] = {} + for test in self.tests: + x_axis: List[str] = [] + y_axes: Dict[str, List[float]] = {} + # Create a y_axis and overwrite the x_axis (will be the + # same as the last) for each set of parameters used with the test + for params in self.tests[test]: + y_axis: List[float] = [] + new_x_axis: List[datetime.date] = [] + # Sort the entries of the time taken by the date the test + # was executed on + for key, value in self.tests[test][params].items(): + # Search through the list and find the two elements between + # which the new date fits in order + index: int = 0 + while index < len(new_x_axis) and key > new_x_axis[index]: + index += 1 + # Insert the date into the list in between the two elements + # either side in the order + new_x_axis = new_x_axis[:index] + [key] + new_x_axis[index:] + # Maintain the order found for the x_axis in + # the y_axis elements + y_axis = y_axis[:index] + [value] + y_axis[index:] + x_axis = new_x_axis + y_axes[params] = y_axis + # Create a new dataframe for each test + panda_dataframe: pd.DataFrame = \ + self.create_dataframe(x_axis, y_axes) + title: str = "Performance over time\n {}\n {}".format( + self.machine_info, test) + dataframes[title] = panda_dataframe + return dataframes + + def create_dataframe(self, x_axis: List[datetime.date], + y_axes: Dict[str, List[float]]) -> pd.DataFrame: + """ + Create a single dataframe from an x_axis and multiple y_axes. + + Parameters + ---------- + x_axis : List[datetime.date] + The dates to plot on the x axis of the dataframe. + y_axes : Dict[str, List[float]] + The key is the parameters used when recording the time taken and + the values are the times taken at the corresponding x_axis dates. + + Returns + ------- + The pandas dataframe containing the data. + """ + dataframe = {'x': x_axis} + dataframe.update(y_axes) + return pd.DataFrame(dataframe) + + def plot_dataframes(self, dataframes: Dict[str, pd.DataFrame]): + """ + Plot a figure for each dataframe and use the dictionary + key as the title. Each dataframe has an x axis with the key 'x' + and a number of y axis traces. + + Parameters + ---------- + dataframes : Dict[str, pd.DataFrame] + The key is the title of the figure to be and the + value is the dataframe to plot for the figure. + """ + # A plot for each test (dataframe) + for title, dataframe in dataframes.items(): + fig, subplots = plt.subplots() + # A trace for each combination of the test parameters + # Vary linestyles + for i, key in enumerate(dataframe.keys()): + if key != "x": + subplots.plot( + 'x', key, + data=dataframe, + linestyle=linestyle_tuple[i%len(get_seednames()) - 1] + ) + # Set figure display details + subplots.set_title(title) + subplots.xaxis.set_major_formatter(dates.DateFormatter('%Y-%m-%d')) + subplots.xaxis.set_major_locator(dates.DayLocator()) + subplots.set_xlabel("Date") + subplots.set_ylabel("Time taken (seconds)") + subplots.legend( + title="Params", + loc='center left', + bbox_to_anchor=(1, 0.5), + fontsize="small" + ) + fig.tight_layout() + fig.autofmt_xdate() + + +class MedianMachineFigures(Figures): + + def __init__(self): + super().__init__() + # A dictionary of figures with keys as the machine information + # and the figure as the value + self.figures: Dict[str, MedianMachineFigure] = {} + + def add_figure(self, machine_info: str): + """ + Add a MedianMachine Figure to the figures stored in this object + with the given machine info (does not add another figure if there + is already one with the given machine information. + + Parameters + ---------- + machine_info : str + The information describing the machine the tests have run on. + """ + if machine_info not in self.figures: + self.figures[machine_info] = MedianMachineFigure(machine_info) + + def get_figure(self, machine_info: str) -> MedianMachineFigure: + """ + Get the figure with the given machine information. + + Parameters + ---------- + machine_info : str + The machine information the tests of the returned + figure have been run on. + + Returns + ------- + MedianMachineFigure + The figure which contains benchmark data that has been run + on the machine with the given information. + """ + return self.figures[machine_info] + + +def plot_median_values(directory: str): + """ + Plot and show a graph for each test displaying performance changes over + time with a trace for each combination of parameters the test has run on. + If the tests have been run on multiple different types of machines there + will be a separate figure for each type of machine. Data is taken from + json files under the given directory. + + Parameters + ---------- + directory : str + The directory under which the json files are stored. + """ + plots = MedianMachineFigures() + for file in json_files(directory): + data = json.load(open(file)) + if "benchmarks" in data: + plots.add_figure(data["machine_info"]["cpu"]["brand"]) + for benchmark in data["benchmarks"]: + test = benchmark["name"] + plots.get_figure( + data["machine_info"]["cpu"]["brand"] + ).add_time_taken( + test, + datetime.strptime( + data["datetime"].split("T")[0], + '%Y-%m-%d' + ).date(), + benchmark["stats"]["median"] + ) + plots.plot() diff --git a/tests_and_analysis/performance_benchmarking/visualise/speedups.py b/tests_and_analysis/performance_benchmarking/visualise/speedups.py new file mode 100644 index 000000000..3dbcf09a3 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/visualise/speedups.py @@ -0,0 +1,36 @@ +import json +import matplotlib.pyplot as plt + + +def plot_speedups_for_file(filename: str): + """ + Plot a figure for each test that has had speedups calculated for it in + filename. There is a trace for each seedname used in the test. + + Parameters + ---------- + filename : str + The file to get the calculated speedups from + """ + data = json.load(open(filename)) + if "speedups" in data: + for test in data["speedups"]: + fig, subplots = plt.subplots() + for seedname in data["speedups"][test]: + subplots.plot( + list(data["speedups"][test][seedname].keys()), + list(data["speedups"][test][seedname].values()), + label=seedname + ) + subplots.set_xlabel("Number of threads") + subplots.set_ylabel("Speedup (Ts/Tp)") + subplots.set_title("Speedups for {}\n {}".format(filename, test)) + # Create the legend to the right of the figure and shrink the + # figure to account for that + subplots.legend( + title="Seedname", + loc='center left', + bbox_to_anchor=(1, 0.5), + fontsize="small" + ) + fig.tight_layout() diff --git a/tests_and_analysis/performance_benchmarking/visualise/speedups_over_time.py b/tests_and_analysis/performance_benchmarking/visualise/speedups_over_time.py new file mode 100644 index 000000000..d2c406e30 --- /dev/null +++ b/tests_and_analysis/performance_benchmarking/visualise/speedups_over_time.py @@ -0,0 +1,184 @@ +from .figures import Figure, Figures, json_files +from typing import Dict +from datetime import datetime +import matplotlib.pyplot as plt +import matplotlib.dates as dates +import json + + +class SpeedupMachineFigure(Figure): + + def add_speedup(self, test: str, seedname: str, + n_threads: int, date: datetime.date, + speedup: float): + """ + Add speedup data for a specific test, date of test + and number of threads. + If the number of threads and test hasn't been added yet, add them. + + Parameters + ---------- + test : str + The name of the test this data is associated with + seedname : str + The material the data comes from + n_threads : int + The number of threads the test was run with + and the speedup calculated with + date : datetime.date + The date the test was run on + speedup : float + The speedup value calculated by: + speed on 1 thread / speed on n_threads + """ + self.add_n_threads(test, seedname, n_threads) + self.tests[test][seedname][n_threads][date] = speedup + + def add_seedname(self, test: str, seedname: str): + """ + Add an entry for the seedname in the recorded data. + + Parameters + ---------- + test : str + The name of the test run with the seedname + seedname : str + The name of the material run with the test + """ + self.add_test(test) + if seedname not in self.tests[test]: + self.tests[test][seedname] = {} + + def add_n_threads(self, test: str, seedname: str, n_threads: int): + """ + The given test has been run with the given number of + threads (n_threads). + Add n_threads as a key for the given test to record + speedup data against. + If the test is present, add it. + + Parameters + ---------- + test : str + The test that has been run with the given number + of threads (n_threads) + seedname : str + The material the data comes from + n_threads : int + A number of threads the test has been run with. + """ + self.add_seedname(test, seedname) + if n_threads not in self.tests[test][seedname]: + self.tests[test][seedname][n_threads] = {} + + def plot(self): + """ + Plot the speedup performance over time across a range of threads. + """ + # Plot a figure for each test and seedname combination + for test in self.tests: + for seedname in self.tests[test]: + fig, subplots = plt.subplots() + # Plot a line on the figure for each number of threads + for n_threads in self.tests[test][seedname]: + subplots.plot( + list(self.tests[test][seedname][n_threads].keys()), + list(self.tests[test][seedname][n_threads].values()), + label=str(n_threads) + ) + # Format x axis to use dates + subplots.xaxis.set_major_formatter( + dates.DateFormatter('%Y-%m-%d') + ) + subplots.xaxis.set_major_locator(dates.DayLocator()) + # Label axes, title and legend correctly + subplots.set_xlabel("Date") + subplots.set_ylabel("Speedup value (Ts/Tn)") + subplots.set_title("Speedups over time\n {}\n {}, {}".format( + self.machine_info, test, seedname + )) + subplots.legend( + title="Threads", + loc='center left', + bbox_to_anchor=(1, 0.5), + fontsize="small" + ) + fig.tight_layout() + fig.autofmt_xdate() + + +class SpeedupMachineFigures(Figures): + + def __init__(self): + super().__init__() + # A dictionary of figures with keys as the machine information + # and the figure as the value + self.figures: Dict[str, SpeedupMachineFigure] = {} + + def add_figure(self, machine_info: str): + """ + Add a SpeedupMachineFigure to the figures to plot associated + with the given machine information. + + Parameters + ---------- + machine_info : str + A string describing the machine the speedup benchmarks + have been run on. + """ + if machine_info not in self.figures: + self.figures[machine_info] = SpeedupMachineFigure(machine_info) + + def get_figure(self, machine_info: str) -> SpeedupMachineFigure: + """ + Get the figure associated with the given machine information. + + Parameters + ---------- + machine_info : str + The machine information associated with the figure to get + + Returns + -------- + SpeedupMachineFigure + The figure associated with the given machine information + """ + return self.figures[machine_info] + + +def plot_speedups_over_time(directory: str): + """ + Plot and show graphs displaying how speedups on different numbers of threads + have changed over time using the data from the json files under the given + directory. + + Parameters + ---------- + directory : str + The directory under which the json files are stored. + """ + figures = SpeedupMachineFigures() + for file in json_files(directory): + data = json.load(open(file)) + if "speedups" in data: + # Add a new figure for each different machine the tests + # have been run on + figures.add_figure(data["machine_info"]["cpu"]["brand"]) + for test in data["speedups"]: + for seedname in data["speedups"][test]: + for n_threads in data["speedups"][test][seedname]: + # Add speedup data for each test and the number of + # threads that have been used for the test + figures.get_figure( + data["machine_info"]["cpu"]["brand"] + ).add_speedup( + test, + seedname, + n_threads, + datetime.strptime( + data["datetime"].split("T")[0], + '%Y-%m-%d' + ).date(), + data["speedups"][test][seedname][n_threads] + ) + figures.plot()