Skip to content

Commit

Permalink
122 performance benchmarking (#59)
Browse files Browse the repository at this point in the history
* Basic performance benchmarking

* Remove accidental file add

* Initial benchmarks and Jenkinsfile

* Some prototyping of sbatch

* Improve sbatch

* sbatch file passes successfully

* Remove unnecessary slashes and add more files

* Better doc

* Add structure factor benchmark

* Initial benchmark testing

* Fix overwrite of data

* Corrected bug where args were the wrong way around

* Write more easily readable json

* GIve 10% on time upper bound

* Account for flaky testing and better generation script output

* Add turbo disable, data generating sbatch and control SCARF turbo

* prep SCARF jenkinsfile

* Send email on failure

* Better message

* Better email message

* Ready for running on scarf

* Write reports to xml

* Added newline

* Make lines less than 79 chars and correct numpy returns doc strings

* Move to pytest-benchmark

* Remove unused code

* Try running from different label

* steps

* Correct cron timing

* remove histogram and qpts limit

* Create json to location

* Remove qpts limit

* Add speedup calculation

* Add speedups call to sbatch

* Speed up plots

* Working performance visualisation

* Tidying visualisation

* Further tidying

* Refactor and document visualisation

* Remove qpoint limit

* Remove unused sbatch file, correct doc

* Specify directory on command line

* Documenting, commenting and making nicer to read

* Take into account seednames in speedups calculations

* Take into account seedname in visualisation

* Move into different files and refactor to use different line styles

* Add visualisation of speedups over the amount of CPUs for a specific file and refactor how directories and files are specified

* Correct dosctring indents

* Place legend to the right

* Add docstring

* Only change linestyle every 5 lines

* Refactor to use subplots

* Update scripts for new API

* Plot each material with a different linestyle

* Move threads into utils.py

Co-authored-by: James King <[email protected]>
Co-authored-by: Rebecca Fair <[email protected]>
  • Loading branch information
3 people authored May 18, 2020
1 parent 6ca7dd6 commit 0d441a1
Show file tree
Hide file tree
Showing 18 changed files with 865 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ euphonic.egg-info/
.tox
tests_and_analysis/test/reports/
tests_and_analysis/static_code_analysis/reports/
tests_and_analysis/performance_benchmarking/reports/
.coverage
*.pyd
*.log
*.err
*.so
venv
30 changes: 30 additions & 0 deletions tests_and_analysis/performance_benchmarking/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!groovy

pipeline {

agent { label "SCARF" }

triggers {
cron("0 0 * * 0")
}

stages {

stage("Benchmark"){
steps {
checkout scm
sh """
cd tests_and_analysis/performance_benchmarking &&
sbatch run_benchmark_tests.sbatch
"""
}
}

}

post {
cleanup {
deleteDir()
}
}
}
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
5 changes: 5 additions & 0 deletions tests_and_analysis/performance_benchmarking/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pytest==5.4.1
pytest-benchmark[histogram]==3.2.3
numpy
pandas==1.0.3
matplotlib
16 changes: 16 additions & 0 deletions tests_and_analysis/performance_benchmarking/run_benchmark_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import os
import pytest

if __name__ == "__main__":

test_dir = os.path.dirname(os.path.abspath(__file__))
reports_dir = os.path.join(test_dir, "reports")
if not os.path.exists(reports_dir):
os.mkdir(reports_dir)

os.chdir(reports_dir)
test_exit_code = pytest.main([
test_dir,
"--benchmark-json=performance_benchmarks.json"
])
os.chdir("..")
131 changes: 131 additions & 0 deletions tests_and_analysis/performance_benchmarking/speedups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import argparse
import json
from typing import Dict
import os


def get_file_or_dir() -> str:
"""
Get the filename to calculate speedups of that has
been specified on the command line.
Returns
-------
str
The filename to calculate speedups for.
"""
parser = argparse.ArgumentParser()
dir_file_group = parser.add_mutually_exclusive_group()
dir_file_group.add_argument("-f", action="store", dest="filename",
help="The file to calculate speedups for")
dir_file_group.add_argument("-d", action="store", dest="dirname",
help="The directory containing files"
" to calculate speedups for",
default="reports")
args_parsed = parser.parse_args()
if args_parsed.filename:
return args_parsed.filename
else:
return args_parsed.dirname


def median_value(benchmark: Dict) -> float:
"""
Extract the median value from the benchmark disctionary.
Parameters
----------
benchmark : Dict
A dictionary containing a median values
Returns
-------
float
The median time taken value from the benchmark data
"""
return benchmark["stats"]["median"]


def calculate_speedups(filename: str) -> Dict[str, Dict[str, Dict[int, float]]]:
"""
Calculate speedups for the tests that are parameterised to
use a number of different threads.
Parameters
----------
filename : str
The file to calculate speedups for
Returns
-------
Dict[str, Dict[str, Dict[int, float]]]
The keys of the top level dictionary are the name of the test.
The keys of the next level of the dictionary are the seednames
used in the tests.
The keys of the next level dictionary are the number of threads used.
The values are the speedups for the given test and number of threads.
"""
data = json.load(open(filename))
data["benchmarks"].sort(key=median_value)
# Extract the time taken for all the tests at the various numbers of threads
# and format the data to easily calculate speedups
speed_at_threads = {}
for benchmark in data["benchmarks"]:
# Filter out the tests that haven't used different numbers of threads
if "use_c" in benchmark["params"] and \
benchmark["params"]["use_c"] is True:
# Initialise performance data structure
test = benchmark["name"].split("[")[0]
if test not in speed_at_threads:
speed_at_threads[test] = {}
seedname = benchmark["params"]["seedname"]
if seedname not in speed_at_threads[test]:
speed_at_threads[test][seedname] = {}
# At the given test and number of threads extract the
# median time taken
speed_at_threads[test][seedname][benchmark["params"]["n_threads"]] \
= benchmark["stats"]["median"]
# Calculate the speedups from the formatted data
speedups = {}
for test in speed_at_threads:
speedups[test] = {}
for seedname in speed_at_threads[test]:
speedups[test][seedname] = {}
sequential_speed = speed_at_threads[test][seedname][1]
for n_threads in speed_at_threads[test][seedname]:
speedups[test][seedname][n_threads] = \
sequential_speed / speed_at_threads[test][seedname][n_threads]
return speedups


def write_speedups(filename: str, speedups: Dict[str, Dict[str, Dict[int, float]]]):
"""
Write the calculated speedups to the given json file in
the "speedups" entry.
Parameters
----------
filename : str
The file to write the speedups to
speedups : Dict[str, Dict[str, Dict[int, float]]]
The calculated speedups to write to file.
"""
# Load in the data and update with the speedups
data = json.load(open(filename))
data["speedups"] = speedups
# Format the data nicely when overwriting to the file
json.dump(data, open(filename, "w+"), indent=4, sort_keys=True)


if __name__ == "__main__":
path: str = get_file_or_dir()
if os.path.isdir(path):
for filename in os.listdir(path):
filepath = os.path.join(path, filename)
speedups: Dict[str, Dict[str, Dict[int, float]]] = \
calculate_speedups(filepath)
write_speedups(filepath, speedups)
elif os.path.isfile(path):
speedups: Dict[str, Dict[str, Dict[int, float]]] = \
calculate_speedups(path)
write_speedups(path, speedups)
53 changes: 53 additions & 0 deletions tests_and_analysis/performance_benchmarking/test_benchmark_fc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import pytest
from utils import get_data_path, get_seednames,\
get_qpts, get_threads

from euphonic import ureg, ForceConstants


@pytest.mark.parametrize("seedname", get_seednames())
@pytest.mark.parametrize("use_c", [True, False])
@pytest.mark.parametrize("n_threads", get_threads())
def test_calculate_qpoint_phonon_modes(seedname, use_c, n_threads, benchmark):
# Set up
fc = ForceConstants.from_castep(
os.path.join(get_data_path(), f'{seedname}.castep_bin'))
qpts = get_qpts()
# Benchmark
if use_c:
benchmark(
fc.calculate_qpoint_phonon_modes,
qpts, use_c=True,
fall_back_on_python=False,
n_threads=n_threads,
asr='reciprocal', eta_scale=0.75
)
elif n_threads == 1:
benchmark(
fc.calculate_qpoint_phonon_modes,
qpts, use_c=False,
asr='reciprocal', eta_scale=0.75
)


@pytest.mark.parametrize("seedname", get_seednames())
def test_calculate_structure_factor(seedname, benchmark):
# Set up
qpts = get_qpts()
fc = ForceConstants.from_castep(
os.path.join(get_data_path(), f'{seedname}.castep_bin'))
phonons = fc.calculate_qpoint_phonon_modes(
qpts, use_c=True, fall_back_on_python=False, n_threads=5
)
fm = ureg('fm')
scattering_lengths = {
'La': 8.24*fm, 'Zr': 7.16*fm, 'O': 5.803*fm, 'C': 6.646*fm,
'Si': 4.1491*fm, 'H': -3.7390*fm, 'N': 9.36*fm, 'S': 2.847*fm,
'Nb': 7.054*fm
}
# Benchmark
benchmark(
phonons.calculate_structure_factor,
scattering_lengths=scattering_lengths
)
44 changes: 44 additions & 0 deletions tests_and_analysis/performance_benchmarking/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np
import os
from typing import List


def get_data_path() -> str:
"""
Returns
-------
str
The path to the data files for use in performance benchmarking
"""
return os.path.join(os.path.dirname(__file__), "data")


def get_seednames() -> List[str]:
"""
Returns
-------
List[str]
A list of the seednames to test with
"""
return ["Nb-242424-s0.25", "quartz", "La2Zr2O7"]


def get_threads() -> List[int]:
"""
Returns
-------
List[int]
A list of the number of threads to test with
"""
return [1, 2, 4, 8, 12, 16, 24]


def get_qpts() -> np.ndarray:
"""
Returns
-------
np.ndarray
A numpy array of 10,000 q-points
"""
qpts_npy_file = os.path.join(get_data_path(), "qpts_10000.npy")
return np.load(qpts_npy_file)
47 changes: 47 additions & 0 deletions tests_and_analysis/performance_benchmarking/visualise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import argparse
import matplotlib.pyplot as plt
from visualise.performance_over_time import plot_median_values
from visualise.speedups_over_time import plot_speedups_over_time
from visualise.speedups import plot_speedups_for_file


def get_parser() -> argparse.ArgumentParser:
"""
Get the directory specified as an argument on the command line.
Returns
-------
str
The path of the directory
"""
parser = argparse.ArgumentParser()
parser.add_argument("-st", "--speedup-over-time", action="store",
dest="speedup_over_time_dir",
help="Plot and show how the speedups data has changed"
" over time for the files in the directory you"
" have specified as part of this argument")
parser.add_argument("-p", "--performance", action="store",
dest="performance_dir",
help="Plot and show how performance data has changed"
" over time for the files in the directory you"
" have specified as part of this argument")
parser.add_argument("-sf", "--speedup-file", action="store",
dest="speedup_file",
help="Plot and show how using more threads affects the"
" performance of functions across multiple"
" different materials for the specified file")
return parser


if __name__ == "__main__":
parser = get_parser()
args_parsed = parser.parse_args()
if args_parsed.speedup_over_time_dir:
figure_index = plot_speedups_over_time(
args_parsed.speedup_over_time_dir
)
if args_parsed.performance_dir:
figure_index = plot_median_values(args_parsed.performance_dir)
if args_parsed.speedup_file:
plot_speedups_for_file(args_parsed.speedup_file)
plt.show()
Empty file.
Loading

0 comments on commit 0d441a1

Please sign in to comment.