Skip to content

Commit

Permalink
Merge branch 'main' of github.com:scitools/iris into enable-type-chec…
Browse files Browse the repository at this point in the history
…king
  • Loading branch information
bouweandela committed May 22, 2024
2 parents af76a1e + 22c98e8 commit 538115e
Show file tree
Hide file tree
Showing 13 changed files with 440 additions and 35 deletions.
10 changes: 8 additions & 2 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,17 @@ if it is not already. You can achieve this by either:

* `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
`iris-test-data` content, and your local `site.cfg` is not available for
benchmark scripts.
benchmark scripts. The benchmark runner defers to any value already set in
the shell, but will otherwise download `iris-test-data` and set the variable
accordingly.
* `DATA_GEN_PYTHON` - required - path to a Python executable that can be
used to generate benchmark test objects/files; see
[Data generation](#data-generation). The benchmark runner sets this
automatically, but will defer to any value already set in the shell.
automatically, but will defer to any value already set in the shell. Note that
[Mule](https://github.com/metomi/mule) will be automatically installed into
this environment, and sometimes
[iris-test-data](https://github.com/SciTools/iris-test-data) (see
`OVERRIDE_TEST_DATA_REPOSITORY`).
* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
test data, which the benchmark scripts will create if it doesn't already
exist. Defaults to `<root>/benchmarks/.data/` if not set. Note that some of
Expand Down
54 changes: 45 additions & 9 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
"""Common code for benchmarks."""

from os import environ
import resource
import tracemalloc

import numpy as np


def disable_repeat_between_setup(benchmark_object):
Expand Down Expand Up @@ -61,27 +63,34 @@ class TrackAddedMemoryAllocation:
AVD's detection threshold and be treated as 'signal'. Results
smaller than this value will therefore be returned as equal to this
value, ensuring fractionally small noise / no noise at all.
Defaults to 1.0
RESULT_ROUND_DP : int
Number of decimal places of rounding on result values (in Mb).
Defaults to 1
"""

RESULT_MINIMUM_MB = 5.0

@staticmethod
def process_resident_memory_mb():
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
RESULT_MINIMUM_MB = 0.2
RESULT_ROUND_DP = 1 # I.E. to nearest 0.1 Mb

def __enter__(self):
self.mb_before = self.process_resident_memory_mb()
tracemalloc.start()
return self

def __exit__(self, *_):
self.mb_after = self.process_resident_memory_mb()
_, peak_mem_bytes = tracemalloc.get_traced_memory()
tracemalloc.stop()
# Save peak-memory allocation, scaled from bytes to Mb.
self._peak_mb = peak_mem_bytes * (2.0**-20)

def addedmem_mb(self):
"""Return measured memory growth, in Mb."""
result = self.mb_after - self.mb_before
result = self._peak_mb
# Small results are too vulnerable to noise being interpreted as signal.
result = max(self.RESULT_MINIMUM_MB, result)
# Rounding makes results easier to read.
result = np.round(result, self.RESULT_ROUND_DP)
return result

@staticmethod
Expand All @@ -105,6 +114,33 @@ def _wrapper(*args, **kwargs):
decorated_func.unit = "Mb"
return _wrapper

@staticmethod
def decorator_repeating(repeats=3):
"""Benchmark to track growth in resident memory during execution.
Tracks memory for repeated calls of decorated function.
Intended for use on ASV ``track_`` benchmarks. Applies the
:class:`TrackAddedMemoryAllocation` context manager to the benchmark
code, sets the benchmark ``unit`` attribute to ``Mb``.
"""

def decorator(decorated_func):
def _wrapper(*args, **kwargs):
assert decorated_func.__name__[:6] == "track_"
# Run the decorated benchmark within the added memory context
# manager.
with TrackAddedMemoryAllocation() as mb:
for _ in range(repeats):
decorated_func(*args, **kwargs)
return mb.addedmem_mb()

decorated_func.unit = "Mb"
return _wrapper

return decorator


def on_demand_benchmark(benchmark_object):
"""Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.
Expand Down
212 changes: 212 additions & 0 deletions benchmarks/benchmarks/aggregate_collapse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``."""

import warnings

import numpy as np

from iris import analysis, coords, cube
from iris.warnings import IrisVagueMetadataWarning

from .generate_data.stock import realistic_4d_w_everything


class AggregationMixin:
params = [[False, True]]
param_names = ["Lazy operations"]

def setup(self, lazy_run: bool):
warnings.filterwarnings("ignore", message="Ignoring a datum")
warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning)
cube = realistic_4d_w_everything(lazy=lazy_run)

for cm in cube.cell_measures():
cube.remove_cell_measure(cm)
for av in cube.ancillary_variables():
cube.remove_ancillary_variable(av)

agg_mln_data = np.arange(0, 70, 10)
agg_mln_repeat = np.repeat(agg_mln_data, 10)

cube = cube[..., :10, :10]

self.mln_aux = "aggregatable"
self.mln = "model_level_number"
agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name=self.mln_aux)

if lazy_run:
agg_mln_coord.points = agg_mln_coord.lazy_points()
cube.add_aux_coord(agg_mln_coord, 1)
self.cube = cube


class Aggregation(AggregationMixin):
def time_aggregated_by_MEAN(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.MEAN).data

def time_aggregated_by_COUNT(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.COUNT, function=lambda values: values > 280
).data

def time_aggregated_by_GMEAN(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.GMEAN).data

def time_aggregated_by_HMEAN(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.HMEAN).data

def time_aggregated_by_MAX_RUN(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.MAX_RUN, function=lambda values: values > 280
).data

def time_aggregated_by_MAX(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.MAX).data

def time_aggregated_by_MEDIAN(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.MEDIAN).data

def time_aggregated_by_MIN(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.MIN).data

def time_aggregated_by_PEAK(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.PEAK).data

def time_aggregated_by_PERCENTILE(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.PERCENTILE, percent=[10, 50, 90]
).data

def time_aggregated_by_FAST_PERCENTILE(self, _):
_ = self.cube.aggregated_by(
self.mln_aux,
analysis.PERCENTILE,
mdtol=0,
percent=[10, 50, 90],
fast_percentile_method=True,
).data

def time_aggregated_by_PROPORTION(self, _):
_ = self.cube.aggregated_by(
self.mln_aux,
analysis.PROPORTION,
function=lambda values: values > 280,
).data

def time_aggregated_by_STD_DEV(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.STD_DEV).data

def time_aggregated_by_VARIANCE(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.VARIANCE).data

def time_aggregated_by_RMS(self, _):
_ = self.cube.aggregated_by(self.mln_aux, analysis.RMS).data

def time_collapsed_by_MEAN(self, _):
_ = self.cube.collapsed(self.mln, analysis.MEAN).data

def time_collapsed_by_COUNT(self, _):
_ = self.cube.collapsed(
self.mln, analysis.COUNT, function=lambda values: values > 280
).data

def time_collapsed_by_GMEAN(self, _):
_ = self.cube.collapsed(self.mln, analysis.GMEAN).data

def time_collapsed_by_HMEAN(self, _):
_ = self.cube.collapsed(self.mln, analysis.HMEAN).data

def time_collapsed_by_MAX_RUN(self, _):
_ = self.cube.collapsed(
self.mln, analysis.MAX_RUN, function=lambda values: values > 280
).data

def time_collapsed_by_MAX(self, _):
_ = self.cube.collapsed(self.mln, analysis.MAX).data

def time_collapsed_by_MEDIAN(self, _):
_ = self.cube.collapsed(self.mln, analysis.MEDIAN).data

def time_collapsed_by_MIN(self, _):
_ = self.cube.collapsed(self.mln, analysis.MIN).data

def time_collapsed_by_PEAK(self, _):
_ = self.cube.collapsed(self.mln, analysis.PEAK).data

def time_collapsed_by_PERCENTILE(self, _):
_ = self.cube.collapsed(
self.mln, analysis.PERCENTILE, percent=[10, 50, 90]
).data

def time_collapsed_by_FAST_PERCENTILE(self, _):
_ = self.cube.collapsed(
self.mln,
analysis.PERCENTILE,
mdtol=0,
percent=[10, 50, 90],
fast_percentile_method=True,
).data

def time_collapsed_by_PROPORTION(self, _):
_ = self.cube.collapsed(
self.mln, analysis.PROPORTION, function=lambda values: values > 280
).data

def time_collapsed_by_STD_DEV(self, _):
_ = self.cube.collapsed(self.mln, analysis.STD_DEV).data

def time_collapsed_by_VARIANCE(self, _):
_ = self.cube.collapsed(self.mln, analysis.VARIANCE).data

def time_collapsed_by_RMS(self, _):
_ = self.cube.collapsed(self.mln, analysis.RMS).data


class WeightedAggregation(AggregationMixin):
def setup(self, lazy_run):
super().setup(lazy_run)

weights = np.linspace(0, 1, 70)
weights = np.broadcast_to(weights, self.cube.shape[:2])
weights = np.broadcast_to(weights.T, self.cube.shape[::-1])
weights = weights.T

self.weights = weights

## currently has problems with indexing weights
# def time_w_aggregated_by_WPERCENTILE(self, _):
# _ = self.cube.aggregated_by(
# self.mln_aux, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90]
# ).data

def time_w_aggregated_by_SUM(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.SUM, weights=self.weights
).data

def time_w_aggregated_by_RMS(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.RMS, weights=self.weights
).data

def time_w_aggregated_by_MEAN(self, _):
_ = self.cube.aggregated_by(
self.mln_aux, analysis.MEAN, weights=self.weights
).data

def time_w_collapsed_by_WPERCENTILE(self, _):
_ = self.cube.collapsed(
self.mln, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90]
).data

def time_w_collapsed_by_SUM(self, _):
_ = self.cube.collapsed(self.mln, analysis.SUM, weights=self.weights).data

def time_w_collapsed_by_RMS(self, _):
_ = self.cube.collapsed(self.mln, analysis.RMS, weights=self.weights).data

def time_w_collapsed_by_MEAN(self, _):
_ = self.cube.collapsed(self.mln, analysis.MEAN, weights=self.weights).data
8 changes: 1 addition & 7 deletions benchmarks/benchmarks/experimental/ugrid/regions_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
from iris.experimental.ugrid.utils import recombine_submeshes

from ... import TrackAddedMemoryAllocation, on_demand_benchmark
from ... import TrackAddedMemoryAllocation
from ...generate_data.ugrid import make_cube_like_2d_cubesphere


Expand Down Expand Up @@ -182,8 +182,6 @@ class CombineRegionsComputeRealData(MixinCombineRegions):
def time_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data

# Vulnerable to noise, so disabled by default.
@on_demand_benchmark
@TrackAddedMemoryAllocation.decorator
def track_addedmem_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data
Expand All @@ -203,8 +201,6 @@ def time_save(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")

# Vulnerable to noise, so disabled by default.
@on_demand_benchmark
@TrackAddedMemoryAllocation.decorator
def track_addedmem_save(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
Expand Down Expand Up @@ -233,8 +229,6 @@ def time_stream_file2file(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")

# Vulnerable to noise, so disabled by default.
@on_demand_benchmark
@TrackAddedMemoryAllocation.decorator
def track_addedmem_stream_file2file(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
3 changes: 2 additions & 1 deletion benchmarks/benchmarks/generate_data/stock.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pathlib import Path

import iris
from iris import cube
from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, load_mesh

from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere
Expand Down Expand Up @@ -153,7 +154,7 @@ def _external(sample_mesh_kwargs_, save_path_):
return source_mesh.to_MeshCoord(location=location, axis=axis)


def realistic_4d_w_everything(w_mesh=False, lazy=False):
def realistic_4d_w_everything(w_mesh=False, lazy=False) -> iris.cube.Cube:
"""Run :func:`iris.tests.stock.realistic_4d_w_everything` in ``DATA_GEN_PYTHON``.
Parameters
Expand Down
Loading

0 comments on commit 538115e

Please sign in to comment.