Merge branch 'main' of github.com:scitools/iris into enable-type-chec…

…king
bouweandela · May 22, 2024 · 538115e · 538115e
2 parents af76a1e + 22c98e8
commit 538115e
Show file tree

Hide file tree

Showing 13 changed files with 440 additions and 35 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -43,11 +43,17 @@ if it is not already. You can achieve this by either:
 
 * `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
 `iris-test-data` content, and your local `site.cfg` is not available for
-benchmark scripts.
+benchmark scripts. The benchmark runner defers to any value already set in
+the shell, but will otherwise download `iris-test-data` and set the variable
+accordingly.
 * `DATA_GEN_PYTHON` - required - path to a Python executable that can be
 used to generate benchmark test objects/files; see
 [Data generation](#data-generation). The benchmark runner sets this 
-automatically, but will defer to any value already set in the shell.
+automatically, but will defer to any value already set in the shell. Note that
+[Mule](https://github.com/metomi/mule) will be  automatically installed into 
+this environment, and sometimes 
+[iris-test-data](https://github.com/SciTools/iris-test-data) (see 
+`OVERRIDE_TEST_DATA_REPOSITORY`).
 * `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
 test data, which the benchmark scripts will create if it doesn't already
 exist. Defaults to `<root>/benchmarks/.data/` if not set. Note that some of

diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -5,7 +5,9 @@
 """Common code for benchmarks."""
 
 from os import environ
-import resource
+import tracemalloc
+
+import numpy as np
 
 
 def disable_repeat_between_setup(benchmark_object):
@@ -61,27 +63,34 @@ class TrackAddedMemoryAllocation:
         AVD's detection threshold and be treated as 'signal'. Results
         smaller than this value will therefore be returned as equal to this
         value, ensuring fractionally small noise / no noise at all.
+        Defaults to 1.0
+
+    RESULT_ROUND_DP : int
+        Number of decimal places of rounding on result values (in Mb).
+        Defaults to 1
 
     """
 
-    RESULT_MINIMUM_MB = 5.0
-
-    @staticmethod
-    def process_resident_memory_mb():
-        return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
+    RESULT_MINIMUM_MB = 0.2
+    RESULT_ROUND_DP = 1  # I.E. to nearest 0.1 Mb
 
     def __enter__(self):
-        self.mb_before = self.process_resident_memory_mb()
+        tracemalloc.start()
         return self
 
     def __exit__(self, *_):
-        self.mb_after = self.process_resident_memory_mb()
+        _, peak_mem_bytes = tracemalloc.get_traced_memory()
+        tracemalloc.stop()
+        # Save peak-memory allocation, scaled from bytes to Mb.
+        self._peak_mb = peak_mem_bytes * (2.0**-20)
 
     def addedmem_mb(self):
         """Return measured memory growth, in Mb."""
-        result = self.mb_after - self.mb_before
+        result = self._peak_mb
         # Small results are too vulnerable to noise being interpreted as signal.
         result = max(self.RESULT_MINIMUM_MB, result)
+        # Rounding makes results easier to read.
+        result = np.round(result, self.RESULT_ROUND_DP)
         return result
 
     @staticmethod
@@ -105,6 +114,33 @@ def _wrapper(*args, **kwargs):
         decorated_func.unit = "Mb"
         return _wrapper
 
+    @staticmethod
+    def decorator_repeating(repeats=3):
+        """Benchmark to track growth in resident memory during execution.
+
+        Tracks memory for repeated calls of decorated function.
+
+        Intended for use on ASV ``track_`` benchmarks. Applies the
+        :class:`TrackAddedMemoryAllocation` context manager to the benchmark
+        code, sets the benchmark ``unit`` attribute to ``Mb``.
+
+        """
+
+        def decorator(decorated_func):
+            def _wrapper(*args, **kwargs):
+                assert decorated_func.__name__[:6] == "track_"
+                # Run the decorated benchmark within the added memory context
+                # manager.
+                with TrackAddedMemoryAllocation() as mb:
+                    for _ in range(repeats):
+                        decorated_func(*args, **kwargs)
+                return mb.addedmem_mb()
+
+            decorated_func.unit = "Mb"
+            return _wrapper
+
+        return decorator
+
 
 def on_demand_benchmark(benchmark_object):
     """Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.

diff --git a/benchmarks/benchmarks/aggregate_collapse.py b/benchmarks/benchmarks/aggregate_collapse.py
@@ -0,0 +1,212 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``."""
+
+import warnings
+
+import numpy as np
+
+from iris import analysis, coords, cube
+from iris.warnings import IrisVagueMetadataWarning
+
+from .generate_data.stock import realistic_4d_w_everything
+
+
+class AggregationMixin:
+    params = [[False, True]]
+    param_names = ["Lazy operations"]
+
+    def setup(self, lazy_run: bool):
+        warnings.filterwarnings("ignore", message="Ignoring a datum")
+        warnings.filterwarnings("ignore", category=IrisVagueMetadataWarning)
+        cube = realistic_4d_w_everything(lazy=lazy_run)
+
+        for cm in cube.cell_measures():
+            cube.remove_cell_measure(cm)
+        for av in cube.ancillary_variables():
+            cube.remove_ancillary_variable(av)
+
+        agg_mln_data = np.arange(0, 70, 10)
+        agg_mln_repeat = np.repeat(agg_mln_data, 10)
+
+        cube = cube[..., :10, :10]
+
+        self.mln_aux = "aggregatable"
+        self.mln = "model_level_number"
+        agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name=self.mln_aux)
+
+        if lazy_run:
+            agg_mln_coord.points = agg_mln_coord.lazy_points()
+        cube.add_aux_coord(agg_mln_coord, 1)
+        self.cube = cube
+
+
+class Aggregation(AggregationMixin):
+    def time_aggregated_by_MEAN(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.MEAN).data
+
+    def time_aggregated_by_COUNT(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.COUNT, function=lambda values: values > 280
+        ).data
+
+    def time_aggregated_by_GMEAN(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.GMEAN).data
+
+    def time_aggregated_by_HMEAN(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.HMEAN).data
+
+    def time_aggregated_by_MAX_RUN(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.MAX_RUN, function=lambda values: values > 280
+        ).data
+
+    def time_aggregated_by_MAX(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.MAX).data
+
+    def time_aggregated_by_MEDIAN(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.MEDIAN).data
+
+    def time_aggregated_by_MIN(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.MIN).data
+
+    def time_aggregated_by_PEAK(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.PEAK).data
+
+    def time_aggregated_by_PERCENTILE(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.PERCENTILE, percent=[10, 50, 90]
+        ).data
+
+    def time_aggregated_by_FAST_PERCENTILE(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux,
+            analysis.PERCENTILE,
+            mdtol=0,
+            percent=[10, 50, 90],
+            fast_percentile_method=True,
+        ).data
+
+    def time_aggregated_by_PROPORTION(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux,
+            analysis.PROPORTION,
+            function=lambda values: values > 280,
+        ).data
+
+    def time_aggregated_by_STD_DEV(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.STD_DEV).data
+
+    def time_aggregated_by_VARIANCE(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.VARIANCE).data
+
+    def time_aggregated_by_RMS(self, _):
+        _ = self.cube.aggregated_by(self.mln_aux, analysis.RMS).data
+
+    def time_collapsed_by_MEAN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.MEAN).data
+
+    def time_collapsed_by_COUNT(self, _):
+        _ = self.cube.collapsed(
+            self.mln, analysis.COUNT, function=lambda values: values > 280
+        ).data
+
+    def time_collapsed_by_GMEAN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.GMEAN).data
+
+    def time_collapsed_by_HMEAN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.HMEAN).data
+
+    def time_collapsed_by_MAX_RUN(self, _):
+        _ = self.cube.collapsed(
+            self.mln, analysis.MAX_RUN, function=lambda values: values > 280
+        ).data
+
+    def time_collapsed_by_MAX(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.MAX).data
+
+    def time_collapsed_by_MEDIAN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.MEDIAN).data
+
+    def time_collapsed_by_MIN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.MIN).data
+
+    def time_collapsed_by_PEAK(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.PEAK).data
+
+    def time_collapsed_by_PERCENTILE(self, _):
+        _ = self.cube.collapsed(
+            self.mln, analysis.PERCENTILE, percent=[10, 50, 90]
+        ).data
+
+    def time_collapsed_by_FAST_PERCENTILE(self, _):
+        _ = self.cube.collapsed(
+            self.mln,
+            analysis.PERCENTILE,
+            mdtol=0,
+            percent=[10, 50, 90],
+            fast_percentile_method=True,
+        ).data
+
+    def time_collapsed_by_PROPORTION(self, _):
+        _ = self.cube.collapsed(
+            self.mln, analysis.PROPORTION, function=lambda values: values > 280
+        ).data
+
+    def time_collapsed_by_STD_DEV(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.STD_DEV).data
+
+    def time_collapsed_by_VARIANCE(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.VARIANCE).data
+
+    def time_collapsed_by_RMS(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.RMS).data
+
+
+class WeightedAggregation(AggregationMixin):
+    def setup(self, lazy_run):
+        super().setup(lazy_run)
+
+        weights = np.linspace(0, 1, 70)
+        weights = np.broadcast_to(weights, self.cube.shape[:2])
+        weights = np.broadcast_to(weights.T, self.cube.shape[::-1])
+        weights = weights.T
+
+        self.weights = weights
+
+    ## currently has problems with indexing weights
+    # def time_w_aggregated_by_WPERCENTILE(self, _):
+    #     _ = self.cube.aggregated_by(
+    #         self.mln_aux, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90]
+    #     ).data
+
+    def time_w_aggregated_by_SUM(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.SUM, weights=self.weights
+        ).data
+
+    def time_w_aggregated_by_RMS(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.RMS, weights=self.weights
+        ).data
+
+    def time_w_aggregated_by_MEAN(self, _):
+        _ = self.cube.aggregated_by(
+            self.mln_aux, analysis.MEAN, weights=self.weights
+        ).data
+
+    def time_w_collapsed_by_WPERCENTILE(self, _):
+        _ = self.cube.collapsed(
+            self.mln, analysis.WPERCENTILE, weights=self.weights, percent=[10, 50, 90]
+        ).data
+
+    def time_w_collapsed_by_SUM(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.SUM, weights=self.weights).data
+
+    def time_w_collapsed_by_RMS(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.RMS, weights=self.weights).data
+
+    def time_w_collapsed_by_MEAN(self, _):
+        _ = self.cube.collapsed(self.mln, analysis.MEAN, weights=self.weights).data
diff --git a/benchmarks/benchmarks/experimental/ugrid/regions_combine.py b/benchmarks/benchmarks/experimental/ugrid/regions_combine.py
@@ -18,7 +18,7 @@
 from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
 from iris.experimental.ugrid.utils import recombine_submeshes
 
-from ... import TrackAddedMemoryAllocation, on_demand_benchmark
+from ... import TrackAddedMemoryAllocation
 from ...generate_data.ugrid import make_cube_like_2d_cubesphere
 
 
@@ -182,8 +182,6 @@ class CombineRegionsComputeRealData(MixinCombineRegions):
     def time_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
 
-    # Vulnerable to noise, so disabled by default.
-    @on_demand_benchmark
     @TrackAddedMemoryAllocation.decorator
     def track_addedmem_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
@@ -203,8 +201,6 @@ def time_save(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         save(self.recombined_cube, "tmp.nc")
 
-    # Vulnerable to noise, so disabled by default.
-    @on_demand_benchmark
     @TrackAddedMemoryAllocation.decorator
     def track_addedmem_save(self, n_cubesphere):
         save(self.recombined_cube, "tmp.nc")
@@ -233,8 +229,6 @@ def time_stream_file2file(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         save(self.recombined_cube, "tmp.nc")
 
-    # Vulnerable to noise, so disabled by default.
-    @on_demand_benchmark
     @TrackAddedMemoryAllocation.decorator
     def track_addedmem_stream_file2file(self, n_cubesphere):
         save(self.recombined_cube, "tmp.nc")
diff --git a/benchmarks/benchmarks/generate_data/stock.py b/benchmarks/benchmarks/generate_data/stock.py
@@ -13,6 +13,7 @@
 from pathlib import Path
 
 import iris
+from iris import cube
 from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, load_mesh
 
 from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere
@@ -153,7 +154,7 @@ def _external(sample_mesh_kwargs_, save_path_):
     return source_mesh.to_MeshCoord(location=location, axis=axis)
 
 
-def realistic_4d_w_everything(w_mesh=False, lazy=False):
+def realistic_4d_w_everything(w_mesh=False, lazy=False) -> iris.cube.Cube:
     """Run :func:`iris.tests.stock.realistic_4d_w_everything` in ``DATA_GEN_PYTHON``.
 
     Parameters