From bd9b23c572e0b3cb21335a2bf9f8cdc5023dd90d Mon Sep 17 00:00:00 2001 From: Joep Vanlier Date: Fri, 9 Aug 2024 17:03:30 +0200 Subject: [PATCH] caching: handle maximum size in bytes Considering the items can have varying sizes, having a maximum size expressed in `bytes` makes more sense. At the end of the day, this is what would impact the user's experience the most. --- lumicks/pylake/channel.py | 17 +++++++++++++---- lumicks/pylake/tests/test_file/test_caching.py | 4 ++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lumicks/pylake/channel.py b/lumicks/pylake/channel.py index 26106fb16..34e83ccea 100644 --- a/lumicks/pylake/channel.py +++ b/lumicks/pylake/channel.py @@ -2,10 +2,10 @@ import numbers from typing import Union -from functools import lru_cache import numpy as np import numpy.typing as npt +from cachetools import LRUCache, cached from .detail.plotting import _annotate from .detail.timeindex import to_seconds, to_timestamp @@ -13,20 +13,25 @@ from .nb_widgets.range_selector import SliceRangeSelectorWidget -@lru_cache(maxsize=100) +@cached(LRUCache(maxsize=1 << 30, getsizeof=lambda x: x.nbytes), info=True) # 1 GB of cache def _get_array(cache_object): return cache_object.read_array() class LazyCache: - def __init__(self, location, dset): + def __init__(self, location, dset, nbytes): """A lazy globally cached wrapper around an object that is convertible to a numpy array""" self._location = location self._dset = dset + self._nbytes = nbytes def __len__(self): return len(self._dset) + @property + def nbytes(self): + return self._nbytes + def __hash__(self): return hash(self._location) @@ -36,7 +41,11 @@ def from_h5py_dset(dset, field=None): if field: location = f"{location}.{field}" dset = dset.fields(field) - return LazyCache(location, dset) + item_size = dset.read_dtype.itemsize + else: + item_size = dset.dtype.itemsize + + return LazyCache(location, dset, nbytes=item_size * len(dset)) def read_array(self): # Note, we deliberately do _not_ allow additional arguments to asarray since we would diff --git a/lumicks/pylake/tests/test_file/test_caching.py b/lumicks/pylake/tests/test_file/test_caching.py index 963ca8468..ff7024649 100644 --- a/lumicks/pylake/tests/test_file/test_caching.py +++ b/lumicks/pylake/tests/test_file/test_caching.py @@ -15,6 +15,7 @@ def test_global_cache_continuous(h5_file): # These should point to the same data assert id(f1x1.data) == id(f1x2.data) assert _get_array.cache_info().hits == 1 + assert _get_array.cache_info().currsize == 40 with pytest.raises(ValueError, match="assignment destination is read-only"): f1x1.data[5:100] = 3 @@ -33,8 +34,10 @@ def test_global_cache_timeseries(h5_file): # These should point to the same data assert id(f1x1.data) == id(f1x2.data) assert _get_array.cache_info().hits == 1 + assert _get_array.cache_info().currsize == 16 assert id(f1x1.timestamps) == id(f1x2.timestamps) assert _get_array.cache_info().hits == 2 + assert _get_array.cache_info().currsize == 32 with pytest.raises(ValueError, match="assignment destination is read-only"): f1x1.data[5:100] = 3 @@ -53,6 +56,7 @@ def test_global_cache_timetags(h5_file): # These should point to the same data assert id(tags1.data) == id(tags2.data) assert _get_array.cache_info().hits == 1 + assert _get_array.cache_info().currsize == 72 with pytest.raises(ValueError, match="assignment destination is read-only"): tags1.data[5:100] = 3