Skip to content

Commit

Permalink
implement changes suggested in PR #162
Browse files Browse the repository at this point in the history
  • Loading branch information
bbean23 committed Nov 6, 2024
1 parent 3c217fa commit fe3cd68
Show file tree
Hide file tree
Showing 5 changed files with 337 additions and 94 deletions.
107 changes: 88 additions & 19 deletions opencsp/common/lib/cv/CacheableImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@ class CacheableImage:
priority order for the data that is returned from various methods:
(1) in-memory array, (2) numpy cache file, (3) image source file.
Only one of the inputs (array, cache_path, source_path) are required. The
"array" parameter should be the raw image data, the cache_path should be a
string to a .npy file, and the source_path should be a string to an image
file. Note that the cache_path file doesn't need to exist yet, but can
instead be an indicator for where to cache the image to.
Parameters
----------
Only one of the inputs (array, cache_path, source_path) are required in the
constructor. In fact, there is a method :py:meth:`from_single_source` that
tries to guess which input is being provided. The "array" parameter should
be the raw image data, the cache_path should be a string to a .npy file, and
the source_path should be a string to an image file. Note that the
cache_path file doesn't need to exist yet, but can instead be an indicator
for where to cache the image to.
The following table determines how images are retrieved based on the given
parameters ([X] = given and file exists, [.] = given and file doesn't
Expand Down Expand Up @@ -72,6 +76,29 @@ class CacheableImage:
+-------+---------+----------+
| | | [X] |
+-------+---------+----------+
sys.getsizeof
-------------
This class overrides the default __sizeof__ dunder method, meaning that the
size returned by sys.getsizeof(cacheable_image) is not just the size of all
variables tracked by the instance. Rather, the size of the Numpy array and
Pillow image are returned. This metric better represents the
memory-conserving use case that is intended for this class.
__sizeof__ returns close to 0 if the array and image attributes have been
set to None (aka the cache() method has been executed). Note that this does
not depend on the state of the garbage collector, which might not actually
free the memory for some time after it is no longer being tracked by this
class. An attempt at freeing the memory can be done immediately with
`gc.collect()` but the results of this are going to be implementation
specific.
The size of the source path, image path, and all other attributes are not
included in the return value of __sizeof__. This decision was made for
simplicity. Also the additional memory has very little impact. For example a
256 character path uses ~0.013% as much memory as a 1920x1080 monochrome
image.
"""

_cacheable_images_registry: weakref.WeakKeyDictionary["CacheableImage", int] = {}
Expand All @@ -81,6 +108,11 @@ class CacheableImage:
# Like _cacheable_images_registry, but for instances that have been deregistered and not reregistered.
_cacheable_images_last_access_index: int = 0
# The last value used in the _cacheable_images_registry for maintaining access order.
_expected_cached_size: int = (48 * 2) * 2
# Upper bound on the anticipated return value from __sizeof__ after cache()
# has just been evaluated. Each python variable uses ~"48" bytes, there are
# "*2" variables included (array and image), and we don't care about
# specifics so add some buffer "*2".

def __init__(self, array: np.ndarray = None, cache_path: str = None, source_path: str = None):
"""
Expand Down Expand Up @@ -425,11 +457,10 @@ def _does_source_image_match(self, nparray: np.ndarray):
except Exception:
return False

enforce_source_image_matches = True
# Debugging: check that the programmer didn't misuse CacheableImage
# by setting the source_path to a file that doesn't actually match
# the numpy array or cache path.
if enforce_source_image_matches and not arrays_are_equal:
# Check that the programmer didn't misuse CacheableImage by setting
# the source_path to a file that doesn't actually match the numpy
# array or cache path.
if not arrays_are_equal:
try:
import os
import opencsp.common.lib.opencsp_path.opencsp_root_path as orp
Expand Down Expand Up @@ -465,8 +496,8 @@ def cache(self, cache_path: str = None):
Parameters
----------
cache_path : str, optional
The path to cache to, as necessary. Can be None if either cache_path
or source_path are already set. By default None.
The path/name.ext to cache to, as necessary. Can be None if either
cache_path or source_path are already set. By default None.
"""
# use either self.cache_path or cache_path, depending on:
# 1. self.cache_path exists
Expand Down Expand Up @@ -516,6 +547,8 @@ def cache(self, cache_path: str = None):
# This instance has already been cached to disk at least once. Don't
# need to cache it again. We check the size to make sure that the
# file was actually cached and doesn't just exist as a placeholder.
# I chose '> 10' instead of '> 0' because I'm paranoid that
# getsize() will return a small number of bytes on some systems.
pass
else:
# Write the numpy array to disk.
Expand All @@ -533,10 +566,15 @@ def cache(self, cache_path: str = None):

def save_image(self, image_path_name_ext: str):
"""
Saves this image as an image file to the given file.
Saves this image as an image file to the given file. This method is best
used when an image is intended to be kept after a computation, in which
case the newly saved image file can be the on-disk reference instead of
an on-disk cache file.
Note: this replaces the internal reference to source_path, if any, with
the newly given path.
the newly given path. It is therefore suggested to not use this method
unless you are using this class as part of an end-use application, in
order to avoid unintended side effects.
Parameters
----------
Expand All @@ -550,8 +588,37 @@ def save_image(self, image_path_name_ext: str):
def cache_images_to_disk_as_necessary(
memory_limit_bytes: int, tmp_path_generator: Callable[[], str], log_level=lt.log.DEBUG
):
"""Check memory usage and convert images to files (aka file path
strings) as necessary in order to reduce memory usage."""
"""
Check memory usage and convert images to files (aka file path strings)
as necessary in order to reduce memory usage.
Note that due to the small amount of necessary memory used by each
CacheableImage instance, all instances can be cached and still be above
the `memory_limit_bytes` threshold. This can happen either when
memory_limit_bytes is sufficiently small, or the number of live
CacheableImages is sufficiently large. In these cases, this method may
not be able to lower the amount of memory in use.
Parameters
----------
memory_limit_bytes : int
The total number of bytes of memory that all CacheableImages are
allowed to use for their in-memory arrays and images, in sum. Note
that each CachableImage instance will still use some small amount of
memory even after it has been cached.
tmp_path_generator : Callable[[], str]
A function that returns a path/name.ext for a file that does not
exist yet. This file will be used to save the numpy array out to.
log_level : int, optional
The level to print out status messages to, including the amount of
memory in use before and after caching images. By default
lt.log.DEBUG.
"""
# By providing the memory_limit_bytes as a parameter, we're effectively
# enabling the user to choose a lower memory threshold than is the
# default. There's also the benefit of requiring the user to think about
# how much memory they want to use, which is going to be system and
# application specific.
total_mem_size = CacheableImage.all_cacheable_images_size()
if total_mem_size <= memory_limit_bytes:
return
Expand All @@ -571,12 +638,14 @@ def cache_images_to_disk_as_necessary(

# free the LRU instance's memory by cacheing it to disk
cacheable_image_size = sys.getsizeof(cacheable_image)
if cacheable_image_size == 0:
pass # already cached to disk
if cacheable_image_size <= cacheable_image._expected_cached_size:
continue # already cached to disk, probably
if cacheable_image.cache_path is not None:
cacheable_image.cache(None)
else: # cache_path is None
cacheable_image.cache(tmp_path_generator())
total_mem_size -= cacheable_image_size

bytes_cached_to_disk = cacheable_image_size - sys.getsizeof(cacheable_image)
total_mem_size -= bytes_cached_to_disk

log_method(f"New total memory size after cacheing images: {int(total_mem_size / 1024 / 1024)}MB")
20 changes: 20 additions & 0 deletions opencsp/common/lib/cv/spot_analysis/SpotAnalysisOperable.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,26 @@ def get_all_images(self, primary=True, supporting=True, visualization=True, algo
"""
Get a list of all images tracked by this operable including all primary
images, supporting images, visualization, and algorithm images.
Parameters
----------
primary : bool, optional
True to include the primary image in the list of returned images. By
default True.
supporting : bool, optional
True to include the supporting images, if any, in the list of
returned images. By default True.
visualization : bool, optional
True to include the visualization images in the list of returned
images. By default True.
algorithm : bool, optional
True to include the algorithm images, if any, in the list of
returned images. By default True.
Returns
-------
list[CacheableImage]
The images tracked by this operable.
"""
ret: list[CacheableImage] = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,15 @@ def assign_inputs(
'AbstractSpotAnalysisImagesProcessor', list[SpotAnalysisOperable], Iterator[SpotAnalysisOperable]
],
):
"""Register the input operables to be processed either with the run() method, or as an iterator."""
"""
Register the input operables to be processed either with the run()
method, or as an iterator.
Parameters
----------
operables : Union[ AbstractSpotAnalysisImagesProcessor, list[SpotAnalysisOperable], Iterator[SpotAnalysisOperable] ]
The operables to be processed.
"""
# initialize the state for a new set of inputs
self.input_operables = operables
self._num_images_processed = 0
Expand Down Expand Up @@ -192,12 +200,33 @@ def _get_tmp_path(self) -> str:
self._tmp_images_saved += 1
return path_name_ext

def _save_image(self, im: CacheableImage, idx_list: list[int], dir: str, name_prefix: str = None, ext="jpg"):
def _save_image(self, im: CacheableImage, idx_list: list[int], dir: str, name_prefix: str = None, ext="jpg") -> str:
# Saves the given image to the given path.
#
# Parameters
# ----------
# im : CacheableImage
# The image to be saved.
# idx_list : list[int]
# Length-1 list where idx_list[0] is the count of images saved with
# this method. Used for naming the saved images. This value is updated
# as part of the execution of this method.
# dir : str
# The directory to save the image to.
# name_prefix : str, optional
# A prefix to prepend to the image name, by default empty string
# ext : str, optional
# The extension/type to save the image with, by default "jpg"
#
# Returns
# -------
# str
# The path/name.ext of the newly saved image.
idx = idx_list[0]
image_name = ("" if name_prefix == None else f"{name_prefix}_") + f"SA_preprocess_{self.name}{idx}"
image_path_name_ext = os.path.join(dir, image_name + "." + ext)
lt.debug("Saving SpotAnalysis processed image to " + image_path_name_ext)
im.save_image(image_path_name_ext)
im.to_image().save(image_path_name_ext)
idx_list[0] = idx + 1
return image_path_name_ext

Expand All @@ -212,7 +241,26 @@ def run(
| Union['AbstractSpotAnalysisImagesProcessor']
),
) -> list[SpotAnalysisOperable]:
"""Performs image processing on the input images."""
"""
Performs image processing on the input operables and returns the results.
This is provided as a convenience method. The more typical way to use
this class is to create a SpotAnalysis instance, assign this image
processor to that instance, and then iterate over the results.
See also: :py:meth:`process_images` as another convenience method.
Parameters
----------
operables : ImagesIterable | ImagesStream | SpotAnalysisImagesStream | list[SpotAnalysisOperable] | Iterator[SpotAnalysisOperable] | Union[AbstractSpotAnalysisImagesProcessor]
The input operables to be processed. If these are images, then they
will be wrapped in a SpotAnalysisOperablesStream.
Returns
-------
list[SpotAnalysisOperable]
The resulting operables after processing.
"""
if isinstance(operables, (ImagesIterable, ImagesStream)):
operables = SpotAnalysisImagesStream(operables)
if isinstance(operables, SpotAnalysisImagesStream):
Expand Down Expand Up @@ -304,6 +352,16 @@ def process_images(self, images: list[CacheableImage | np.ndarray | Image.Image]
spot_analysis = SpotAnalysis("Log Scale Images", processors)
spot_analysis.set_primary_images(images)
results = [result for result in spot_analysis]
Parameters
----------
images : list[CacheableImage | np.ndarray | Image.Image]
The images to be processed.
Returns
-------
list[CacheableImage]
The resulting images after processing.
"""
# import here to avoid cyclic imports
from opencsp.common.lib.cv.SpotAnalysis import SpotAnalysis
Expand Down Expand Up @@ -379,7 +437,7 @@ def __iter__(self):
# We must have already finished processing all input images, either
# through the run() method or by simply having iterated through them
# all.
raise StopIteration
return self
elif self.input_iter != None:
# We must be iterating through the input images already.
return self
Expand Down
Loading

0 comments on commit fe3cd68

Please sign in to comment.