diff --git a/3rdparty/LICENSE.pba+ b/3rdparty/LICENSE.pba+ new file mode 100644 index 000000000..9d0b4030a --- /dev/null +++ b/3rdparty/LICENSE.pba+ @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 School of Computing, National University of Singapore + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/CHANGELOG.md b/CHANGELOG.md index 387d53820..18d0f992e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,51 @@ +# cuCIM 22.08.00 (17 Aug 2022) + +## 🚨 Breaking Changes + +- Stain extraction: use a less strict condition across channels when thresholding ([#316](https://github.com/rapidsai/cucim/pull/316)) [@grlee77](https://github.com/grlee77) + +## πŸ› Bug Fixes + +- create SimilarityTransform using CuPy 9.x-compatible indexing ([#365](https://github.com/rapidsai/cucim/pull/365)) [@grlee77](https://github.com/grlee77) +- Add `__init__.py` in `cucim.core` ([#359](https://github.com/rapidsai/cucim/pull/359)) [@jakirkham](https://github.com/jakirkham) +- Stain extraction: use a less strict condition across channels when thresholding ([#316](https://github.com/rapidsai/cucim/pull/316)) [@grlee77](https://github.com/grlee77) +- Incorporate bug fixes from skimage 0.19.3 ([#312](https://github.com/rapidsai/cucim/pull/312)) [@grlee77](https://github.com/grlee77) +- fix RawKernel bug for canny filter when quantiles are used ([#310](https://github.com/rapidsai/cucim/pull/310)) [@grlee77](https://github.com/grlee77) + +## πŸ“– Documentation + +- Defer loading of `custom.js` ([#383](https://github.com/rapidsai/cucim/pull/383)) [@galipremsagar](https://github.com/galipremsagar) +- add cucim.core.morphology to API docs + other docstring fixes ([#367](https://github.com/rapidsai/cucim/pull/367)) [@grlee77](https://github.com/grlee77) +- Update README.md ([#361](https://github.com/rapidsai/cucim/pull/361)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder) +- remove unimplemented functions from See Also and fix version numbers in deprecation warnings ([#356](https://github.com/rapidsai/cucim/pull/356)) [@grlee77](https://github.com/grlee77) +- Forward-merge branch-22.06 to branch-22.08 ([#344](https://github.com/rapidsai/cucim/pull/344)) [@grlee77](https://github.com/grlee77) +- Update README.md ([#315](https://github.com/rapidsai/cucim/pull/315)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder) +- Update index.rst ([#314](https://github.com/rapidsai/cucim/pull/314)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder) +- Update PyPI package documentation for v22.06.00 ([#311](https://github.com/rapidsai/cucim/pull/311)) [@gigony](https://github.com/gigony) + +## πŸš€ New Features + +- Add segmentation with the Chan-Vese active contours method ([#343](https://github.com/rapidsai/cucim/pull/343)) [@grlee77](https://github.com/grlee77) +- Add cucim.skimage.morphology.medial_axis ([#342](https://github.com/rapidsai/cucim/pull/342)) [@grlee77](https://github.com/grlee77) +- Add cucim.skimage.segmentation.expand_labels ([#341](https://github.com/rapidsai/cucim/pull/341)) [@grlee77](https://github.com/grlee77) +- Add Euclidean distance transform for images/volumes ([#318](https://github.com/rapidsai/cucim/pull/318)) [@grlee77](https://github.com/grlee77) + +## πŸ› οΈ Improvements + +- Revert "Allow CuPy 11" ([#362](https://github.com/rapidsai/cucim/pull/362)) [@galipremsagar](https://github.com/galipremsagar) +- Fix issues with day & night modes in python docs ([#360](https://github.com/rapidsai/cucim/pull/360)) [@galipremsagar](https://github.com/galipremsagar) +- Allow CuPy 11 ([#357](https://github.com/rapidsai/cucim/pull/357)) [@jakirkham](https://github.com/jakirkham) +- more efficient separable convolution ([#355](https://github.com/rapidsai/cucim/pull/355)) [@grlee77](https://github.com/grlee77) +- Support resolution and spacing metadata ([#349](https://github.com/rapidsai/cucim/pull/349)) [@gigony](https://github.com/gigony) +- Performance optimizations to morphological segmentation functions ([#340](https://github.com/rapidsai/cucim/pull/340)) [@grlee77](https://github.com/grlee77) +- benchmarks: avoid use of deprecated pandas API ([#339](https://github.com/rapidsai/cucim/pull/339)) [@grlee77](https://github.com/grlee77) +- Reduce memory overhead and improve performance of normalize_colors_pca ([#328](https://github.com/rapidsai/cucim/pull/328)) [@grlee77](https://github.com/grlee77) +- Protect against obscure divide by zero error in edge case of `normalize_colors_pca` ([#327](https://github.com/rapidsai/cucim/pull/327)) [@grlee77](https://github.com/grlee77) +- complete parametrization of cucim.skimage benchmarks ([#324](https://github.com/rapidsai/cucim/pull/324)) [@grlee77](https://github.com/grlee77) +- parameterization of `filters` and `features` benchmarks (v2) ([#322](https://github.com/rapidsai/cucim/pull/322)) [@grlee77](https://github.com/grlee77) +- Add a fast histogram-based median filter ([#317](https://github.com/rapidsai/cucim/pull/317)) [@grlee77](https://github.com/grlee77) +- Remove custom compiler environment variables ([#307](https://github.com/rapidsai/cucim/pull/307)) [@ajschmidt8](https://github.com/ajschmidt8) + # cuCIM 22.06.00 (7 Jun 2022) ## 🚨 Breaking Changes @@ -17,6 +65,7 @@ ## πŸš€ New Features - add missing `cucim.skimage.segmentation.clear_border` function ([#267](https://github.com/rapidsai/cucim/pull/267)) [@grlee77](https://github.com/grlee77) +- add `cucim.core.operations.color.stain_extraction_pca` and `cucim.core.operations.color.normalize_colors_pca` for digital pathology H&E stain extraction and normalization ([#273](https://github.com/rapidsai/cucim/pull/273)) [@grlee77](https://github.com/grlee77), [@drbeh](https://github.com/drbeh) ## πŸ› οΈ Improvements @@ -27,6 +76,7 @@ - Promote small integer types to single rather than double precision ([#278](https://github.com/rapidsai/cucim/pull/278)) [@grlee77](https://github.com/grlee77) - improve efficiency of histogram-based thresholding functions ([#276](https://github.com/rapidsai/cucim/pull/276)) [@grlee77](https://github.com/grlee77) - Remove unused dependencies in GPU tests job ([#268](https://github.com/rapidsai/cucim/pull/268)) [@Ethyling](https://github.com/Ethyling) +- Enable footprint decomposition for morphology ([#274](https://github.com/rapidsai/cucim/pull/274)) [@grlee77](https://github.com/grlee77) - Use conda compilers ([#232](https://github.com/rapidsai/cucim/pull/232)) [@Ethyling](https://github.com/Ethyling) - Build packages using mambabuild ([#216](https://github.com/rapidsai/cucim/pull/216)) [@Ethyling](https://github.com/Ethyling) diff --git a/LICENSE-3rdparty.md b/LICENSE-3rdparty.md index 0a3789af7..984d0e952 100644 --- a/LICENSE-3rdparty.md +++ b/LICENSE-3rdparty.md @@ -281,3 +281,9 @@ StainTools - https://github.com/Peter554/StainTools/blob/master/LICENSE.txt - Copyright: Peter Byfield - Usage: reference for stain color normalization algorithm + +PBA+ +- License: MIT License + - https://github.com/orzzzjq/Parallel-Banding-Algorithm-plus/blob/master/LICENSE +- Copyright: School of Computing, National University of Singapore +- Usage: PBA+ is used to implement the Euclidean distance transform. diff --git a/README.md b/README.md index 9f31f70ce..612fda219 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,43 @@ #
 cuCIM
+[RAPIDS](https://rapids.ai) cuCIM is an open-source, accelerated computer vision and image processing software library for multidimensional images used in biomedical, geospatial, material and life science, and remote sensing use cases. -[RAPIDS](https://rapids.ai) cuCIM is an extensible toolkit designed to provide GPU accelerated I/O, computer vision & image processing primitives for N-Dimensional images with a focus on biomedical imaging. +cuCIM offers: + +- Enhanced Image Processing Capabilities for large and n-dimensional tag image file format (TIFF) files +- Accelerated performance through Graphics Processing Unit (GPU)-based image processing and computer vision primitives +- A Straightforward Pythonic Interface with Matching Application Programming Interface (API) for Openslide + +cuCIM supports the following formats: + +- Aperio ScanScope Virtual Slide (SVS) +- Philips TIFF +- Generic Tiled, Multi-resolution RGB TIFF files with the following compression schemes: + - No Compression + - JPEG + - JPEG2000 + - Lempel-Ziv-Welch (LZW) + - Deflate **NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cucim/blob/main/README.md) ensure you are on the `main` branch. -- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date) - - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/) +- [GTC 2022 Accelerating Storage IO to GPUs with Magnum IO [S41347]](https://events.rainfocus.com/widget/nvidia/gtcspring2022/sessioncatalog/session/1634960000577001Etxp) + - cuCIM's GDS API examples: - [SciPy 2021 cuCIM - A GPU image I/O and processing library](https://www.scipy2021.scipy.org/) - [video](https://youtu.be/G46kOOM9xbQ) +- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date) + - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/) + +**Blogs** +- [Enhanced Image Analysis with Multidimensional Image Processing](https://developer.nvidia.com/blog/enhanced-image-analysis-with-multidimensional-image-processing/) +- [Accelerating Scikit-Image API with cuCIM: n-Dimensional Image Processing and IO on GPUs](https://developer.nvidia.com/blog/cucim-rapid-n-dimensional-image-processing-and-i-o-on-gpus/) +- [Accelerating Digital Pathology Pipelines with NVIDIA Claraβ„’ Deploy](https://developer.nvidia.com/blog/accelerating-digital-pathology-pipelines-with-nvidia-clara-deploy-2/) + +**Webinars** + +- [cuCIM: a GPU Image IO and Processing Library](https://www.youtube.com/watch?v=G46kOOM9xbQ) + +**[Documentation](https://docs.rapids.ai/api/cucim/stable)** **Release notes** are available on our [wiki page](https://github.com/rapidsai/cucim/wiki/Release-Notes). @@ -39,7 +68,7 @@ pip install scipy scikit-image cupy-cuda110 ### Notebooks -Please check out our [Welcome](notebooks/Welcome.ipynb) notebook ([NBViewer](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Welcome.ipynb)) +Please check out our [Welcome](notebooks/Welcome.ipynb) notebook ([NBViewer](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Welcome.ipynb)) #### Downloading sample images diff --git a/VERSION b/VERSION index ddc883c42..231fac64b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -22.06.00 +22.08.00 diff --git a/benchmarks/skimage/_image_bench.py b/benchmarks/skimage/_image_bench.py index c7d5abfd8..854fae290 100644 --- a/benchmarks/skimage/_image_bench.py +++ b/benchmarks/skimage/_image_bench.py @@ -36,6 +36,7 @@ def __init__( module_cpu=scipy.ndimage, module_gpu=cupyx.scipy.ndimage, function_is_generator=False, + run_cpu=True ): self.shape = shape @@ -67,9 +68,12 @@ def gen_gpu(*args, **kwargs): self.module_name_cpu = module_cpu.__name__ self.module_name_gpu = module_gpu.__name__ + self.run_cpu = run_cpu + def set_args(self, dtype): if np.dtype(dtype).kind in "iu": im1 = skimage.data.camera() + im1 = im1.astype(dtype) else: im1 = skimage.data.camera() / 255.0 im1 = im1.astype(dtype) @@ -166,17 +170,20 @@ def run_benchmark(self, duration=3, verbose=True): rep_kwargs_gpu = self.get_reps( self.func_gpu, self.args_gpu, kw_gpu, duration, cpu=False ) - perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu) + print("Number of Repetitions : ", rep_kwargs_gpu) perf_gpu = repeat(self.func_gpu, self.args_gpu, kw_gpu, **rep_kwargs_gpu) - df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean() + df.at[index, "shape"] = f"{self.shape}" # df.at[index, "description"] = index df.at[index, "function_name"] = self.function_name df.at[index, "dtype"] = np.dtype(dtype).name df.at[index, "ndim"] = len(self.shape) - df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean() - df.at[index, "CPU: host (std)"] = perf.cpu_times.std() + if self.run_cpu == True: + perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu) + df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean() + df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean() + df.at[index, "CPU: host (std)"] = perf.cpu_times.std() df.at[index, "GPU: host (mean)"] = perf_gpu.cpu_times.mean() df.at[index, "GPU: host (std)"] = perf_gpu.cpu_times.std() diff --git a/benchmarks/skimage/bench_convolve.py b/benchmarks/skimage/bench_convolve.py new file mode 100644 index 000000000..60007b8a3 --- /dev/null +++ b/benchmarks/skimage/bench_convolve.py @@ -0,0 +1,139 @@ +""" +Benchmark locally modified ndimage functions vs. their CuPy counterparts +""" +import cupy as cp +import cupyx.scipy.ndimage as ndi +import pytest +from cupyx.profiler import benchmark + +from cucim.skimage._vendored.ndimage import ( + convolve1d, correlate1d, gaussian_filter, gaussian_filter1d, + gaussian_gradient_magnitude, gaussian_laplace, laplace, prewitt, sobel, + uniform_filter, uniform_filter1d, +) + +d = cp.cuda.Device() + + +def _get_image(shape, dtype, seed=123): + + rng = cp.random.default_rng(seed) + dtype = cp.dtype(dtype) + if dtype.kind == 'b': + image = rng.integers(0, 1, shape, dtype=cp.uint8).astype(bool) + elif dtype.kind in 'iu': + image = rng.integers(0, 128, shape, dtype=dtype) + elif dtype.kind in 'c': + real_dtype = cp.asarray([], dtype=dtype).real.dtype + image = rng.standard_normal(shape, dtype=real_dtype) + image = image + 1j * rng.standard_normal(shape, dtype=real_dtype) + else: + if dtype == cp.float16: + image = rng.standard_normal(shape).astype(dtype) + else: + image = rng.standard_normal(shape, dtype=dtype) + return image + + +def _compare_implementations( + shape, kernel_size, axis, dtype, mode, cval=0.0, origin=0, + output_dtype=None, kernel_dtype=None, output_preallocated=False, + function=convolve1d, max_duration=1 +): + dtype = cp.dtype(dtype) + if kernel_dtype is None: + kernel_dtype = dtype + image = _get_image(shape, dtype) + kernel = _get_image((kernel_size,), kernel_dtype) + kwargs = dict(axis=axis, mode=mode, cval=cval, origin=origin) + if output_dtype is not None: + output_dtype = cp.dtype(output_dtype) + function_ref = getattr(ndi, function.__name__) + if output_preallocated: + if output_dtype is None: + output_dtype = image.dtype + output1 = cp.empty(image.shape, dtype=output_dtype) + output2 = cp.empty(image.shape, dtype=output_dtype) + kwargs.update(dict(output=output1)) + perf1 = benchmark(function_ref, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + kwargs.update(dict(output=output2, algorithm='shared_memory')) + perf2 = benchmark(function, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + return perf1, perf2 + kwargs.update(dict(output=output_dtype)) + perf1 = benchmark(function_ref, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + kwargs.update(dict(output=output_dtype, algorithm='shared_memory')) + perf2 = benchmark(function, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + return perf1, perf2 + + +def _compare_implementations_other( + shape, dtype, mode, cval=0.0, + output_dtype=None, kernel_dtype=None, output_preallocated=False, + function=convolve1d, func_kwargs={}, max_duration=1, +): + dtype = cp.dtype(dtype) + image = _get_image(shape, dtype) + kwargs = dict(mode=mode, cval=cval) + if func_kwargs: + kwargs.update(func_kwargs) + if output_dtype is not None: + output_dtype = cp.dtype(output_dtype) + function_ref = getattr(ndi, function.__name__) + if output_preallocated: + if output_dtype is None: + output_dtype = image.dtype + output1 = cp.empty(image.shape, dtype=output_dtype) + output2 = cp.empty(image.shape, dtype=output_dtype) + kwargs.update(dict(output=output1)) + perf1 = benchmark(function_ref, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + kwargs.update(dict(output=output1, algorithm='shared_memory')) + perf2 = benchmark(function, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + return perf1, perf2 + kwargs.update(dict(output=output_dtype)) + perf1 = benchmark(function_ref, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + kwargs.update(dict(output=output_dtype, algorithm='shared_memory')) + perf2 = benchmark(function, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration) + return perf1, perf2 + + +print("\n\n") +print("function | shape | dtype | mode | kernel size | preallocated | axis | dur (ms), CuPy | dur (ms), cuCIM | acceleration ") +print("---------|-------|-------|------|-------------|--------------|------|----------------|-----------------|--------------") +for function in [convolve1d]: + for shape in [(512, 512), (3840, 2160), (64, 64, 64), (256, 256, 256)]: + for dtype in [cp.float32, cp.uint8]: + for mode in ['nearest']: + for kernel_size in [3, 7, 11, 41]: + for output_preallocated in [False]: # , True]: + for axis in range(len(shape)): + output_dtype = dtype + perf1, perf2 = _compare_implementations(shape=shape, kernel_size=kernel_size, mode=mode, axis=axis, dtype=dtype, output_dtype=output_dtype, output_preallocated=output_preallocated, function=function) + t_elem = perf1.gpu_times * 1000. + t_shared = perf2.gpu_times * 1000. + print(f"{function.__name__} | {shape} | {cp.dtype(dtype).name} | {mode} | {kernel_size=} | prealloc={output_preallocated} | {axis=} | {t_elem.mean():0.3f} +/- {t_elem.std():0.3f} | {t_shared.mean():0.3f} +/- {t_shared.std():0.3f} | {t_elem.mean() / t_shared.mean():0.3f}") + + +print("function | kwargs | shape | dtype | mode | preallocated | dur (ms), CuPy | dur (ms), cuCIM | acceleration ") +print("---------|--------|-------|-------|------|--------------|----------------|-----------------|--------------") +for function, func_kwargs in [ + # (gaussian_filter1d, dict(sigma=1.0, axis=0)), + # (gaussian_filter1d, dict(sigma=1.0, axis=-1)), + # (gaussian_filter1d, dict(sigma=4.0, axis=0)), + # (gaussian_filter1d, dict(sigma=4.0, axis=-1)), + (gaussian_filter, dict(sigma=1.0)), + (gaussian_filter, dict(sigma=4.0)), + (uniform_filter, dict(size=11)), + (prewitt, dict(axis=0)), + (sobel, dict(axis=0)), + (prewitt, dict(axis=-1)), + (sobel, dict(axis=-1)), +]: + for shape in [(512, 512), (3840, 2160), (64, 64, 64), (256, 256, 256)]: + for (dtype, output_dtype) in [(cp.float32, cp.float32), (cp.uint8, cp.float32)]: + for mode in ['nearest']: + for output_preallocated in [False, True]: + perf1, perf2 = _compare_implementations_other(shape=shape, mode=mode, dtype=dtype, output_dtype=output_dtype, output_preallocated=output_preallocated, function=function, func_kwargs=func_kwargs) + t_elem = perf1.gpu_times * 1000. + t_shared = perf2.gpu_times * 1000. + print(f"{function.__name__} | {func_kwargs} | {shape} | {cp.dtype(dtype).name} | {mode} | {output_preallocated} | {t_elem.mean():0.3f} +/- {t_elem.std():0.3f} | {t_shared.mean():0.3f} +/- {t_shared.std():0.3f} | {t_elem.mean() / t_shared.mean():0.3f}") + diff --git a/benchmarks/skimage/cucim_color_bench.py b/benchmarks/skimage/cucim_color_bench.py index 66ce6b74c..87955dd0a 100644 --- a/benchmarks/skimage/cucim_color_bench.py +++ b/benchmarks/skimage/cucim_color_bench.py @@ -1,3 +1,4 @@ +import argparse import os import pickle @@ -14,6 +15,7 @@ from _image_bench import ImageBench +func_name_choices = ['convert_colorspace', 'rgb2hed', 'hed2rgb', 'lab2lch', 'lch2lab', 'xyz2lab', 'lab2xyz', 'rgba2rgb', 'label2rgb'] class ColorBench(ImageBench): def set_args(self, dtype): @@ -47,6 +49,7 @@ def __init__( index_str=None, module_cpu=scipy.ndimage, module_gpu=cupyx.scipy.ndimage, + run_cpu=True, ): self.contiguous_labels = contiguous_labels super().__init__( @@ -58,6 +61,7 @@ def __init__( index_str=index_str, module_cpu=module_cpu, module_gpu=module_gpu, + run_cpu=run_cpu, ) def set_args(self, dtype): @@ -70,14 +74,12 @@ def set_args(self, dtype): ], dtype=int, ) - tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape)) + tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape)) if self.contiguous_labels: label = np.kron(a, np.ones(tiling, dtype=a.dtype)) else: label = np.tile(a, tiling) labeld = cp.asarray(label) - if self.shape[-1] != 3: - raise ValueError("shape must be 3 on the last axis") imaged = cupy.testing.shaped_random(labeld.shape, xp=cp, dtype=dtype, scale=1.0) image = cp.asnumpy(imaged) self.args_cpu = ( @@ -90,96 +92,125 @@ def set_args(self, dtype): ) -pfile = "cucim_color_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] -all_colorspaces = True - -for shape in [(256, 256, 3), (3840, 2160, 3), (192, 192, 192, 3)]: - ndim = len(shape) +def main(args): - if all_colorspaces: - color_spaces = ["RGB", "HSV", "RGB CIE", "XYZ", "YUV", "YIQ", "YPbPr", "YCbCr", "YDbDr"] + pfile = "cucim_color_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) else: - color_spaces = ["RGB", "HSV", "YUV", "XYZ"] - for fromspace in color_spaces: - for tospace in color_spaces: - if fromspace == tospace: - continue + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + all_colorspaces = False + ndim = len(shape) + + for function_name in func_name_choices: + + if function_name != args.func_name: + continue + + if function_name == 'convert_colorspace': + if all_colorspaces: + color_spaces = ["RGB", "HSV", "RGB CIE", "XYZ", "YUV", "YIQ", "YPbPr", "YCbCr", "YDbDr"] + else: + color_spaces = ["RGB", "HSV", "YUV", "XYZ"] + for fromspace in color_spaces: + for tospace in color_spaces: + if fromspace == tospace: + continue + + B = ColorBench( + function_name="convert_colorspace", + shape=shape + (3,), + dtypes=dtypes, + fixed_kwargs=dict(fromspace=fromspace, tospace=tospace), + var_kwargs={}, + index_str=f"{fromspace.lower()}2{tospace.lower()}", + module_cpu=skimage.color, + module_gpu=cucim.skimage.color, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + elif function_name == 'rgba2rgb': + B = RGBABench( + function_name="rgba2rgb", + shape=shape[:-1] + (4,), + dtypes=dtypes, + fixed_kwargs={}, + var_kwargs={}, + module_cpu=skimage.color, + module_gpu=cucim.skimage.color, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + elif function_name == 'label2rgb': + + for contiguous_labels in [True, False]: + if contiguous_labels: + index_str = "contiguous" + else: + index_str = None + B = LabelBench( + function_name="label2rgb", + shape=shape, + dtypes=dtypes, + contiguous_labels=contiguous_labels, + index_str=index_str, + fixed_kwargs=dict(bg_label=0), + var_kwargs=dict(kind=["avg", "overlay"]), + module_cpu=skimage.color, + module_gpu=cucim.skimage.color, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + elif function_name in [ + 'rgb2hed', 'hed2rgb', 'lab2lch', 'lch2lab', 'xyz2lab', 'lab2xyz' + ]: B = ColorBench( - function_name="convert_colorspace", - shape=shape, + function_name=function_name, + shape=shape + (3,), dtypes=dtypes, - fixed_kwargs=dict(fromspace=fromspace, tospace=tospace), + fixed_kwargs={}, var_kwargs={}, - index_str=f"{fromspace.lower()}2{tospace.lower()}", module_cpu=skimage.color, module_gpu=cucim.skimage.color, + run_cpu=run_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - # rgb2hed and hed2rgb test combine_stains and separate_stains and all other - # stains should have equivalent performance. - # - # Probably not necessary to benchmark combine_stains and separate_stains - # e.g. - # ihc_hdx = separate_stains(ihc, hdx_from_rgb) - # ihc = combine_stains(ihc_hdx, rgb_from_hdx) - # - - for fname in ["rgb2hed", "hed2rgb", "lab2lch", "lch2lab", "xyz2lab", - "lab2xyz"]: - B = ColorBench( - function_name=fname, - shape=shape, - dtypes=dtypes, - fixed_kwargs={}, - var_kwargs={}, - module_cpu=skimage.color, - module_gpu=cucim.skimage.color, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - B = RGBABench( - function_name="rgba2rgb", - shape=shape[:-1] + (4,), - dtypes=dtypes, - fixed_kwargs={}, - var_kwargs={}, - module_cpu=skimage.color, - module_gpu=cucim.skimage.color, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - for contiguous_labels in [True, False]: - if contiguous_labels: - index_str = "contiguous" - else: - index_str = None - B = LabelBench( - function_name="label2rgb", - shape=shape, - dtypes=dtypes, - contiguous_labels=contiguous_labels, - index_str=index_str, - fixed_kwargs=dict(bg_label=0), - var_kwargs=dict(kind=["avg", "overlay"]), - module_cpu=skimage.color, - module_gpu=cucim.skimage.color, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM color conversion functions') + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_exposure_bench.py b/benchmarks/skimage/cucim_exposure_bench.py index 9f7ffbec2..d0fb4dbda 100644 --- a/benchmarks/skimage/cucim_exposure_bench.py +++ b/benchmarks/skimage/cucim_exposure_bench.py @@ -1,3 +1,4 @@ +import argparse import os import pickle @@ -27,6 +28,7 @@ def set_args(self, dtype): class MatchHistogramBench(ImageBench): def set_args(self, dtype): + if np.dtype(dtype).kind in "iu": scale = 256 else: @@ -39,95 +41,101 @@ def set_args(self, dtype): self.args_gpu = (imaged, imaged2) -pfile = "cucim_exposure_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.uint8, np.float32] - -exposure_config = { - "equalize_adapthist": dict( - fixed_kwargs=dict(clip_limit=0.01, nbins=256), - variable_kwargs=dict(), - color_required=False, - grayscale_only=False, - dtypes=None, - shapes=None, - ), - "histogram": dict( - fixed_kwargs=dict(source_range="image"), - variable_kwargs=dict(nbins=[16, 256], normalize=[True, False]), - color_required=False, - grayscale_only=True, - dtypes=None, - shapes=None, - ), -} - -for function_name, fixed_kwargs, var_kwargs, allow_color in [ - ("equalize_adapthist", dict(clip_limit=0.01, nbins=256), dict(), True), - ( - "histogram", - dict(source_range="image"), - dict(nbins=[16, 256], normalize=[True, False]), - False, - ), - ("cumulative_distribution", dict(), dict(nbins=[16, 256]), False), - ("equalize_hist", dict(mask=None), dict(nbins=[16, 256]), False), - ("rescale_intensity", dict(in_range="image", out_range="dtype"), dict(), False), - ("adjust_gamma", dict(), dict(), False), - ("adjust_log", dict(), dict(), False), - ("adjust_sigmoid", dict(), dict(inv=[False, True]), False), - ("is_low_contrast", dict(), dict(), False), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - ndim = len(shape) - if shape[-1] == 3 and not allow_color: +def main(args): + + pfile = "cucim_exposure_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color in [ + ("equalize_adapthist", dict(clip_limit=0.01, nbins=256), dict(), True), + ( + "histogram", + dict(source_range="image"), + dict(nbins=[16, 256], normalize=[True, False]), + False, + ), + ("cumulative_distribution", dict(), dict(nbins=[16, 256]), False), + ("equalize_hist", dict(mask=None), dict(nbins=[16, 256]), False), + ("rescale_intensity", dict(in_range="image", out_range="dtype"), dict(), False), + ("adjust_gamma", dict(), dict(), False), + ("adjust_log", dict(), dict(), False), + ("adjust_sigmoid", dict(), dict(inv=[False, True]), False), + ("is_low_contrast", dict(), dict(), False), + ]: + + if function_name != args.func_name: continue - if function_name == "equalize_adapthist": - # TODO: fix equalize_adapthist for size (3840, 2160) and kernel_size = [16, 16] - size_factors = [4, 8, 16] - kernel_sizes = [] - for size_factor in size_factors: - kernel_sizes.append([max(s // size_factor, 1) for s in shape if s != 3]) - var_kwargs.update(dict(kernel_size=kernel_sizes)) - - B = ExposureBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.exposure, - module_gpu=cucim.skimage.exposure, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - ndim = len(shape) - - channel_axis = -1 if shape[-1] in [3, 4] else None - - B = MatchHistogramBench( - function_name="match_histograms", - shape=shape, - dtypes=dtypes, - fixed_kwargs=dict(channel_axis=channel_axis), - var_kwargs=dict(), - module_cpu=skimage.exposure, - module_gpu=cucim.skimage.exposure, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + ndim = len(shape) + + if function_name == 'match_histograms': + + channel_axis = -1 if shape[-1] in [3, 4] else None + + B = MatchHistogramBench( + function_name="match_histograms", + shape=shape, + dtypes=dtypes, + fixed_kwargs=dict(channel_axis=channel_axis), + var_kwargs=dict(), + module_cpu=skimage.exposure, + module_gpu=cucim.skimage.exposure, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + else: + + if shape[-1] == 3 and not allow_color: + continue + + if function_name == "equalize_adapthist": + # TODO: fix equalize_adapthist for size (3840, 2160) and kernel_size = [16, 16] + size_factors = [4, 8, 16] + kernel_sizes = [] + for size_factor in size_factors: + kernel_sizes.append([max(s // size_factor, 1) for s in shape if s != 3]) + var_kwargs.update(dict(kernel_size=kernel_sizes)) + + B = ExposureBench( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.exposure, + module_gpu=cucim.skimage.exposure, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM exposure functions') + func_name_choices = ['equalize_adapthist', 'cumulative_distribution', 'equalize_hist', 'rescale_intensity', 'adjust_gamma', 'adjust_log', 'adjust_sigmoid', 'is_low_contrast', 'match_histograms'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_feature_bench.py b/benchmarks/skimage/cucim_feature_bench.py index 800086f7a..05f61493f 100644 --- a/benchmarks/skimage/cucim_feature_bench.py +++ b/benchmarks/skimage/cucim_feature_bench.py @@ -1,5 +1,6 @@ import os import pickle +import argparse import cucim.skimage import cucim.skimage.feature @@ -26,102 +27,119 @@ def set_args(self, dtype): self.args_cpu = (image, template) self.args_gpu = (imaged, templated) - -pfile = "cucim_feature_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True), - ("canny", dict(sigma=1.8), dict(), False, False), - # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory - ( - "daisy", - dict(step=4, radius=15, rings=2, histograms=5, orientations=4), - dict(normalization=["l1", "l2", "daisy"]), - False, - False, - ), - ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True), - ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True), - ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True), - ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False), - ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False), - ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False), - ("corner_shi_tomasi", dict(sigma=1), dict(), False, False), - ("corner_foerstner", dict(sigma=1), dict(), False, False), - ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True), -]: - - for shape in [(128, 128, 128), (512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - - if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000: - # skip any large sizes that take too long - continue - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: +def main(args): + + pfile = "cucim_feature_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True), + ("canny", dict(sigma=1.8), dict(), False, False), + # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory + ( + "daisy", + dict(step=4, radius=15, rings=2, histograms=5, orientations=4), + dict(normalization=["l1", "l2", "daisy"]), + False, + False, + ), + ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True), + ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True), + ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True), + ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False), + ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False), + ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False), + ("corner_shi_tomasi", dict(sigma=1), dict(), False, False), + ("corner_foerstner", dict(sigma=1), dict(), False, False), + ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True), + ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True) + ]: + + if function_name == args.func_name: + shape = tuple(list(map(int,(args.img_size.split(','))))) + else: continue - if function_name == "multiscale_basic_features": - fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None - if ndim == 3 and shape[-1] != 3: - # Omit texture=True case to avoid excessive GPU memory usage - var_kwargs["texture"] = [False] - - B = ImageBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.feature, - module_gpu=cucim.skimage.feature, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True), -]: - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - + #if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000: + # skip any large sizes that take too long ndim = len(shape) - if not allow_nd: - if allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue - - B = MatchTemplateBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.feature, - module_gpu=cucim.skimage.feature, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + run_cpu = not args.no_cpu + + if function_name != "match_template": + if not allow_nd: + if not allow_color: + if ndim > 2: + continue + else: + if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): + continue + + if shape[-1] == 3 and not allow_color: + continue + + if function_name == "multiscale_basic_features": + fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None + if ndim == 3 and shape[-1] != 3: + # Omit texture=True case to avoid excessive GPU memory usage + var_kwargs["texture"] = [False] + + B = ImageBench( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.feature, + module_gpu=cucim.skimage.feature, + run_cpu=run_cpu, + ) + else: + if not allow_nd: + if allow_color: + if ndim > 2: + continue + else: + if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): + continue + if shape[-1] == 3 and not allow_color: + continue + + B = MatchTemplateBench( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.feature, + module_gpu=cucim.skimage.feature, + run_cpu=run_cpu, + ) + + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM Feature') + func_name_choices = ["multiscale_basic_features","canny","daisy","structure_tensor","hessian_matrix","hessian_matrix_det","shape_index","corner_kitchen_rosenfeld","corner_harris","corner_shi_tomasi","corner_foerstner","corner_peaks","match_template"] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_filters_bench.py b/benchmarks/skimage/cucim_filters_bench.py index c60c27816..e12a9a9e8 100644 --- a/benchmarks/skimage/cucim_filters_bench.py +++ b/benchmarks/skimage/cucim_filters_bench.py @@ -1,5 +1,6 @@ import os import pickle +import argparse import cucim.skimage import cucim.skimage.filters @@ -10,107 +11,117 @@ from _image_bench import ImageBench -pfile = "cucim_filters_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _gabor.py - ( - "gabor", - dict(n_stds=3), - dict(frequency=[0.075, 0.1, 0.2, 0.3]), - False, - False, - ), - # _gaussian.py - ( - "gaussian", - dict(truncate=4.0, preserve_range=True), - dict(sigma=[0.25, 1, 4]), - True, - True, - ), - # _median.py - ("median", dict(mode="nearest"), dict(), False, True), - # _rank_order.py - ("rank_order", dict(), dict(), False, True), - # _unsharp_mask.py - ( - "unsharp_mask", - dict(), - dict(radius=[0.5, 1.0, 2.0, 3.0]), - True, - True, - ), - # edges.py - ("sobel", dict(), dict(axis=[None, 0, -1]), False, True), - ("prewitt", dict(), dict(axis=[None, 0, -1]), False, True), - ("scharr", dict(), dict(axis=[None, 0, -1]), False, True), - ("roberts", dict(), dict(), False, False), - ("roberts_pos_diag", dict(), dict(), False, False), - ("roberts_neg_diag", dict(), dict(), False, False), - ("farid", dict(), dict(), False, False), - ("laplace", dict(ksize=3), dict(), False, True), - # lpi_filter.py - # TODO: benchmark wiener - # ridges.py - # TODO: had to set meijering, etc allow_nd to False just due to insufficient GPU memory - ( - "meijering", - dict(sigmas=range(1, 10, 2), alpha=None), - dict(black_ridges=[True, False], mode=["reflect"]), - False, - False, - ), - ( - "sato", - dict(sigmas=range(1, 10, 2)), - dict(black_ridges=[True, False], mode=["reflect"]), - False, - False, - ), - ( - "frangi", - dict(sigmas=range(1, 10, 2)), - dict(black_ridges=[True, False], mode=["reflect"]), - False, - False, - ), - ( - "hessian", - dict(sigmas=range(1, 10, 2)), - dict(black_ridges=[True, False], mode=["reflect"]), - False, - False, - ), - # thresholding.py - ("threshold_isodata", dict(), dict(nbins=[64, 256]), False, True), - ("threshold_otsu", dict(), dict(nbins=[64, 256]), False, True), - ("threshold_yen", dict(), dict(nbins=[64, 256]), False, True), - # TODO: threshold_local should support n-dimensional data - ( - "threshold_local", - dict(), - dict(block_size=[5, 15], method=["gaussian", "mean", "median"]), - False, - False, - ), - ("threshold_li", dict(), dict(), False, True), - ("threshold_minimum", dict(), dict(nbins=[64, 256]), False, True), - ("threshold_mean", dict(), dict(), False, True), - ("threshold_triangle", dict(), dict(nbins=[64, 256]), False, True), - ("threshold_niblack", dict(), dict(window_size=[7, 15, 65]), False, True), - ("threshold_sauvola", dict(), dict(window_size=[7, 15, 65]), False, True), - ("apply_hysteresis_threshold", dict(low=0.15, high=0.6), dict(), False, True), - ("threshold_multiotsu", dict(), dict(nbins=[64, 256], classes=[3]), False, True), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: +def main(args): + + pfile = "cucim_filters_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + dtypes = [np.dtype(args.dtype)] + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _gabor.py + ( + "gabor", + dict(n_stds=3), + dict(frequency=[0.075, 0.1, 0.2, 0.3]), + False, + False, + ), + # _gaussian.py + ( + "gaussian", + dict(truncate=4.0, preserve_range=True), + dict(sigma=[0.25, 1, 4]), + True, + True, + ), + # _median.py + ("median", dict(mode="nearest"), dict(), False, True), + # _rank_order.py + ("rank_order", dict(), dict(), False, True), + # _unsharp_mask.py + ( + "unsharp_mask", + dict(), + dict(radius=[0.5, 1.0, 2.0, 3.0]), + True, + True, + ), + # edges.py + ("sobel", dict(), dict(axis=[None, 0, -1]), False, True), + ("prewitt", dict(), dict(axis=[None, 0, -1]), False, True), + ("scharr", dict(), dict(axis=[None, 0, -1]), False, True), + ("roberts", dict(), dict(), False, False), + ("roberts_pos_diag", dict(), dict(), False, False), + ("roberts_neg_diag", dict(), dict(), False, False), + ("farid", dict(), dict(), False, False), + ("laplace", dict(ksize=3), dict(), False, True), + # lpi_filter.py + # TODO: benchmark wiener + # ridges.py + # TODO: had to set meijering, etc allow_nd to False just due to insufficient GPU memory + ( + "meijering", + dict(sigmas=range(1, 10, 2), alpha=None), + dict(black_ridges=[True, False], mode=["reflect"]), + False, + False, + ), + ( + "sato", + dict(sigmas=range(1, 10, 2)), + dict(black_ridges=[True, False], mode=["reflect"]), + False, + False, + ), + ( + "frangi", + dict(sigmas=range(1, 10, 2)), + dict(black_ridges=[True, False], mode=["reflect"]), + False, + False, + ), + ( + "hessian", + dict(sigmas=range(1, 10, 2)), + dict(black_ridges=[True, False], mode=["reflect"]), + False, + False, + ), + # thresholding.py + ("threshold_isodata", dict(), dict(nbins=[64, 256]), False, True), + ("threshold_otsu", dict(), dict(nbins=[64, 256]), False, True), + ("threshold_yen", dict(), dict(nbins=[64, 256]), False, True), + # TODO: threshold_local should support n-dimensional data + ( + "threshold_local", + dict(), + dict(block_size=[5, 15], method=["gaussian", "mean", "median"]), + False, + False, + ), + ("threshold_li", dict(), dict(), False, True), + ("threshold_minimum", dict(), dict(nbins=[64, 256]), False, True), + ("threshold_mean", dict(), dict(), False, True), + ("threshold_triangle", dict(), dict(nbins=[64, 256]), False, True), + ("threshold_niblack", dict(), dict(window_size=[7, 15, 65]), False, True), + ("threshold_sauvola", dict(), dict(window_size=[7, 15, 65]), False, True), + ("apply_hysteresis_threshold", dict(low=0.15, high=0.6), dict(), False, True), + ("threshold_multiotsu", dict(), dict(nbins=[64, 256], classes=[3]), False, True), + ]: + if function_name != args.func_name: + continue + else: + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + + # for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + + if function_name in ["gaussian", "unsharp_mask"]: + fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None ndim = len(shape) if not allow_nd: @@ -131,7 +142,7 @@ footprints = [] ndim = len(shape) footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7] - for footprint_size in [3, 5, 7, 9]: + for footprint_size in footprint_sizes: footprints.append( np.ones((footprint_size,) * ndim, dtype=bool) ) @@ -148,12 +159,27 @@ var_kwargs=var_kwargs, module_cpu=skimage.filters, module_gpu=cucim.skimage.filters, + run_cpu=not args.no_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM Filters') + func_name_choices = ['gabor', 'gaussian', 'median', 'rank_order', 'unsharp_mask', 'sobel', 'prewitt', 'scharr', 'roberts', 'roberts_pos_diag', 'roberts_neg_diag', 'farid', 'laplace', 'meijering', 'sato', 'frangi', 'hessian', 'threshold_isodata', 'threshold_otsu', 'threshold_yen', 'threshold_local', 'threshold_li', 'threshold_minimum', 'threshold_mean', 'threshold_triangle', 'threshold_niblack', 'threshold_sauvola', 'apply_hysteresis_threshold', 'threshold_multiotsu'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_measure_bench.py b/benchmarks/skimage/cucim_measure_bench.py index badb53e73..c5f207b64 100644 --- a/benchmarks/skimage/cucim_measure_bench.py +++ b/benchmarks/skimage/cucim_measure_bench.py @@ -1,3 +1,4 @@ +import argparse import math import os import pickle @@ -25,6 +26,7 @@ def __init__( index_str=None, module_cpu=skimage.measure, module_gpu=cucim.skimage.measure, + run_cpu=True, ): self.contiguous_labels = contiguous_labels @@ -38,6 +40,7 @@ def __init__( index_str=index_str, module_cpu=module_cpu, module_gpu=module_gpu, + run_cpu=run_cpu, ) def set_args(self, dtype): @@ -49,7 +52,7 @@ def set_args(self, dtype): [0, 0, 0, 0, 0, 5, 0, 0], ] ) - tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape)) + tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape)) if self.contiguous_labels: image = np.kron(a, np.ones(tiling, dtype=a.dtype)) else: @@ -71,6 +74,7 @@ def __init__( index_str=None, module_cpu=skimage.measure, module_gpu=cucim.skimage.measure, + run_cpu=True, ): self.contiguous_labels = contiguous_labels @@ -84,6 +88,7 @@ def __init__( index_str=index_str, module_cpu=module_cpu, module_gpu=module_gpu, + run_cpu=run_cpu, ) def set_args(self, dtype): @@ -95,7 +100,7 @@ def set_args(self, dtype): [0, 0, 0, 0, 0, 5, 0, 0], ] ) - tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape)) + tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape)) if self.contiguous_labels: image = np.kron(a, np.ones(tiling, dtype=a.dtype)) else: @@ -127,32 +132,70 @@ def set_args(self, dtype): self.args_gpu = (imaged,) -pfile = "cucim_measure_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _gaussian.py - ( - "label", - dict(return_num=False, background=0), - dict(connectivity=[1, 2]), - False, - True, - ), - # regionprops.py - ("regionprops", dict(), dict(), False, True), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: +def main(args): + + pfile = "cucim_measure_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _gaussian.py + ( + "label", + dict(return_num=False, background=0), + dict(connectivity=[1, 2]), + False, + True, + ), + # regionprops.py + ("regionprops", dict(), dict(), False, True), + # _moments.py + ("moments", dict(), dict(order=[1, 2, 3, 4]), False, False), + ("moments_central", dict(), dict(order=[1, 2, 3]), False, True), + # omited from benchmarks (only tiny arrays): moments_normalized, moments_hu + ("centroid", dict(), dict(), False, True), + ("inertia_tensor", dict(), dict(), False, True), + ("inertia_tensor_eigvals", dict(), dict(), False, True), + # _polygon.py + # TODO: approximate_polygon, subdivide_polygon + # block.py + ( + "block_reduce", + dict(), + dict( + func=[ + cp.sum, + ] + ), + True, + True, + ), # variable block_size configured below + # entropy.py + ("shannon_entropy", dict(base=2), dict(), True, True), + # profile.py + ( + "profile_line", + dict(src=(5, 7)), + dict(reduce_func=[cp.mean], linewidth=[1, 2, 4], order=[1, 3]), + True, + False, + ), # variable block_size configured below + ]: + + if function_name != args.func_name: + continue ndim = len(shape) if not allow_nd: - if allow_color: + if not allow_color: if ndim > 2: continue else: @@ -161,118 +204,89 @@ def set_args(self, dtype): if shape[-1] == 3 and not allow_color: continue - Tester = LabelBench if function_name == "label" else RegionpropsBench + if function_name in ['label', 'regionprops']: + + Tester = LabelBench if function_name == "label" else RegionpropsBench + + for contiguous_labels in [True, False]: + if contiguous_labels: + index_str = f"contiguous" + else: + index_str = None + B = Tester( + function_name=function_name, + shape=shape, + dtypes=dtypes, + contiguous_labels=contiguous_labels, + index_str=index_str, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.measure, + module_gpu=cucim.skimage.measure, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + else: + - for contiguous_labels in [True, False]: - if contiguous_labels: - index_str = f"contiguous" - else: - index_str = None - B = Tester( + if function_name == "gabor" and np.prod(shape) > 1000000: + # avoid cases that are too slow on the CPU + var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1] + + if function_name == "block_reduce": + ndim = len(shape) + if shape[-1] == 3: + block_sizes = [(b,) * (ndim - 1) + (3,) for b in (16, 32, 64)] + else: + block_sizes = [(b,) * ndim for b in (16, 32, 64)] + var_kwargs["block_size"] = block_sizes + + if function_name == "profile_line": + fixed_kwargs["dst"] = (shape[0] - 32, shape[1] + 9) + + if function_name == "median": + footprints = [] + ndim = len(shape) + footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7] + for footprint_size in [3, 5, 7, 9]: + footprints.append( + np.ones((footprint_sizes,) * ndim, dtype=bool) + ) + var_kwargs["footprint"] = footprints + + if function_name in ["gaussian", "unsharp_mask"]: + fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None + + B = FiltersBench( function_name=function_name, shape=shape, dtypes=dtypes, - contiguous_labels=contiguous_labels, - index_str=index_str, fixed_kwargs=fixed_kwargs, var_kwargs=var_kwargs, module_cpu=skimage.measure, module_gpu=cucim.skimage.measure, + run_cpu=run_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _moments.py - ("moments", dict(), dict(order=[1, 2, 3, 4]), False, False), - ("moments_central", dict(), dict(order=[1, 2, 3]), False, True), - # omited from benchmarks (only tiny arrays): moments_normalized, moments_hu - ("centroid", dict(), dict(), False, True), - ("inertia_tensor", dict(), dict(), False, True), - ("inertia_tensor_eigvals", dict(), dict(), False, True), - # _polygon.py - # TODO: approximate_polygon, subdivide_polygon - # block.py - ( - "block_reduce", - dict(), - dict( - func=[ - cp.sum, - ] - ), - True, - True, - ), # variable block_size configured below - # entropy.py - ("shannon_entropy", dict(base=2), dict(), True, True), - # profile.py - ( - "profile_line", - dict(src=(5, 7)), - dict(reduce_func=[cp.mean], linewidth=[1, 2, 4], order=[1, 3]), - True, - False, - ), # variable block_size configured below -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue - - if function_name == "gabor" and np.prod(shape) > 1000000: - # avoid cases that are too slow on the CPU - var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1] - - if function_name == "block_reduce": - ndim = len(shape) - if shape[-1] == 3: - block_sizes = [(b,) * (ndim - 1) + (3,) for b in (16, 32, 64)] - else: - block_sizes = [(b,) * ndim for b in (16, 32, 64)] - var_kwargs["block_size"] = block_sizes - - if function_name == "profile_line": - fixed_kwargs["dst"] = (shape[0] - 32, shape[1] + 9) - - if function_name == "median": - footprints = [] - ndim = len(shape) - footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7] - for footprint_size in [3, 5, 7, 9]: - footprints.append( - np.ones((footprint_sizes,) * ndim, dtype=bool) - ) - var_kwargs["footprint"] = footprints - - if function_name in ["gaussian", "unsharp_mask"]: - fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None - - B = FiltersBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.measure, - module_gpu=cucim.skimage.measure, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM measure functions') + func_name_choices = ['label', 'regionprops', 'moments', 'moments_central', 'centroid', 'inertia_tensor', 'inertia_tensor_eigvals', 'block_reduce', 'shannon_entropy', 'profile_line'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_metrics_bench.py b/benchmarks/skimage/cucim_metrics_bench.py index 713c78fbd..af6b94814 100644 --- a/benchmarks/skimage/cucim_metrics_bench.py +++ b/benchmarks/skimage/cucim_metrics_bench.py @@ -1,3 +1,4 @@ +import argparse import os import pickle @@ -17,44 +18,51 @@ def set_args(self, dtype): imaged = cp.testing.shaped_arange(self.shape, dtype=dtype) imaged2 = cp.testing.shaped_arange(self.shape, dtype=dtype) imaged2 = imaged2 + 0.05 * cp.random.standard_normal(self.shape) - imaged /= imaged.max() - imaged2 /= imaged2.max() + imaged = imaged / imaged.max() + imaged2 = imaged2 / imaged2.max() imaged2 = imaged2.clip(0, 1.0) self.args_cpu = (cp.asnumpy(imaged), cp.asnumpy(imaged2)) self.args_gpu = (imaged, imaged2) -pfile = "cucim_metrics_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _structural_similarity.py - ( - "structural_similarity", - dict(data_range=1.0), - dict(gradient=[False, True], gaussian_weights=[False, True]), - True, - True, - ), - # simple_metrics.py - ("mean_squared_error", dict(), dict(), True, True), - ( - "normalized_root_mse", - dict(), - dict(normalization=["euclidean", "min-max", "mean"]), - True, - True, - ), - ("peak_signal_noise_ratio", dict(data_range=1.0), dict(), True, True), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: +def main(args): + + pfile = "cucim_metrics_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _structural_similarity.py + ( + "structural_similarity", + dict(data_range=1.0), + dict(gradient=[False, True], gaussian_weights=[False, True]), + True, + True, + ), + # simple_metrics.py + ("mean_squared_error", dict(), dict(), True, True), + ( + "normalized_root_mse", + dict(), + dict(normalization=["euclidean", "min-max", "mean"]), + True, + True, + ), + ("peak_signal_noise_ratio", dict(data_range=1.0), dict(), True, True), + ("normalized_mutual_information", dict(bins=100), dict(), True, True), + + ]: + if function_name != args.func_name: + continue ndim = len(shape) if not allow_nd: @@ -78,13 +86,28 @@ def set_args(self, dtype): var_kwargs=var_kwargs, module_cpu=skimage.metrics, module_gpu=cucim.skimage.metrics, + run_cpu=run_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM metrics functions') + func_name_choices = ['structural_similarity', 'mean_squared_error', 'normalized_root_mse', 'peak_signal_noise_ratio', 'normalized_mutual_information'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_morphology_bench.py b/benchmarks/skimage/cucim_morphology_bench.py index dd8fbed4c..8fe1d6a95 100644 --- a/benchmarks/skimage/cucim_morphology_bench.py +++ b/benchmarks/skimage/cucim_morphology_bench.py @@ -1,3 +1,4 @@ +import argparse import copy import functools import math @@ -30,6 +31,7 @@ def __init__( var_kwargs={}, module_cpu=skimage.morphology, module_gpu=cucim.skimage.morphology, + run_cpu=True, ): array_kwargs = dict(footprint=footprint) @@ -47,6 +49,7 @@ def __init__( index_str=index_str, module_cpu=module_cpu, module_gpu=module_gpu, + run_cpu=run_cpu, ) def set_args(self, dtype): @@ -104,169 +107,169 @@ def set_args(self, dtype): self.args_gpu = (a, 5) -pfile = "cucim_morphology_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes_gray = [np.float32] - - -for function_name, fixed_kwargs, var_kwargs, allow_nd in [ - ("thin", dict(), dict(), True), -]: - - for shape in [(512, 512), (3840, 2160)]: - - ndim = len(shape) - if ndim != 2: - raise ValueError("only 2d benchmark data has been implemented") - - if not allow_nd and ndim > 2: +def main(args): + + pfile = "cucim_morphology_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # binary.py + ("binary_erosion", dict(), dict(), False, True), + ("binary_dilation", dict(), dict(), False, True), + ("binary_opening", dict(), dict(), False, True), + ("binary_closing", dict(), dict(), False, True), + # misc.py + ("remove_small_objects", dict(), dict(), False, True), + ("remove_small_holes", dict(), dict(), False, True), + # gray.py + ("erosion", dict(), dict(), False, True), + ("dilation", dict(), dict(), False, True), + ("opening", dict(), dict(), False, True), + ("closing", dict(), dict(), False, True), + ("white_tophat", dict(), dict(), False, True), + ("black_tophat", dict(), dict(), False, True), + # _skeletonize.py + ("thin", dict(), dict(), False, True), + # grayreconstruct.py + ("reconstruction", dict(), dict(), False, True), + # footprints.py + # OMIT the functions from this file (each creates a structuring element) + ]: + + if function_name != args.func_name: continue - - B = SkeletonizeBench( - function_name=function_name, - shape=shape, - dtypes=[bool], - fixed_kwargs={}, - var_kwargs=var_kwargs, - module_cpu=skimage.morphology, - module_gpu=cucim.skimage.morphology, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for function_name, fixed_kwargs, var_kwargs, allow_nd in [ - ("binary_erosion", dict(), dict(), True), - ("binary_dilation", dict(), dict(), True), - ("binary_opening", dict(), dict(), True), - ("binary_closing", dict(), dict(), True), -]: - - for shape in [(512, 512), (3840, 2160), (192, 192, 192)]: - ndim = len(shape) - if not allow_nd and ndim > 2: - continue + if function_name == 'thin': + if ndim != 2: + raise ValueError("only 2d benchmark data has been implemented") - for connectivity in range(1, ndim + 1): - index_str = f"conn={connectivity}" - footprint = ndi.generate_binary_structure(ndim, connectivity) + if not allow_nd and ndim > 2: + continue - B = BinaryMorphologyBench( + B = SkeletonizeBench( function_name=function_name, shape=shape, dtypes=[bool], - footprint=footprint, fixed_kwargs={}, var_kwargs=var_kwargs, - index_str=index_str, module_cpu=skimage.morphology, module_gpu=cucim.skimage.morphology, + run_cpu=run_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - -for function_name, fixed_kwargs, var_kwargs, allow_nd in [ - # misc.py - ("remove_small_objects", dict(), dict(), True), - ("remove_small_holes", dict(), dict(), True), -]: + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + elif function_name.startswith('binary_'): + + if not allow_nd and ndim > 2: + continue + + for connectivity in range(1, ndim + 1): + index_str = f"conn={connectivity}" + footprint = ndi.generate_binary_structure(ndim, connectivity) + + B = BinaryMorphologyBench( + function_name=function_name, + shape=shape, + dtypes=[bool], + footprint=footprint, + fixed_kwargs={}, + var_kwargs=var_kwargs, + index_str=index_str, + module_cpu=skimage.morphology, + module_gpu=cucim.skimage.morphology, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) - for shape in [(512, 512), (3840, 2160), (192, 192, 192)]: - ndim = len(shape) - if not allow_nd and ndim > 2: - continue + elif function_name in ['remove_small_holes', 'remove_small_objects']: + if not allow_nd and ndim > 2: + continue - if function_name == "remove_small_objects": - TestClass = RemoveSmallObjectsBench - elif function_name == "remove_small_holes": - TestClass = RemoveSmallHolesBench - else: - raise ValueError(f"unknown function: {function_name}") - B = TestClass( - function_name=function_name, - shape=shape, - dtypes=[bool], - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.morphology, - module_gpu=cucim.skimage.morphology, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # grey.py - ("erosion", dict(), dict(), False, True), - ("dilation", dict(), dict(), False, True), - ("opening", dict(), dict(), False, True), - ("closing", dict(), dict(), False, True), - ("white_tophat", dict(), dict(), False, True), - ("black_tophat", dict(), dict(), False, True), - # greyreconstruct.py - ("reconstruction", dict(), dict(), False, True), - # footprints.py - # OMIT the functions from this file (each creates a structuring element) -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + if function_name == "remove_small_objects": + TestClass = RemoveSmallObjectsBench + elif function_name == "remove_small_holes": + TestClass = RemoveSmallHolesBench - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue - - if function_name == "gabor" and np.prod(shape) > 1000000: - # avoid cases that are too slow on the CPU - var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1] - - if function_name == "median": - footprints = [] - ndim = len(shape) - footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7] - for footprint_size in [3, 5, 7, 9]: - footprints.append( - np.ones((footprint_sizes,) * ndim, dtype=bool) - ) - var_kwargs["footprint"] = footprints - - if function_name in ["gaussian", "unsharp_mask"]: - fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None + raise ValueError(f"unknown function: {function_name}") + B = TestClass( + function_name=function_name, + shape=shape, + dtypes=[bool], + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.morphology, + module_gpu=cucim.skimage.morphology, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) - if function_name == "reconstruction": - TestClass = ReconstructionBench else: - TestClass = ImageBench - B = TestClass( - function_name=function_name, - shape=shape, - dtypes=dtypes_gray, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.morphology, - module_gpu=cucim.skimage.morphology, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + if not allow_nd: + if not allow_color: + if ndim > 2: + continue + else: + if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): + continue + + if shape[-1] == 3 and not allow_color: + continue + if function_name == "reconstruction": + TestClass = ReconstructionBench + else: + TestClass = ImageBench -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + B = TestClass( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.morphology, + module_gpu=cucim.skimage.morphology, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM morphology functions') + func_name_choices = ['binary_erosion', 'binary_dilation', 'binary_opening', 'binary_closing', 'remove_small_objects', 'remove_small_holes', 'erosion', 'dilation', 'opening', 'closing', 'white_tophat', 'black_tophat', 'thin', 'reconstruction'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_registration_bench.py b/benchmarks/skimage/cucim_registration_bench.py index d90bd16a4..6d275dcec 100644 --- a/benchmarks/skimage/cucim_registration_bench.py +++ b/benchmarks/skimage/cucim_registration_bench.py @@ -1,3 +1,4 @@ +import argparse import math import os import pickle @@ -33,48 +34,93 @@ def set_args(self, dtype): self.args_gpu = (imaged, imaged2) -pfile = "cucim_registration_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _phase_cross_correlation.py - ("phase_cross_correlation", dict(), dict(), False, True), -]: +def main(args): + + pfile = "cucim_registration_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _phase_cross_correlation.py + ("phase_cross_correlation", dict(), dict(), False, True), + # optical flow functions + ("optical_flow_tvl1", dict(), dict(num_iter=[10], num_warp=[5]), False, True), + ( + "optical_flow_ilk", + dict(), + dict(radius=[3, 7], num_warp=[10], gaussian=[False, True], prefilter=[False, True]), + False, + True, + ), + + ]: + + if function_name != args.func_name: + continue - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + if function_name == 'phase_cross_correlation': + + ndim = len(shape) + if not allow_nd: + if not allow_color: + if ndim > 2: + continue + else: + if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): + continue + if shape[-1] == 3 and not allow_color: + continue + + for masked in [True, False]: + + index_str = f"masked={masked}" + if masked: + moving_mask = cp.ones(shape, dtype=bool) + moving_mask[20:-20, :] = 0 + moving_mask[:, 20:-20] = 0 + reference_mask = cp.ones(shape, dtype=bool) + reference_mask[80:-80, :] = 0 + reference_mask[:, 80:-80] = 0 + fixed_kwargs["moving_mask"] = moving_mask + fixed_kwargs["reference_mask"] = reference_mask + else: + fixed_kwargs["moving_mask"] = None + fixed_kwargs["reference_mask"] = None + + B = RegistrationBench( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + index_str=index_str, + module_cpu=skimage.registration, + module_gpu=cucim.skimage.registration, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue + else: - for masked in [True, False]: - - index_str = f"masked={masked}" - if masked: - moving_mask = cp.ones(shape, dtype=bool) - moving_mask[20:-20, :] = 0 - moving_mask[:, 20:-20] = 0 - reference_mask = cp.ones(shape, dtype=bool) - reference_mask[80:-80, :] = 0 - reference_mask[:, 80:-80] = 0 - fixed_kwargs["moving_mask"] = moving_mask - fixed_kwargs["reference_mask"] = reference_mask - else: - fixed_kwargs["moving_mask"] = None - fixed_kwargs["reference_mask"] = None + ndim = len(shape) + if not allow_nd: + if not allow_color: + if ndim > 2: + continue + else: + if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): + continue + if shape[-1] == 3 and not allow_color: + continue B = RegistrationBench( function_name=function_name, @@ -82,54 +128,35 @@ def set_args(self, dtype): dtypes=dtypes, fixed_kwargs=fixed_kwargs, var_kwargs=var_kwargs, - index_str=index_str, module_cpu=skimage.registration, module_gpu=cucim.skimage.registration, + run_cpu=run_cpu, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _phase_cross_correlation.py - ("optical_flow_tvl1", dict(), dict(num_iter=[10], num_warp=[5]), False, True), - ( - "optical_flow_ilk", - dict(), - dict(radius=[3, 7], num_warp=[10], gaussian=[False, True], prefilter=[False, True]), - False, - True, - ), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: - - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue - - B = RegistrationBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.registration, - module_gpu=cucim.skimage.registration, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM registration functions') + func_name_choices = ['phase_cross_correlation', 'optical_flow_tvl1', 'optical_flow_ilk'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_restoration_bench.py b/benchmarks/skimage/cucim_restoration_bench.py index e20be1b9f..a1523dbae 100644 --- a/benchmarks/skimage/cucim_restoration_bench.py +++ b/benchmarks/skimage/cucim_restoration_bench.py @@ -1,3 +1,4 @@ +import argparse import math import os import pickle @@ -97,65 +98,35 @@ def set_args(self, dtype): self.args_gpu = (imaged, psfd) -pfile = "cucim_restoration_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] +def main(args): -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _denoise.py - ("denoise_tv_chambolle", dict(), dict(weight=[0.02]), True, True), - # j_invariant.py - ("calibrate_denoiser", dict(), dict(), False, True), -]: + pfile = "cucim_restoration_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() - for shape in [(512, 512), (1980, 1080), (1980, 1080, 3), (128, 128, 128)]: + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue - else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue - if function_name == "denoise_tv_chambolle": - fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _denoise.py + ("denoise_tv_chambolle", dict(), dict(weight=[0.02]), True, True), + # j_invariant.py + ("calibrate_denoiser", dict(), dict(), False, True), + # deconvolution.py + ("wiener", dict(balance=100.0), dict(), False, False), + ("unsupervised_wiener", dict(), dict(), False, False), + ("richardson_lucy", dict(), dict(num_iter=[5]), False, True), + ]: - if function_name == "calibrate_denoiser": - denoise_class = CalibratedDenoiseBench - else: - denoise_class = DenoiseBench - - B = denoise_class( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.restoration, - module_gpu=cucim.skimage.restoration, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -# function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd = ('unsupervised_wiener', dict(), dict(), False, True) -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # deconvolution.py - ("wiener", dict(balance=100.0), dict(), False, False), - ("unsupervised_wiener", dict(), dict(), False, False), - ("richardson_lucy", dict(), dict(num_iter=[5]), False, True), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + if function_name != args.func_name: + continue ndim = len(shape) if not allow_nd: @@ -168,21 +139,65 @@ def set_args(self, dtype): if shape[-1] == 3 and not allow_color: continue - B = DeconvolutionBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.restoration, - module_gpu=cucim.skimage.restoration, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + if function_name in ['denoise_tv_chambolle', 'calibrate_denoiser']: + + if function_name == "denoise_tv_chambolle": + fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None + + if function_name == "calibrate_denoiser": + denoise_class = CalibratedDenoiseBench + else: + denoise_class = DenoiseBench + + B = denoise_class( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.restoration, + module_gpu=cucim.skimage.restoration, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + elif function_name in ['wiener', 'unsupervised_wiener', 'richardson_lucy']: + + B = DeconvolutionBench( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.restoration, + module_gpu=cucim.skimage.restoration, + run_cpu=run_cpu, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM restoration functions') + func_name_choices = ['denoise_tv_chambolle', 'calibrate_denoiser', 'wiener', 'unsupervised_wiener', 'richardson_lucy'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) + + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_segmentation_bench.py b/benchmarks/skimage/cucim_segmentation_bench.py index b770cdb36..382349a9b 100644 --- a/benchmarks/skimage/cucim_segmentation_bench.py +++ b/benchmarks/skimage/cucim_segmentation_bench.py @@ -1,8 +1,11 @@ +import argparse import math import os import pickle import cucim.skimage +import cucim.skimage.data +import cucim.skimage.exposure import cucim.skimage.segmentation import cupy as cp import numpy as np @@ -25,6 +28,7 @@ def __init__( index_str=None, module_cpu=skimage.measure, module_gpu=cucim.skimage.measure, + run_cpu=True, ): self.contiguous_labels = contiguous_labels @@ -38,6 +42,7 @@ def __init__( index_str=index_str, module_cpu=module_cpu, module_gpu=module_gpu, + run_cpu=run_cpu, ) def set_args(self, dtype): @@ -50,14 +55,38 @@ def set_args(self, dtype): ], dtype=dtype, ) - tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape)) + tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape)) if self.contiguous_labels: - image = np.kron(a, np.ones(tiling, dtype=a.dtype)) + labels = np.kron(a, np.ones(tiling, dtype=a.dtype)) else: - image = np.tile(a, tiling) - imaged = cp.asarray(image) - self.args_cpu = (image,) - self.args_gpu = (imaged,) + labels = np.tile(a, tiling) + labels_d = cp.asarray(labels) + self.args_cpu = (labels,) + self.args_gpu = (labels_d,) + + +class LabelAndImageBench(LabelBench): + + def set_args(self, dtype): + a = np.array( + [ + [0, 0, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 4, 0], + [2, 2, 0, 0, 3, 0, 4, 4], + [0, 0, 0, 0, 0, 5, 0, 0], + ], + dtype=dtype, + ) + tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape)) + if self.contiguous_labels: + labels = np.kron(a, np.ones(tiling, dtype=a.dtype)) + else: + labels = np.tile(a, tiling) + labels_d = cp.asarray(labels) + image_d = cp.random.standard_normal(labels.shape).astype(np.float32) + image = cp.asnumpy(image_d) + self.args_cpu = (image, labels) + self.args_gpu = (image_d, labels_d) class MorphGeodesicBench(ImageBench): @@ -75,7 +104,9 @@ def set_args(self, dtype): imaged = cp.tile(im1, n_tile)[slices] # need this preprocessing for morphological_geodesic_active_contour - imaged = skimage.segmentation.inverse_gaussian_gradient(imaged) + imaged = cp.array( + skimage.segmentation.inverse_gaussian_gradient(cp.asnumpy(imaged)) + ) image = cp.asnumpy(imaged) assert imaged.dtype == dtype @@ -84,29 +115,118 @@ def set_args(self, dtype): self.args_gpu = (imaged,) -pfile = "cucim_segmentation_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.int32] +class RandomWalkerBench(ImageBench): + + + def set_args(self, dtype): + # Note: dtype only used for merkers array, data is hard-coded as float32 + + if np.dtype(dtype).kind not in 'iu': + raise ValueError("random_walker markers require integer dtype") + + n_dim = len(self.shape) + data = cucim.skimage.img_as_float( + cucim.skimage.data.binary_blobs( + length=max(self.shape), n_dim=n_dim, seed=1 + ) + ) + data = data[tuple(slice(s) for s in self.shape)] + sigma = 0.35 + rng = np.random.default_rng(5) + data += cp.array(rng.normal(loc=0, scale=sigma, size=data.shape)) + data = cucim.skimage.exposure.rescale_intensity( + data, in_range=(-sigma, 1 + sigma), out_range=(-1, 1) + ) + data = data.astype(cp.float32) + data_cpu = cp.asnumpy(data) + + # The range of the binary image spans over (-1, 1). + # We choose the hottest and the coldest pixels as markers. + markers = cp.zeros(data.shape, dtype=dtype) + markers[data < -0.95] = 1 + markers[data > 0.95] = 2 + markers_cpu = cp.asnumpy(markers) + self.args_cpu = (data_cpu, markers_cpu) + self.args_gpu = (data, markers) + + + +def main(args): + + pfile = "cucim_segmentation_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + dtypes = [np.dtype(args.dtype)] + dtypes_label = [np.dtype(args.dtype_label)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _denoise.py - ( - "find_boundaries", - dict(), - dict(connectivity=[1], mode=["thick", "inner", "outer", "subpixel"]), - False, - True, - ), -]: + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _clear_border.py + ( + "clear_border", + dict(), + dict(), + False, + True, + ), + # _join.py + ( + "relabel_sequential", + dict(offset=5), + dict(), + False, + True, + ), + # boundaries.py + ( + "find_boundaries", + dict(), + dict(connectivity=[1], mode=["thick", "inner", "outer", "subpixel"]), + False, + True, + ), + ( + "mark_boundaries", + dict(), + dict(), + False, + True, + ), + ( + "random_walker", + dict(beta=4, tol=1.e-4, prob_tol=1.e-2), + dict(mode=['cg', 'cg_j']), + False, + True, + ), + # morphsnakes.py + ("inverse_gaussian_gradient", dict(), dict(), False, True), + ( + "morphological_geodesic_active_contour", + dict(), + dict(num_iter=[16], init_level_set=["checkerboard", "disk"]), + False, + False, + ), + ( + "morphological_chan_vese", + dict(), + dict(num_iter=[16], init_level_set=["checkerboard", "disk"]), + False, + False, + ), + # omit: disk_level_set (simple array generation function) + # omit: checkerboard_level_set (simple array generation function) + ]: - for shape in [ - (64, 64), - ]: # (512, 512), (1980, 1080), (1980, 1080, 3), (128, 128, 128)]: + if function_name != args.func_name: + continue ndim = len(shape) if not allow_nd: @@ -119,73 +239,72 @@ def set_args(self, dtype): if shape[-1] == 3 and not allow_color: continue - B = LabelBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.segmentation, - module_gpu=cucim.skimage.segmentation, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) - - -dtypes = [np.float32] -# function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd = ('unsupervised_wiener', dict(), dict(), False, True) -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # morphsnakes.py - ("inverse_gaussian_gradient", dict(), dict(), False, True), - ( - "morphological_geodesic_active_contour", - dict(), - dict(num_iter=[16], init_level_set=["checkerboard", "disk"]), - False, - False, - ), - ( - "morphological_chan_vese", - dict(), - dict(num_iter=[16], init_level_set=["checkerboard", "disk"]), - False, - False, - ), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + if function_name in ["clear_border", "relabel_sequential", "find_boundaries", "mark_boundaries", "random_walker"]: + if function_name == 'random_walker': + fixed_kwargs['channel_axis'] = -1 if shape[-1] == 3 else None - ndim = len(shape) - if not allow_nd: - if not allow_color: - if ndim > 2: - continue + if function_name == 'mark_boundaries': + bench_func = LabelAndImageBench + elif function_name == 'random_walker': + bench_func = RandomWalkerBench else: - if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]): - continue - if shape[-1] == 3 and not allow_color: - continue + bench_func = LabelBench - if function_name == "morphological_geodesic_active_contour": - bench_class = MorphGeodesicBench - else: - bench_class = ImageBench + B = bench_func( + function_name=function_name, + shape=shape, + dtypes=dtypes_label, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.segmentation, + module_gpu=cucim.skimage.segmentation, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + + elif function_name in ["inverse_gaussian_gradient", "morphological_geodesic_active_contour", "morphological_chan_vese"]: + + if function_name == "morphological_geodesic_active_contour": + bench_class = MorphGeodesicBench + else: + bench_class = ImageBench + + B = bench_class( + function_name=function_name, + shape=shape, + dtypes=dtypes, + fixed_kwargs=fixed_kwargs, + var_kwargs=var_kwargs, + module_cpu=skimage.segmentation, + module_gpu=cucim.skimage.segmentation, + ) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass - B = ImageBench( - function_name=function_name, - shape=shape, - dtypes=dtypes, - fixed_kwargs=fixed_kwargs, - var_kwargs=var_kwargs, - module_cpu=skimage.segmentation, - module_gpu=cucim.skimage.segmentation, - ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM segmentation functions') + func_name_choices = ["clear_border", "relabel_sequential", "find_boundaries", "mark_boundaries", "random_walker", "inverse_gaussian_gradient", "morphological_geodesic_active_contour", "morphological_chan_vese"] + label_dtype_choices = ['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('--dtype_label', type=str, help='Dtype of input image', choices = label_dtype_choices, required=False, default='uint8') + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cucim_transform_bench.py b/benchmarks/skimage/cucim_transform_bench.py index 534a6a225..b6aa315f9 100644 --- a/benchmarks/skimage/cucim_transform_bench.py +++ b/benchmarks/skimage/cucim_transform_bench.py @@ -1,3 +1,4 @@ +import argparse import os import pickle @@ -10,75 +11,83 @@ from _image_bench import ImageBench -pfile = "cucim_transform_results.pickle" -if os.path.exists(pfile): - with open(pfile, "rb") as f: - all_results = pickle.load(f) -else: - all_results = pd.DataFrame() -dtypes = [np.float32] - -for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ - # _warps.py - ( - "resize", - dict(preserve_range=True), - dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]), - True, - True, - ), # scale handled in loop below - ( - "resize_local_mean", - dict(preserve_range=True), - {}, - True, - True, - ), # scale handled in loop below - ( - "rescale", - dict(preserve_range=True), - dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]), - True, - True, - ), # output_shape handled in loop below - ( - "rotate", - dict(angle=15, preserve_range=True), - dict(order=[0, 1, 3], mode=["reflect"], resize=[False, True]), - False, - False, - ), - ("downscale_local_mean", dict(), dict(), True, True), # factors handled in loop below - ( - "swirl", - dict(strength=1, preserve_range=True), - dict(order=[0, 1, 3], mode=["reflect"]), - False, - False, - ), - # TODO : warp? already indirectly benchmarked via swirl, etc - ("warp_polar", dict(), dict(scaling=["linear", "log"]), True, False), - # integral.py - ("integral_image", dict(), dict(), False, True), - # TODO: integrate - # pyramids.py - ( - "pyramid_gaussian", - dict(max_layer=6, downscale=2, preserve_range=True), - dict(order=[0, 1, 3]), - True, - True, - ), - ( - "pyramid_laplacian", - dict(max_layer=6, downscale=2, preserve_range=True), - dict(order=[0, 1, 3]), - True, - True, - ), -]: - - for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]: + +def main(args): + + pfile = "cucim_transform_results.pickle" + if os.path.exists(pfile): + with open(pfile, "rb") as f: + all_results = pickle.load(f) + else: + all_results = pd.DataFrame() + + dtypes = [np.dtype(args.dtype)] + # image sizes/shapes + shape = tuple(list(map(int,(args.img_size.split(','))))) + run_cpu = not args.no_cpu + + for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [ + # _warps.py + ( + "resize", + dict(preserve_range=True), + dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]), + True, + True, + ), # scale handled in loop below + ( + "resize_local_mean", + dict(preserve_range=True), + {}, + True, + True, + ), # scale handled in loop below + ( + "rescale", + dict(preserve_range=True), + dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]), + True, + True, + ), # output_shape handled in loop below + ( + "rotate", + dict(angle=15, preserve_range=True), + dict(order=[0, 1, 3], mode=["reflect"], resize=[False, True]), + False, + False, + ), + ("downscale_local_mean", dict(), dict(), True, True), # factors handled in loop below + ( + "swirl", + dict(strength=1, preserve_range=True), + dict(order=[0, 1, 3], mode=["reflect"]), + False, + False, + ), + # TODO : warp? already indirectly benchmarked via swirl, etc + ("warp_polar", dict(), dict(scaling=["linear", "log"]), True, False), + # integral.py + ("integral_image", dict(), dict(), False, True), + # TODO: integrate + # pyramids.py + ( + "pyramid_gaussian", + dict(max_layer=6, downscale=2, preserve_range=True), + dict(order=[0, 1, 3]), + True, + True, + ), + ( + "pyramid_laplacian", + dict(max_layer=6, downscale=2, preserve_range=True), + dict(order=[0, 1, 3]), + True, + True, + ), + ]: + + if function_name != args.func_name: + continue ndim = len(shape) if not allow_nd: @@ -131,12 +140,30 @@ module_gpu=cucim.skimage.transform, function_is_generator=function_is_generator, ) - results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + results = B.run_benchmark(duration=args.duration) + all_results = pd.concat([all_results, results["full"]]) + + fbase = os.path.splitext(pfile)[0] + all_results.to_csv(fbase + ".csv") + all_results.to_pickle(pfile) + try: + import tabular + + with open(fbase + ".md", "wt") as f: + f.write(all_results.to_markdown()) + except ImportError: + pass + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Benchmarking cuCIM transform functions') + func_name_choices = ['resize', 'resize_local_mean', 'rescale', 'rotate', 'downscale_local_mean', 'warp_polar', 'integral_image', 'pyramid_gaussian', 'pyramid_laplacian'] + dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] + parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True) + parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True) + parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True) + parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True) + parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False) -fbase = os.path.splitext(pfile)[0] -all_results.to_csv(fbase + ".csv") -all_results.to_pickle(pfile) -with open(fbase + ".md", "wt") as f: - f.write(all_results.to_markdown()) + args = parser.parse_args() + main(args) diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py index 4c5143246..e09f3fe1d 100644 --- a/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py +++ b/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py @@ -102,7 +102,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) for fname, wshape, var_kwargs in [ ("convolve", weights_shape, dict(mode=modes)), @@ -119,7 +119,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) fbase = os.path.splitext(pfile)[0] all_results.to_csv(fbase + ".csv") diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py index bfc6b448d..06eb19fc2 100644 --- a/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py +++ b/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py @@ -43,7 +43,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) fbase = os.path.splitext(pfile)[0] diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py index 294c33d5e..de518ab82 100644 --- a/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py +++ b/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py @@ -56,7 +56,7 @@ def set_args(self, dtype): var_kwargs=dict(mode=modes, order=orders), ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) for fname, fixed_kwargs, var_kwargs in [ ( @@ -139,7 +139,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) fbase = os.path.splitext(pfile)[0] all_results.to_csv(fbase + ".csv") diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py index 9f6e531af..bb1d479b1 100644 --- a/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py +++ b/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py @@ -145,7 +145,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) for fname in [ "sum", @@ -180,7 +180,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) fbase = os.path.splitext(pfile)[0] all_results.to_csv(fbase + ".csv") diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py index 54d58d667..7fbe9e7c6 100644 --- a/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py +++ b/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py @@ -126,7 +126,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) iterations = [1, 10, 30] for fname, var_kwargs in [ @@ -152,7 +152,7 @@ def set_args(self, dtype): var_kwargs=var_kwargs, ) results = B.run_benchmark(duration=1) - all_results = all_results.append(results["full"]) + all_results = pd.concat([all_results, results["full"]]) fbase = os.path.splitext(pfile)[0] all_results.to_csv(fbase + ".csv") diff --git a/benchmarks/skimage/requirements-bench.txt b/benchmarks/skimage/requirements-bench.txt new file mode 100644 index 000000000..2cc6ac30e --- /dev/null +++ b/benchmarks/skimage/requirements-bench.txt @@ -0,0 +1,2 @@ +pandas>=1.0 +tabulate>=0.8.7 diff --git a/benchmarks/skimage/run-nv-bench-color.sh b/benchmarks/skimage/run-nv-bench-color.sh new file mode 100755 index 000000000..14a0809ec --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-color.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 192,192,192) +param_filt=(convert_colorspace rgb2hed hed2rgb lab2lch lch2lab xyz2lab lab2xyz rgba2rgb label2rgb) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_color_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-exposure.sh b/benchmarks/skimage/run-nv-bench-exposure.sh new file mode 100755 index 000000000..4ed9e0748 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-exposure.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(equalize_adapthist cumulative_distribution equalize_hist rescale_intensity adjust_gamma adjust_log adjust_sigmoid is_low_contrast match_histograms) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_exposure_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-feature.sh b/benchmarks/skimage/run-nv-bench-feature.sh new file mode 100755 index 000000000..fbab913c6 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-feature.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(multiscale_basic_features canny daisy structure_tensor hessian_matrix hessian_matrix_det shape_index corner_kitchen_rosenfeld corner_harris corner_shi_tomasi corner_foerstner corner_peaks match_template) +param_dt=(float64 float32 float16) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_feature_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-filters.sh b/benchmarks/skimage/run-nv-bench-filters.sh new file mode 100755 index 000000000..37388e9e9 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-filters.sh @@ -0,0 +1,12 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(gabor gaussian median rank_order unsharp_mask sobel prewitt scharr roberts roberts_pos_diag roberts_neg_diag farid laplace meijering sato frangi hessian threshold_isodata threshold_otsu threshold_yen threshold_local threshold_li threshold_minimum threshold_mean threshold_triangle threshold_niblack threshold_sauvola apply_hysteresis_threshold threshold_multiotsu) +# param_filt=(rank_order ) +param_dt=(float64 float32 float16) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_filters_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-measure.sh b/benchmarks/skimage/run-nv-bench-measure.sh new file mode 100755 index 000000000..5c2b4f413 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-measure.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(label regionprops moments moments_central centroid inertia_tensor inertia_tensor_eigvals block_reduce shannon_entropy profile_line) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_measure_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-metrics.sh b/benchmarks/skimage/run-nv-bench-metrics.sh new file mode 100755 index 000000000..b24ba775b --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-metrics.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(structural_similarity mean_squared_error normalized_root_mse peak_signal_noise_ratio normalized_mutual_information) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_metrics_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-morphology.sh b/benchmarks/skimage/run-nv-bench-morphology.sh new file mode 100755 index 000000000..4981a58ad --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-morphology.sh @@ -0,0 +1,12 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(binary_erosion binary_dilation binary_opening binary_closing remove_small_objects remove_small_holes erosion dilation opening closing white_tophat black_tophat thin reconstruction) +# Note: user-specified dtype ignored for binary_* functions and thin (these only accept binary input) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_morphology_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-registration.sh b/benchmarks/skimage/run-nv-bench-registration.sh new file mode 100755 index 000000000..18f572b53 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-registration.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(phase_cross_correlation optical_flow_tvl1 optical_flow_ilk) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_registration_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-restoration.sh b/benchmarks/skimage/run-nv-bench-restoration.sh new file mode 100755 index 000000000..df7c9141f --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-restoration.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(denoise_tv_chambolle calibrate_denoiser wiener unsupervised_wiener richardson_lucy) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_restoration_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-segmentation.sh b/benchmarks/skimage/run-nv-bench-segmentation.sh new file mode 100755 index 000000000..4723f0c60 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-segmentation.sh @@ -0,0 +1,27 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) + +# these require an integer-valued label image +param_filt=(clear_border relabel_sequential find_boundaries mark_boundaries random_walker) +param_dt=(float32) +param_dt_label=(uint8 uint32) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + for dt_label in "${param_dt_label[@]}"; do + python cucim_segmentation_bench.py -f $filt -i $shape -d $dt --dtype_label $dt_label -t 10 + done + done + done +done + +# these do not require an integer-valued input image +param_filt=(inverse_gaussian_gradient morphological_geodesic_active_contour morphological_chan_vese) +param_dt=(float32) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_segmentation_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/benchmarks/skimage/run-nv-bench-transform.sh b/benchmarks/skimage/run-nv-bench-transform.sh new file mode 100755 index 000000000..c880cb699 --- /dev/null +++ b/benchmarks/skimage/run-nv-bench-transform.sh @@ -0,0 +1,11 @@ +#!/bin/bash +param_shape=(512,512 3840,2160 3840,2160,3 192,192,192) +param_filt=(resize resize_local_mean rescale rotate downscale_local_mean warp_polar integral_image pyramid_gaussian pyramid_laplacian) +param_dt=(float32 uint8) +for shape in "${param_shape[@]}"; do + for filt in "${param_filt[@]}"; do + for dt in "${param_dt[@]}"; do + python cucim_transform_bench.py -f $filt -i $shape -d $dt -t 10 + done + done +done diff --git a/conda/environments/env.yml b/conda/environments/env.yml index 895329fa6..0cee99491 100644 --- a/conda/environments/env.yml +++ b/conda/environments/env.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - cupy>=9 - - scikit-image>=0.18.1 + - scikit-image>=0.19.0 - openslide - zlib - jpeg diff --git a/conda/recipes/cucim/build.sh b/conda/recipes/cucim/build.sh index 77e9b7aef..28b1112c2 100644 --- a/conda/recipes/cucim/build.sh +++ b/conda/recipes/cucim/build.sh @@ -4,14 +4,6 @@ CUCIM_BUILD_TYPE=${CUCIM_BUILD_TYPE:-release} echo "CC : ${CC}" echo "CXX : ${CXX}" -echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" - -# For now CUDAHOSTCXX is set to `/usr/bin/g++` by -# https://github.com/rapidsai/docker/blob/161b200157206660d88fb02cf69fe58d363ac95e/generated-dockerfiles/rapidsai-core_ubuntu18.04-devel.Dockerfile -# To use GCC-9 in conda build environment, need to set it to $CXX (=$BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++) -# This can be removed once we switch to use gcc-9 -# : https://docs.rapids.ai/notices/rdn0002/ -export CUDAHOSTCXX=${CXX} # CUDA needs to include $PREFIX/include as system include path export CUDAFLAGS="-isystem $BUILD_PREFIX/include -isystem $PREFIX/include " diff --git a/conda/recipes/cucim/meta.yaml b/conda/recipes/cucim/meta.yaml index fa9377b93..479d17e83 100644 --- a/conda/recipes/cucim/meta.yaml +++ b/conda/recipes/cucim/meta.yaml @@ -32,7 +32,7 @@ requirements: - cupy >=9,<11.0.0a0 - numpy 1.19 - scipy - - scikit-image >=0.18.1,<0.20.0a0 + - scikit-image >=0.19.0,<0.20.0a0 run: - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - python {{ python_version }}.* @@ -41,7 +41,7 @@ requirements: - cupy >=9,<11.0.0a0 - {{ pin_compatible('numpy') }} - scipy - - scikit-image >=0.18.1,<0.20.0a0 + - scikit-image >=0.19.0,<0.20.0a0 # - openslide # skipping here but benchmark binary would needs openslide library tests: # [linux64] diff --git a/conda/recipes/libcucim/build.sh b/conda/recipes/libcucim/build.sh index 9d2217bbc..7d21aebe3 100644 --- a/conda/recipes/libcucim/build.sh +++ b/conda/recipes/libcucim/build.sh @@ -4,16 +4,8 @@ CUCIM_BUILD_TYPE=${CUCIM_BUILD_TYPE:-release} echo "CC : ${CC}" echo "CXX : ${CXX}" -echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" echo "CUDA : ${CUDA}" -# For now CUDAHOSTCXX is set to `/usr/bin/g++` by -# https://github.com/rapidsai/docker/blob/161b200157206660d88fb02cf69fe58d363ac95e/generated-dockerfiles/rapidsai-core_ubuntu18.04-devel.Dockerfile -# To use GCC-9 in conda build environment, need to set it to $CXX (=$BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++) -# This can be removed once we switch to use gcc-9 -# : https://docs.rapids.ai/notices/rdn0002/ -export CUDAHOSTCXX=${CXX} - # CUDA needs to include $PREFIX/include as system include path export CUDAFLAGS="-isystem $BUILD_PREFIX/include -isystem $PREFIX/include " export LD_LIBRARY_PATH="$BUILD_PREFIX/lib:$PREFIX/lib:$LD_LIBRARY_PATH" diff --git a/cpp/plugins/cucim.kit.cumed/VERSION b/cpp/plugins/cucim.kit.cumed/VERSION index ddc883c42..231fac64b 100644 --- a/cpp/plugins/cucim.kit.cumed/VERSION +++ b/cpp/plugins/cucim.kit.cumed/VERSION @@ -1 +1 @@ -22.06.00 +22.08.00 diff --git a/cpp/plugins/cucim.kit.cuslide/VERSION b/cpp/plugins/cucim.kit.cuslide/VERSION index ddc883c42..231fac64b 100644 --- a/cpp/plugins/cucim.kit.cuslide/VERSION +++ b/cpp/plugins/cucim.kit.cuslide/VERSION @@ -1 +1 @@ -22.06.00 +22.08.00 diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp index b284e0f71..0b7cbeb21 100644 --- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp +++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp @@ -171,16 +171,46 @@ static bool CUCIM_ABI parser_parse(CuCIMFileHandle_ptr handle_ptr, cucim::io::fo channel_names.emplace_back(std::string_view{ "A" }); } - // TODO: Set correct spacing value + // Spacing units + std::pmr::vector spacing_units(&resource); + spacing_units.reserve(ndim); + std::pmr::vector spacing(&resource); spacing.reserve(ndim); - spacing.insert(spacing.end(), ndim, 1.0); + const auto resolution_unit = level0_ifd->resolution_unit(); + const auto x_resolution = level0_ifd->x_resolution(); + const auto y_resolution = level0_ifd->y_resolution(); + + switch (resolution_unit) + { + case 1: // no absolute unit of measurement + spacing.emplace_back(y_resolution); + spacing.emplace_back(x_resolution); + spacing.emplace_back(1.0f); + + spacing_units.emplace_back(std::string_view{ "" }); + spacing_units.emplace_back(std::string_view{ "" }); + break; + case 2: // inch + spacing.emplace_back(y_resolution != 0 ? 25400 / y_resolution : 1.0f); + spacing.emplace_back(x_resolution != 0 ? 25400 / x_resolution : 1.0f); + spacing.emplace_back(1.0f); + + spacing_units.emplace_back(std::string_view{ "micrometer" }); + spacing_units.emplace_back(std::string_view{ "micrometer" }); + break; + case 3: // centimeter + spacing.emplace_back(y_resolution != 0 ? 10000 / y_resolution : 1.0f); + spacing.emplace_back(x_resolution != 0 ? 10000 / x_resolution : 1.0f); + spacing.emplace_back(1.0f); + + spacing_units.emplace_back(std::string_view{ "micrometer" }); + spacing_units.emplace_back(std::string_view{ "micrometer" }); + break; + default: + spacing.insert(spacing.end(), ndim, 1.0f); + } - // TODO: Set correct spacing units - std::pmr::vector spacing_units(&resource); - spacing_units.reserve(ndim); - spacing_units.emplace_back(std::string_view{ "micrometer" }); - spacing_units.emplace_back(std::string_view{ "micrometer" }); spacing_units.emplace_back(std::string_view{ "color" }); std::pmr::vector origin({ 0.0, 0.0, 0.0 }, &resource); diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp index 52a66cd80..9bcacb25d 100644 --- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp +++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp @@ -63,6 +63,9 @@ IFD::IFD(TIFF* tiff, uint16_t index, ifd_offset_t offset) : tiff_(tiff), ifd_ind model_ = std::string(model_char_ptr ? model_char_ptr : ""); TIFFGetField(tif, TIFFTAG_IMAGEDESCRIPTION, &model_char_ptr); image_description_ = std::string(model_char_ptr ? model_char_ptr : ""); + TIFFGetField(tif, TIFFTAG_RESOLUTIONUNIT, &resolution_unit_); + TIFFGetField(tif, TIFFTAG_XRESOLUTION, &x_resolution_); + TIFFGetField(tif, TIFFTAG_YRESOLUTION, &y_resolution_); TIFFDirectory& tif_dir = tif->tif_dir; flags_ = tif->tif_flags; @@ -451,6 +454,18 @@ std::string& IFD::image_description() { return image_description_; } +uint16_t IFD::resolution_unit() const +{ + return resolution_unit_; +} +float IFD::x_resolution() const +{ + return x_resolution_; +} +float IFD::y_resolution() const +{ + return y_resolution_; +} uint32_t IFD::width() const { return width_; diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h index e15724782..5737d82d5 100644 --- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h +++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h @@ -72,6 +72,9 @@ class EXPORT_VISIBLE IFD : public std::enable_shared_from_this std::string& software(); std::string& model(); std::string& image_description(); + uint16_t resolution_unit() const; + float x_resolution() const; + float y_resolution() const; uint32_t width() const; uint32_t height() const; uint32_t tile_width() const; @@ -109,6 +112,10 @@ class EXPORT_VISIBLE IFD : public std::enable_shared_from_this std::string software_; std::string model_; std::string image_description_; + uint16_t resolution_unit_ = 1; // 1 = No absolute unit of measurement, 2 = Inch, 3 = Centimeter + float x_resolution_ = 1.0f; + float y_resolution_ = 1.0f; + uint32_t flags_ = 0; uint32_t width_ = 0; uint32_t height_ = 0; diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp index cf9d38e3e..1f1ac6ab6 100644 --- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp +++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp @@ -387,6 +387,9 @@ void TIFF::resolve_vendor_format() auto& first_ifd = ifds_[0]; std::string& model = first_ifd->model(); std::string& software = first_ifd->software(); + const uint16_t resolution_unit = first_ifd->resolution_unit(); + const float x_resolution = first_ifd->x_resolution(); + const float y_resolution = first_ifd->y_resolution(); // Detect Aperio SVS format { @@ -416,6 +419,20 @@ void TIFF::resolve_vendor_format() tiff_metadata.emplace("model", model); tiff_metadata.emplace("software", software); + switch (resolution_unit) + { + case 2: + tiff_metadata.emplace("resolution_unit", "inch"); + break; + case 3: + tiff_metadata.emplace("resolution_unit", "centimeter"); + break; + default: + tiff_metadata.emplace("resolution_unit", ""); + break; + } + tiff_metadata.emplace("x_resolution", x_resolution); + tiff_metadata.emplace("y_resolution", y_resolution); (*json_metadata).emplace("tiff", std::move(tiff_metadata)); } diff --git a/cpp/src/cuimage.cpp b/cpp/src/cuimage.cpp index 3ba8fc18b..d9c0ee4dc 100644 --- a/cpp/src/cuimage.cpp +++ b/cpp/src/cuimage.cpp @@ -680,10 +680,11 @@ CuImage CuImage::read_region(std::vector&& location, location.emplace_back(0); location.emplace_back(0); } + + const ResolutionInfo& res_info = resolutions(); // If `size` is not specified, size would be (width, height) of the image at the specified `level`. if (size.empty()) { - const ResolutionInfo& res_info = resolutions(); const auto level_count = res_info.level_count(); if (level_count == 0) { @@ -853,19 +854,36 @@ CuImage CuImage::read_region(std::vector&& location, // The first dimension is for 'batch' ('N') spacing_units.emplace_back(std::string_view{ "batch" }); } + const auto& level_downsample = res_info.level_downsample(level); for (; index < ndim; ++index) { - int64_t dim_char = dim_indices_.index(dims[index]); + int64_t dim_index = dim_indices_.index(dims[index]); + if (dim_index < 0) + { + throw std::runtime_error(fmt::format("[Error] Invalid dimension name: {}", dims[index])); + } - const char* str_ptr = image_metadata_->spacing_units[dim_char]; - size_t str_len = strlen(image_metadata_->spacing_units[dim_char]); + const char* str_ptr = image_metadata_->spacing_units[dim_index]; + size_t str_len = strlen(image_metadata_->spacing_units[dim_index]); char* spacing_unit = static_cast(resource.allocate(str_len + 1)); memcpy(spacing_unit, str_ptr, str_len); spacing_unit[str_len] = '\0'; - // std::pmr::string spacing_unit{ image_metadata_->spacing_units[dim_char], &resource }; + // std::pmr::string spacing_unit{ image_metadata_->spacing_units[dim_index], &resource }; spacing_units.emplace_back(std::string_view{ spacing_unit }); + + // Update spacing based on level_downsample + char dim_char = image_metadata_->dims[dim_index]; + switch (dim_char) + { + case 'X': + case 'Y': + spacing[index] /= level_downsample; + break; + default: + break; + } } std::pmr::vector origin(&resource); diff --git a/cucim.code-workspace b/cucim.code-workspace index ac73eea68..a73035b60 100644 --- a/cucim.code-workspace +++ b/cucim.code-workspace @@ -33,7 +33,7 @@ "CUCIM_TESTDATA_FOLDER": "${workspaceDirectory}/test_data", // Add cuslide plugin's library path to LD_LIBRARY_PATH "LD_LIBRARY_PATH": "${workspaceDirectory}/build-debug/lib:${workspaceDirectory}/cpp/plugins/cucim.kit.cuslide/build-debug/lib:${workspaceDirectory}/temp/cuda/lib64:${os_env:LD_LIBRARY_PATH}", - "CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@22.06.00.so" + "CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@22.08.00.so" }, "cwd": "${workspaceDirectory}", "catch2": { @@ -226,7 +226,7 @@ }, { "name": "CUCIM_TEST_PLUGIN_PATH", - "value": "cucim.kit.cuslide@22.06.00.so" + "value": "cucim.kit.cuslide@22.08.00.so" } ], "console": "externalTerminal", @@ -254,7 +254,7 @@ }, { "name": "CUCIM_TEST_PLUGIN_PATH", - "value": "cucim.kit.cuslide@22.06.00.so" + "value": "cucim.kit.cuslide@22.08.00.so" } ], "console": "externalTerminal", @@ -286,7 +286,7 @@ }, { "name": "CUCIM_TEST_PLUGIN_PATH", - "value": "cucim.kit.cuslide@22.06.00.so" + "value": "cucim.kit.cuslide@22.08.00.so" } ], "console": "externalTerminal", diff --git a/docs/Makefile b/docs/Makefile index aeb3540aa..2f6cf8a2b 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -4,7 +4,7 @@ # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build -SPHINXPROJ = cuImage +SPHINXPROJ = cuCIM SOURCEDIR = source BUILDDIR = build diff --git a/docs/source/_static/params.css b/docs/source/_static/params.css deleted file mode 100644 index c080d3669..000000000 --- a/docs/source/_static/params.css +++ /dev/null @@ -1,30 +0,0 @@ -/* Mirrors the change in: - * https://github.com/sphinx-doc/sphinx/pull/5976 - * which is not showing up in our theme. - */ -.classifier:before { - font-style: normal; - margin: 0.5em; - content: ":"; -} - -:root { - - --pst-color-active-navigation: 114, 83, 237; - --pst-color-navbar-link: 77, 77, 77; - --pst-color-navbar-link-hover: var(--pst-color-active-navigation); - --pst-color-navbar-link-active: var(--pst-color-active-navigation); - --pst-color-sidebar-link: 77, 77, 77; - --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); - --pst-color-sidebar-link-active: var(--pst-color-active-navigation); - --pst-color-sidebar-expander-background-hover: 244, 244, 244; - --pst-color-sidebar-caption: 77, 77, 77; - --pst-color-toc-link: 119, 117, 122; - --pst-color-toc-link-hover: var(--pst-color-active-navigation); - --pst-color-toc-link-active: var(--pst-color-active-navigation); - -} - -.special-table td, .special-table th { - border: 1px solid #dee2e6; -} diff --git a/docs/source/api.rst b/docs/source/api.rst index 04cfda78c..059886c7e 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -55,6 +55,13 @@ intensity :members: :undoc-members: +morphology +---------- + +.. automodule:: cucim.core.operations.morphology + :members: + :undoc-members: + spatial ------- diff --git a/docs/source/conf.py b/docs/source/conf.py index dfad46b01..e3957382d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,6 +39,7 @@ 'sphinx.ext.intersphinx', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', 'numpydoc', 'doi_role', 'IPython.sphinxext.ipython_console_highlighting', @@ -71,9 +72,9 @@ # built documents. # # The short X.Y version. -version = '22.06' +version = '22.08' # The full version, including alpha/beta/rc tags. -release = '22.06.00' +release = '22.08.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -172,8 +173,14 @@ ] -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +# Configuration for intersphinx: refer to other projects documentation. +intersphinx_mapping = { + 'python': ('https://docs.python.org/', None), + 'cupy': ('https://docs.cupy.dev/en/stable/', None), + 'numpy': ('https://numpy.org/doc/stable', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/', None), + 'skimage': ('https://scikit-image.org/docs/stable/', None), +} # Config numpydoc @@ -182,5 +189,5 @@ def setup(app): - app.add_css_file('params.css') app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") + app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer") diff --git a/docs/source/index.rst b/docs/source/index.rst index 5c8dd9fcc..1fd04ad19 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,18 +1,81 @@ Welcome to cuCIM's documentation! ==================================== -cuCIM is a an extensible toolkit designed to provide GPU-accelearted I/O, -computer vision and image processing primitives for N-Dimensional images -with a focus on biomedical imaging. Our API mirrors `scikit-image +cuCIM (Compute Unified Device Architecture Clara IMage) is an open-source, accelerated computer vision and image processing software library for multidimensional images used in biomedical, geospatial, material and life science, and remote sensing use cases. + +cuCIM provides GPU-accelearted I/O, +computer vision and image processing primitives for N-Dimensional images including: + +- color conversion +- exposure +- feature extraction +- filters +- measure +- metrics +- morphology +- registration +- restoration +- segmentation +- transforms + +cuCIM supports the following formats: + +- Aperio ScanScope Virtual Slide (SVS) +- Philips TIFF +- Generic Tiled, Multi-resolution RGB TIFF files with the following + compression schemes: + + - No Compression + - JPEG + - JPEG2000 + - Lempel-Ziv-Welch (LZW) + - Deflate + +Our API mirrors `scikit-image `_ for image manipulation and `OpenSlide `_ for image loading. -cuCIM is fully open sourced under the Apache-2.0 license, and the Clara and RAPIDS teams welcomes new and seasoned -contributors, users and hobbyists! Thank you for your wonderful support! +cuCIM is interoperable with the following workflows: + +- Albumentations +- cuPY +- Data Loading Library (DALI) +- JFX +- MONAI +- Numba +- NumPy +- PyTorch +- Tensorflow +- Triton + +cuCIM is fully open sourced under the Apache-2.0 license, and the Clara +and RAPIDS teams welcomes new and seasoned contributors, users and +hobbyists! You may download cuCIM via Anaconda `Conda`_ or `PyPI`_ Thank +you for your wonderful support! Below, we provide some resources to help +get you started. + +**Blogs** + +- `Accelerating Scikit-Image API with cuCIM: n-Dimensional Image + Processing and IO on GPUs`_ +- `Accelerating Digital Pathology Pipelines with NVIDIA Claraβ„’ Deploy`_ + +**Webinars** + +- `cuCIM: a GPU Image IO and Processing Library`_ + +.. _Conda: https://anaconda.org/rapidsai-nightly/cucim +.. _PyPi: https://pypi.org/project/cucim/ +.. _`Accelerating Scikit-Image API with cuCIM: n-Dimensional Image Processing and IO on GPUs`: https://developer.nvidia.com/blog/cucim-rapid-n-dimensional-image-processing-and-i-o-on-gpus/ +.. _Accelerating Digital Pathology Pipelines with NVIDIA Claraβ„’ Deploy: https://developer.nvidia.com/blog/accelerating-digital-pathology-pipelines-with-nvidia-clara-deploy-2/ +.. _`cuCIM: a GPU Image IO and Processing Library`: https://www.youtube.com/watch?v=G46kOOM9xbQ + + +Contents +================== .. toctree:: :maxdepth: 4 - :caption: Contents: api.rst diff --git a/python/cucim/CHANGELOG.md b/python/cucim/CHANGELOG.md index d29d31b41..81d223e82 100644 --- a/python/cucim/CHANGELOG.md +++ b/python/cucim/CHANGELOG.md @@ -1,6 +1,30 @@ # Changelog (See [Release Notes](https://github.com/rapidsai/cucim/wiki/Release-Notes)) +## [22.02.06](https://github.com/rapidsai/cucim/wiki/release_notes_v22.06.00) + +- [Update/Breaking] Promote small integer types to single rather than double precision ([#278](https://github.com/rapidsai/cucim/pull/278)) [@grlee77](https://github.com/grlee77) +- [Bug] Populate correct channel names for RGBA image ([#294](https://github.com/rapidsai/cucim/pull/294)) [@gigony](https://github.com/gigony) +- [Bug] Merge branch-22.04 into branch-22.06 ([#258](https://github.com/rapidsai/cucim/pull/258)) [@jakirkham](https://github.com/jakirkham) +- [New] add missing `cucim.skimage.segmentation.clear_border` function ([#267](https://github.com/rapidsai/cucim/pull/267)) [@grlee77](https://github.com/grlee77) +- [New] add `cucim.core.operations.color.stain_extraction_pca` and `cucim.core.operations.color.normalize_colors_pca` for digital pathology H&E stain extraction and normalization ([#273](https://github.com/rapidsai/cucim/pull/273)) [@grlee77](https://github.com/grlee77), [@drbeh](https://github.com/drbeh) +- [Update] Update to use DLPack v0.6 ([#295](https://github.com/rapidsai/cucim/pull/295)) [@gigony](https://github.com/gigony) +- [Update] Remove plugin-related messages temporarily ([#291](https://github.com/rapidsai/cucim/pull/291)) [@gigony](https://github.com/gigony) +- [Update] Simplify recipes ([#286](https://github.com/rapidsai/cucim/pull/286)) [@Ethyling](https://github.com/Ethyling) +- [Update] Use cupy.fuse to improve efficiency hessian_matrix_eigvals ([#280](https://github.com/rapidsai/cucim/pull/280)) [@grlee77](https://github.com/grlee77) +- [Update] improve efficiency of histogram-based thresholding functions ([#276](https://github.com/rapidsai/cucim/pull/276)) [@grlee77](https://github.com/grlee77) +- [Update] Remove unused dependencies in GPU tests job ([#268](https://github.com/rapidsai/cucim/pull/268)) [@Ethyling](https://github.com/Ethyling) +- [Update] Enable footprint decomposition for morphology ([#274](https://github.com/rapidsai/cucim/pull/274)) [@grlee77](https://github.com/grlee77) +- [Update] Use conda compilers ([#232](https://github.com/rapidsai/cucim/pull/232)) [@Ethyling](https://github.com/Ethyling) +- [Update] Build packages using mambabuild ([#216](https://github.com/rapidsai/cucim/pull/216)) [@Ethyling](https://github.com/Ethyling) +- [Doc] update outdated links to example data ([#289](https://github.com/rapidsai/cucim/pull/289)) [@grlee77](https://github.com/grlee77) +- [Doc] Add missing API docs ([#275](https://github.com/rapidsai/cucim/pull/275)) [@grlee77](https://github.com/grlee77) + +## [22.02.04](https://github.com/rapidsai/cucim/wiki/release_notes_v22.04.00) + +- [Bug] Fix ImportError from vendored code ([#252](https://github.com/rapidsai/cucim/pull/252)) [@grlee77](https://github.com/grlee77) +- [Bug] Fix wrong dimension in metadata ([#248](https://github.com/rapidsai/cucim/pull/248)) [@gigony](https://github.com/gigony) + ## [22.02.01](https://github.com/rapidsai/cucim/wiki/release_notes_v22.02.01) - [Bug] Check nullptr of handler in CuFileDriver::close() ([#229](https://github.com/rapidsai/cucim/pull/229)) [@gigony](https://github.com/gigony) diff --git a/python/cucim/README.md b/python/cucim/README.md index 94556f23c..8f1413137 100644 --- a/python/cucim/README.md +++ b/python/cucim/README.md @@ -6,10 +6,13 @@ **NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cucim/blob/main/README.md) ensure you are on the `main` branch. -- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date) - - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/) +- [GTC 2022 Accelerating Storage IO to GPUs with Magnum IO [S41347]](https://events.rainfocus.com/widget/nvidia/gtcspring2022/sessioncatalog/session/1634960000577001Etxp) + - cuCIM's GDS API examples: - [SciPy 2021 cuCIM - A GPU image I/O and processing library](https://www.scipy2021.scipy.org/) - [video](https://youtu.be/G46kOOM9xbQ) +- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date) + - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/) + ## Quick Start @@ -24,7 +27,7 @@ pip install scipy scikit-image cupy-cuda110 ### Jupyter Notebooks -Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-22.06/notebooks/Welcome.ipynb) notebook. +Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-22.08/notebooks/Welcome.ipynb) notebook. ### Open Image @@ -97,15 +100,15 @@ visualize(region) Aperio SVS (.svs) image format is supported since cuCIM [v21.10.01](https://github.com/rapidsai/cucim/wiki/release_notes_v21.10.01#support-aperio-svs-svs). -Please check [this notebook](https://nbviewer.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Supporting_Aperio_SVS_Format.ipynb) to see how to use the feature. +Please check [this notebook](https://nbviewer.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Supporting_Aperio_SVS_Format.ipynb) to see how to use the feature. ### Using Cache -Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Using_Cache.ipynb). +Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Using_Cache.ipynb). ### Accessing File with GDS -Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Accessing_File_with_GDS.ipynb). +Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Accessing_File_with_GDS.ipynb). ### NVTX Support for Performance Analysis @@ -178,4 +181,4 @@ is used in this project. Apache-2.0 License (see `LICENSE` file). -Copyright (c) 2020-2021, NVIDIA CORPORATION. +Copyright (c) 2020-2022, NVIDIA CORPORATION. diff --git a/python/cucim/VERSION b/python/cucim/VERSION index ddc883c42..231fac64b 100644 --- a/python/cucim/VERSION +++ b/python/cucim/VERSION @@ -1 +1 @@ -22.06.00 +22.08.00 diff --git a/python/cucim/docs/getting_started/index.md b/python/cucim/docs/getting_started/index.md index 200c9fc3b..cede687cf 100644 --- a/python/cucim/docs/getting_started/index.md +++ b/python/cucim/docs/getting_started/index.md @@ -14,15 +14,15 @@ ## Installation -Please download the latest SDK package (`cuCIM-v22.06.00-linux.tar.gz`). +Please download the latest SDK package (`cuCIM-v22.08.00-linux.tar.gz`). Untar the downloaded file. ```bash -mkdir -p cuCIM-v22.06.00 -tar -xzvf cuCIM-v22.06.00-linux.tar.gz -C cuCIM-v22.06.00 +mkdir -p cuCIM-v22.08.00 +tar -xzvf cuCIM-v22.08.00-linux.tar.gz -C cuCIM-v22.08.00 -cd cuCIM-v22.06.00 +cd cuCIM-v22.08.00 ``` ## Run command @@ -147,7 +147,7 @@ Its execution would show some metadata information and create two files -- `outp ``` $ ./bin/tiff_image notebooks/input/image.tif . [Plugin: cucim.kit.cuslide] Loading... -[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@22.06.00.so +[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@22.08.00.so [Plugin: cucim.kit.cuslide] loaded successfully. Version: 0 Initializing plugin: cucim.kit.cuslide (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cuslide) is_loaded: true diff --git a/python/cucim/docs/index.md b/python/cucim/docs/index.md index 4f03922c3..e9c4df2a4 100644 --- a/python/cucim/docs/index.md +++ b/python/cucim/docs/index.md @@ -18,7 +18,7 @@ development/index --> # cuCIM Documentation -Current latest version is [Version 22.06.00](release_notes/v22.06.00.md). +Current latest version is [Version 22.08.00](release_notes/v22.08.00.md). **cuCIM** a toolkit to provide GPU accelerated I/O, image processing & computer vision primitives for N-Dimensional images with a focus on biomedical imaging. diff --git a/python/cucim/requirements-test.txt b/python/cucim/requirements-test.txt index ffdcf7092..6d62d5a29 100644 --- a/python/cucim/requirements-test.txt +++ b/python/cucim/requirements-test.txt @@ -1,8 +1,8 @@ -GPUtil==1.4.0 -imagecodecs==2021.6.8 -openslide-python==1.1.2 -psutil==5.8.0 -pytest==6.2.4 -pytest-cov==2.12.1 -pytest-lazy-fixture==0.6.3 -tifffile==2021.7.2 +GPUtil>=1.4.0 +imagecodecs>=2021.6.8 +openslide-python>=1.1.2 +psutil>=5.8.0 +pytest>=6.2.4 +pytest-cov>=2.12.1 +pytest-lazy-fixture>=0.6.3 +tifffile>=2022.7.28 diff --git a/python/cucim/setup.cfg b/python/cucim/setup.cfg index 517f33fff..7743540c2 100644 --- a/python/cucim/setup.cfg +++ b/python/cucim/setup.cfg @@ -65,6 +65,6 @@ line_length = 80 known_first_party = cucim default_section = THIRDPARTY forced_separate = test_cucim -skip = .tox,.eggs,ci/templates,build,dist,versioneer.py +skip = .tox,.eggs,ci/templates,build,dist,versioneer.py,ndimage.py sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER multi_line_output = GRID diff --git a/python/cucim/src/cucim/core/__init__.py b/python/cucim/src/cucim/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/cucim/src/cucim/core/operations/color/stain_normalizer.py b/python/cucim/src/cucim/core/operations/color/stain_normalizer.py index f5141ede2..3e82b26b8 100644 --- a/python/cucim/src/cucim/core/operations/color/stain_normalizer.py +++ b/python/cucim/src/cucim/core/operations/color/stain_normalizer.py @@ -13,7 +13,6 @@ # limitations under the License. import math -import warnings from typing import Union import cupy as cp @@ -56,12 +55,9 @@ def image_to_absorbance(image, source_intensity=255.0, dtype=cp.float32): ----- If `image` has an integer dtype it will be clipped to range ``[1, source_intensity]``, while float image inputs are clipped to range - ``[source_intensity/255, source_intensity]. The minimum is to avoid log(0). - Absorbance is then given by - - .. math:: - - absorbance = \\log{\\frac{image}{source_intensity}}. + ``[source_intensity/255, source_intensity]``. + The minimum is to avoid log(0). Absorbance is then given by + ``absorbance = log(image / source_intensity)``. """ dtype = cp.dtype(dtype) if dtype.kind != "f": @@ -209,9 +205,7 @@ def _covariance(a): ddof = 1 fact = X.shape[1] - ddof if fact <= 0: - warnings.warn("Degrees of freedom <= 0 for slice", - RuntimeWarning, stacklevel=2) - fact = 0.0 + raise RuntimeError("Degrees of freedom <= 0") X -= X.mean(axis=1, keepdims=True) if not X.flags.f_contiguous: @@ -314,10 +308,10 @@ def stain_extraction_pca(image, source_intensity=240, alpha=1, beta=0.345, ) # remove transparent pixels - absorbance = absorbance[:, cp.all(absorbance > beta, axis=0)] - if absorbance.size == 0: + absorbance = absorbance[:, cp.any(absorbance > beta, axis=0)] + if absorbance.size == 0 or absorbance.shape[1] <= 1: raise ValueError( - "All pixels of the input image are below the threshold." + "Multiple pixels of the input must be above the `beta` threshold." ) # compute eigenvectors (do small 3x3 matrix calculations on the host) @@ -383,7 +377,10 @@ def _get_raw_concentrations(src_stain_coeff, absorbance): def _normalized_from_concentrations(conc_raw, max_percentile, ref_stain_coeff, ref_max_conc, source_intensity, original_shape, channel_axis): - """Determine normalized image from concentrations.""" + """Determine normalized image from concentrations. + + Note: This function will also modify conc_raw in-place. + """ # verify conc_raw is shape (2, n_pixels) if conc_raw.ndim != 2 or conc_raw.shape[0] != 2: @@ -410,10 +407,10 @@ def _normalized_from_concentrations(conc_raw, max_percentile, ref_stain_coeff, for ch_raw in conc_raw] ) normalization_factors = ref_max_conc / max_conc - conc_norm = conc_raw * normalization_factors[:, cp.newaxis] + conc_raw *= normalization_factors[:, cp.newaxis] # reconstruct the image based on the reference stain matrix - absorbance_norm = ref_stain_coeff.dot(conc_norm) + absorbance_norm = ref_stain_coeff.dot(conc_raw) image_norm = absorbance_to_image( absorbance_norm, source_intensity=source_intensity, dtype=np.uint8 ) @@ -527,8 +524,8 @@ def normalize_colors_pca( image_norm = _normalized_from_concentrations( conc_raw=conc_raw, max_percentile=100 - alpha, - ref_max_conc=cp.asarray(ref_max_conc), - ref_stain_coeff=cp.asarray(ref_stain_coeff), + ref_max_conc=cp.asarray(ref_max_conc, dtype=conc_raw.dtype), + ref_stain_coeff=cp.asarray(ref_stain_coeff, dtype=conc_raw.dtype), source_intensity=source_intensity, channel_axis=channel_axis, original_shape=image.shape, diff --git a/python/cucim/src/cucim/core/operations/morphology/__init__.py b/python/cucim/src/cucim/core/operations/morphology/__init__.py new file mode 100644 index 000000000..a9c676edb --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/__init__.py @@ -0,0 +1,3 @@ +from ._distance_transform import distance_transform_edt + +__all__ = ["distance_transform_edt"] diff --git a/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py b/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py new file mode 100644 index 000000000..70523c5bf --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py @@ -0,0 +1,183 @@ +import numpy as np + +from ._pba_2d import _pba_2d +from ._pba_3d import _pba_3d + +# TODO: support sampling distances +# support the distances and indices output arguments +# support chamfer, chessboard and l1/manhattan distances too? + + +def distance_transform_edt(image, sampling=None, return_distances=True, + return_indices=False, distances=None, indices=None, + *, block_params=None, float64_distances=False): + r"""Exact Euclidean distance transform. + + This function calculates the distance transform of the `input`, by + replacing each foreground (non-zero) element, with its shortest distance to + the background (any zero-valued element). + + In addition to the distance transform, the feature transform can be + calculated. In this case the index of the closest background element to + each foreground element is returned in a separate array. + + Parameters + ---------- + image : array_like + Input data to transform. Can be any type but will be converted into + binary: 1 wherever image equates to True, 0 elsewhere. + sampling : float, or sequence of float, optional + Spacing of elements along each dimension. If a sequence, must be of + length equal to the image rank; if a single number, this is used for + all axes. If not specified, a grid spacing of unity is implied. + return_distances : bool, optional + Whether to calculate the distance transform. + return_indices : bool, optional + Whether to calculate the feature transform. + distances : float32 cupy.ndarray, optional + An output array to store the calculated distance transform, instead of + returning it. `return_distances` must be True. It must be the same + shape as `image`. + indices : int32 cupy.ndarray, optional + An output array to store the calculated feature transform, instead of + returning it. `return_indicies` must be True. Its shape must be + `(image.ndim,) + image.shape`. + + Other Parameters + ---------------- + block_params : 3-tuple of int + The m1, m2, m3 algorithm parameters as described in [2]_. If None, + suitable defaults will be chosen. Note: This parameter is specific to + cuCIM and does not exist in SciPy. + float64_distances : bool, optional + If True, use double precision in the distance computation (to match + SciPy behavior). Otherwise, single precision will be used for + efficiency. Note: This parameter is specific to cuCIM and does not + exist in SciPy. + + Returns + ------- + distances : float64 ndarray, optional + The calculated distance transform. Returned only when + `return_distances` is True and `distances` is not supplied. It will + have the same shape as `image`. + indices : int32 ndarray, optional + The calculated feature transform. It has an image-shaped array for each + dimension of the image. See example below. Returned only when + `return_indices` is True and `indices` is not supplied. + + Notes + ----- + The Euclidean distance transform gives values of the Euclidean distance. + + .. math:: + + y_i = \sqrt{\sum_{i}^{n} (x[i] - b[i])^2} + + where :math:`b[i]` is the background point (value 0) with the smallest + Euclidean distance to input points :math:`x[i]`, and :math:`n` is the + number of dimensions. + + Note that the `indices` output may differ from the one given by + :func:`scipy.ndimage.distance_transform_edt` in the case of input pixels + that are equidistant from multiple background points. + + The parallel banding algorithm implemented here was originally described in + [1]_. The kernels used here correspond to the revised PBA+ implementation + that is described on the author's website [2]_. The source code of the + author's PBA+ implementation is available at [3]_. + + References + ---------- + .. [1] Thanh-Tung Cao, Ke Tang, Anis Mohamed, and Tiow-Seng Tan. 2010. + Parallel Banding Algorithm to compute exact distance transform with the + GPU. In Proceedings of the 2010 ACM SIGGRAPH symposium on Interactive + 3D Graphics and Games (I3D ’10). Association for Computing Machinery, + New York, NY, USA, 83–90. + DOI:https://doi.org/10.1145/1730804.1730818 + .. [2] https://www.comp.nus.edu.sg/~tants/pba.html + .. [3] https://github.com/orzzzjq/Parallel-Banding-Algorithm-plus + + Examples + -------- + >>> import cupy as cp + >>> from cucim.core.operations import morphology + >>> a = cp.array(([0,1,1,1,1], + ... [0,0,1,1,1], + ... [0,1,1,1,1], + ... [0,1,1,1,0], + ... [0,1,1,0,0])) + >>> morphology.distance_transform_edt(a) + array([[ 0. , 1. , 1.4142, 2.2361, 3. ], + [ 0. , 0. , 1. , 2. , 2. ], + [ 0. , 1. , 1.4142, 1.4142, 1. ], + [ 0. , 1. , 1.4142, 1. , 0. ], + [ 0. , 1. , 1. , 0. , 0. ]]) + + With a sampling of 2 units along x, 1 along y: + + >>> morphology.distance_transform_edt(a, sampling=[2,1]) + array([[ 0. , 1. , 2. , 2.8284, 3.6056], + [ 0. , 0. , 1. , 2. , 3. ], + [ 0. , 1. , 2. , 2.2361, 2. ], + [ 0. , 1. , 2. , 1. , 0. ], + [ 0. , 1. , 1. , 0. , 0. ]]) + + Asking for indices as well: + + >>> edt, inds = morphology.distance_transform_edt(a, return_indices=True) + >>> inds + array([[[0, 0, 1, 1, 3], + [1, 1, 1, 1, 3], + [2, 2, 1, 3, 3], + [3, 3, 4, 4, 3], + [4, 4, 4, 4, 4]], + [[0, 0, 1, 1, 4], + [0, 1, 1, 1, 4], + [0, 0, 1, 4, 4], + [0, 0, 3, 3, 4], + [0, 0, 3, 3, 4]]]) + + """ + if distances is not None: + raise NotImplementedError( + "preallocated distances image is not supported" + ) + if indices is not None: + raise NotImplementedError( + "preallocated indices image is not supported" + ) + scalar_sampling = None + if sampling is not None: + sampling = np.unique(np.atleast_1d(sampling)) + if len(sampling) == 1: + scalar_sampling = float(sampling) + sampling = None + else: + raise NotImplementedError( + "non-uniform values in sampling is not currently supported" + ) + + if image.ndim == 3: + pba_func = _pba_3d + elif image.ndim == 2: + pba_func = _pba_2d + else: + raise NotImplementedError( + "Only 2D and 3D distance transforms are supported.") + + vals = pba_func( + image, + sampling=sampling, + return_distances=return_distances, + return_indices=return_indices, + block_params=block_params + ) + + if return_distances and scalar_sampling is not None: + vals = (vals[0] * scalar_sampling,) + vals[1:] + + if len(vals) == 1: + vals = vals[0] + + return vals diff --git a/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py b/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py new file mode 100644 index 000000000..edb10a983 --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py @@ -0,0 +1,300 @@ +import math +import os + +import cupy + +pba2d_defines_template = """ + +// MARKER is used to mark blank pixels in the texture. +// Any uncolored pixels will have x = MARKER. +// Input texture should have x = MARKER for all pixels other than sites +#define MARKER {marker} +#define BLOCKSIZE {block_size_2d} +#define pixel_int2_t {pixel_int2_t} // typically short2 (int2 for images with > 32k pixels per side) +#define make_pixel(x, y) {make_pixel_func}(x, y) // typically make_short2 (make_int2 images with > 32k pixels per side + +""" # noqa + + +def _init_marker(int_dtype): + """use a minimum value that is appropriate to the integer dtype""" + if int_dtype == cupy.int16: + # marker = cupy.iinfo(int_dtype).min + marker = -32768 + elif int_dtype == cupy.int32: + # divide by two so we don't have to promote other intermediate int + # variables to 64-bit int + marker = -2147483648 // 2 + else: + raise ValueError( + "expected int_dtype to be either cupy.int16 or cupy.int32" + ) + return marker + + +@cupy.memoize(True) +def get_pba2d_src(block_size_2d=64, marker=-32768, pixel_int2_t='short2'): + make_pixel_func = 'make_' + pixel_int2_t + + pba2d_code = pba2d_defines_template.format( + block_size_2d=block_size_2d, + marker=marker, + pixel_int2_t=pixel_int2_t, + make_pixel_func=make_pixel_func + ) + kernel_directory = os.path.join(os.path.dirname(__file__), 'cuda') + with open(os.path.join(kernel_directory, 'pba_kernels_2d.h'), 'rt') as f: + pba2d_kernels = '\n'.join(f.readlines()) + + pba2d_code += pba2d_kernels + return pba2d_code + + +def _get_block_size(check_warp_size=False): + if check_warp_size: + dev = cupy.cuda.runtime.getDevice() + device_properties = cupy.cuda.runtime.getDeviceProperties(dev) + return int(device_properties['warpSize']) + else: + return 32 + + +def _pack_int2(arr, marker=-32768, int_dtype=cupy.int16): + if arr.ndim != 2: + raise ValueError("only 2d arr suppported") + input_x = cupy.zeros(arr.shape, dtype=int_dtype) + input_y = cupy.zeros(arr.shape, dtype=int_dtype) + # TODO: create custom kernel for setting values in input_x, input_y + cond = arr == 0 + y, x = cupy.where(cond) + input_x[cond] = x + mask = arr != 0 + input_x[mask] = marker # 1 << 32 + input_y[cond] = y + input_y[mask] = marker # 1 << 32 + int2_dtype = cupy.dtype({'names': ['x', 'y'], 'formats': [int_dtype] * 2}) + # in C++ code x is the contiguous axis and corresponds to width + # y is the non-contiguous axis and corresponds to height + # given that, store input_x as the last axis here + return cupy.squeeze( + cupy.stack((input_x, input_y), axis=-1).view(int2_dtype) + ) + + +def _unpack_int2(img, make_copy=False, int_dtype=cupy.int16): + temp = img.view(int_dtype).reshape(img.shape + (2,)) + if make_copy: + temp = temp.copy() + return temp + + +def _determine_padding(shape, padded_size, block_size): + # all kernels assume equal size along both axes, so pad up to equal size if + # shape is not isotropic + orig_sy, orig_sx = shape + if orig_sx != padded_size or orig_sy != padded_size: + padding_width = ( + (0, padded_size - orig_sy), (0, padded_size - orig_sx) + ) + else: + padding_width = None + return padding_width + + +def _pba_2d(arr, sampling=None, return_distances=True, return_indices=False, + block_params=None, check_warp_size=False, *, + float64_distances=False): + + # input_arr: a 2D image + # For each site at (x, y), the pixel at coordinate (x, y) should contain + # the pair (x, y). Pixels that are not sites should contain the pair + # (MARKER, MARKER) + + # Note: could query warp size here, but for now just assume 32 to avoid + # overhead of querying properties + block_size = _get_block_size(check_warp_size) + + if sampling is not None: + raise NotImplementedError("sampling not yet supported") + # if len(sampling) != 2: + # raise ValueError("sampling must be a sequence of two values.") + + padded_size = math.ceil(max(arr.shape) / block_size) * block_size + if block_params is None: + # should be <= size / 64. sy must be a multiple of m1 + m1 = max(1, min(padded_size // block_size, 32)) + + # size must be a multiple of m2 + m2 = max(1, min(padded_size // block_size, 32)) + # m2 must also be a power of two + m2 = 2**math.floor(math.log2(m2)) + if padded_size % m2 != 0: + raise RuntimeError("error in setting default m2") + + # should be <= 64. texture size must be a multiple of m3 + m3 = min(min(m1, m2), 2) + else: + m1, m2, m3 = block_params + + if m1 > padded_size // block_size: + raise ValueError("m1 too large. must be <= arr.shape[0] // 32") + if m2 > padded_size // block_size: + raise ValueError("m2 too large. must be <= arr.shape[1] // 32") + for m in (m1, m2, m3): + if padded_size % m != 0: + raise ValueError( + f"Largest dimension of image ({padded_size}) must be evenly " + f"disivible by each element of block_params: {(m1, m2, m3)}." + ) + + shape_max = max(arr.shape) + if shape_max <= 32768: + int_dtype = cupy.int16 + pixel_int2_type = 'short2' + else: + if shape_max > (1 << 24): + # limit to coordinate range to 2**24 due to use of __mul24 in + # coordinate TOID macro + raise ValueError( + f"maximum axis size of {1 << 24} exceeded, for image with " + f"shape {arr.shape}" + ) + int_dtype = cupy.int32 + pixel_int2_type = 'int2' + + marker = _init_marker(int_dtype) + + orig_sy, orig_sx = arr.shape + padding_width = _determine_padding(arr.shape, padded_size, block_size) + if padding_width is not None: + arr = cupy.pad(arr, padding_width, mode='constant', constant_values=1) + size = arr.shape[0] + + input_arr = _pack_int2(arr, marker=marker, int_dtype=int_dtype) + output = cupy.zeros_like(input_arr) + + int2_dtype = cupy.dtype({'names': ['x', 'y'], 'formats': [int_dtype] * 2}) + margin = cupy.empty((2 * m1 * size,), dtype=int2_dtype) + + # phase 1 of PBA. m1 must divide texture size and be <= 64 + pba2d = cupy.RawModule( + code=get_pba2d_src( + block_size_2d=block_size, + marker=marker, + pixel_int2_t=pixel_int2_type, + ) + ) + kernelFloodDown = pba2d.get_function('kernelFloodDown') + kernelFloodUp = pba2d.get_function('kernelFloodUp') + kernelPropagateInterband = pba2d.get_function('kernelPropagateInterband') + kernelUpdateVertical = pba2d.get_function('kernelUpdateVertical') + kernelProximatePoints = pba2d.get_function('kernelProximatePoints') + kernelCreateForwardPointers = pba2d.get_function( + 'kernelCreateForwardPointers' + ) + kernelMergeBands = pba2d.get_function('kernelMergeBands') + kernelDoubleToSingleList = pba2d.get_function('kernelDoubleToSingleList') + kernelColor = pba2d.get_function('kernelColor') + + block = (block_size, 1, 1) + grid = (math.ceil(size / block[0]), m1, 1) + bandSize1 = size // m1 + # kernelFloodDown modifies input_arr in-place + kernelFloodDown( + grid, + block, + (input_arr, input_arr, size, bandSize1), + ) + # kernelFloodUp modifies input_arr in-place + kernelFloodUp( + grid, + block, + (input_arr, input_arr, size, bandSize1), + ) + # kernelFloodUp fills values into margin + kernelPropagateInterband( + grid, + block, + (input_arr, margin, size, bandSize1), + ) + # kernelUpdateVertical stores output into an intermediate array of + # transposed shape + kernelUpdateVertical( + grid, + block, + (input_arr, margin, output, size, bandSize1), + ) + + # phase 2 + block = (block_size, 1, 1) + grid = (math.ceil(size / block[0]), m2, 1) + bandSize2 = size // m2 + kernelProximatePoints( + grid, + block, + (output, input_arr, size, bandSize2), + ) + kernelCreateForwardPointers( + grid, + block, + (input_arr, input_arr, size, bandSize2), + ) + # Repeatly merging two bands into one + noBand = m2 + while noBand > 1: + grid = (math.ceil(size / block[0]), noBand // 2) + kernelMergeBands( + grid, + block, + (output, input_arr, input_arr, size, size // noBand), + ) + noBand //= 2 + # Replace the forward link with the X coordinate of the seed to remove + # the need of looking at the other texture. We need it for coloring. + grid = (math.ceil(size / block[0]), size) + kernelDoubleToSingleList( + grid, + block, + (output, input_arr, input_arr, size), + ) + + # Phase 3 of PBA + block = (block_size, m3, 1) + grid = (math.ceil(size / block[0]), 1, 1) + kernelColor( + grid, + block, + (input_arr, output, size), + ) + + output = _unpack_int2(output, make_copy=False, int_dtype=int_dtype) + # make sure to crop any padding that was added here! + x = output[:orig_sy, :orig_sx, 0] + y = output[:orig_sy, :orig_sx, 1] + + # raise NotImplementedError("TODO") + vals = () + if return_distances: + # TODO: custom kernel for more efficient distance computation + y0, x0 = cupy.meshgrid( + *( + cupy.arange(s, dtype=cupy.int32) + for s in (orig_sy, orig_sx) + ), + indexing='ij', + sparse=True, + ) + tmp = (x - x0) + dist = tmp * tmp + tmp = (y - y0) + dist += tmp * tmp + if float64_distances: + dist = cupy.sqrt(dist) + else: + dist = dist.astype(cupy.float32) + cupy.sqrt(dist, out=dist) + vals = vals + (dist,) + if return_indices: + indices = cupy.stack((y, x), axis=0) + vals = vals + (indices,) + return vals diff --git a/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py b/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py new file mode 100644 index 000000000..dab484e90 --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py @@ -0,0 +1,311 @@ +import functools +import math +import numbers +import os + +import cupy +import numpy as np + +from ._pba_2d import _get_block_size + +try: + # math.lcm was introduced in Python 3.9 + from math import lcm +except ImportError: + + """Fallback implementation of least common multiple (lcm) + + TODO: remove once minimum Python requirement is >= 3.9 + """ + + def _lcm(a, b): + return abs(b * (a // math.gcd(a, b))) + + @functools.lru_cache() + def lcm(*args): + nargs = len(args) + if not all(isinstance(a, numbers.Integral) for a in args): + raise TypeError("all arguments must be integers") + if nargs == 0: + return 1 + res = int(args[0]) + if nargs == 1: + return abs(res) + for i in range(1, nargs): + x = int(args[i]) + res = _lcm(res, x) + return res + + +pba3d_defines_template = """ + +#define MARKER {marker} +#define MAX_INT {max_int} +#define BLOCKSIZE {block_size_3d} + +""" + +# For efficiency, the original PBA+ packs three 10-bit integers and two binary +# flags into a single 32-bit integer. The defines in +# `pba3d_defines_encode_32bit` handle this format. +pba3d_defines_encode_32bit = """ +// Sites : ENCODE(x, y, z, 0, 0) +// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER +#define ENCODED_INT_TYPE int +#define ZERO 0 +#define ONE 1 +#define ENCODE(x, y, z, a, b) (((x) << 20) | ((y) << 10) | (z) | ((a) << 31) | ((b) << 30)) +#define DECODE(value, x, y, z) \ + x = ((value) >> 20) & 0x3ff; \ + y = ((value) >> 10) & 0x3ff; \ + z = (value) & 0x3ff + +#define NOTSITE(value) (((value) >> 31) & 1) +#define HASNEXT(value) (((value) >> 30) & 1) + +#define GET_X(value) (((value) >> 20) & 0x3ff) +#define GET_Y(value) (((value) >> 10) & 0x3ff) +#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0x3ff)) + +""" # noqa + + +# 64bit version of ENCODE/DECODE to allow a 20-bit integer per coordinate axis. +pba3d_defines_encode_64bit = """ +// Sites : ENCODE(x, y, z, 0, 0) +// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER +#define ENCODED_INT_TYPE long long +#define ZERO 0L +#define ONE 1L +#define ENCODE(x, y, z, a, b) (((x) << 40) | ((y) << 20) | (z) | ((a) << 61) | ((b) << 60)) +#define DECODE(value, x, y, z) \ + x = ((value) >> 40) & 0xfffff; \ + y = ((value) >> 20) & 0xfffff; \ + z = (value) & 0xfffff + +#define NOTSITE(value) (((value) >> 61) & 1) +#define HASNEXT(value) (((value) >> 60) & 1) + +#define GET_X(value) (((value) >> 40) & 0xfffff) +#define GET_Y(value) (((value) >> 20) & 0xfffff) +#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0xfffff)) + +""" # noqa + + +@cupy.memoize(True) +def get_pba3d_src(block_size_3d=32, marker=-2147483648, max_int=2147483647, + size_max=1024): + pba3d_code = pba3d_defines_template.format( + block_size_3d=block_size_3d, marker=marker, max_int=max_int + ) + if size_max > 1024: + pba3d_code += pba3d_defines_encode_64bit + else: + pba3d_code += pba3d_defines_encode_32bit + kernel_directory = os.path.join(os.path.dirname(__file__), 'cuda') + with open(os.path.join(kernel_directory, 'pba_kernels_3d.h'), 'rt') as f: + pba3d_kernels = '\n'.join(f.readlines()) + pba3d_code += pba3d_kernels + return pba3d_code + + +# TODO: custom kernel for encode3d +def encode3d(arr, marker=-2147483648, bit_depth=32, size_max=1024): + if arr.ndim != 3: + raise ValueError("only 3d arr suppported") + if bit_depth not in [32, 64]: + raise ValueError("only bit_depth of 32 or 64 is supported") + if size_max > 1024: + dtype = np.int64 + else: + dtype = np.int32 + image = cupy.zeros(arr.shape, dtype=dtype, order='C') + cond = arr == 0 + z, y, x = cupy.where(cond) + # z, y, x so that x is the contiguous axis + # (must match TOID macro in the C++ code!) + if size_max > 1024: + image[cond] = (((x) << 40) | ((y) << 20) | (z)) + else: + image[cond] = (((x) << 20) | ((y) << 10) | (z)) + image[arr != 0] = marker # 1 << 32 + return image + + +# TODO: custom kernel for decode3d +def decode3d(output, size_max=1024): + if size_max > 1024: + x = (output >> 40) & 0xfffff + y = (output >> 20) & 0xfffff + z = output & 0xfffff + else: + x = (output >> 20) & 0x3ff + y = (output >> 10) & 0x3ff + z = output & 0x3ff + return (x, y, z) + + +def _determine_padding(shape, block_size, m1, m2, m3, blockx, blocky): + # TODO: can possibly revise to consider only particular factors for LCM on + # a given axis + LCM = lcm(block_size, m1, m2, m3, blockx, blocky) + orig_sz, orig_sy, orig_sx = shape + round_up = False + if orig_sx % LCM != 0: + # round up size to a multiple of the band size + round_up = True + sx = LCM * math.ceil(orig_sx / LCM) + else: + sx = orig_sx + if orig_sy % LCM != 0: + # round up size to a multiple of the band size + round_up = True + sy = LCM * math.ceil(orig_sy / LCM) + else: + sy = orig_sy + if orig_sz % LCM != 0: + # round up size to a multiple of the band size + round_up = True + sz = LCM * math.ceil(orig_sz / LCM) + else: + sz = orig_sz + + aniso = not (sx == sy == sz) + if aniso or round_up: + smax = max(sz, sy, sx) + padding_width = ( + (0, smax - orig_sz), (0, smax - orig_sy), (0, smax - orig_sx) + ) + else: + padding_width = None + return padding_width + + +def _pba_3d(arr, sampling=None, return_distances=True, return_indices=False, + block_params=None, check_warp_size=False, *, + float64_distances=False): + if arr.ndim != 3: + raise ValueError(f"expected a 3D array, got {arr.ndim}D") + + if sampling is not None: + raise NotImplementedError("sampling not yet supported") + # if len(sampling) != 3: + # raise ValueError("sampling must be a sequence of three values.") + + if block_params is None: + m1 = 1 + m2 = 1 + m3 = 2 + else: + m1, m2, m3 = block_params + + # reduce blockx for small inputs + s_min = min(arr.shape) + if s_min <= 4: + blockx = 4 + elif s_min <= 8: + blockx = 8 + elif s_min <= 16: + blockx = 16 + else: + blockx = 32 + blocky = 4 + + block_size = _get_block_size(check_warp_size) + + orig_sz, orig_sy, orig_sx = arr.shape + padding_width = _determine_padding( + arr.shape, block_size, m1, m2, m3, blockx, blocky + ) + if padding_width is not None: + arr = cupy.pad(arr, padding_width, mode='constant', constant_values=1) + size = arr.shape[0] + + # pba algorithm was implemented to use 32-bit integer to store compressed + # coordinates. input_arr will be C-contiguous, int32 + size_max = max(arr.shape) + input_arr = encode3d(arr, size_max=size_max) + buffer_idx = 0 + output = cupy.zeros_like(input_arr) + pba_images = [input_arr, output] + + block = (blockx, blocky, 1) + grid = (size // block[0], size // block[1], 1) + pba3d = cupy.RawModule( + code=get_pba3d_src(block_size_3d=block_size, size_max=size_max) + ) + + kernelFloodZ = pba3d.get_function('kernelFloodZ') + kernelMaurerAxis = pba3d.get_function('kernelMaurerAxis') + kernelColorAxis = pba3d.get_function('kernelColorAxis') + + kernelFloodZ( + grid, + block, + (pba_images[buffer_idx], pba_images[1 - buffer_idx], size) + ) + buffer_idx = 1 - buffer_idx + + block = (blockx, blocky, 1) + grid = (size // block[0], size // block[1], 1) + kernelMaurerAxis( + grid, + block, + (pba_images[buffer_idx], pba_images[1 - buffer_idx], size), + ) + + block = (block_size, m3, 1) + grid = (size // block[0], size, 1) + kernelColorAxis( + grid, + block, + (pba_images[1 - buffer_idx], pba_images[buffer_idx], size), + ) + + block = (blockx, blocky, 1) + grid = (size // block[0], size // block[1], 1) + kernelMaurerAxis( + grid, + block, + (pba_images[buffer_idx], pba_images[1 - buffer_idx], size), + ) + + block = (block_size, m3, 1) + grid = (size // block[0], size, 1) + kernelColorAxis( + grid, + block, + (pba_images[1 - buffer_idx], pba_images[buffer_idx], size), + ) + + output = pba_images[buffer_idx] + if return_distances or return_indices: + x, y, z = decode3d(output[:orig_sz, :orig_sy, :orig_sx], + size_max=size_max) + + vals = () + if return_distances: + # TODO: custom kernel for more efficient distance computation + orig_shape = (orig_sz, orig_sy, orig_sx) + z0, y0, x0 = cupy.meshgrid( + *(cupy.arange(s, dtype=cupy.int32) for s in orig_shape), + indexing='ij', + sparse=True + ) + tmp = (x - x0) + dist = tmp * tmp + tmp = (y - y0) + dist += tmp * tmp + tmp = (z - z0) + dist += tmp * tmp + if float64_distances: + dist = cupy.sqrt(dist) + else: + dist = dist.astype(cupy.float32) + cupy.sqrt(dist, out=dist) + vals = vals + (dist,) + if return_indices: + indices = cupy.stack((z, y, x), axis=0) + vals = vals + (indices,) + return vals diff --git a/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h new file mode 100644 index 000000000..61677c682 --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h @@ -0,0 +1,451 @@ +// Euclidean Distance Transform +// +// Kernels for the 2D version of the Parallel Banding Algorithm (PBA+). +// +// MIT license: see 3rdparty/LICENSE.pba+ +// Copyright: (c) 2019 School of Computing, National University of Singapore +// +// Modifications by Gregory Lee (2022) (NVIDIA) +// - add user-defined pixel_int2_t to enable +// - replace __mul24 operations with standard multiplication operator + + +// START OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT + +// The values included in this header file are those defined in the original +// PBA+ implementation + +// However, the Python code generation can potentially generate a different +// ENCODE/DECODE that use 20 bits per coordinates instead of 10 bits per +// coordinate with ENCODED_INT_TYPE as `long long`. + +#ifndef MARKER +#define MARKER -32768 +#endif + +#ifndef BLOCKSIZE +#define BLOCKSIZE 32 +#endif + +#ifndef pixel_int2_t +#define pixel_int2_t short2 +#define make_pixel(x, y) make_short2(x, y) +#endif + +// END OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT + + +#define TOID(x, y, size) ((y) * (size) + (x)) + +#define LL long long +__device__ bool dominate(LL x1, LL y1, LL x2, LL y2, LL x3, LL y3, LL x0) +{ + LL k1 = y2 - y1, k2 = y3 - y2; + return (k1 * (y1 + y2) + (x2 - x1) * ((x1 + x2) - (x0 << 1))) * k2 > \ + (k2 * (y2 + y3) + (x3 - x2) * ((x2 + x3) - (x0 << 1))) * k1; +} +#undef LL + + +extern "C"{ + +__global__ void kernelFloodDown(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = blockIdx.y * bandSize; + int id = TOID(tx, ty, size); + + pixel_int2_t pixel1, pixel2; + + pixel1 = make_pixel(MARKER, MARKER); + + for (int i = 0; i < bandSize; i++, id += size) { + pixel2 = input[id]; + + if (pixel2.x != MARKER) + pixel1 = pixel2; + + output[id] = pixel1; + } +} + +__global__ void kernelFloodUp(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = (blockIdx.y+1) * bandSize - 1; + int id = TOID(tx, ty, size); + + pixel_int2_t pixel1, pixel2; + int dist1, dist2; + + pixel1 = make_pixel(MARKER, MARKER); + + for (int i = 0; i < bandSize; i++, id -= size) { + dist1 = abs(pixel1.y - ty + i); + + pixel2 = input[id]; + dist2 = abs(pixel2.y - ty + i); + + if (dist2 < dist1) + pixel1 = pixel2; + + output[id] = pixel1; + } +} + +__global__ void kernelPropagateInterband(pixel_int2_t *input, pixel_int2_t *margin_out, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int inc = bandSize * size; + int ny, nid, nDist; + pixel_int2_t pixel; + + // Top row, look backward + int ty = blockIdx.y * bandSize; + int topId = TOID(tx, ty, size); + int bottomId = TOID(tx, ty + bandSize - 1, size); + int tid = blockIdx.y * size + tx; + int bid = tid + (size * size / bandSize); + + pixel = input[topId]; + int myDist = abs(pixel.y - ty); + margin_out[tid] = pixel; + + for (nid = bottomId - inc; nid >= 0; nid -= inc) { + pixel = input[nid]; + + if (pixel.x != MARKER) { + nDist = abs(pixel.y - ty); + + if (nDist < myDist) + margin_out[tid] = pixel; + + break; + } + } + + // Last row, look downward + ty = ty + bandSize - 1; + pixel = input[bottomId]; + myDist = abs(pixel.y - ty); + margin_out[bid] = pixel; + + for (ny = ty + 1, nid = topId + inc; ny < size; ny += bandSize, nid += inc) { + pixel = input[nid]; + + if (pixel.x != MARKER) { + nDist = abs(pixel.y - ty); + + if (nDist < myDist) + margin_out[bid] = pixel; + + break; + } + } +} + +__global__ void kernelUpdateVertical(pixel_int2_t *color, pixel_int2_t *margin, pixel_int2_t *output, int size, int bandSize) +{ + __shared__ pixel_int2_t block[BLOCKSIZE][BLOCKSIZE]; + + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = blockIdx.y * bandSize; + + pixel_int2_t top = margin[blockIdx.y * size + tx]; + pixel_int2_t bottom = margin[(blockIdx.y + size / bandSize) * size + tx]; + pixel_int2_t pixel; + + int dist, myDist; + + int id = TOID(tx, ty, size); + + int n_step = bandSize / blockDim.x; + for(int step = 0; step < n_step; ++step) { + int y_start = blockIdx.y * bandSize + step * blockDim.x; + int y_end = y_start + blockDim.x; + + for (ty = y_start; ty < y_end; ++ty, id += size) { + pixel = color[id]; + myDist = abs(pixel.y - ty); + + dist = abs(top.y - ty); + if (dist < myDist) { myDist = dist; pixel = top; } + + dist = abs(bottom.y - ty); + if (dist < myDist) pixel = bottom; + + // temporary result is stored in block + block[threadIdx.x][ty - y_start] = make_pixel(pixel.y, pixel.x); + } + + __syncthreads(); + + // block is written to a transposed location in the output + + int tid = TOID(blockIdx.y * bandSize + step * blockDim.x + threadIdx.x, \ + blockIdx.x * blockDim.x, size); + + for(int i = 0; i < blockDim.x; ++i, tid += size) { + output[tid] = block[i][threadIdx.x]; + } + + __syncthreads(); + } +} + +__global__ void kernelProximatePoints(pixel_int2_t *input, pixel_int2_t *stack, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = blockIdx.y * bandSize; + int id = TOID(tx, ty, size); + int lasty = -1; + pixel_int2_t last1, last2, current; + + last1.y = -1; last2.y = -1; + + for (int i = 0; i < bandSize; i++, id += size) { + current = input[id]; + + if (current.x != MARKER) { + while (last2.y >= 0) { + if (!dominate(last1.x, last2.y, last2.x, \ + lasty, current.x, current.y, tx)) + break; + + lasty = last2.y; last2 = last1; + + if (last1.y >= 0) + last1 = stack[TOID(tx, last1.y, size)]; + } + + last1 = last2; last2 = make_pixel(current.x, lasty); lasty = current.y; + + stack[id] = last2; + } + } + + // Store the pointer to the tail at the last pixel of this band + if (lasty != ty + bandSize - 1) + stack[TOID(tx, ty + bandSize - 1, size)] = make_pixel(MARKER, lasty); +} + +__global__ void kernelCreateForwardPointers(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = (blockIdx.y+1) * bandSize - 1; + int id = TOID(tx, ty, size); + int lasty = -1, nexty; + pixel_int2_t current; + + // Get the tail pointer + current = input[id]; + + if (current.x == MARKER) + nexty = current.y; + else + nexty = ty; + + for (int i = 0; i < bandSize; i++, id -= size) + if (ty - i == nexty) { + current = make_pixel(lasty, input[id].y); + output[id] = current; + + lasty = nexty; + nexty = current.y; + } + + // Store the pointer to the head at the first pixel of this band + if (lasty != ty - bandSize + 1) + output[id + size] = make_pixel(lasty, MARKER); +} + +__global__ void kernelMergeBands(pixel_int2_t *color, pixel_int2_t *link, pixel_int2_t *output, int size, int bandSize) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int band1 = blockIdx.y * 2; + int band2 = band1 + 1; + int firsty, lasty; + pixel_int2_t last1, last2, current; + // last1 and last2: x component store the x coordinate of the site, + // y component store the backward pointer + // current: y component store the x coordinate of the site, + // x component store the forward pointer + + // Get the two last items of the first list + lasty = band2 * bandSize - 1; + last2 = make_pixel(color[TOID(tx, lasty, size)].x, + link[TOID(tx, lasty, size)].y); + + if (last2.x == MARKER) { + lasty = last2.y; + + if (lasty >= 0) + last2 = make_pixel(color[TOID(tx, lasty, size)].x, + link[TOID(tx, lasty, size)].y); + else + last2 = make_pixel(MARKER, MARKER); + } + + if (last2.y >= 0) { + // Second item at the top of the stack + last1 = make_pixel(color[TOID(tx, last2.y, size)].x, + link[TOID(tx, last2.y, size)].y); + } + + // Get the first item of the second band + firsty = band2 * bandSize; + current = make_pixel(link[TOID(tx, firsty, size)].x, + color[TOID(tx, firsty, size)].x); + + if (current.y == MARKER) { + firsty = current.x; + + if (firsty >= 0) + current = make_pixel(link[TOID(tx, firsty, size)].x, + color[TOID(tx, firsty, size)].x); + else + current = make_pixel(MARKER, MARKER); + } + + // Count the number of item in the second band that survive so far. + // Once it reaches 2, we can stop. + int top = 0; + + while (top < 2 && current.y >= 0) { + // While there's still something on the left + while (last2.y >= 0) { + + if (!dominate(last1.x, last2.y, last2.x, \ + lasty, current.y, firsty, tx)) + break; + + lasty = last2.y; last2 = last1; + top--; + + if (last1.y >= 0) + last1 = make_pixel(color[TOID(tx, last1.y, size)].x, + link[TOID(tx, last1.y, size)].y); + } + + // Update the current pointer + output[TOID(tx, firsty, size)] = make_pixel(current.x, lasty); + + if (lasty >= 0) + output[TOID(tx, lasty, size)] = make_pixel(firsty, last2.y); + + last1 = last2; last2 = make_pixel(current.y, lasty); lasty = firsty; + firsty = current.x; + + top = max(1, top + 1); + + // Advance the current pointer to the next one + if (firsty >= 0) + current = make_pixel(link[TOID(tx, firsty, size)].x, + color[TOID(tx, firsty, size)].x); + else + current = make_pixel(MARKER, MARKER); + } + + // Update the head and tail pointer. + firsty = band1 * bandSize; + lasty = band2 * bandSize; + current = link[TOID(tx, firsty, size)]; + + if (current.y == MARKER && current.x < 0) { // No head? + last1 = link[TOID(tx, lasty, size)]; + + if (last1.y == MARKER) + current.x = last1.x; + else + current.x = lasty; + + output[TOID(tx, firsty, size)] = current; + } + + firsty = band1 * bandSize + bandSize - 1; + lasty = band2 * bandSize + bandSize - 1; + current = link[TOID(tx, lasty, size)]; + + if (current.x == MARKER && current.y < 0) { // No tail? + last1 = link[TOID(tx, firsty, size)]; + + if (last1.x == MARKER) + current.y = last1.y; + else + current.y = firsty; + + output[TOID(tx, lasty, size)] = current; + } +} + +__global__ void kernelDoubleToSingleList(pixel_int2_t *color, pixel_int2_t *link, pixel_int2_t *output, int size) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = blockIdx.y; + int id = TOID(tx, ty, size); + + output[id] = make_pixel(color[id].x, link[id].y); +} + +__global__ void kernelColor(pixel_int2_t *input, pixel_int2_t *output, int size) +{ + __shared__ pixel_int2_t block[BLOCKSIZE][BLOCKSIZE]; + + int col = threadIdx.x; + int tid = threadIdx.y; + int tx = blockIdx.x * blockDim.x + col; + int dx, dy, lasty; + unsigned int best, dist; + pixel_int2_t last1, last2; + + lasty = size - 1; + + last2 = input[TOID(tx, lasty, size)]; + + if (last2.x == MARKER) { + lasty = last2.y; + last2 = input[TOID(tx, lasty, size)]; + } + + if (last2.y >= 0) + last1 = input[TOID(tx, last2.y, size)]; + + int y_start, y_end, n_step = size / blockDim.x; + for(int step = 0; step < n_step; ++step) { + y_start = size - step * blockDim.x - 1; + y_end = size - (step + 1) * blockDim.x; + + for (int ty = y_start - tid; ty >= y_end; ty -= blockDim.y) { + dx = last2.x - tx; dy = lasty - ty; + best = dist = dx * dx + dy * dy; + + while (last2.y >= 0) { + dx = last1.x - tx; dy = last2.y - ty; + dist = dx * dx + dy * dy; + + if (dist > best) + break; + + best = dist; lasty = last2.y; last2 = last1; + + if (last2.y >= 0) + last1 = input[TOID(tx, last2.y, size)]; + } + + block[threadIdx.x][ty - y_end] = make_pixel(lasty, last2.x); + } + + __syncthreads(); + + // note: transposes back to original shape here + if(!threadIdx.y) { + int id = TOID(y_end + threadIdx.x, blockIdx.x * blockDim.x, size); + for(int i = 0; i < blockDim.x; ++i, id+=size) { + output[id] = block[i][threadIdx.x]; + } + } + + __syncthreads(); + } +} +} // extern C diff --git a/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h new file mode 100644 index 000000000..c09f4b51f --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h @@ -0,0 +1,237 @@ +// Euclidean Distance Transform +// +// Kernels for the 3D version of the Parallel Banding Algorithm (PBA+). +// +// MIT license: see 3rdparty/LICENSE.pba+ +// +// Modifications by Gregory Lee (2022) (NVIDIA) +// - allow user-defined ENCODED_INT_TYPE, ENCODE, DECODE + + +// START OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT + +// The values included in this header file are those defined in the original +// PBA+ implementation + +// However, the Python code generation can potentially generate a different +// ENCODE/DECODE that use 20 bits per coordinates instead of 10 bits per +// coordinate with ENCODED_INT_TYPE as `long long`. + + +#ifndef MARKER +#define MARKER -2147483648 +#endif // MARKER + +#ifndef MAX_INT +#define MAX_INT 2147483647 +#endif + +#ifndef BLOCKSIZE +#define BLOCKSIZE 32 +#endif + +#ifndef ENCODE + +// Sites : ENCODE(x, y, z, 0, 0) +// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER +#define ENCODED_INT_TYPE int +#define ZERO 0 +#define ONE 1 +#define ENCODE(x, y, z, a, b) (((x) << 20) | ((y) << 10) | (z) | ((a) << 31) | ((b) << 30)) +#define DECODE(value, x, y, z) \ + x = ((value) >> 20) & 0x3ff; \ + y = ((value) >> 10) & 0x3ff; \ + z = (value) & 0x3ff + +#define NOTSITE(value) (((value) >> 31) & 1) +#define HASNEXT(value) (((value) >> 30) & 1) + +#define GET_X(value) (((value) >> 20) & 0x3ff) +#define GET_Y(value) (((value) >> 10) & 0x3ff) +#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0x3ff)) + +#endif // ENCODE + +// END OF DEFINITIONS DEFINED IN THE PYTHON SCRIPT + + +#define LL long long +__device__ bool dominate(LL x_1, LL y_1, LL z_1, LL x_2, LL y_2, LL z_2, LL x_3, LL y_3, LL z_3, LL x_0, LL z_0) +{ + LL k_1 = y_2 - y_1, k_2 = y_3 - y_2; + + return (((y_1 + y_2) * k_1 + ((x_2 - x_1) * (x_1 + x_2 - (x_0 << 1)) + (z_2 - z_1) * (z_1 + z_2 - (z_0 << 1)))) * k_2 > \ + ((y_2 + y_3) * k_2 + ((x_3 - x_2) * (x_2 + x_3 - (x_0 << 1)) + (z_3 - z_2) * (z_2 + z_3 - (z_0 << 1)))) * k_1); +} +#undef LL + +#define TOID(x, y, z, size) ((((z) * (size)) + (y)) * (size) + (x)) + + +extern "C"{ + +__global__ void kernelFloodZ(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *output, int size) +{ + + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int ty = blockIdx.y * blockDim.y + threadIdx.y; + int tz = 0; + + int plane = size * size; + int id = TOID(tx, ty, tz, size); + ENCODED_INT_TYPE pixel1, pixel2; + + pixel1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO); + + // Sweep down + for (int i = 0; i < size; i++, id += plane) { + pixel2 = input[id]; + + if (!NOTSITE(pixel2)) + pixel1 = pixel2; + + output[id] = pixel1; + } + + ENCODED_INT_TYPE dist1, dist2, nz; + + id -= plane + plane; + + // Sweep up + for (int i = size - 2; i >= 0; i--, id -= plane) { + nz = GET_Z(pixel1); + dist1 = abs(nz - (tz + i)); + + pixel2 = output[id]; + nz = GET_Z(pixel2); + dist2 = abs(nz - (tz + i)); + + if (dist2 < dist1) + pixel1 = pixel2; + + output[id] = pixel1; + } +} + + +__global__ void kernelMaurerAxis(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *stack, int size) +{ + int tx = blockIdx.x * blockDim.x + threadIdx.x; + int tz = blockIdx.y * blockDim.y + threadIdx.y; + int ty = 0; + + int id = TOID(tx, ty, tz, size); + + ENCODED_INT_TYPE lasty = 0; + ENCODED_INT_TYPE x1, y1, z1, x2, y2, z2, nx, ny, nz; + ENCODED_INT_TYPE p = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), s1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), s2 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO); + ENCODED_INT_TYPE flag = 0; + + for (ty = 0; ty < size; ++ty, id += size) { + p = input[id]; + + if (!NOTSITE(p)) { + + while (HASNEXT(s2)) { + DECODE(s1, x1, y1, z1); + DECODE(s2, x2, y2, z2); + DECODE(p, nx, ny, nz); + + if (!dominate(x1, y2, z1, x2, lasty, z2, nx, ty, nz, tx, tz)) + break; + + lasty = y2; s2 = s1; y2 = y1; + + if (HASNEXT(s2)) + s1 = stack[TOID(tx, y2, tz, size)]; + } + + DECODE(p, nx, ny, nz); + s1 = s2; + s2 = ENCODE(nx, lasty, nz, ZERO, flag); + y2 = lasty; + lasty = ty; + + stack[id] = s2; + + flag = ONE; + } + } + + if (NOTSITE(p)) + stack[TOID(tx, ty - 1, tz, size)] = ENCODE(ZERO, lasty, ZERO, ONE, flag); +} + +__global__ void kernelColorAxis(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *output, int size) +{ + __shared__ ENCODED_INT_TYPE block[BLOCKSIZE][BLOCKSIZE]; + + int col = threadIdx.x; + int tid = threadIdx.y; + int tx = blockIdx.x * blockDim.x + col; + int tz = blockIdx.y; + + ENCODED_INT_TYPE x1, y1, z1, x2, y2, z2; + ENCODED_INT_TYPE last1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), last2 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), lasty; + long long dx, dy, dz, best, dist; + + lasty = size - 1; + + last2 = input[TOID(tx, lasty, tz, size)]; + DECODE(last2, x2, y2, z2); + + if (NOTSITE(last2)) { + lasty = y2; + if(HASNEXT(last2)) { + last2 = input[TOID(tx, lasty, tz, size)]; + DECODE(last2, x2, y2, z2); + } + } + + if (HASNEXT(last2)) { + last1 = input[TOID(tx, y2, tz, size)]; + DECODE(last1, x1, y1, z1); + } + + int y_start, y_end, n_step = size / blockDim.x; + for(int step = 0; step < n_step; ++step) { + y_start = size - step * blockDim.x - 1; + y_end = size - (step + 1) * blockDim.x; + + for (int ty = y_start - tid; ty >= y_end; ty -= blockDim.y) { + dx = x2 - tx; dy = lasty - ty; dz = z2 - tz; + best = dx * dx + dy * dy + dz * dz; + + while (HASNEXT(last2)) { + dx = x1 - tx; dy = y2 - ty; dz = z1 - tz; + dist = dx * dx + dy * dy + dz * dz; + + if(dist > best) break; + + best = dist; lasty = y2; last2 = last1; + DECODE(last2, x2, y2, z2); + + if (HASNEXT(last2)) { + last1 = input[TOID(tx, y2, tz, size)]; + DECODE(last1, x1, y1, z1); + } + } + + block[threadIdx.x][ty - y_end] = ENCODE(lasty, x2, z2, NOTSITE(last2), ZERO); + } + + __syncthreads(); + + if(!threadIdx.y) { + int id = TOID(y_end + threadIdx.x, blockIdx.x * blockDim.x, tz, size); + for(int i = 0; i < blockDim.x; i++, id+=size) { + output[id] = block[i][threadIdx.x]; + } + } + + __syncthreads(); + } +} + + +} // extern C diff --git a/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py b/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py new file mode 100644 index 000000000..ab46ae1fb --- /dev/null +++ b/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py @@ -0,0 +1,145 @@ +from copy import copy + +import cupy as cp +import numpy as np +import pytest +import scipy.ndimage as ndi_cpu + +from cucim.core.operations.morphology import distance_transform_edt + + +def binary_image(shape, pct_true=50): + rng = cp.random.default_rng(123) + x = rng.integers(0, 100, size=shape, dtype=cp.uint8) + return x >= pct_true + + +def assert_percentile_equal(arr1, arr2, pct=95): + """Assert that at least pct% of the entries in arr1 and arr2 are equal.""" + pct_mismatch = (100 - pct) / 100 + arr1 = cp.asnumpy(arr1) + arr2 = cp.asnumpy(arr2) + mismatch = np.sum(arr1 != arr2) / arr1.size + assert mismatch < pct_mismatch + + +@pytest.mark.parametrize('return_indices', [False, True]) +@pytest.mark.parametrize('return_distances', [False, True]) +@pytest.mark.parametrize( + 'shape, sampling', + [ + ((256, 128), None), + ((384, 256), (1.5, 1.5)), + ((14, 32, 50), None), + ((50, 32, 24), (2, 2, 2)), + ] +) +@pytest.mark.parametrize('density', [5, 50, 95]) +@pytest.mark.parametrize('block_params', [None, (1, 1, 1)]) +def test_distance_transform_edt( + shape, sampling, return_distances, return_indices, density, block_params +): + + if not (return_indices or return_distances): + return + + kwargs_scipy = dict( + sampling=sampling, + return_distances=return_distances, + return_indices=return_indices, + ) + kwargs_cucim = copy(kwargs_scipy) + kwargs_cucim['block_params'] = block_params + img = binary_image(shape, pct_true=density) + out = distance_transform_edt(img, **kwargs_cucim) + expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img), **kwargs_scipy) + if return_indices and return_distances: + assert len(out) == 2 + cp.testing.assert_allclose(out[0], expected[0]) + # May differ at a small % of coordinates where multiple points were + # equidistant. + assert_percentile_equal(out[1], expected[1], pct=95) + elif return_distances: + cp.testing.assert_allclose(out, expected) + elif return_indices: + assert_percentile_equal(out, expected, pct=95) + + +@pytest.mark.parametrize('return_indices', [False, True]) +@pytest.mark.parametrize('return_distances', [False, True]) +@pytest.mark.parametrize( + 'shape, sampling', + [ + ((384, 256), (1, 3)), + ((50, 32, 24), (1, 2, 4)), + ] +) +@pytest.mark.parametrize('density', [5, 50, 95]) +def test_distance_transform_edt_nonuniform_sampling( + shape, sampling, return_distances, return_indices, density +): + + if not (return_indices or return_distances): + return + + kwargs_scipy = dict( + sampling=sampling, + return_distances=return_distances, + return_indices=return_indices, + ) + kwargs_cucim = copy(kwargs_scipy) + img = binary_image(shape, pct_true=density) + if sampling is not None and len(np.unique(sampling)) != 1: + with pytest.raises(NotImplementedError): + distance_transform_edt(img, **kwargs_cucim) + return + + +@pytest.mark.parametrize('value', [0, 1, 3]) +@pytest.mark.parametrize('ndim', [2, 3]) +def test_distance_transform_edt_uniform_valued(value, ndim): + """ensure default block_params is robust to anisotropic shape.""" + img = cp.full((48, ) * ndim, value, dtype=cp.uint8) + # ensure there is at least 1 pixel at background intensity + img[(slice(24, 25),) * ndim] = 0 + out = distance_transform_edt(img) + expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img)) + cp.testing.assert_allclose(out, expected) + + +@pytest.mark.parametrize('sx', list(range(16))) +@pytest.mark.parametrize('sy', list(range(16))) +def test_distance_transform_edt_2d_aniso(sx, sy): + """ensure default block_params is robust to anisotropic shape.""" + shape = (128 + sy, 128 + sx) + img = binary_image(shape, pct_true=80) + out = distance_transform_edt(img) + expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img)) + cp.testing.assert_allclose(out, expected) + + +@pytest.mark.parametrize('sx', list(range(4))) +@pytest.mark.parametrize('sy', list(range(4))) +@pytest.mark.parametrize('sz', list(range(4))) +def test_distance_transform_edt_3d_aniso(sx, sy, sz): + """ensure default block_params is robust to anisotropic shape.""" + shape = (16 + sz, 32 + sy, 48 + sx) + img = binary_image(shape, pct_true=80) + out = distance_transform_edt(img) + expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img)) + cp.testing.assert_allclose(out, expected) + + +@pytest.mark.parametrize('ndim', [1, 4, 5]) +def test_distance_transform_edt_unsupported_ndim(ndim): + with pytest.raises(NotImplementedError): + distance_transform_edt(cp.zeros((8,) * ndim)) + + +@pytest.mark.skip(reason="excessive memory requirement") +def test_distance_transform_edt_3d_int64(): + shape = (1280, 1280, 1280) + img = binary_image(shape, pct_true=80) + distance_transform_edt(img) + # Note: no validation vs. scipy.ndimage due to excessive run time + return diff --git a/python/cucim/src/cucim/skimage/_shared/_gradient.py b/python/cucim/src/cucim/skimage/_shared/_gradient.py new file mode 100644 index 000000000..2b0b67334 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_shared/_gradient.py @@ -0,0 +1,121 @@ +""" +Simplified version of cupy.gradient + +This version doesn't support non-unit spacing or 2nd order edges. + +Importantly, this version does not promote all integer dtypes to float64, but +instead will promote 8 and 16-bit integer types to float32. +""" +import cupy + +from cucim.skimage._shared.utils import _supported_float_type + + +def gradient(f, axis=None, output_as_array=False): + """Return the gradient of an N-dimensional array. + + The gradient is computed using second order accurate central differences + in the interior points and either first or second order accurate one-sides + (forward or backwards) differences at the boundaries. + The returned gradient hence has the same shape as the input array. + + Args: + f (cupy.ndarray): An N-dimensional array containing samples of a scalar + function. + axis (None or int or tuple of ints, optional): The gradient is + calculated only along the given axis or axes. The default + (axis = None) is to calculate the gradient for all the axes of the + input array. axis may be negative, in which case it counts from the + last to the first axis. + output_as_array + + Returns: + gradient (cupy.ndarray or list of cupy.ndarray): A set of ndarrays + (or a single ndarray if there is only one dimension) corresponding + to the derivatives of f with respect to each dimension. Each + derivative has the same shape as f. + + """ + ndim = f.ndim # number of dimensions + if axis is None: + axes = tuple(range(ndim)) + else: + if cupy.isscalar(axis): + axis = (axis,) + for ax in axis: + if ax < -ndim or ax > ndim + 1: + raise ValueError(f"invalid axis: {ax}") + axes = tuple(ax + ndim if ax < 0 else ax for ax in axis) + len_axes = len(axes) + + # use central differences on interior and one-sided differences on the + # endpoints. This preserves second order-accuracy over the full domain. + + # create slice objects --- initially all are [:, :, ..., :] + slice1 = [slice(None)] * ndim + slice2 = [slice(None)] * ndim + slice3 = [slice(None)] * ndim + slice4 = [slice(None)] * ndim + + otype = f.dtype + if cupy.issubdtype(otype, cupy.inexact): + pass + else: + # All other types convert to floating point. + float_dtype = _supported_float_type(otype) + if cupy.issubdtype(otype, cupy.integer): + f = f.astype(float_dtype) + otype = float_dtype + + if output_as_array: + out = cupy.empty((ndim,) + f.shape, dtype=otype) + outvals = out + else: + outvals = [] + + for axis in axes: + if f.shape[axis] < 2: + raise ValueError( + "Shape of array too small to calculate a numerical gradient, " + "at least 2 elements are required." + ) + # result allocation + if not output_as_array: + out = cupy.empty_like(f, dtype=otype) + + # Numerical differentiation: 2nd order interior + slice1[axis] = slice(1, -1) + slice2[axis] = slice(None, -2) + slice3[axis] = slice(1, -1) + slice4[axis] = slice(2, None) + + out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1) + out[out_sl] = (f[tuple(slice4)] - f[tuple(slice2)]) / 2.0 + + # Numerical differentiation: 1st order edges + slice1[axis] = 0 + slice2[axis] = 1 + slice3[axis] = 0 + # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0]) + out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1) + out[out_sl] = f[tuple(slice2)] - f[tuple(slice3)] + + slice1[axis] = -1 + slice2[axis] = -1 + slice3[axis] = -2 + # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2]) + out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1) + out[out_sl] = f[tuple(slice2)] - f[tuple(slice3)] + if not output_as_array: + outvals.append(out) + + # reset the slice object in this dimension to ":" + slice1[axis] = slice(None) + slice2[axis] = slice(None) + slice3[axis] = slice(None) + slice4[axis] = slice(None) + + if len_axes == 1: + return outvals[0] + else: + return outvals diff --git a/python/cucim/src/cucim/skimage/_shared/filters.py b/python/cucim/src/cucim/skimage/_shared/filters.py index 2f68221b4..56cee8000 100644 --- a/python/cucim/src/cucim/skimage/_shared/filters.py +++ b/python/cucim/src/cucim/skimage/_shared/filters.py @@ -7,7 +7,8 @@ from collections.abc import Iterable import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi + from .._shared import utils from .._shared.utils import _supported_float_type, convert_to_float diff --git a/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py b/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py index f7135a0e3..c7d817b84 100644 --- a/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py +++ b/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py @@ -66,13 +66,13 @@ def bar(arg0, arg1=0, arg2=1): def test_deprecate_kwarg(): - @deprecate_kwarg({'old_arg1': 'new_arg1'}, '0.19') + @deprecate_kwarg({'old_arg1': 'new_arg1'}, '22.02.00') def foo(arg0, new_arg1=1, arg2=None): """Expected docstring""" return arg0, new_arg1, arg2 @deprecate_kwarg({'old_arg1': 'new_arg1'}, - deprecated_version='0.19', + deprecated_version='22.02.00', warning_msg="Custom warning message") def bar(arg0, new_arg1=1, arg2=None): """Expected docstring""" @@ -116,7 +116,7 @@ def bar(arg0, new_arg1=1, arg2=None): old_arg1 : DEPRECATED Deprecated in favor of `new_arg1`. - .. deprecated:: 0.19 + .. deprecated:: 22.02.00 """ assert len(recorded) == 0 diff --git a/python/cucim/src/cucim/skimage/_shared/utils.py b/python/cucim/src/cucim/skimage/_shared/utils.py index 6798628c7..1e014ef3a 100644 --- a/python/cucim/src/cucim/skimage/_shared/utils.py +++ b/python/cucim/src/cucim/skimage/_shared/utils.py @@ -95,10 +95,11 @@ def __call__(self, func): parameters = inspect.signature(func).parameters arg_idx = list(parameters.keys()).index(self.arg_name) warning_msg = ( - f"{self.arg_name} argument is deprecated and will be removed " - f"in version {self.changed_version}. To avoid this warning, " - f"please do not use the {self.arg_name} argument. Please " - f"see {func.__name__} documentation for more details.") + f"{self.arg_name} argument is deprecated in upstream scikit-image " + f"and will be removed in cuCIM {self.changed_version}. To avoid " + f"this warning, please do not use the {self.arg_name} argument. " + f"Please see {func.__name__} documentation for more details." + ) if self.help_msg is not None: warning_msg += f" {self.help_msg}" @@ -206,7 +207,7 @@ def __init__(self, kwarg_mapping, deprecated_version, warning_msg=None, self.warning_msg = ("`{old_arg}` is a deprecated argument name " "for `{func_name}`. ") if removed_version is not None: - self.warning_msg += (f'It will be removed in ' + self.warning_msg += (f'It will be removed in cuCIM ' f'version {removed_version}.') self.warning_msg += "Please use `{new_arg}` instead." else: @@ -248,10 +249,11 @@ class deprecate_multichannel_kwarg(deprecate_kwarg): """ - def __init__(self, removed_version='1.0', multichannel_position=None): + def __init__(self, removed_version='2023.02.00', + multichannel_position=None): super().__init__( kwarg_mapping={'multichannel': 'channel_axis'}, - deprecated_version='0.19', + deprecated_version='22.02.00', warning_msg=None, removed_version=removed_version) self.position = multichannel_position @@ -293,7 +295,7 @@ def fixed_func(*args, **kwargs): if func.__doc__ is not None: newdoc = docstring_add_deprecated( - func, {'multichannel': 'channel_axis'}, '0.19') + func, {'multichannel': 'channel_axis'}, '22.02.00') fixed_func.__doc__ = newdoc return fixed_func diff --git a/python/cucim/src/cucim/skimage/_vendored/_internal.py b/python/cucim/src/cucim/skimage/_vendored/_internal.py index b6d163a36..196486d94 100644 --- a/python/cucim/src/cucim/skimage/_vendored/_internal.py +++ b/python/cucim/src/cucim/skimage/_vendored/_internal.py @@ -1,3 +1,7 @@ +import math +from functools import reduce +from operator import mul + import cupy import numpy @@ -61,3 +65,11 @@ def _normalize_axis_indices(axes, ndim): # NOQA res.append(axis) return tuple(sorted(res)) + + +if hasattr(math, 'prod'): + prod = math.prod +else: + + def prod(iterable, *, start=1): + return reduce(mul, iterable, start) diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py index 1ce475371..e4dc1311d 100644 --- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py @@ -1,8 +1,224 @@ """A vendored subset of cupyx.scipy.ndimage._filters""" +import warnings import cupy +import numpy +from cucim.skimage._vendored import _internal as internal from cucim.skimage._vendored import _ndimage_filters_core as _filters_core +from cucim.skimage._vendored import _ndimage_util as _util + + +def correlate(input, weights, output=None, mode='reflect', cval=0.0, origin=0): + """Multi-dimensional correlate. + + The array is correlated with the given kernel. + + Args: + input (cupy.ndarray): The input array. + weights (cupy.ndarray): Array of weights, same number of dimensions as + input + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of correlate. + + .. seealso:: :func:`scipy.ndimage.correlate` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + return _correlate_or_convolve(input, weights, output, mode, cval, origin) + + +def convolve(input, weights, output=None, mode='reflect', cval=0.0, origin=0): + """Multi-dimensional convolution. + + The array is convolved with the given kernel. + + Args: + input (cupy.ndarray): The input array. + weights (cupy.ndarray): Array of weights, same number of dimensions as + input + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of convolution. + + .. seealso:: :func:`scipy.ndimage.convolve` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + return _correlate_or_convolve(input, weights, output, mode, cval, origin, + True) + + +def correlate1d(input, weights, axis=-1, output=None, mode="reflect", cval=0.0, + origin=0, *, algorithm=None): + """One-dimensional correlate. + + The array is correlated with the given kernel. + + Args: + input (cupy.ndarray): The input array. + weights (cupy.ndarray): One-dimensional array of weights + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int): The origin parameter controls the placement of the + filter, relative to the center of the current element of the + input. Default is ``0``. + + Returns: + cupy.ndarray: The result of the 1D correlation. + + .. seealso:: :func:`scipy.ndimage.correlate1d` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + return _correlate_or_convolve1d( + input, weights, axis, output, mode, cval, origin, False, algorithm + ) + + +def convolve1d(input, weights, axis=-1, output=None, mode="reflect", cval=0.0, + origin=0, *, algorithm=None): + """One-dimensional convolution. + + The array is convolved with the given kernel. + + Args: + input (cupy.ndarray): The input array. + weights (cupy.ndarray): One-dimensional array of weights + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int): The origin parameter controls the placement of the + filter, relative to the center of the current element of the + input. Default is ``0``. + Returns: + cupy.ndarray: The result of the 1D convolution. + + .. seealso:: :func:`scipy.ndimage.convolve1d` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + return _correlate_or_convolve1d( + input, weights, axis, output, mode, cval, origin, True, algorithm + ) + + +def _correlate_or_convolve(input, weights, output, mode, cval, origin, + convolution=False): + origins, int_type = _filters_core._check_nd_args(input, weights, + mode, origin) + if weights.size == 0: + return cupy.zeros_like(input) + + _util._check_cval(mode, cval, _util._is_integer_output(output, input)) + + if convolution: + weights = weights[tuple([slice(None, None, -1)] * weights.ndim)] + origins = list(origins) + for i, wsize in enumerate(weights.shape): + origins[i] = -origins[i] + if wsize % 2 == 0: + origins[i] -= 1 + origins = tuple(origins) + elif weights.dtype.kind == "c": + # numpy.correlate conjugates weights rather than input. + weights = weights.conj() + weights_dtype = _util._get_weights_dtype(input, weights, use_cucim_casting=True) # noqa + offsets = _filters_core._origins_to_offsets(origins, weights.shape) + kernel = _get_correlate_kernel(mode, weights.shape, int_type, + offsets, cval) + output = _filters_core._call_kernel(kernel, input, weights, output, + weights_dtype=weights_dtype) + return output + + +def _correlate_or_convolve1d(input, weights, axis, output, mode, cval, origin, + convolution=False, algorithm=None): + # Calls fast shared-memory convolution when possible, otherwise falls back + # to the vendored elementwise _correlate_or_convolve + if algorithm is None: + if input.ndim == 2 and weights.size <= 256: + algorithm = 'shared_memory' + else: + algorithm = 'elementwise' + elif algorithm not in ['shared_memory', 'elementwise']: + raise ValueError( + "algorithm must be 'shared_memory', 'elementwise' or None" + ) + if mode == 'wrap': + mode = 'grid-wrap' + if algorithm == 'shared_memory': + from cucim.skimage.filters._separable_filtering import ( + ResourceLimitError, _shmem_convolve1d) + if input.ndim not in [2, 3]: + raise NotImplementedError( + f"shared_memory not implemented for ndim={input.ndim}" + ) + try: + out = _shmem_convolve1d(input, weights, axis=axis, output=output, + mode=mode, cval=cval, origin=origin, + convolution=convolution) + return out + except ResourceLimitError: + # fallback to elementwise if inadequate shared memory available + warnings.warn( + "Inadequate resources for algorithm='shared_memory: " + "falling back to the elementwise implementation" + ) + algorithm = 'elementwise' + if algorithm == 'elementwise': + weights, origins = _filters_core._convert_1d_args( + input.ndim, weights, origin, axis + ) + return _correlate_or_convolve( + input, weights, output, mode, cval, origins, convolution + ) @cupy.memoize(for_each_device=True) @@ -13,3 +229,949 @@ def _get_correlate_kernel(mode, w_shape, int_type, offsets, cval): 'sum += cast({value}) * wval;', 'y = cast(sum);', mode, w_shape, int_type, offsets, cval, ctype='W') + + +def _run_1d_correlates(input, params, get_weights, output, mode, cval, + origin=0, **filter_kwargs): + """ + Enhanced version of _run_1d_filters that uses correlate1d as the filter + function. The params are a list of values to pass to the get_weights + callable given. If duplicate param values are found, the weights are + reused from the first invocation of get_weights. The get_weights callable + must return a 1D array of weights to give to correlate1d. + """ + wghts = {} + for param in params: + if param not in wghts: + wghts[param] = get_weights(param) + wghts = [wghts[param] for param in params] + return _filters_core._run_1d_filters( + [None if w is None else correlate1d for w in wghts], + input, wghts, output, mode, cval, origin, **filter_kwargs) + + +def uniform_filter1d(input, size, axis=-1, output=None, mode="reflect", + cval=0.0, origin=0, *, algorithm=None): + """One-dimensional uniform filter along the given axis. + + The lines of the array along the given axis are filtered with a uniform + filter of the given size. + + Args: + input (cupy.ndarray): The input array. + size (int): Length of the uniform filter. + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int): The origin parameter controls the placement of the + filter, relative to the center of the current element of the + input. Default is ``0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.uniform_filter1d` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + weights = cupy.full(size, 1 / size, dtype=weights_dtype) + return correlate1d(input, weights, axis, output, mode, cval, + origin, algorithm=algorithm) + + +def uniform_filter(input, size=3, output=None, mode="reflect", cval=0.0, + origin=0, *, algorithm=None): + """Multi-dimensional uniform filter. + + Args: + input (cupy.ndarray): The input array. + size (int or sequence of int): Lengths of the uniform filter for each + dimension. A single value applies to all axes. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of ``0`` is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.uniform_filter` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + sizes = _util._fix_sequence_arg(size, input.ndim, 'size', int) + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + + def get(size): + return None if size <= 1 else cupy.full(size, 1 / size, dtype=weights_dtype) # noqa + + return _run_1d_correlates( + input, sizes, get, output, mode, cval, origin, algorithm=algorithm + ) + + +def gaussian_filter1d(input, sigma, axis=-1, order=0, output=None, + mode="reflect", cval=0.0, truncate=4.0, *, + algorithm=None): + """One-dimensional Gaussian filter along the given axis. + + The lines of the array along the given axis are filtered with a Gaussian + filter of the given standard deviation. + + Args: + input (cupy.ndarray): The input array. + sigma (scalar): Standard deviation for Gaussian kernel. + axis (int): The axis of input along which to calculate. Default is -1. + order (int): An order of ``0``, the default, corresponds to convolution + with a Gaussian kernel. A positive order corresponds to convolution + with that derivative of a Gaussian. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + truncate (float): Truncate the filter at this many standard deviations. + Default is ``4.0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.gaussian_filter1d` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + radius = int(float(truncate) * float(sigma) + 0.5) + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + weights = _gaussian_kernel1d(sigma, int(order), radius, weights_dtype) + return correlate1d( + input, weights, axis, output, mode, cval, algorithm=algorithm + ) + + +def gaussian_filter(input, sigma, order=0, output=None, mode="reflect", + cval=0.0, truncate=4.0, *, algorithm=None): + """Multi-dimensional Gaussian filter. + + Args: + input (cupy.ndarray): The input array. + sigma (scalar or sequence of scalar): Standard deviations for each axis + of Gaussian kernel. A single value applies to all axes. + order (int or sequence of scalar): An order of ``0``, the default, + corresponds to convolution with a Gaussian kernel. A positive order + corresponds to convolution with that derivative of a Gaussian. A + single value applies to all axes. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + truncate (float): Truncate the filter at this many standard deviations. + Default is ``4.0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.gaussian_filter` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + sigmas = _util._fix_sequence_arg(sigma, input.ndim, 'sigma', float) + orders = _util._fix_sequence_arg(order, input.ndim, 'order', int) + truncate = float(truncate) + weights_dtype = cupy.promote_types(input, cupy.float32) + + def get(param, dtype=weights_dtype): + sigma, order = param + radius = int(truncate * float(sigma) + 0.5) + if radius <= 0: + return None + return _gaussian_kernel1d(sigma, order, radius, dtype) + + return _run_1d_correlates(input, list(zip(sigmas, orders)), get, output, + mode, cval, 0, algorithm=algorithm) + + +def _gaussian_kernel1d(sigma, order, radius, dtype=cupy.float64): + """ + Computes a 1-D Gaussian correlation kernel. + """ + if order < 0: + raise ValueError('order must be non-negative') + sigma2 = sigma * sigma + x = numpy.arange(-radius, radius + 1) + phi_x = numpy.exp(-0.5 / sigma2 * x ** 2) + phi_x /= phi_x.sum() + + if order == 0: + return cupy.asarray(phi_x) + + # f(x) = q(x) * phi(x) = q(x) * exp(p(x)) + # f'(x) = (q'(x) + q(x) * p'(x)) * phi(x) + # p'(x) = -1 / sigma ** 2 + # Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the + # coefficients of q(x) + exponent_range = numpy.arange(order + 1) + q = numpy.zeros(order + 1) + q[0] = 1 + D = numpy.diag(exponent_range[1:], 1) # D @ q(x) = q'(x) + P = numpy.diag(numpy.ones(order) / -sigma2, -1) # P @ q(x) = q(x) * p'(x) + Q_deriv = D + P + for _ in range(order): + q = Q_deriv.dot(q) + q = (x[:, None] ** exponent_range).dot(q) + return cupy.asarray((q * phi_x)[::-1], order='C', dtype=dtype) + + +def prewitt(input, axis=-1, output=None, mode="reflect", cval=0.0, *, + algorithm=None): + """Compute a Prewitt filter along the given axis. + + Args: + input (cupy.ndarray): The input array. + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.prewitt` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + smooth = cupy.ones(3, dtype=weights_dtype) + return _prewitt_or_sobel( + input, axis, output, mode, cval, smooth, algorithm + ) + + +def sobel(input, axis=-1, output=None, mode="reflect", cval=0.0, *, + algorithm=None): + """Compute a Sobel filter along the given axis. + + Args: + input (cupy.ndarray): The input array. + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.sobel` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + smooth = cupy.array([1, 2, 1], dtype=weights_dtype) + return _prewitt_or_sobel( + input, axis, output, mode, cval, smooth, algorithm + ) + + +def _prewitt_or_sobel(input, axis, output, mode, cval, weights, algorithm): + axis = internal._normalize_axis_index(axis, input.ndim) + + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + + def get(is_diff, dtype=weights_dtype): + return cupy.array([-1, 0, 1], dtype=dtype) if is_diff else weights # noqa + + return _run_1d_correlates(input, [a == axis for a in range(input.ndim)], + get, output, mode, cval, algorithm=algorithm) + + +def generic_laplace(input, derivative2, output=None, mode="reflect", + cval=0.0, extra_arguments=(), extra_keywords=None): + """Multi-dimensional Laplace filter using a provided second derivative + function. + + Args: + input (cupy.ndarray): The input array. + derivative2 (callable): Function or other callable with the following + signature that is called once per axis:: + + derivative2(input, axis, output, mode, cval, + *extra_arguments, **extra_keywords) + + where ``input`` and ``output`` are ``cupy.ndarray``, ``axis`` is an + ``int`` from ``0`` to the number of dimensions, and ``mode``, + ``cval``, ``extra_arguments``, ``extra_keywords`` are the values + given to this function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + extra_arguments (sequence, optional): + Sequence of extra positional arguments to pass to ``derivative2``. + extra_keywords (dict, optional): + dict of extra keyword arguments to pass ``derivative2``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.generic_laplace` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + if extra_keywords is None: + extra_keywords = {} + ndim = input.ndim + modes = _util._fix_sequence_arg(mode, ndim, 'mode', + _util._check_mode) + output = _util._get_output(output, input) + if ndim == 0: + output[:] = input + return output + derivative2(input, 0, output, modes[0], cval, + *extra_arguments, **extra_keywords) + if ndim > 1: + tmp = _util._get_output(output.dtype, input) + for i in range(1, ndim): + derivative2(input, i, tmp, modes[i], cval, + *extra_arguments, **extra_keywords) + output += tmp + return output + + +def laplace(input, output=None, mode="reflect", cval=0.0, *, algorithm=None): + """Multi-dimensional Laplace filter based on approximate second + derivatives. + + Args: + input (cupy.ndarray): The input array. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.laplace` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + weights_dtype = cupy.promote_types(input.dtype, cupy.float32) + weights = cupy.array([1, -2, 1], dtype=weights_dtype) + + def derivative2(input, axis, output, mode, cval): + return correlate1d( + input, weights, axis, output, mode, cval, algorithm=algorithm + ) + + return generic_laplace(input, derivative2, output, mode, cval) + + +def gaussian_laplace(input, sigma, output=None, mode="reflect", + cval=0.0, *, algorithm=None, **kwargs): + """Multi-dimensional Laplace filter using Gaussian second derivatives. + + Args: + input (cupy.ndarray): The input array. + sigma (scalar or sequence of scalar): Standard deviations for each axis + of Gaussian kernel. A single value applies to all axes. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + kwargs (dict, optional): + dict of extra keyword arguments to pass ``gaussian_filter()``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.gaussian_laplace` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + def derivative2(input, axis, output, mode, cval): + order = [0] * input.ndim + order[axis] = 2 + return gaussian_filter(input, sigma, order, output, mode, cval, + algorithm=algorithm, **kwargs) + return generic_laplace(input, derivative2, output, mode, cval) + + +def generic_gradient_magnitude(input, derivative, output=None, + mode="reflect", cval=0.0, + extra_arguments=(), extra_keywords=None): + """Multi-dimensional gradient magnitude filter using a provided derivative + function. + + Args: + input (cupy.ndarray): The input array. + derivative (callable): Function or other callable with the following + signature that is called once per axis:: + + derivative(input, axis, output, mode, cval, + *extra_arguments, **extra_keywords) + + where ``input`` and ``output`` are ``cupy.ndarray``, ``axis`` is an + ``int`` from ``0`` to the number of dimensions, and ``mode``, + ``cval``, ``extra_arguments``, ``extra_keywords`` are the values + given to this function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + extra_arguments (sequence, optional): + Sequence of extra positional arguments to pass to ``derivative2``. + extra_keywords (dict, optional): + dict of extra keyword arguments to pass ``derivative2``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.generic_gradient_magnitude` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + if extra_keywords is None: + extra_keywords = {} + ndim = input.ndim + modes = _util._fix_sequence_arg(mode, ndim, 'mode', + _util._check_mode) + output = _util._get_output(output, input) + if ndim == 0: + output[:] = input + return output + derivative(input, 0, output, modes[0], cval, + *extra_arguments, **extra_keywords) + output *= output + if ndim > 1: + tmp = _util._get_output(output.dtype, input) + for i in range(1, ndim): + derivative(input, i, tmp, modes[i], cval, + *extra_arguments, **extra_keywords) + tmp *= tmp + output += tmp + return cupy.sqrt(output, output, casting='unsafe') + + +def gaussian_gradient_magnitude(input, sigma, output=None, mode="reflect", + cval=0.0, *, algorithm=None, **kwargs): + """Multi-dimensional gradient magnitude using Gaussian derivatives. + + Args: + input (cupy.ndarray): The input array. + sigma (scalar or sequence of scalar): Standard deviations for each axis + of Gaussian kernel. A single value applies to all axes. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + kwargs (dict, optional): + dict of extra keyword arguments to pass ``gaussian_filter()``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.gaussian_gradient_magnitude` + + .. note:: + When the output data type is integral (or when no output is provided + and input is integral) the results may not perfectly match the results + from SciPy due to floating-point rounding of intermediate results. + """ + def derivative(input, axis, output, mode, cval): + order = [0] * input.ndim + order[axis] = 1 + return gaussian_filter(input, sigma, order, output, mode, cval, + algorithm=algorithm, **kwargs) + return generic_gradient_magnitude(input, derivative, output, mode, cval) + + +def minimum_filter(input, size=None, footprint=None, output=None, + mode="reflect", cval=0.0, origin=0): + """Multi-dimensional minimum filter. + + Args: + input (cupy.ndarray): The input array. + size (int or sequence of int): One of ``size`` or ``footprint`` must be + provided. If ``footprint`` is given, ``size`` is ignored. Otherwise + ``footprint = cupy.ones(size)`` with ``size`` automatically made to + match the number of dimensions in ``input``. + footprint (cupy.ndarray): a boolean array which specifies which of the + elements within this shape will get passed to the filter function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.minimum_filter` + """ + return _min_or_max_filter(input, size, footprint, None, output, mode, + cval, origin, 'min') + + +def maximum_filter(input, size=None, footprint=None, output=None, + mode="reflect", cval=0.0, origin=0): + """Multi-dimensional maximum filter. + + Args: + input (cupy.ndarray): The input array. + size (int or sequence of int): One of ``size`` or ``footprint`` must be + provided. If ``footprint`` is given, ``size`` is ignored. Otherwise + ``footprint = cupy.ones(size)`` with ``size`` automatically made to + match the number of dimensions in ``input``. + footprint (cupy.ndarray): a boolean array which specifies which of the + elements within this shape will get passed to the filter function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.maximum_filter` + """ + return _min_or_max_filter(input, size, footprint, None, output, mode, + cval, origin, 'max') + + +def _min_or_max_filter(input, size, ftprnt, structure, output, mode, cval, + origin, func): + # structure is used by morphology.grey_erosion() and grey_dilation() + # and not by the regular min/max filters + + sizes, ftprnt, structure = _filters_core._check_size_footprint_structure( + input.ndim, size, ftprnt, structure) + if cval is cupy.nan: + raise NotImplementedError("NaN cval is unsupported") + + if sizes is not None: + # Seperable filter, run as a series of 1D filters + fltr = minimum_filter1d if func == 'min' else maximum_filter1d + return _filters_core._run_1d_filters( + [fltr if size > 1 else None for size in sizes], + input, sizes, output, mode, cval, origin) + + origins, int_type = _filters_core._check_nd_args(input, ftprnt, + mode, origin, 'footprint') + if structure is not None and structure.ndim != input.ndim: + raise RuntimeError('structure array has incorrect shape') + + if ftprnt.size == 0: + return cupy.zeros_like(input) + offsets = _filters_core._origins_to_offsets(origins, ftprnt.shape) + kernel = _get_min_or_max_kernel(mode, ftprnt.shape, func, + offsets, float(cval), int_type, + has_structure=structure is not None, + has_central_value=bool(ftprnt[offsets])) + return _filters_core._call_kernel(kernel, input, ftprnt, output, + structure, weights_dtype=bool) + + +def minimum_filter1d(input, size, axis=-1, output=None, mode="reflect", + cval=0.0, origin=0): + """Compute the minimum filter along a single axis. + + Args: + input (cupy.ndarray): The input array. + size (int): Length of the minimum filter. + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int): The origin parameter controls the placement of the + filter, relative to the center of the current element of the + input. Default is ``0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.minimum_filter1d` + """ + return _min_or_max_1d(input, size, axis, output, mode, cval, origin, 'min') + + +def maximum_filter1d(input, size, axis=-1, output=None, mode="reflect", + cval=0.0, origin=0): + """Compute the maximum filter along a single axis. + + Args: + input (cupy.ndarray): The input array. + size (int): Length of the maximum filter. + axis (int): The axis of input along which to calculate. Default is -1. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int): The origin parameter controls the placement of the + filter, relative to the center of the current element of the + input. Default is ``0``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.maximum_filter1d` + """ + return _min_or_max_1d(input, size, axis, output, mode, cval, origin, 'max') + + +def _min_or_max_1d(input, size, axis=-1, output=None, mode="reflect", cval=0.0, + origin=0, func='min'): + ftprnt = cupy.ones(size, dtype=bool) + ftprnt, origin = _filters_core._convert_1d_args(input.ndim, ftprnt, + origin, axis) + origins, int_type = _filters_core._check_nd_args(input, ftprnt, + mode, origin, 'footprint') + offsets = _filters_core._origins_to_offsets(origins, ftprnt.shape) + kernel = _get_min_or_max_kernel(mode, ftprnt.shape, func, offsets, + float(cval), int_type, has_weights=False) + return _filters_core._call_kernel(kernel, input, None, output, + weights_dtype=bool) + + +@cupy._util.memoize(for_each_device=True) +def _get_min_or_max_kernel(mode, w_shape, func, offsets, cval, int_type, + has_weights=True, has_structure=False, + has_central_value=True): + # When there are no 'weights' (the footprint, for the 1D variants) then + # we need to make sure intermediate results are stored as doubles for + # consistent results with scipy. + ctype = 'X' if has_weights else 'double' + value = '{value}' + if not has_weights: + value = 'cast({})'.format(value) + + # Having a non-flat structure biases the values + if has_structure: + value += ('-' if func == 'min' else '+') + 'cast(sval)' + + if has_central_value: + pre = '{} value = x[i];' + found = 'value = {func}({value}, value);' + else: + # If the central pixel is not included in the footprint we cannot + # assume `x[i]` is not below the min or above the max and thus cannot + # seed with that value. Instead we keep track of having set `value`. + pre = '{} value; bool set = false;' + found = 'value = set ? {func}({value}, value) : {value}; set=true;' + + return _filters_core._generate_nd_kernel( + func, pre.format(ctype), + found.format(func=func, value=value), 'y = cast(value);', + mode, w_shape, int_type, offsets, cval, ctype=ctype, + has_weights=has_weights, has_structure=has_structure) + + +def rank_filter(input, rank, size=None, footprint=None, output=None, + mode="reflect", cval=0.0, origin=0): + """Multi-dimensional rank filter. + + Args: + input (cupy.ndarray): The input array. + rank (int): The rank of the element to get. Can be negative to count + from the largest value, e.g. ``-1`` indicates the largest value. + size (int or sequence of int): One of ``size`` or ``footprint`` must be + provided. If ``footprint`` is given, ``size`` is ignored. Otherwise + ``footprint = cupy.ones(size)`` with ``size`` automatically made to + match the number of dimensions in ``input``. + footprint (cupy.ndarray): a boolean array which specifies which of the + elements within this shape will get passed to the filter function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.rank_filter` + """ + rank = int(rank) + return _rank_filter(input, lambda fs: rank + fs if rank < 0 else rank, + size, footprint, output, mode, cval, origin) + + +def median_filter(input, size=None, footprint=None, output=None, + mode="reflect", cval=0.0, origin=0): + """Multi-dimensional median filter. + + Args: + input (cupy.ndarray): The input array. + size (int or sequence of int): One of ``size`` or ``footprint`` must be + provided. If ``footprint`` is given, ``size`` is ignored. Otherwise + ``footprint = cupy.ones(size)`` with ``size`` automatically made to + match the number of dimensions in ``input``. + footprint (cupy.ndarray): a boolean array which specifies which of the + elements within this shape will get passed to the filter function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.median_filter` + """ + return _rank_filter(input, lambda fs: fs // 2, + size, footprint, output, mode, cval, origin) + + +def percentile_filter(input, percentile, size=None, footprint=None, + output=None, mode="reflect", cval=0.0, origin=0): + """Multi-dimensional percentile filter. + + Args: + input (cupy.ndarray): The input array. + percentile (scalar): The percentile of the element to get (from ``0`` + to ``100``). Can be negative, thus ``-20`` equals ``80``. + size (int or sequence of int): One of ``size`` or ``footprint`` must be + provided. If ``footprint`` is given, ``size`` is ignored. Otherwise + ``footprint = cupy.ones(size)`` with ``size`` automatically made to + match the number of dimensions in ``input``. + footprint (cupy.ndarray): a boolean array which specifies which of the + elements within this shape will get passed to the filter function. + output (cupy.ndarray, dtype or None): The array in which to place the + output. Default is is same dtype as the input. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``'constant'``. Default is ``0.0``. + origin (int or sequence of int): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of the filtering. + + .. seealso:: :func:`scipy.ndimage.percentile_filter` + """ + percentile = float(percentile) + if percentile < 0.0: + percentile += 100.0 + if percentile < 0.0 or percentile > 100.0: + raise RuntimeError('invalid percentile') + if percentile == 100.0: + def get_rank(fs): + return fs - 1 + else: + def get_rank(fs): + return int(float(fs) * percentile / 100.0) + return _rank_filter(input, get_rank, + size, footprint, output, mode, cval, origin) + + +def _rank_filter(input, get_rank, size=None, footprint=None, output=None, + mode="reflect", cval=0.0, origin=0): + _, footprint, _ = _filters_core._check_size_footprint_structure( + input.ndim, size, footprint, None, force_footprint=True) + if cval is cupy.nan: + raise NotImplementedError("NaN cval is unsupported") + origins, int_type = _filters_core._check_nd_args(input, footprint, + mode, origin, 'footprint') + if footprint.size == 0: + return cupy.zeros_like(input) + filter_size = int(footprint.sum()) + rank = get_rank(filter_size) + if rank < 0 or rank >= filter_size: + raise RuntimeError('rank not within filter footprint size') + if rank == 0: + return _min_or_max_filter(input, None, footprint, None, output, mode, + cval, origins, 'min') + if rank == filter_size - 1: + return _min_or_max_filter(input, None, footprint, None, output, mode, + cval, origins, 'max') + offsets = _filters_core._origins_to_offsets(origins, footprint.shape) + kernel = _get_rank_kernel(filter_size, rank, mode, footprint.shape, + offsets, float(cval), int_type) + return _filters_core._call_kernel(kernel, input, footprint, output, + weights_dtype=bool) + + +__SHELL_SORT = ''' +__device__ void sort(X *array, int size) {{ + int gap = {gap}; + while (gap > 1) {{ + gap /= 3; + for (int i = gap; i < size; ++i) {{ + X value = array[i]; + int j = i - gap; + while (j >= 0 && value < array[j]) {{ + array[j + gap] = array[j]; + j -= gap; + }} + array[j + gap] = value; + }} + }} +}}''' + + +@cupy._util.memoize() +def _get_shell_gap(filter_size): + gap = 1 + while gap < filter_size: + gap = 3 * gap + 1 + return gap + + +@cupy._util.memoize(for_each_device=True) +def _get_rank_kernel(filter_size, rank, mode, w_shape, offsets, cval, + int_type): + s_rank = min(rank, filter_size - rank - 1) + # The threshold was set based on the measurements on a V100 + # TODO(leofang, anaruse): Use Optuna to automatically tune the threshold, + # as it may vary depending on the GPU in use, compiler version, dtype, + # filter size, etc. + if s_rank <= 80: + # When s_rank is small and register usage is low, this partial + # selection sort approach is faster than general sorting approach + # using shell sort. + if s_rank == rank: + comp_op = '<' + else: + comp_op = '>' + array_size = s_rank + 2 + found_post = ''' + if (iv > {rank} + 1) {{{{ + int target_iv = 0; + X target_val = values[0]; + for (int jv = 1; jv <= {rank} + 1; jv++) {{{{ + if (target_val {comp_op} values[jv]) {{{{ + target_val = values[jv]; + target_iv = jv; + }}}} + }}}} + if (target_iv <= {rank}) {{{{ + values[target_iv] = values[{rank} + 1]; + }}}} + iv = {rank} + 1; + }}}}'''.format(rank=s_rank, comp_op=comp_op) + post = ''' + X target_val = values[0]; + for (int jv = 1; jv <= {rank}; jv++) {{ + if (target_val {comp_op} values[jv]) {{ + target_val = values[jv]; + }} + }} + y=cast(target_val);'''.format(rank=s_rank, comp_op=comp_op) + sorter = '' + else: + array_size = filter_size + found_post = '' + post = 'sort(values,{});\ny=cast(values[{}]);'.format( + filter_size, rank) + sorter = __SHELL_SORT.format(gap=_get_shell_gap(filter_size)) + + return _filters_core._generate_nd_kernel( + 'rank_{}_{}'.format(filter_size, rank), + 'int iv = 0;\nX values[{}];'.format(array_size), + 'values[iv++] = {value};' + found_post, post, + mode, w_shape, int_type, offsets, cval, preamble=sorter) diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py index 60aec68cf..469dff9d9 100644 --- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py @@ -1,24 +1,172 @@ """A vendored subset of cupyx.scipy.ndimage._filters_core""" +import warnings import cupy import numpy +from cucim.skimage._vendored import _internal as internal from cucim.skimage._vendored import _ndimage_util as _util -includes = r''' -// workaround for HIP: line begins with #include + +def _origins_to_offsets(origins, w_shape): + return tuple(x // 2 + o for x, o in zip(w_shape, origins)) + + +def _check_size_footprint_structure(ndim, size, footprint, structure, + stacklevel=3, force_footprint=False): + if structure is None and footprint is None: + if size is None: + raise RuntimeError("no footprint or filter size provided") + sizes = _util._fix_sequence_arg(size, ndim, 'size', int) + if force_footprint: + return None, cupy.ones(sizes, bool), None + return sizes, None, None + if size is not None: + warnings.warn("ignoring size because {} is set".format( + 'structure' if footprint is None else 'footprint'), + UserWarning, stacklevel=stacklevel + 1) + + if footprint is not None: + footprint = cupy.array(footprint, bool, True, 'C') + if not footprint.any(): + raise ValueError("all-zero footprint is not supported") + + if structure is None: + if not force_footprint and footprint.all(): + if footprint.ndim != ndim: + raise RuntimeError("size must have length equal to input rank") + return footprint.shape, None, None + return None, footprint, None + + structure = cupy.ascontiguousarray(structure) + if footprint is None: + footprint = cupy.ones(structure.shape, bool) + return None, footprint, structure + + +def _convert_1d_args(ndim, weights, origin, axis): + if weights.ndim != 1 or weights.size < 1: + raise RuntimeError('incorrect filter size') + axis = internal._normalize_axis_index(axis, ndim) + w_shape = [1] * ndim + w_shape[axis] = weights.size + weights = weights.reshape(w_shape) + origins = [0] * ndim + origins[axis] = _util._check_origin(origin, weights.size) + return weights, tuple(origins) + + +def _check_nd_args(input, weights, mode, origin, wghts_name='filter weights'): + _util._check_mode(mode) + # Weights must always be less than 2 GiB + if weights.nbytes >= (1 << 31): + raise RuntimeError('weights must be 2 GiB or less, use FFTs instead') + weight_dims = [x for x in weights.shape if x != 0] + if len(weight_dims) != input.ndim: + raise RuntimeError('{} array has incorrect shape'.format(wghts_name)) + origins = _util._fix_sequence_arg(origin, len(weight_dims), 'origin', int) + for origin, width in zip(origins, weight_dims): + _util._check_origin(origin, width) + return tuple(origins), _util._get_inttype(input) + + +def _run_1d_filters(filters, input, args, output, mode, cval, origin=0, + **filter_kwargs): + """ + Runs a series of 1D filters forming an nd filter. The filters must be a + list of callables that take input, arg, axis, output, mode, cval, origin. + The args is a list of values that are passed for the arg value to the + filter. Individual filters can be None causing that axis to be skipped. + """ + output = _util._get_output(output, input) + modes = _util._fix_sequence_arg(mode, input.ndim, 'mode', + _util._check_mode) + # for filters, "wrap" is a synonym for "grid-wrap". + modes = ['grid-wrap' if m == 'wrap' else m for m in modes] + origins = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) + n_filters = sum(filter is not None for filter in filters) + if n_filters == 0: + output[:] = input + return output + # We can't operate in-place efficiently, so use a 2-buffer system + temp = _util._get_output(output.dtype, input) if n_filters > 1 else None # noqa + iterator = zip(filters, args, modes, origins) + for axis, (fltr, arg, mode, origin) in enumerate(iterator): + if fltr is None: + continue + else: + break + if n_filters % 2 == 0: + fltr(input, arg, axis, temp, mode, cval, origin, **filter_kwargs) + input = temp + else: + fltr(input, arg, axis, output, mode, cval, origin, **filter_kwargs) + if n_filters == 1: + return output + input, output = output, temp + for axis, (fltr, arg, mode, origin) in enumerate(iterator, start=axis + 1): + if fltr is None: + continue + fltr(input, arg, axis, output, mode, cval, origin, **filter_kwargs) + input, output = output, input + return input + + +def _call_kernel(kernel, input, weights, output, structure=None, + weights_dtype=numpy.float64, structure_dtype=numpy.float64): + """ + Calls a constructed ElementwiseKernel. The kernel must take an input image, + an optional array of weights, an optional array for the structure, and an + output array. + + weights and structure can be given as None (structure defaults to None) in + which case they are not passed to the kernel at all. If the output is given + as None then it will be allocated in this function. + + This function deals with making sure that the weights and structure are + contiguous and float64 (or bool for weights that are footprints)*, that the + output is allocated and appriopately shaped. This also deals with the + situation that the input and output arrays overlap in memory. + + * weights is always cast to float64 or bool in order to get an output + compatible with SciPy, though float32 might be sufficient when input dtype + is low precision. If weights_dtype is passed as weights.dtype then no + dtype conversion will occur. The input and output are never converted. + """ + args = [input] + complex_output = input.dtype.kind == 'c' + if weights is not None: + weights = cupy.ascontiguousarray(weights, weights_dtype) + complex_output = complex_output or weights.dtype.kind == 'c' + args.append(weights) + if structure is not None: + structure = cupy.ascontiguousarray(structure, structure_dtype) + args.append(structure) + output = _util._get_output(output, input, None, complex_output) # noqa + needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS') + if needs_temp: + output, temp = _util._get_output(output.dtype, input, None, complex_output), output # noqa + args.append(output) + kernel(*args) + if needs_temp: + output[:] = temp + output = temp + return output + + +_ndimage_includes = r''' #include // let Jitify handle this #include -''' - -_CAST_FUNCTION = """ -// Implements a casting function to make it compatible with scipy -// Use like cast(value) template<> struct std::is_floating_point : std::true_type {}; template<> struct std::is_signed : std::true_type {}; template struct std::is_signed> : std::is_signed {}; +''' + +_ndimage_CAST_FUNCTION = """ +// Implements a casting function to make it compatible with scipy +// Use like cast(value) template __device__ __forceinline__ typename std::enable_if<(!std::is_floating_point::value @@ -148,7 +296,7 @@ def _generate_nd_kernel(name, pre, found, post, mode, w_shape, int_type, name += '_with_structure' if has_mask: name += '_with_mask' - preamble = includes + _CAST_FUNCTION + preamble + preamble = _ndimage_includes + _ndimage_CAST_FUNCTION + preamble options += ('--std=c++11', '-DCUPY_USE_JITIFY') return cupy.ElementwiseKernel(in_params, out_params, operation, name, reduce_dims=False, preamble=preamble, diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py new file mode 100644 index 000000000..fa65eed36 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py @@ -0,0 +1,598 @@ +import cupy +import numpy + +from cucim.skimage._vendored import \ + _ndimage_spline_kernel_weights as _spline_kernel_weights +from cucim.skimage._vendored import \ + _ndimage_spline_prefilter_core as _spline_prefilter_core +from cucim.skimage._vendored import _ndimage_util as _util + +math_constants_preamble = r''' +// workaround for HIP: line begins with #include +#include +''' + +spline_weights_inline = _spline_kernel_weights.spline_weights_inline + + +def _get_coord_map(ndim, nprepad=0): + """Extract target coordinate from coords array (for map_coordinates). + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + coords (ndarray): array of shape (ncoords, ndim) containing the target + coordinates. + c_j: variables to hold the target coordinates + + computes:: + + c_j = coords[i + j * ncoords]; + + ncoords is determined by the size of the output array, y. + y will be indexed by the CIndexer, _ind. + Thus ncoords = _ind.size(); + + """ + ops = [] + ops.append('ptrdiff_t ncoords = _ind.size();') + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = coords[i + {j} * ncoords]{pre};''') + return ops + + +def _get_coord_zoom_and_shift(ndim, nprepad=0): + """Compute target coordinate based on a shift followed by a zoom. + + This version zooms from the center of the edge pixels. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + in_coord[ndim]: array containing the source coordinate + zoom[ndim]: array containing the zoom for each axis + shift[ndim]: array containing the zoom for each axis + + computes:: + + c_j = zoom[j] * (in_coord[j] - shift[j]) + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = zoom[{j}] * ((W)in_coord[{j}] - shift[{j}]){pre};''') + return ops + + +def _get_coord_zoom_and_shift_grid(ndim, nprepad=0): + """Compute target coordinate based on a shift followed by a zoom. + + This version zooms from the outer edges of the grid pixels. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + in_coord[ndim]: array containing the source coordinate + zoom[ndim]: array containing the zoom for each axis + shift[ndim]: array containing the zoom for each axis + + computes:: + + c_j = zoom[j] * (in_coord[j] - shift[j] + 0.5) - 0.5 + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = zoom[{j}] * ((W)in_coord[{j}] - shift[j] + 0.5) - 0.5{pre};''') + return ops + + +def _get_coord_zoom(ndim, nprepad=0): + """Compute target coordinate based on a zoom. + + This version zooms from the center of the edge pixels. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + in_coord[ndim]: array containing the source coordinate + zoom[ndim]: array containing the zoom for each axis + + computes:: + + c_j = zoom[j] * in_coord[j] + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = zoom[{j}] * (W)in_coord[{j}]{pre};''') + return ops + + +def _get_coord_zoom_grid(ndim, nprepad=0): + """Compute target coordinate based on a zoom (grid_mode=True version). + + This version zooms from the outer edges of the grid pixels. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + in_coord[ndim]: array containing the source coordinate + zoom[ndim]: array containing the zoom for each axis + + computes:: + + c_j = zoom[j] * (in_coord[j] + 0.5) - 0.5 + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = zoom[{j}] * ((W)in_coord[{j}] + 0.5) - 0.5{pre};''') + return ops + + +def _get_coord_shift(ndim, nprepad=0): + """Compute target coordinate based on a shift. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + in_coord[ndim]: array containing the source coordinate + shift[ndim]: array containing the zoom for each axis + + computes:: + + c_j = in_coord[j] - shift[j] + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + for j in range(ndim): + ops.append(f''' + W c_{j} = (W)in_coord[{j}] - shift[{j}]{pre};''') + return ops + + +def _get_coord_affine(ndim, nprepad=0): + """Compute target coordinate based on a homogeneous transformation matrix. + + The homogeneous matrix has shape (ndim, ndim + 1). It corresponds to + affine matrix where the last row of the affine is assumed to be: + ``[0] * ndim + [1]``. + + Notes + ----- + Assumes the following variables have been initialized on the device:: + + mat(array): array containing the (ndim, ndim + 1) transform matrix. + in_coords(array): coordinates of the input + + For example, in 2D: + + c_0 = mat[0] * in_coords[0] + mat[1] * in_coords[1] + aff[2]; + c_1 = mat[3] * in_coords[0] + mat[4] * in_coords[1] + aff[5]; + + """ + ops = [] + pre = f" + (W){nprepad}" if nprepad > 0 else '' + ncol = ndim + 1 + for j in range(ndim): + ops.append(f''' + W c_{j} = (W)0.0;''') + for k in range(ndim): + ops.append(f''' + c_{j} += mat[{ncol * j + k}] * (W)in_coord[{k}];''') + ops.append(f''' + c_{j} += mat[{ncol * j + ndim}]{pre};''') + return ops + + +def _unravel_loop_index(shape, uint_t='unsigned int'): + """ + declare a multi-index array in_coord and unravel the 1D index, i into it. + This code assumes that the array is a C-ordered array. + """ + ndim = len(shape) + code = [f''' + {uint_t} in_coord[{ndim}]; + {uint_t} s, t, idx = i;'''] + for j in range(ndim - 1, 0, -1): + code.append(f''' + s = {shape[j]}; + t = idx / s; + in_coord[{j}] = idx - t * s; + idx = t;''') + code.append(''' + in_coord[0] = idx;''') + return '\n'.join(code) + + +def _generate_interp_custom(coord_func, ndim, large_int, yshape, mode, cval, + order, name='', integer_output=False, nprepad=0, + omit_in_coord=False): + """ + Args: + coord_func (function): generates code to do the coordinate + transformation. See for example, `_get_coord_shift`. + ndim (int): The number of dimensions. + large_int (bool): If true use Py_ssize_t instead of int for indexing. + yshape (tuple): Shape of the output array. + mode (str): Signal extension mode to use at the array boundaries + cval (float): constant value used when `mode == 'constant'`. + name (str): base name for the interpolation kernel + integer_output (bool): boolean indicating whether the output has an + integer type. + nprepad (int): integer indicating the amount of prepadding at the + boundaries. + + Returns: + operation (str): code body for the ElementwiseKernel + name (str): name for the ElementwiseKernel + """ + + ops = [] + internal_dtype = 'double' if integer_output else 'Y' + ops.append(f'{internal_dtype} out = 0.0;') + + if large_int: + uint_t = 'size_t' + int_t = 'ptrdiff_t' + else: + uint_t = 'unsigned int' + int_t = 'int' + + # determine strides for x along each axis + for j in range(ndim): + ops.append(f'const {int_t} xsize_{j} = x.shape()[{j}];') + ops.append(f'const {uint_t} sx_{ndim - 1} = 1;') + for j in range(ndim - 1, 0, -1): + ops.append(f'const {uint_t} sx_{j - 1} = sx_{j} * xsize_{j};') + + if not omit_in_coord: + # create in_coords array to store the unraveled indices + ops.append(_unravel_loop_index(yshape, uint_t)) + + # compute the transformed (target) coordinates, c_j + ops = ops + coord_func(ndim, nprepad) + + if cval is numpy.nan: + cval = '(Y)CUDART_NAN' + elif cval == numpy.inf: + cval = '(Y)CUDART_INF' + elif cval == -numpy.inf: + cval = '(Y)(-CUDART_INF)' + else: + cval = f'({internal_dtype}){cval}' + + if mode == 'constant': + # use cval if coordinate is outside the bounds of x + _cond = ' || '.join( + [f'(c_{j} < 0) || (c_{j} > xsize_{j} - 1)' for j in range(ndim)]) + ops.append(f''' + if ({_cond}) + {{ + out = {cval}; + }} + else + {{''') + + if order == 0: + if mode == 'wrap': + ops.append('double dcoord;') # mode 'wrap' requires this to work + for j in range(ndim): + # determine nearest neighbor + if mode == 'wrap': + ops.append(f''' + dcoord = c_{j};''') + else: + ops.append(f''' + {int_t} cf_{j} = ({int_t})floor((double)c_{j} + 0.5);''') + + # handle boundary + if mode != 'constant': + if mode == 'wrap': + ixvar = 'dcoord' + float_ix = True + else: + ixvar = f'cf_{j}' + float_ix = False + ops.append( + _util._generate_boundary_condition_ops( + mode, ixvar, f'xsize_{j}', int_t, float_ix)) + if mode == 'wrap': + ops.append(f''' + {int_t} cf_{j} = ({int_t})floor(dcoord + 0.5);''') + + # sum over ic_j will give the raveled coordinate in the input + ops.append(f''' + {int_t} ic_{j} = cf_{j} * sx_{j};''') + _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)]) + if mode == 'grid-constant': + _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)]) + ops.append(f''' + if ({_cond}) {{ + out = {cval}; + }} else {{ + out = ({internal_dtype})x[{_coord_idx}]; + }}''') + else: + ops.append(f''' + out = ({internal_dtype})x[{_coord_idx}];''') + + elif order == 1: + for j in range(ndim): + # get coordinates for linear interpolation along axis j + ops.append(f''' + {int_t} cf_{j} = ({int_t})floor((double)c_{j}); + {int_t} cc_{j} = cf_{j} + 1; + {int_t} n_{j} = (c_{j} == cf_{j}) ? 1 : 2; // points needed + ''') + + if mode == 'wrap': + ops.append(f''' + double dcoordf = c_{j}; + double dcoordc = c_{j} + 1;''') + else: + # handle boundaries for extension modes. + ops.append(f''' + {int_t} cf_bounded_{j} = cf_{j}; + {int_t} cc_bounded_{j} = cc_{j};''') + + if mode != 'constant': + if mode == 'wrap': + ixvar = 'dcoordf' + float_ix = True + else: + ixvar = f'cf_bounded_{j}' + float_ix = False + ops.append( + _util._generate_boundary_condition_ops( + mode, ixvar, f'xsize_{j}', int_t, float_ix)) + + ixvar = 'dcoordc' if mode == 'wrap' else f'cc_bounded_{j}' + ops.append( + _util._generate_boundary_condition_ops( + mode, ixvar, f'xsize_{j}', int_t, float_ix)) + if mode == 'wrap': + ops.append( + f''' + {int_t} cf_bounded_{j} = ({int_t})floor(dcoordf);; + {int_t} cc_bounded_{j} = ({int_t})floor(dcoordf + 1);; + ''' + ) + + ops.append(f''' + for (int s_{j} = 0; s_{j} < n_{j}; s_{j}++) + {{ + W w_{j}; + {int_t} ic_{j}; + if (s_{j} == 0) + {{ + w_{j} = (W)cc_{j} - c_{j}; + ic_{j} = cf_bounded_{j} * sx_{j}; + }} else + {{ + w_{j} = c_{j} - (W)cf_{j}; + ic_{j} = cc_bounded_{j} * sx_{j}; + }}''') + elif order > 1: + if mode == 'grid-constant': + spline_mode = 'constant' + elif mode == 'nearest': + spline_mode = 'nearest' + else: + spline_mode = _spline_prefilter_core._get_spline_mode(mode) + + # wx, wy are temporary variables used during spline weight computation + ops.append(f''' + W wx, wy; + {int_t} start;''') + for j in range(ndim): + # determine weights along the current axis + ops.append(f''' + W weights_{j}[{order + 1}];''') + ops.append(spline_weights_inline[order].format(j=j, order=order)) + + # get starting coordinate for spline interpolation along axis j + if mode in ['wrap']: + ops.append(f'double dcoord = c_{j};') + coord_var = 'dcoord' + ops.append( + _util._generate_boundary_condition_ops( + mode, coord_var, f'xsize_{j}', int_t, True)) + else: + coord_var = f'(double)c_{j}' + + if order & 1: + op_str = ''' + start = ({int_t})floor({coord_var}) - {order_2};''' + else: + op_str = ''' + start = ({int_t})floor({coord_var} + 0.5) - {order_2};''' + ops.append( + op_str.format( + int_t=int_t, coord_var=coord_var, order_2=order // 2 + )) + + # set of coordinate values within spline footprint along axis j + ops.append(f'''{int_t} ci_{j}[{order + 1}];''') + for k in range(order + 1): + ixvar = f'ci_{j}[{k}]' + ops.append(f''' + {ixvar} = start + {k};''') + ops.append( + _util._generate_boundary_condition_ops( + spline_mode, ixvar, f'xsize_{j}', int_t)) + + # loop over the order + 1 values in the spline filter + ops.append(f''' + W w_{j}; + {int_t} ic_{j}; + for (int k_{j} = 0; k_{j} <= {order}; k_{j}++) + {{ + w_{j} = weights_{j}[k_{j}]; + ic_{j} = ci_{j}[k_{j}] * sx_{j}; + ''') + + if order > 0: + + _weight = ' * '.join([f'w_{j}' for j in range(ndim)]) + _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)]) + if mode == 'grid-constant' or (order > 1 and mode == 'constant'): + _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)]) + ops.append(f''' + if ({_cond}) {{ + out += {cval} * ({internal_dtype})({_weight}); + }} else {{ + {internal_dtype} val = ({internal_dtype})x[{_coord_idx}]; + out += val * ({internal_dtype})({_weight}); + }}''') + else: + ops.append(f''' + {internal_dtype} val = ({internal_dtype})x[{_coord_idx}]; + out += val * ({internal_dtype})({_weight});''') + + ops.append('}' * ndim) + + if mode == 'constant': + ops.append('}') + + if integer_output: + ops.append('y = (Y)rint((double)out);') + else: + ops.append('y = (Y)out;') + operation = '\n'.join(ops) + + mode_str = mode.replace('-', '_') # avoid hyphen in kernel name + name = 'cupyx_scipy_ndimage_interpolate_{}_order{}_{}_{}d_y{}'.format( + name, order, mode_str, ndim, '_'.join([f'{j}' for j in yshape]), + ) + if uint_t == 'size_t': + name += '_i64' + return operation, name + + +@cupy._util.memoize(for_each_device=True) +def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, + integer_output=False, nprepad=0): + in_params = 'raw X x, raw W coords' + out_params = 'Y y' + operation, name = _generate_interp_custom( + coord_func=_get_coord_map, + ndim=ndim, + large_int=large_int, + yshape=yshape, + mode=mode, + cval=cval, + order=order, + name='map', + integer_output=integer_output, + nprepad=nprepad, + omit_in_coord=True, # input image coordinates are not needed + ) + return cupy.ElementwiseKernel(in_params, out_params, operation, name, + preamble=math_constants_preamble) + + +@cupy._util.memoize(for_each_device=True) +def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, + integer_output=False, nprepad=0): + in_params = 'raw X x, raw W shift' + out_params = 'Y y' + operation, name = _generate_interp_custom( + coord_func=_get_coord_shift, + ndim=ndim, + large_int=large_int, + yshape=yshape, + mode=mode, + cval=cval, + order=order, + name='shift', + integer_output=integer_output, + nprepad=nprepad, + ) + return cupy.ElementwiseKernel(in_params, out_params, operation, name, + preamble=math_constants_preamble) + + +@cupy._util.memoize(for_each_device=True) +def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, + integer_output=False, grid_mode=False, nprepad=0): + in_params = 'raw X x, raw W shift, raw W zoom' + out_params = 'Y y' + if grid_mode: + zoom_shift_func = _get_coord_zoom_and_shift_grid + else: + zoom_shift_func = _get_coord_zoom_and_shift + operation, name = _generate_interp_custom( + coord_func=zoom_shift_func, + ndim=ndim, + large_int=large_int, + yshape=yshape, + mode=mode, + cval=cval, + order=order, + name="zoom_shift_grid" if grid_mode else "zoom_shift", + integer_output=integer_output, + nprepad=nprepad, + ) + return cupy.ElementwiseKernel(in_params, out_params, operation, name, + preamble=math_constants_preamble) + + +@cupy._util.memoize(for_each_device=True) +def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, + integer_output=False, grid_mode=False, nprepad=0): + in_params = 'raw X x, raw W zoom' + out_params = 'Y y' + operation, name = _generate_interp_custom( + coord_func=_get_coord_zoom_grid if grid_mode else _get_coord_zoom, + ndim=ndim, + large_int=large_int, + yshape=yshape, + mode=mode, + cval=cval, + order=order, + name="zoom_grid" if grid_mode else "zoom", + integer_output=integer_output, + nprepad=nprepad, + ) + return cupy.ElementwiseKernel(in_params, out_params, operation, name, + preamble=math_constants_preamble) + + +@cupy._util.memoize(for_each_device=True) +def _get_affine_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1, + integer_output=False, nprepad=0): + in_params = 'raw X x, raw W mat' + out_params = 'Y y' + operation, name = _generate_interp_custom( + coord_func=_get_coord_affine, + ndim=ndim, + large_int=large_int, + yshape=yshape, + mode=mode, + cval=cval, + order=order, + name='affine', + integer_output=integer_output, + nprepad=nprepad, + ) + return cupy.ElementwiseKernel(in_params, out_params, operation, name, + preamble=math_constants_preamble) diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py new file mode 100644 index 000000000..18275213b --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py @@ -0,0 +1,757 @@ +import cmath +import math +import warnings + +import cupy +import numpy +from cupy import _core +from cupy.cuda import runtime + +from cucim.skimage._vendored import _ndimage_interp_kernels as _interp_kernels +from cucim.skimage._vendored import \ + _ndimage_spline_prefilter_core as _spline_prefilter_core +from cucim.skimage._vendored import _ndimage_util as _util +from cucim.skimage._vendored._internal import _normalize_axis_index, prod + + +def _check_parameter(func_name, order, mode): + if order is None: + warnings.warn(f'Currently the default order of {func_name} is 1. In a ' + 'future release this may change to 3 to match ' + 'scipy.ndimage ') + elif order < 0 or 5 < order: + raise ValueError('spline order is not supported') + + if mode not in ('constant', 'grid-constant', 'nearest', 'mirror', + 'reflect', 'grid-mirror', 'wrap', 'grid-wrap', 'opencv', + '_opencv_edge'): + raise ValueError('boundary mode ({}) is not supported'.format(mode)) + + +def _get_spline_output(input, output): + """Create workspace array, temp, and the final dtype for the output. + + Differs from SciPy by not always forcing the internal floating point dtype + to be double precision. + """ + complex_data = input.dtype.kind == 'c' + if complex_data: + min_float_dtype = cupy.complex64 + else: + min_float_dtype = cupy.float32 + if isinstance(output, cupy.ndarray): + if complex_data and output.dtype.kind != 'c': + raise ValueError( + 'output must have complex dtype for complex inputs' + ) + float_dtype = cupy.promote_types(output.dtype, min_float_dtype) + output_dtype = output.dtype + else: + if output is None: + output = output_dtype = input.dtype + else: + output_dtype = cupy.dtype(output) + float_dtype = cupy.promote_types(output, min_float_dtype) + + if (isinstance(output, cupy.ndarray) + and output.dtype == float_dtype == output_dtype + and output.flags.c_contiguous): + if output is not input: + _core.elementwise_copy(input, output) + temp = output + else: + temp = input.astype(float_dtype, copy=False) + temp = cupy.ascontiguousarray(temp) + if cupy.shares_memory(temp, input, 'MAY_SHARE_BOUNDS'): + temp = temp.copy() + return temp, float_dtype, output_dtype + + +def spline_filter1d(input, order=3, axis=-1, output=cupy.float64, + mode='mirror'): + """ + Calculate a 1-D spline filter along the given axis. + + The lines of the array along the given axis are filtered by a + spline filter. The order of the spline must be >= 2 and <= 5. + + Args: + input (cupy.ndarray): The input array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + axis (int): The axis along which the spline filter is applied. Default + is the last axis. + output (cupy.ndarray or dtype, optional): The array in which to place + the output, or the dtype of the returned array. Default is + ``numpy.float64``. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + + Returns: + cupy.ndarray: The result of prefiltering the input. + + .. seealso:: :func:`scipy.spline_filter1d` + """ + if order < 0 or order > 5: + raise RuntimeError('spline order not supported') + x = input + ndim = x.ndim + axis = _normalize_axis_index(axis, ndim) + + # order 0, 1 don't require reshaping as no CUDA kernel will be called + # scalar or size 1 arrays also don't need to be filtered + run_kernel = not (order < 2 or x.ndim == 0 or x.shape[axis] == 1) + if not run_kernel: + output = _util._get_output(output, input) + _core.elementwise_copy(x, output) + return output + + temp, data_dtype, output_dtype = _get_spline_output(x, output) + data_type = cupy._core._scalar.get_typename(temp.dtype) + pole_type = cupy._core._scalar.get_typename(temp.real.dtype) + + index_type = _util._get_inttype(input) + index_dtype = cupy.int32 if index_type == 'int' else cupy.int64 + + n_samples = x.shape[axis] + n_signals = x.size // n_samples + info = cupy.array((n_signals, n_samples) + x.shape, dtype=index_dtype) + + # empirical choice of block size that seemed to work well + block_size = max(2 ** math.ceil(numpy.log2(n_samples / 32)), 8) + kern = _spline_prefilter_core.get_raw_spline1d_kernel( + axis, + ndim, + mode, + order=order, + index_type=index_type, + data_type=data_type, + pole_type=pole_type, + block_size=block_size, + ) + + # Due to recursive nature, a given line of data must be processed by a + # single thread. n_signals lines will be processed in total. + block = (block_size,) + grid = ((n_signals + block[0] - 1) // block[0],) + + # apply prefilter gain + poles = _spline_prefilter_core.get_poles(order=order) + temp *= _spline_prefilter_core.get_gain(poles) + + # apply caual + anti-causal IIR spline filters + kern(grid, block, (temp, info)) + + if isinstance(output, cupy.ndarray) and temp is not output: + # copy kernel output into the user-provided output array + _core.elementwise_copy(temp, output) + return output + return temp.astype(output_dtype, copy=False) + + +def spline_filter(input, order=3, output=cupy.float64, mode='mirror'): + """Multidimensional spline filter. + + Args: + input (cupy.ndarray): The input array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + output (cupy.ndarray or dtype, optional): The array in which to place + the output, or the dtype of the returned array. Default is + ``numpy.float64``. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + + Returns: + cupy.ndarray: The result of prefiltering the input. + + .. seealso:: :func:`scipy.spline_filter1d` + """ + if order < 2 or order > 5: + raise RuntimeError('spline order not supported') + + x = input + temp, data_dtype, output_dtype = _get_spline_output(x, output) + if order not in [0, 1] and input.ndim > 0: + for axis in range(x.ndim): + spline_filter1d(x, order, axis, output=temp, mode=mode) + x = temp + if isinstance(output, cupy.ndarray): + _core.elementwise_copy(temp, output) + else: + output = temp + if output.dtype != output_dtype: + output = output.astype(output_dtype) + return output + + +def _check_coordinates(coordinates, order, allow_float32=True): + if coordinates.dtype.kind == 'f': + if allow_float32: + coord_dtype = cupy.promote_types(coordinates.dtype, cupy.float32) + else: + coord_dtype = cupy.promote_types(coordinates.dtype, cupy.float64) + coordinates = coordinates.astype(coord_dtype, copy=False) + elif coordinates.dtype.kind in 'iu': + if order > 1: + # order > 1 (spline) kernels require floating-point coordinates + if allow_float32: + coord_dtype = cupy.promote_types( + coordinates.dtype, cupy.float32 + ) + else: + coord_dtype = cupy.promote_types( + coordinates.dtype, cupy.float64 + ) + coordinates = coordinates.astype(coord_dtype) + else: + raise ValueError('coordinates should have floating point dtype') + if not coordinates.flags.c_contiguous: + coordinates = cupy.ascontiguousarray(coordinates) + return coordinates + + +def _prepad_for_spline_filter(input, mode, cval): + if mode in ['nearest', 'grid-constant']: + # these modes need padding to get accurate boundary values + npad = 12 # empirical factor chosen by SciPy + if mode == 'grid-constant': + kwargs = dict(mode='constant', constant_values=cval) + else: + kwargs = dict(mode='edge') + padded = cupy.pad(input, npad, **kwargs) + else: + npad = 0 + padded = input + return padded, npad + + +def _filter_input(image, prefilter, mode, cval, order): + """Perform spline prefiltering when needed. + + Spline orders > 1 need a prefiltering stage to preserve resolution. + + For boundary modes without analytical spline boundary conditions, some + prepadding of the input with cupy.pad is used to maintain accuracy. + ``npad`` is an integer corresponding to the amount of padding at each edge + of the array. + """ + if not prefilter or order < 2: + return (cupy.ascontiguousarray(image), 0) + padded, npad = _prepad_for_spline_filter(image, mode, cval) + float_dtype = cupy.promote_types(image.dtype, cupy.float32) + filtered = spline_filter(padded, order, output=float_dtype, mode=mode) + return cupy.ascontiguousarray(filtered), npad + + +def map_coordinates(input, coordinates, output=None, order=3, + mode='constant', cval=0.0, prefilter=True): + """Map the input array to new coordinates by interpolation. + + The array of coordinates is used to find, for each point in the output, the + corresponding coordinates in the input. The value of the input at those + coordinates is determined by spline interpolation of the requested order. + + The shape of the output is derived from that of the coordinate array by + dropping the first axis. The values of the array along the first axis are + the coordinates in the input array at which the output value is found. + + Args: + input (cupy.ndarray): The input array. + coordinates (array_like): The coordinates at which ``input`` is + evaluated. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + cval (scalar): Value used for points outside the boundaries of + the input if ``mode='constant'`` or ``mode='opencv'``. Default is + 0.0 + prefilter (bool): It is not used yet. It just exists for compatibility + with :mod:`scipy.ndimage`. + + Returns: + cupy.ndarray: + The result of transforming the input. The shape of the output is + derived from that of ``coordinates`` by dropping the first axis. + + .. seealso:: :func:`scipy.ndimage.map_coordinates` + """ + + _check_parameter('map_coordinates', order, mode) + + if mode == 'opencv' or mode == '_opencv_edge': + input = cupy.pad(input, [(1, 1)] * input.ndim, 'constant', + constant_values=cval) + coordinates = cupy.add(coordinates, 1) + mode = 'constant' + + ret = _util._get_output(output, input, coordinates.shape[1:]) + integer_output = ret.dtype.kind in 'iu' + _util._check_cval(mode, cval, integer_output) + + if input.dtype.kind in 'iu': + input = input.astype(cupy.float32) + coordinates = _check_coordinates(coordinates, order) + filtered, nprepad = _filter_input(input, prefilter, mode, cval, order) + large_int = max(prod(input.shape), coordinates.shape[0]) > 1 << 31 + kern = _interp_kernels._get_map_kernel( + input.ndim, large_int, yshape=coordinates.shape, mode=mode, cval=cval, + order=order, integer_output=integer_output, nprepad=nprepad) + kern(filtered, coordinates, ret) + return ret + + +def affine_transform(input, matrix, offset=0.0, output_shape=None, output=None, + order=3, mode='constant', cval=0.0, prefilter=True, *, + texture_memory=False): + """Apply an affine transformation. + + Given an output image pixel index vector ``o``, the pixel value is + determined from the input image at position + ``cupy.dot(matrix, o) + offset``. + + Args: + input (cupy.ndarray): The input array. + matrix (cupy.ndarray): The inverse coordinate transformation matrix, + mapping output coordinates to input coordinates. If ``ndim`` is the + number of dimensions of ``input``, the given matrix must have one + of the following shapes: + + - ``(ndim, ndim)``: the linear transformation matrix for each + output coordinate. + - ``(ndim,)``: assume that the 2D transformation matrix is + diagonal, with the diagonal specified by the given value. + - ``(ndim + 1, ndim + 1)``: assume that the transformation is + specified using homogeneous coordinates. In this case, any + value passed to ``offset`` is ignored. + - ``(ndim, ndim + 1)``: as above, but the bottom row of a + homogeneous transformation matrix is always + ``[0, 0, ..., 1]``, and may be omitted. + + offset (float or sequence): The offset into the array where the + transform is applied. If a float, ``offset`` is the same for each + axis. If a sequence, ``offset`` should contain one value for each + axis. + output_shape (tuple of ints): Shape tuple. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + cval (scalar): Value used for points outside the boundaries of + the input if ``mode='constant'`` or ``mode='opencv'``. Default is + 0.0 + prefilter (bool): It is not used yet. It just exists for compatibility + with :mod:`scipy.ndimage`. + texture_memory (bool): If True, uses GPU texture memory. Supports only: + + - 2D and 3D float32 arrays as input + - ``(ndim + 1, ndim + 1)`` homogeneous float32 transformation + matrix + - ``mode='constant'`` and ``mode='nearest'`` + - ``order=0`` (nearest neighbor) and ``order=1`` (linear + interpolation) + - NVIDIA CUDA GPUs + + Returns: + cupy.ndarray or None: + The transformed input. If ``output`` is given as a parameter, + ``None`` is returned. + + .. seealso:: :func:`scipy.ndimage.affine_transform` + """ + + if texture_memory: + # _texture only available in CuPy 10.x so delay the import + # We do not use this texture-based implementation in cuCIM. + from cucim.skimage._vendored import _texture + + if runtime.is_hip: + raise RuntimeError( + 'HIP currently does not support texture acceleration') + tm_interp = 'linear' if order > 0 else 'nearest' + return _texture.affine_transformation(data=input, + transformation_matrix=matrix, + output_shape=output_shape, + output=output, + interpolation=tm_interp, + mode=mode, + border_value=cval) + + _check_parameter('affine_transform', order, mode) + + offset = _util._fix_sequence_arg(offset, input.ndim, 'offset', float) + + if matrix.ndim not in [1, 2] or matrix.shape[0] < 1: + raise RuntimeError('no proper affine matrix provided') + if matrix.ndim == 2: + if matrix.shape[0] == matrix.shape[1] - 1: + offset = matrix[:, -1] + matrix = matrix[:, :-1] + elif matrix.shape[0] == input.ndim + 1: + offset = matrix[:-1, -1] + matrix = matrix[:-1, :-1] + if matrix.shape != (input.ndim, input.ndim): + raise RuntimeError('improper affine shape') + + if mode == 'opencv': + m = cupy.zeros((input.ndim + 1, input.ndim + 1)) + m[:-1, :-1] = matrix + m[:-1, -1] = offset + m[-1, -1] = 1 + m = cupy.linalg.inv(m) + m[:2] = cupy.roll(m[:2], 1, axis=0) + m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1) + matrix = m[:-1, :-1] + offset = m[:-1, -1] + + if output_shape is None: + output_shape = input.shape + + if mode == 'opencv' or mode == '_opencv_edge': + if matrix.ndim == 1: + matrix = cupy.diag(matrix) + coordinates = cupy.indices(output_shape, dtype=cupy.float64) + coordinates = cupy.dot(matrix, coordinates.reshape((input.ndim, -1))) + coordinates += cupy.expand_dims(cupy.asarray(offset), -1) + ret = _util._get_output(output, input, shape=output_shape) + ret[:] = map_coordinates(input, coordinates, ret.dtype, order, mode, + cval, prefilter).reshape(output_shape) + return ret + + matrix = matrix.astype(cupy.float64, copy=False) + ndim = input.ndim + output = _util._get_output(output, input, shape=output_shape) + if input.dtype.kind in 'iu': + input = input.astype(cupy.float32) + filtered, nprepad = _filter_input(input, prefilter, mode, cval, order) + + integer_output = output.dtype.kind in 'iu' + _util._check_cval(mode, cval, integer_output) + large_int = max(prod(input.shape), prod(output_shape)) > 1 << 31 + if matrix.ndim == 1: + offset = cupy.asarray(offset, dtype=cupy.float64) + offset = -offset / matrix + kern = _interp_kernels._get_zoom_shift_kernel( + ndim, large_int, output_shape, mode, cval=cval, order=order, + integer_output=integer_output, nprepad=nprepad) + kern(filtered, offset, matrix, output) + else: + kern = _interp_kernels._get_affine_kernel( + ndim, large_int, output_shape, mode, cval=cval, order=order, + integer_output=integer_output, nprepad=nprepad) + m = cupy.zeros((ndim, ndim + 1), dtype=cupy.float64) + m[:, :-1] = matrix + m[:, -1] = cupy.asarray(offset, dtype=cupy.float64) + kern(filtered, m, output) + return output + + +def _minmax(coor, minc, maxc): + if coor[0] < minc[0]: + minc[0] = coor[0] + elif coor[0] > maxc[0]: + maxc[0] = coor[0] + if coor[1] < minc[1]: + minc[1] = coor[1] + elif coor[1] > maxc[1]: + maxc[1] = coor[1] + return minc, maxc + + +def rotate(input, angle, axes=(1, 0), reshape=True, output=None, order=3, + mode='constant', cval=0.0, prefilter=True): + """Rotate an array. + + The array is rotated in the plane defined by the two axes given by the + ``axes`` parameter using spline interpolation of the requested order. + + Args: + input (cupy.ndarray): The input array. + angle (float): The rotation angle in degrees. + axes (tuple of 2 ints): The two axes that define the plane of rotation. + Default is the first two axes. + reshape (bool): If ``reshape`` is True, the output shape is adapted so + that the input array is contained completely in the output. Default + is True. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + cval (scalar): Value used for points outside the boundaries of + the input if ``mode='constant'`` or ``mode='opencv'``. Default is + 0.0 + prefilter (bool): It is not used yet. It just exists for compatibility + with :mod:`scipy.ndimage`. + + Returns: + cupy.ndarray or None: + The rotated input. + + .. seealso:: :func:`scipy.ndimage.rotate` + """ + + _check_parameter('rotate', order, mode) + + if mode == 'opencv': + mode = '_opencv_edge' + + input_arr = input + axes = list(axes) + if axes[0] < 0: + axes[0] += input_arr.ndim + if axes[1] < 0: + axes[1] += input_arr.ndim + if axes[0] > axes[1]: + axes = [axes[1], axes[0]] + if axes[0] < 0 or input_arr.ndim <= axes[1]: + raise ValueError('invalid rotation plane specified') + + ndim = input_arr.ndim + rad = math.radians(angle) + sincos = cmath.rect(1, rad) + cos, sin = sincos.real, sincos.imag + + # determine offsets and output shape as in scipy.ndimage.rotate + rot_matrix = numpy.array([[cos, sin], + [-sin, cos]]) + + img_shape = numpy.asarray(input_arr.shape) + in_plane_shape = img_shape[axes] + if reshape: + # Compute transformed input bounds + iy, ix = in_plane_shape + out_bounds = rot_matrix @ [[0, 0, iy, iy], + [0, ix, 0, ix]] + # Compute the shape of the transformed input plane + out_plane_shape = (out_bounds.ptp(axis=1) + 0.5).astype(cupy.int64) + else: + out_plane_shape = img_shape[axes] + + out_center = rot_matrix @ ((out_plane_shape - 1) / 2) + in_center = (in_plane_shape - 1) / 2 + + output_shape = img_shape + output_shape[axes] = out_plane_shape + output_shape = tuple(output_shape) + + matrix = numpy.identity(ndim) + matrix[axes[0], axes[0]] = cos + matrix[axes[0], axes[1]] = sin + matrix[axes[1], axes[0]] = -sin + matrix[axes[1], axes[1]] = cos + + offset = numpy.zeros(ndim, dtype=cupy.float64) + offset[axes] = in_center - out_center + + matrix = cupy.asarray(matrix) + offset = cupy.asarray(offset) + + return affine_transform(input, matrix, offset, output_shape, output, order, + mode, cval, prefilter) + + +def shift(input, shift, output=None, order=3, mode='constant', cval=0.0, + prefilter=True): + """Shift an array. + + The array is shifted using spline interpolation of the requested order. + Points outside the boundaries of the input are filled according to the + given mode. + + Args: + input (cupy.ndarray): The input array. + shift (float or sequence): The shift along the axes. If a float, + ``shift`` is the same for each axis. If a sequence, ``shift`` + should contain one value for each axis. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + cval (scalar): Value used for points outside the boundaries of + the input if ``mode='constant'`` or ``mode='opencv'``. Default is + 0.0 + prefilter (bool): It is not used yet. It just exists for compatibility + with :mod:`scipy.ndimage`. + + Returns: + cupy.ndarray or None: + The shifted input. + + .. seealso:: :func:`scipy.ndimage.shift` + """ + + _check_parameter('shift', order, mode) + + shift = _util._fix_sequence_arg(shift, input.ndim, 'shift', float) + + if mode == 'opencv': + mode = '_opencv_edge' + + output = affine_transform( + input, + cupy.ones(input.ndim, input.dtype), + cupy.negative(cupy.asarray(shift)), + None, + output, + order, + mode, + cval, + prefilter, + ) + else: + output = _util._get_output(output, input) + if input.dtype.kind in 'iu': + input = input.astype(cupy.float32) + filtered, nprepad = _filter_input(input, prefilter, mode, cval, order) + integer_output = output.dtype.kind in 'iu' + _util._check_cval(mode, cval, integer_output) + large_int = prod(input.shape) > 1 << 31 + kern = _interp_kernels._get_shift_kernel( + input.ndim, large_int, input.shape, mode, cval=cval, order=order, + integer_output=integer_output, nprepad=nprepad) + shift = cupy.asarray(shift, dtype=cupy.float64, order='C') + if shift.ndim != 1: + raise ValueError('shift must be 1d') + if shift.size != filtered.ndim: + raise ValueError('len(shift) must equal input.ndim') + kern(filtered, shift, output) + return output + + +def zoom(input, zoom, output=None, order=3, mode='constant', cval=0.0, + prefilter=True, *, grid_mode=False): + """Zoom an array. + + The array is zoomed using spline interpolation of the requested order. + + Args: + input (cupy.ndarray): The input array. + zoom (float or sequence): The zoom factor along the axes. If a float, + ``zoom`` is the same for each axis. If a sequence, ``zoom`` should + contain one value for each axis. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. + order (int): The order of the spline interpolation, default is 3. Must + be in the range 0-5. + mode (str): Points outside the boundaries of the input are filled + according to the given mode (``'constant'``, ``'nearest'``, + ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, + ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). + cval (scalar): Value used for points outside the boundaries of + the input if ``mode='constant'`` or ``mode='opencv'``. Default is + 0.0 + prefilter (bool): It is not used yet. It just exists for compatibility + with :mod:`scipy.ndimage`. + grid_mode (bool, optional): If False, the distance from the pixel + centers is zoomed. Otherwise, the distance including the full pixel + extent is used. For example, a 1d signal of length 5 is considered + to have length 4 when ``grid_mode`` is False, but length 5 when + ``grid_mode`` is True. See the following visual illustration: + + .. code-block:: text + + | pixel 1 | pixel 2 | pixel 3 | pixel 4 | pixel 5 | + |<-------------------------------------->| + vs. + |<----------------------------------------------->| + + The starting point of the arrow in the diagram above corresponds to + coordinate location 0 in each mode. + + Returns: + cupy.ndarray or None: + The zoomed input. + + .. seealso:: :func:`scipy.ndimage.zoom` + """ + + _check_parameter('zoom', order, mode) + + zoom = _util._fix_sequence_arg(zoom, input.ndim, 'zoom', float) + + output_shape = [] + for s, z in zip(input.shape, zoom): + output_shape.append(int(round(s * z))) + output_shape = tuple(output_shape) + + if mode == 'opencv': + zoom = [] + offset = [] + for in_size, out_size in zip(input.shape, output_shape): + if out_size > 1: + zoom.append(float(in_size) / out_size) + offset.append((zoom[-1] - 1) / 2.0) + else: + zoom.append(0) + offset.append(0) + mode = 'nearest' + + output = affine_transform( + input, + cupy.asarray(zoom), + offset, + output_shape, + output, + order, + mode, + cval, + prefilter, + ) + else: + if grid_mode: + + # warn about modes that may have surprising behavior + suggest_mode = None + if mode == 'constant': + suggest_mode = 'grid-constant' + elif mode == 'wrap': + suggest_mode = 'grid-wrap' + if suggest_mode is not None: + warnings.warn( + f'It is recommended to use mode = {suggest_mode} instead ' + f'of {mode} when grid_mode is True.') + + zoom = [] + for in_size, out_size in zip(input.shape, output_shape): + if grid_mode and out_size > 0: + zoom.append(in_size / out_size) + elif out_size > 1: + zoom.append((in_size - 1) / (out_size - 1)) + else: + zoom.append(0) + + output = _util._get_output(output, input, shape=output_shape) + if input.dtype.kind in 'iu': + input = input.astype(cupy.float32) + filtered, nprepad = _filter_input(input, prefilter, mode, cval, order) + integer_output = output.dtype.kind in 'iu' + _util._check_cval(mode, cval, integer_output) + large_int = max(prod(input.shape), prod(output_shape)) > 1 << 31 + kern = _interp_kernels._get_zoom_kernel( + input.ndim, large_int, output_shape, mode, order=order, + integer_output=integer_output, grid_mode=grid_mode, + nprepad=nprepad) + zoom = cupy.asarray(zoom, dtype=cupy.float64) + kern(filtered, zoom, output) + return output diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py new file mode 100644 index 000000000..5c6462cff --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py @@ -0,0 +1,1016 @@ +import operator +import warnings + +import cupy +import numpy +from cupy import _core + +from cucim.skimage._vendored import _ndimage_filters as _filters +from cucim.skimage._vendored import _ndimage_filters_core as _filters_core +from cucim.skimage._vendored import _ndimage_util as _util + + +@cupy.memoize(for_each_device=True) +def _get_binary_erosion_kernel( + w_shape, int_type, offsets, center_is_true, border_value, invert, masked, + all_weights_nonzero +): + if invert: + border_value = int(not border_value) + true_val = 0 + false_val = 1 + else: + true_val = 1 + false_val = 0 + + if masked: + pre = """ + bool mv = (bool)mask[i]; + bool _in = (bool)x[i]; + if (!mv) {{ + y = cast(_in); + return; + }} else if ({center_is_true} && _in == {false_val}) {{ + y = cast(_in); + return; + }}""".format(center_is_true=int(center_is_true), + false_val=false_val) + else: + pre = """ + bool _in = (bool)x[i]; + if ({center_is_true} && _in == {false_val}) {{ + y = cast(_in); + return; + }}""".format(center_is_true=int(center_is_true), + false_val=false_val) + pre = pre + """ + y = cast({true_val});""".format(true_val=true_val) + + # {{{{ required because format is called again within _generate_nd_kernel + found = """ + if ({{cond}}) {{{{ + if (!{border_value}) {{{{ + y = cast({false_val}); + return; + }}}} + }}}} else {{{{ + bool nn = {{value}} ? {true_val} : {false_val}; + if (!nn) {{{{ + y = cast({false_val}); + return; + }}}} + }}}}""".format(true_val=int(true_val), + false_val=int(false_val), + border_value=int(border_value),) + + name = 'binary_erosion' + if false_val: + name += '_invert' + return _filters_core._generate_nd_kernel( + name, + pre, + found, + '', + 'constant', w_shape, int_type, offsets, 0, ctype='Y', has_weights=True, + has_structure=False, has_mask=masked, binary_morphology=True, + all_weights_nonzero=all_weights_nonzero) + + +def _center_is_true(structure, origin): + coor = tuple([oo + ss // 2 for ss, oo in zip(structure.shape, origin)]) + return bool(structure[coor]) # device synchronization + + +def iterate_structure(structure, iterations, origin=None): + """Iterate a structure by dilating it with itself. + + Args: + structure(array_like): Structuring element (an array of bools, + for example), to be dilated with itself. + iterations(int): The number of dilations performed on the structure + with itself. + origin(int or tuple of int, optional): If origin is None, only the + iterated structure is returned. If not, a tuple of the iterated + structure and the modified origin is returned. + + Returns: + cupy.ndarray: A new structuring element obtained by dilating + ``structure`` (``iterations`` - 1) times with itself. + + .. seealso:: :func:`scipy.ndimage.iterate_structure` + """ + if iterations < 2: + return structure.copy() + ni = iterations - 1 + shape = [ii + ni * (ii - 1) for ii in structure.shape] + pos = [ni * (structure.shape[ii] // 2) for ii in range(len(shape))] + slc = tuple( + slice(pos[ii], pos[ii] + structure.shape[ii], None) + for ii in range(len(shape)) + ) + out = cupy.zeros(shape, bool) + out[slc] = structure != 0 + out = binary_dilation(out, structure, iterations=ni) + if origin is None: + return out + else: + origin = _util._fix_sequence_arg(origin, structure.ndim, 'origin', int) + origin = [iterations * o for o in origin] + return out, origin + + +def generate_binary_structure(rank, connectivity): + """Generate a binary structure for binary morphological operations. + + Args: + rank(int): Number of dimensions of the array to which the structuring + element will be applied, as returned by ``np.ndim``. + connectivity(int): ``connectivity`` determines which elements of the + output array belong to the structure, i.e., are considered as + neighbors of the central element. Elements up to a squared distance + of ``connectivity`` from the center are considered neighbors. + ``connectivity`` may range from 1 (no diagonal elements are + neighbors) to ``rank`` (all elements are neighbors). + + Returns: + cupy.ndarray: Structuring element which may be used for binary + morphological operations, with ``rank`` dimensions and all + dimensions equal to 3. + + .. seealso:: :func:`scipy.ndimage.generate_binary_structure` + """ + if connectivity < 1: + connectivity = 1 + if rank < 1: + return cupy.asarray(True, dtype=bool) + output = numpy.fabs(numpy.indices([3] * rank) - 1) + output = numpy.add.reduce(output, 0) + output = output <= connectivity + return cupy.asarray(output) + + +def _binary_erosion(input, structure, iterations, mask, output, border_value, + origin, invert, brute_force=True): + try: + iterations = operator.index(iterations) + except TypeError: + raise TypeError('iterations parameter should be an integer') + + if input.dtype.kind == 'c': + raise TypeError('Complex type not supported') + if structure is None: + structure = generate_binary_structure(input.ndim, 1) + all_weights_nonzero = input.ndim == 1 + center_is_true = True + default_structure = True + else: + structure = structure.astype(dtype=bool, copy=False) + # transfer to CPU for use in determining if it is fully dense + # structure_cpu = cupy.asnumpy(structure) + default_structure = False + if structure.ndim != input.ndim: + raise RuntimeError('structure and input must have same dimensionality') + if not structure.flags.c_contiguous: + structure = cupy.ascontiguousarray(structure) + if structure.size < 1: + raise RuntimeError('structure must not be empty') + + if mask is not None: + if mask.shape != input.shape: + raise RuntimeError('mask and input must have equal sizes') + if not mask.flags.c_contiguous: + mask = cupy.ascontiguousarray(mask) + masked = True + else: + masked = False + origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) + + if isinstance(output, cupy.ndarray): + if output.dtype.kind == 'c': + raise TypeError('Complex output type not supported') + else: + output = bool + output = _util._get_output(output, input) + temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS') + if temp_needed: + # input and output arrays cannot share memory + temp = output + output = _util._get_output(output.dtype, input) + if structure.ndim == 0: + # kernel doesn't handle ndim=0, so special case it here + if float(structure): + output[...] = cupy.asarray(input, dtype=bool) + else: + output[...] = ~cupy.asarray(input, dtype=bool) + return output + origin = tuple(origin) + int_type = _util._get_inttype(input) + offsets = _filters_core._origins_to_offsets(origin, structure.shape) + if not default_structure: + # synchronize required to determine if all weights are non-zero + nnz = int(cupy.count_nonzero(structure)) + all_weights_nonzero = nnz == structure.size + if all_weights_nonzero: + center_is_true = True + else: + center_is_true = _center_is_true(structure, origin) + + erode_kernel = _get_binary_erosion_kernel( + structure.shape, int_type, offsets, center_is_true, border_value, + invert, masked, all_weights_nonzero, + ) + + if iterations == 1: + if masked: + output = erode_kernel(input, structure, mask, output) + else: + output = erode_kernel(input, structure, output) + elif center_is_true and not brute_force: + raise NotImplementedError( + 'only brute_force iteration has been implemented' + ) + else: + if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'): + raise ValueError('output and input may not overlap in memory') + tmp_in = cupy.empty_like(input, dtype=output.dtype) + tmp_out = output + if iterations >= 1 and not iterations & 1: + tmp_in, tmp_out = tmp_out, tmp_in + if masked: + tmp_out = erode_kernel(input, structure, mask, tmp_out) + else: + tmp_out = erode_kernel(input, structure, tmp_out) + # TODO: kernel doesn't return the changed status, so determine it here + changed = not (input == tmp_out).all() # synchronize! + ii = 1 + while ii < iterations or ((iterations < 1) and changed): + tmp_in, tmp_out = tmp_out, tmp_in + if masked: + tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out) + else: + tmp_out = erode_kernel(tmp_in, structure, tmp_out) + changed = not (tmp_in == tmp_out).all() + ii += 1 + if not changed and (not ii & 1): # synchronize! + # can exit early if nothing changed + # (only do this after even number of tmp_in/out swaps) + break + output = tmp_out + if temp_needed: + _core.elementwise_copy(output, temp) + output = temp + return output + + +def binary_erosion(input, structure=None, iterations=1, mask=None, output=None, + border_value=0, origin=0, brute_force=False): + """Multidimensional binary erosion with a given structuring element. + + Binary erosion is a mathematical morphology operation used for image + processing. + + Args: + input(cupy.ndarray): The input binary array_like to be eroded. + Non-zero (True) elements form the subset to be eroded. + structure(cupy.ndarray, optional): The structuring element used for the + erosion. Non-zero elements are considered True. If no structuring + element is provided an element is generated with a square + connectivity equal to one. (Default value = None). + iterations(int, optional): The erosion is repeated ``iterations`` times + (one, by default). If iterations is less than 1, the erosion is + repeated until the result does not change anymore. Only an integer + of iterations is accepted. + mask(cupy.ndarray or None, optional): If a mask is given, only those + elements with a True value at the corresponding mask element are + modified at each iteration. (Default value = None) + output(cupy.ndarray, optional): Array of the same shape as input, into + which the output is placed. By default, a new array is created. + border_value(int (cast to 0 or 1), optional): Value at the + border in the output array. (Default value = 0) + origin(int or tuple of ints, optional): Placement of the filter, by + default 0. + brute_force(boolean, optional): Memory condition: if False, only the + pixels whose value was changed in the last iteration are tracked as + candidates to be updated (eroded) in the current iteration; if + True all pixels are considered as candidates for erosion, + regardless of what happened in the previous iteration. + + Returns: + cupy.ndarray: The result of binary erosion. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_erosion` + """ + return _binary_erosion(input, structure, iterations, mask, output, + border_value, origin, 0, brute_force) + + +def binary_dilation(input, structure=None, iterations=1, mask=None, + output=None, border_value=0, origin=0, brute_force=False): + """Multidimensional binary dilation with the given structuring element. + + Args: + input(cupy.ndarray): The input binary array_like to be dilated. + Non-zero (True) elements form the subset to be dilated. + structure(cupy.ndarray, optional): The structuring element used for the + dilation. Non-zero elements are considered True. If no structuring + element is provided an element is generated with a square + connectivity equal to one. (Default value = None). + iterations(int, optional): The dilation is repeated ``iterations`` + times (one, by default). If iterations is less than 1, the dilation + is repeated until the result does not change anymore. Only an + integer of iterations is accepted. + mask(cupy.ndarray or None, optional): If a mask is given, only those + elements with a True value at the corresponding mask element are + modified at each iteration. (Default value = None) + output(cupy.ndarray, optional): Array of the same shape as input, into + which the output is placed. By default, a new array is created. + border_value(int (cast to 0 or 1), optional): Value at the + border in the output array. (Default value = 0) + origin(int or tuple of ints, optional): Placement of the filter, by + default 0. + brute_force(boolean, optional): Memory condition: if False, only the + pixels whose value was changed in the last iteration are tracked as + candidates to be updated (dilated) in the current iteration; if + True all pixels are considered as candidates for dilation, + regardless of what happened in the previous iteration. + + Returns: + cupy.ndarray: The result of binary dilation. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_dilation` + """ + if structure is None: + structure = generate_binary_structure(input.ndim, 1) + origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) + structure = structure[tuple([slice(None, None, -1)] * structure.ndim)] + for ii in range(len(origin)): + origin[ii] = -origin[ii] + if not structure.shape[ii] & 1: + origin[ii] -= 1 + return _binary_erosion(input, structure, iterations, mask, output, + border_value, origin, 1, brute_force) + + +def binary_opening(input, structure=None, iterations=1, output=None, origin=0, + mask=None, border_value=0, brute_force=False): + """ + Multidimensional binary opening with the given structuring element. + + The *opening* of an input image by a structuring element is the + *dilation* of the *erosion* of the image by the structuring element. + + Args: + input(cupy.ndarray): The input binary array to be opened. + Non-zero (True) elements form the subset to be opened. + structure(cupy.ndarray, optional): The structuring element used for the + opening. Non-zero elements are considered True. If no structuring + element is provided an element is generated with a square + connectivity equal to one. (Default value = None). + iterations(int, optional): The opening is repeated ``iterations`` times + (one, by default). If iterations is less than 1, the opening is + repeated until the result does not change anymore. Only an integer + of iterations is accepted. + output(cupy.ndarray, optional): Array of the same shape as input, into + which the output is placed. By default, a new array is created. + origin(int or tuple of ints, optional): Placement of the filter, by + default 0. + mask(cupy.ndarray or None, optional): If a mask is given, only those + elements with a True value at the corresponding mask element are + modified at each iteration. (Default value = None) + border_value(int (cast to 0 or 1), optional): Value at the + border in the output array. (Default value = 0) + brute_force(boolean, optional): Memory condition: if False, only the + pixels whose value was changed in the last iteration are tracked as + candidates to be updated (dilated) in the current iteration; if + True all pixels are considered as candidates for opening, + regardless of what happened in the previous iteration. + + Returns: + cupy.ndarray: The result of binary opening. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_opening` + """ + if structure is None: + rank = input.ndim + structure = generate_binary_structure(rank, 1) + tmp = binary_erosion(input, structure, iterations, mask, None, + border_value, origin, brute_force) + return binary_dilation(tmp, structure, iterations, mask, output, + border_value, origin, brute_force) + + +def binary_closing(input, structure=None, iterations=1, output=None, origin=0, + mask=None, border_value=0, brute_force=False): + """ + Multidimensional binary closing with the given structuring element. + + The *closing* of an input image by a structuring element is the + *erosion* of the *dilation* of the image by the structuring element. + + Args: + input(cupy.ndarray): The input binary array to be closed. + Non-zero (True) elements form the subset to be closed. + structure(cupy.ndarray, optional): The structuring element used for the + closing. Non-zero elements are considered True. If no structuring + element is provided an element is generated with a square + connectivity equal to one. (Default value = None). + iterations(int, optional): The closing is repeated ``iterations`` times + (one, by default). If iterations is less than 1, the closing is + repeated until the result does not change anymore. Only an integer + of iterations is accepted. + output(cupy.ndarray, optional): Array of the same shape as input, into + which the output is placed. By default, a new array is created. + origin(int or tuple of ints, optional): Placement of the filter, by + default 0. + mask(cupy.ndarray or None, optional): If a mask is given, only those + elements with a True value at the corresponding mask element are + modified at each iteration. (Default value = None) + border_value(int (cast to 0 or 1), optional): Value at the + border in the output array. (Default value = 0) + brute_force(boolean, optional): Memory condition: if False, only the + pixels whose value was changed in the last iteration are tracked as + candidates to be updated (dilated) in the current iteration; if + True all pixels are considered as candidates for closing, + regardless of what happened in the previous iteration. + + Returns: + cupy.ndarray: The result of binary closing. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_closing` + """ + if structure is None: + rank = input.ndim + structure = generate_binary_structure(rank, 1) + tmp = binary_dilation(input, structure, iterations, mask, None, + border_value, origin, brute_force) + return binary_erosion(tmp, structure, iterations, mask, output, + border_value, origin, brute_force) + + +def binary_hit_or_miss(input, structure1=None, structure2=None, output=None, + origin1=0, origin2=None): + """ + Multidimensional binary hit-or-miss transform. + + The hit-or-miss transform finds the locations of a given pattern + inside the input image. + + Args: + input (cupy.ndarray): Binary image where a pattern is to be detected. + structure1 (cupy.ndarray, optional): Part of the structuring element to + be fitted to the foreground (non-zero elements) of ``input``. If no + value is provided, a structure of square connectivity 1 is chosen. + structure2 (cupy.ndarray, optional): Second part of the structuring + element that has to miss completely the foreground. If no value is + provided, the complementary of ``structure1`` is taken. + output (cupy.ndarray, dtype or None, optional): Array of the same shape + as input, into which the output is placed. By default, a new array + is created. + origin1 (int or tuple of ints, optional): Placement of the first part + of the structuring element ``structure1``, by default 0 for a + centered structure. + origin2 (int or tuple of ints or None, optional): Placement of the + second part of the structuring element ``structure2``, by default 0 + for a centered structure. If a value is provided for ``origin1`` + and not for ``origin2``, then ``origin2`` is set to ``origin1``. + + Returns: + cupy.ndarray: Hit-or-miss transform of ``input`` with the given + structuring element (``structure1``, ``structure2``). + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_hit_or_miss` + """ + if structure1 is None: + structure1 = generate_binary_structure(input.ndim, 1) + if structure2 is None: + structure2 = cupy.logical_not(structure1) + origin1 = _util._fix_sequence_arg(origin1, input.ndim, 'origin1', int) + if origin2 is None: + origin2 = origin1 + else: + origin2 = _util._fix_sequence_arg(origin2, input.ndim, 'origin2', int) + + tmp1 = _binary_erosion(input, structure1, 1, None, None, 0, origin1, 0, + False) + inplace = isinstance(output, cupy.ndarray) + result = _binary_erosion(input, structure2, 1, None, output, 0, origin2, 1, + False) + if inplace: + cupy.logical_not(output, output) + cupy.logical_and(tmp1, output, output) + else: + cupy.logical_not(result, result) + return cupy.logical_and(tmp1, result) + + +def binary_propagation(input, structure=None, mask=None, output=None, + border_value=0, origin=0): + """ + Multidimensional binary propagation with the given structuring element. + + Args: + input (cupy.ndarray): Binary image to be propagated inside ``mask``. + structure (cupy.ndarray, optional): Structuring element used in the + successive dilations. The output may depend on the structuring + element, especially if ``mask`` has several connex components. If + no structuring element is provided, an element is generated with a + squared connectivity equal to one. + mask (cupy.ndarray, optional): Binary mask defining the region into + which ``input`` is allowed to propagate. + output (cupy.ndarray, optional): Array of the same shape as input, into + which the output is placed. By default, a new array is created. + border_value (int, optional): Value at the border in the output array. + The value is cast to 0 or 1. + origin (int or tuple of ints, optional): Placement of the filter. + + Returns: + cupy.ndarray : Binary propagation of ``input`` inside ``mask``. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_propagation` + """ + return binary_dilation(input, structure, -1, mask, output, border_value, + origin, brute_force=True) + + +def binary_fill_holes(input, structure=None, output=None, origin=0): + """Fill the holes in binary objects. + + Args: + input (cupy.ndarray): N-D binary array with holes to be filled. + structure (cupy.ndarray, optional): Structuring element used in the + computation; large-size elements make computations faster but may + miss holes separated from the background by thin regions. The + default element (with a square connectivity equal to one) yields + the intuitive result where all holes in the input have been filled. + output (cupy.ndarray, dtype or None, optional): Array of the same shape + as input, into which the output is placed. By default, a new array + is created. + origin (int, tuple of ints, optional): Position of the structuring + element. + + Returns: + cupy.ndarray: Transformation of the initial image ``input`` where holes + have been filled. + + .. warning:: + + This function may synchronize the device. + + .. seealso:: :func:`scipy.ndimage.binary_fill_holes` + """ + mask = cupy.logical_not(input) + tmp = cupy.zeros(mask.shape, bool) + inplace = isinstance(output, cupy.ndarray) + # TODO (grlee77): set brute_force=False below once implemented + if inplace: + binary_dilation(tmp, structure, -1, mask, output, 1, origin, + brute_force=True) + cupy.logical_not(output, output) + else: + output = binary_dilation(tmp, structure, -1, mask, None, 1, origin, + brute_force=True) + cupy.logical_not(output, output) + return output + + +def grey_erosion(input, size=None, footprint=None, structure=None, output=None, + mode='reflect', cval=0.0, origin=0): + """Calculates a greyscale erosion. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the greyscale erosion. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for greyscale erosion. Non-zero values + give the set of neighbors of the center over which minimum is + chosen. + structure (array of ints): Structuring element used for the greyscale + erosion. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of greyscale erosion. + + .. seealso:: :func:`scipy.ndimage.grey_erosion` + """ + + if size is None and footprint is None and structure is None: + raise ValueError('size, footprint or structure must be specified') + + return _filters._min_or_max_filter(input, size, footprint, structure, + output, mode, cval, origin, 'min') + + +def grey_dilation(input, size=None, footprint=None, structure=None, + output=None, mode='reflect', cval=0.0, origin=0): + """Calculates a greyscale dilation. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the greyscale dilation. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for greyscale dilation. Non-zero values + give the set of neighbors of the center over which maximum is + chosen. + structure (array of ints): Structuring element used for the greyscale + dilation. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of greyscale dilation. + + .. seealso:: :func:`scipy.ndimage.grey_dilation` + """ + + if size is None and footprint is None and structure is None: + raise ValueError('size, footprint or structure must be specified') + if structure is not None: + structure = cupy.array(structure) + structure = structure[tuple([slice(None, None, -1)] * structure.ndim)] + if footprint is not None: + footprint = cupy.array(footprint) + footprint = footprint[tuple([slice(None, None, -1)] * footprint.ndim)] + + origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) + for i in range(len(origin)): + origin[i] = -origin[i] + if footprint is not None: + sz = footprint.shape[i] + elif structure is not None: + sz = structure.shape[i] + elif numpy.isscalar(size): + sz = size + else: + sz = size[i] + if sz % 2 == 0: + origin[i] -= 1 + + return _filters._min_or_max_filter(input, size, footprint, structure, + output, mode, cval, origin, 'max') + + +def grey_closing(input, size=None, footprint=None, structure=None, + output=None, mode='reflect', cval=0.0, origin=0): + """Calculates a multi-dimensional greyscale closing. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the greyscale closing. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for greyscale closing. Non-zero values + give the set of neighbors of the center over which closing is + chosen. + structure (array of ints): Structuring element used for the greyscale + closing. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of greyscale closing. + + .. seealso:: :func:`scipy.ndimage.grey_closing` + """ + if (size is not None) and (footprint is not None): + warnings.warn('ignoring size because footprint is set', UserWarning, + stacklevel=2) + tmp = grey_dilation(input, size, footprint, structure, None, mode, cval, + origin) + return grey_erosion(tmp, size, footprint, structure, output, mode, cval, + origin) + + +def grey_opening(input, size=None, footprint=None, structure=None, + output=None, mode='reflect', cval=0.0, origin=0): + """Calculates a multi-dimensional greyscale opening. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the greyscale opening. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for greyscale opening. Non-zero values + give the set of neighbors of the center over which opening is + chosen. + structure (array of ints): Structuring element used for the greyscale + opening. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The result of greyscale opening. + + .. seealso:: :func:`scipy.ndimage.grey_opening` + """ + if (size is not None) and (footprint is not None): + warnings.warn('ignoring size because footprint is set', UserWarning, + stacklevel=2) + tmp = grey_erosion(input, size, footprint, structure, None, mode, cval, + origin) + return grey_dilation(tmp, size, footprint, structure, output, mode, cval, + origin) + + +def morphological_gradient( + input, + size=None, + footprint=None, + structure=None, + output=None, + mode='reflect', + cval=0.0, + origin=0, +): + """ + Multidimensional morphological gradient. + + The morphological gradient is calculated as the difference between a + dilation and an erosion of the input with a given structuring element. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the morphological gradient. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for morphological gradient. Non-zero + values give the set of neighbors of the center over which opening + is chosen. + structure (array of ints): Structuring element used for the + morphological gradient. ``structure`` may be a non-flat + structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The morphological gradient of the input. + + .. seealso:: :func:`scipy.ndimage.morphological_gradient` + """ + tmp = grey_dilation( + input, size, footprint, structure, None, mode, cval, origin + ) + if isinstance(output, cupy.ndarray): + grey_erosion( + input, size, footprint, structure, output, mode, cval, origin + ) + return cupy.subtract(tmp, output, output) + else: + return tmp - grey_erosion( + input, size, footprint, structure, None, mode, cval, origin + ) + + +def morphological_laplace( + input, + size=None, + footprint=None, + structure=None, + output=None, + mode='reflect', + cval=0.0, + origin=0, +): + """ + Multidimensional morphological laplace. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the morphological laplace. Optional if ``footprint`` or + ``structure`` is provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for morphological laplace. Non-zero + values give the set of neighbors of the center over which opening + is chosen. + structure (array of ints): Structuring element used for the + morphological laplace. ``structure`` may be a non-flat + structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: The morphological laplace of the input. + + .. seealso:: :func:`scipy.ndimage.morphological_laplace` + """ + tmp1 = grey_dilation( + input, size, footprint, structure, None, mode, cval, origin + ) + if isinstance(output, cupy.ndarray): + grey_erosion( + input, size, footprint, structure, output, mode, cval, origin + ) + cupy.add(tmp1, output, output) + cupy.subtract(output, input, output) + return cupy.subtract(output, input, output) + else: + tmp2 = grey_erosion( + input, size, footprint, structure, None, mode, cval, origin + ) + cupy.add(tmp1, tmp2, tmp2) + cupy.subtract(tmp2, input, tmp2) + cupy.subtract(tmp2, input, tmp2) + return tmp2 + + +def white_tophat( + input, + size=None, + footprint=None, + structure=None, + output=None, + mode='reflect', + cval=0.0, + origin=0, +): + """ + Multidimensional white tophat filter. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the white tophat. Optional if ``footprint`` or ``structure`` is + provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for the white tophat. Non-zero values + give the set of neighbors of the center over which opening is + chosen. + structure (array of ints): Structuring element used for the white + tophat. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarray: Result of the filter of ``input`` with ``structure``. + + .. seealso:: :func:`scipy.ndimage.white_tophat` + """ + if (size is not None) and (footprint is not None): + warnings.warn( + 'ignoring size because footprint is set', UserWarning, stacklevel=2 + ) + tmp = grey_erosion( + input, size, footprint, structure, None, mode, cval, origin + ) + tmp = grey_dilation( + tmp, size, footprint, structure, output, mode, cval, origin + ) + if input.dtype == numpy.bool_ and tmp.dtype == numpy.bool_: + cupy.bitwise_xor(input, tmp, out=tmp) + else: + cupy.subtract(input, tmp, out=tmp) + return tmp + + +def black_tophat( + input, + size=None, + footprint=None, + structure=None, + output=None, + mode='reflect', + cval=0.0, + origin=0, +): + """ + Multidimensional black tophat filter. + + Args: + input (cupy.ndarray): The input array. + size (tuple of ints): Shape of a flat and full structuring element used + for the black tophat. Optional if ``footprint`` or ``structure`` is + provided. + footprint (array of ints): Positions of non-infinite elements of a flat + structuring element used for the black tophat. Non-zero values + give the set of neighbors of the center over which opening is + chosen. + structure (array of ints): Structuring element used for the black + tophat. ``structure`` may be a non-flat structuring element. + output (cupy.ndarray, dtype or None): The array in which to place the + output. + mode (str): The array borders are handled according to the given mode + (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``, + ``'wrap'``). Default is ``'reflect'``. + cval (scalar): Value to fill past edges of input if mode is + ``constant``. Default is ``0.0``. + origin (scalar or tuple of scalar): The origin parameter controls the + placement of the filter, relative to the center of the current + element of the input. Default of 0 is equivalent to + ``(0,)*input.ndim``. + + Returns: + cupy.ndarry : Result of the filter of ``input`` with ``structure``. + + .. seealso:: :func:`scipy.ndimage.black_tophat` + """ + if (size is not None) and (footprint is not None): + warnings.warn( + 'ignoring size because footprint is set', UserWarning, stacklevel=2 + ) + tmp = grey_dilation( + input, size, footprint, structure, None, mode, cval, origin + ) + tmp = grey_erosion( + tmp, size, footprint, structure, output, mode, cval, origin + ) + if input.dtype == numpy.bool_ and tmp.dtype == numpy.bool_: + cupy.bitwise_xor(tmp, input, out=tmp) + else: + cupy.subtract(tmp, input, out=tmp) + return tmp diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py new file mode 100644 index 000000000..b2fc84449 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py @@ -0,0 +1,73 @@ +"""Determination of spline kernel weights (adapted from SciPy) + +See more verbose comments for each case there: +https://github.com/scipy/scipy/blob/eba29d69846ab1299976ff4af71c106188397ccc/scipy/ndimage/src/ni_splines.c#L7 # NOQA + +``spline_weights_inline`` is a dict where the key is the spline order and the +value is the spline weight initialization code. +""" + +spline_weights_inline = {} + +# Note: This order = 1 case is currently unused (order = 1 has a different code +# path in _interp_kernels.py). I think that existing code is a bit more +# efficient. +spline_weights_inline[1] = ''' +wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5); +weights_{j}[0] = 1.0 - wx; +weights_{j}[1] = wx; +''' + +spline_weights_inline[2] = ''' +wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5); +weights_{j}[1] = 0.75 - wx * wx; +wy = 0.5 - wx; +weights_{j}[0] = 0.5 * wy * wy; +weights_{j}[2] = 1.0 - weights_{j}[0] - weights_{j}[1]; +''' + +spline_weights_inline[3] = ''' +wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5); +wy = 1.0 - wx; +weights_{j}[1] = (wx * wx * (wx - 2.0) * 3.0 + 4.0) / 6.0; +weights_{j}[2] = (wy * wy * (wy - 2.0) * 3.0 + 4.0) / 6.0; +weights_{j}[0] = wy * wy * wy / 6.0; +weights_{j}[3] = 1.0 - weights_{j}[0] - weights_{j}[1] - weights_{j}[2]; +''' + +spline_weights_inline[4] = ''' +wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5); +wy = wx * wx; +weights_{j}[2] = wy * (wy * 0.25 - 0.625) + 115.0 / 192.0; +wy = 1.0 + wx; +weights_{j}[1] = wy * (wy * (wy * (5.0 - wy) / 6.0 - 1.25) + 5.0 / 24.0) + + 55.0 / 96.0; +wy = 1.0 - wx; +weights_{j}[3] = wy * (wy * (wy * (5.0 - wy) / 6.0 - 1.25) + 5.0 / 24.0) + + 55.0 / 96.0; +wy = 0.5 - wx; +wy = wy * wy; +weights_{j}[0] = wy * wy / 24.0; +weights_{j}[4] = 1.0 - weights_{j}[0] - weights_{j}[1] + - weights_{j}[2] - weights_{j}[3]; +''' + +spline_weights_inline[5] = ''' +wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5); +wy = wx * wx; +weights_{j}[2] = wy * (wy * (0.25 - wx / 12.0) - 0.5) + 0.55; +wy = 1.0 - wx; +wy = wy * wy; +weights_{j}[3] = wy * (wy * (0.25 - (1.0 - wx) / 12.0) - 0.5) + 0.55; +wy = wx + 1.0; +weights_{j}[1] = wy * (wy * (wy * (wy * (wy / 24.0 - 0.375) + 1.25) - 1.75) + + 0.625) + 0.425; +wy = 2.0 - wx; +weights_{j}[4] = wy * (wy * (wy * (wy * (wy / 24.0 - 0.375) + 1.25) - 1.75) + + 0.625) + 0.425; +wy = 1.0 - wx; +wy = wy * wy; +weights_{j}[0] = (1.0 - wx) * wy * wy / 120.0; +weights_{j}[5] = 1.0 - weights_{j}[0] - weights_{j}[1] - weights_{j}[2] + - weights_{j}[3] - weights_{j}[4]; +''' diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py new file mode 100644 index 000000000..c44df836e --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py @@ -0,0 +1,256 @@ +""" +Spline poles and boundary handling implemented as in SciPy + +https://github.com/scipy/scipy/blob/ee6ae72f83a0995aeb34929aed881d3f36fccfda/scipy/ndimage/src/ni_splines.c # noqa +""" +import functools +import math +import operator +import textwrap + +import cupy + + +def get_poles(order): + if order == 2: + # sqrt(8.0) - 3.0 + return (-0.171572875253809902396622551580603843,) + elif order == 3: + # sqrt(3.0) - 2.0 + return (-0.267949192431122706472553658494127633,) + elif order == 4: + # sqrt(664.0 - sqrt(438976.0)) + sqrt(304.0) - 19.0 + # sqrt(664.0 + sqrt(438976.0)) - sqrt(304.0) - 19.0 + return (-0.361341225900220177092212841325675255, + -0.013725429297339121360331226939128204) + elif order == 5: + # sqrt(67.5 - sqrt(4436.25)) + sqrt(26.25) - 6.5 + # sqrt(67.5 + sqrt(4436.25)) - sqrt(26.25) - 6.5 + return (-0.430575347099973791851434783493520110, + -0.043096288203264653822712376822550182) + else: + raise ValueError('only order 2-5 supported') + + +def get_gain(poles): + return functools.reduce(operator.mul, + [(1.0 - z) * (1.0 - 1.0 / z) for z in poles]) + + +def _causal_init_code(mode): + """Code for causal initialization step of IIR filtering. + + c is a 1d array of length n and z is a filter pole + """ + code = f''' + // causal init for mode={mode}''' + if mode == 'mirror': + code += ''' + z_i = z; + z_n_1 = pow(z, (P)(n - 1)); + + c[0] = c[0] + z_n_1 * c[(n - 1) * element_stride]; + for (i = 1; i < min(n - 1, static_cast({n_boundary})); ++i) {{ + c[0] += z_i * (c[i * element_stride] + + z_n_1 * c[(n - 1 - i) * element_stride]); + z_i *= z; + }} + c[0] /= 1 - z_n_1 * z_n_1;''' + elif mode == 'grid-wrap': + code += ''' + z_i = z; + + for (i = 1; i < min(n, static_cast({n_boundary})); ++i) {{ + c[0] += z_i * c[(n - i) * element_stride]; + z_i *= z; + }} + c[0] /= 1 - z_i; /* z_i = pow(z, n) */''' + elif mode == 'reflect': + code += ''' + z_i = z; + z_n = pow(z, (P)n); + c0 = c[0]; + + c[0] = c[0] + z_n * c[(n - 1) * element_stride]; + for (i = 1; i < min(n, static_cast({n_boundary})); ++i) {{ + c[0] += z_i * (c[i * element_stride] + + z_n * c[(n - 1 - i) * element_stride]); + z_i *= z; + }} + c[0] *= z / (1 - z_n * z_n); + c[0] += c0;''' + else: + raise ValueError('invalid mode: {}'.format(mode)) + return code + + +def _anticausal_init_code(mode): + """Code for the anti-causal initialization step of IIR filtering. + + c is a 1d array of length n and z is a filter pole + """ + code = f''' + // anti-causal init for mode={mode}''' + if mode == 'mirror': + code += ''' + c[(n - 1) * element_stride] = ( + z * c[(n - 2) * element_stride] + + c[(n - 1) * element_stride]) * z / (z * z - 1);''' + elif mode == 'grid-wrap': + code += ''' + z_i = z; + + for (i = 0; i < min(n - 1, static_cast({n_boundary})); ++i) {{ + c[(n - 1) * element_stride] += z_i * c[i * element_stride]; + z_i *= z; + }} + c[(n - 1) * element_stride] *= z / (z_i - 1); /* z_i = pow(z, n) */''' + elif mode == 'reflect': + code += ''' + c[(n - 1) * element_stride] *= z / (z - 1);''' + else: + raise ValueError('invalid mode: {}'.format(mode)) + return code + + +def _get_spline_mode(mode): + """spline boundary mode for interpolation with order >= 2.""" + if mode in ['mirror', 'reflect', 'grid-wrap']: + # exact analytic boundary conditions exist for these modes. + return mode + elif mode == 'grid-mirror': + # grid-mirror is a synonym for 'reflect' + return 'reflect' + # No exact analytical spline boundary condition implemented. Reflect gives + # lower error than using mirror or wrap for mode 'nearest'. Otherwise, a + # mirror spline boundary condition is used. + return 'reflect' if mode == 'nearest' else 'mirror' + + +def _get_spline1d_code(mode, poles, n_boundary): + """Generates the code required for IIR filtering of a single 1d signal. + + Prefiltering is done by causal filtering followed by anti-causal filtering. + Multiple boundary conditions have been implemented. + """ + code = [''' + __device__ void spline_prefilter1d( + T* __restrict__ c, idx_t signal_length, idx_t element_stride) + {{'''] + + # variables common to all boundary modes + code.append(''' + idx_t i, n = signal_length; + P z, z_i;''') + + # retrieve the spline boundary extension mode to use + mode = _get_spline_mode(mode) + + if mode == 'mirror': + # variables specific to mirror boundary mode + code.append(''' + P z_n_1;''') + elif mode == 'reflect': + # variables specific to reflect boundary mode + code.append(''' + P z_n; + T c0;''') + + for pole in poles: + + code.append(f''' + // select the current pole + z = {pole};''') + + # initialize and apply the causal filter + code.append(_causal_init_code(mode)) + code.append(''' + // apply the causal filter for the current pole + for (i = 1; i < n; ++i) {{ + c[i * element_stride] += z * c[(i - 1) * element_stride]; + }}''') + # initialize and apply the anti-causal filter + code.append(_anticausal_init_code(mode)) + code.append(''' + // apply the anti-causal filter for the current pole + for (i = n - 2; i >= 0; --i) {{ + c[i * element_stride] = z * (c[(i + 1) * element_stride] - + c[i * element_stride]); + }}''') + + code += [''' + }}'''] + return textwrap.dedent('\n'.join(code)).format(n_boundary=n_boundary) + + +_FILTER_GENERAL = ''' +#include "cupy/carray.cuh" +#include "cupy/complex.cuh" +typedef {data_type} T; +typedef {pole_type} P; +typedef {index_type} idx_t; +template +__device__ T* row( + T* ptr, idx_t i, idx_t axis, idx_t ndim, const idx_t* shape) {{ + idx_t index = 0, stride = 1; + for (idx_t a = ndim - 1; a > 0; --a) {{ + if (a != axis) {{ + index += (i % shape[a]) * stride; + i /= shape[a]; + }} + stride *= shape[a]; + }} + return ptr + index + stride * i; +}} +''' + + +_batch_spline1d_strided_template = """ +extern "C" __global__ +__launch_bounds__({block_size}) +void {kernel_name}(T* __restrict__ y, const idx_t* __restrict__ info) {{ + const idx_t n_signals = info[0], n_samples = info[1], + * __restrict__ shape = info+2; + idx_t y_elem_stride = 1; + for (int a = {ndim} - 1; a > {axis}; --a) {{ y_elem_stride *= shape[a]; }} + idx_t unraveled_idx = blockDim.x * blockIdx.x + threadIdx.x; + idx_t batch_idx = unraveled_idx; + if (batch_idx < n_signals) + {{ + T* __restrict__ y_i = row(y, batch_idx, {axis}, {ndim}, shape); + spline_prefilter1d(y_i, n_samples, y_elem_stride); + }} +}} +""" + + +@cupy.memoize(for_each_device=True) +def get_raw_spline1d_kernel(axis, ndim, mode, order, index_type='int', + data_type='double', pole_type='double', + block_size=128): + """Generate a kernel for applying a spline prefilter along a given axis.""" + poles = get_poles(order) + + # determine number of samples for the boundary approximation + # (SciPy uses n_boundary = n_samples but this is excessive) + largest_pole = max([abs(p) for p in poles]) + # tol < 1e-7 fails test cases comparing to SciPy at atol = rtol = 1e-5 + tol = 1e-10 if pole_type == 'float' else 1e-18 + n_boundary = math.ceil(math.log(tol, largest_pole)) + + # headers and general utility function for extracting rows of data + code = _FILTER_GENERAL.format(index_type=index_type, + data_type=data_type, + pole_type=pole_type) + + # generate source for a 1d function for a given boundary mode and poles + code += _get_spline1d_code(mode, poles, n_boundary) + + # generate code handling batch operation of the 1d filter + mode_str = mode.replace('-', '_') # cannot have '-' in kernel name + kernel_name = (f'cupyx_scipy_ndimage_spline_filter_{ndim}d_ord{order}_' + f'axis{axis}_{mode_str}') + code += _batch_spline1d_strided_template.format(ndim=ndim, axis=axis, + block_size=block_size, + kernel_name=kernel_name) + return cupy.RawKernel(code, kernel_name) diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py index 1d87b3256..1a6a830a5 100644 --- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py +++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py @@ -5,6 +5,33 @@ import numpy +def _is_integer_output(output, input): + if output is None: + return input.dtype.kind in 'iu' + elif isinstance(output, cupy.ndarray): + return output.dtype.kind in 'iu' + return cupy.dtype(output).kind in 'iu' + + +def _check_cval(mode, cval, integer_output): + if mode == 'constant' and integer_output and not cupy.isfinite(cval): + raise NotImplementedError("Non-finite cval is not supported for " + "outputs with integer dtype.") + + +def _get_weights_dtype(input, weights, use_cucim_casting=False): + if weights.dtype.kind == "c" or input.dtype.kind == "c": + return cupy.promote_types(input.real.dtype, cupy.complex64) + elif weights.dtype.kind in 'iub': + if use_cucim_casting: + from cucim.skimage._shared.utils import _supported_float_type + return _supported_float_type(weights.dtype) + else: + # convert integer dtype weights to double as in SciPy + return cupy.float64 + return cupy.promote_types(input.real.dtype, cupy.float32) + + def _get_output(output, input, shape=None, complex_output=False): shape = input.shape if shape is None else shape if output is None: @@ -12,17 +39,17 @@ def _get_output(output, input, shape=None, complex_output=False): _dtype = cupy.promote_types(input.dtype, cupy.complex64) else: _dtype = input.dtype - output = cupy.zeros(shape, dtype=_dtype) + output = cupy.empty(shape, dtype=_dtype) elif isinstance(output, (type, cupy.dtype)): if complex_output and cupy.dtype(output).kind != 'c': warnings.warn("promoting specified output dtype to complex") output = cupy.promote_types(output, cupy.complex64) - output = cupy.zeros(shape, dtype=output) + output = cupy.empty(shape, dtype=output) elif isinstance(output, str): output = numpy.sctypeDict[output] if complex_output and cupy.dtype(output).kind != 'c': raise RuntimeError("output must have complex dtype") - output = cupy.zeros(shape, dtype=output) + output = cupy.empty(shape, dtype=output) elif output.shape != shape: raise RuntimeError("output shape not correct") elif complex_output and output.dtype.kind != 'c': @@ -44,6 +71,13 @@ def _fix_sequence_arg(arg, ndim, name, conv=lambda x: x): return lst +def _check_origin(origin, width): + origin = int(origin) + if (width // 2 + origin < 0) or (width // 2 + origin >= width): + raise ValueError('invalid origin') + return origin + + def _check_mode(mode): if mode not in ('reflect', 'constant', 'nearest', 'mirror', 'wrap', 'grid-mirror', 'grid-wrap', 'grid-reflect'): @@ -52,13 +86,6 @@ def _check_mode(mode): return mode -def _check_origin(origin, width): - origin = int(origin) - if (width // 2 + origin < 0) or (width // 2 + origin >= width): - raise ValueError('invalid origin') - return origin - - def _get_inttype(input): # The integer type to use for indices in the input array # The indices actually use byte positions and we can't just use @@ -70,53 +97,118 @@ def _get_inttype(input): def _generate_boundary_condition_ops(mode, ix, xsize, int_t="int", - float_ix=False): + float_ix=False, separate=False): + """Generate boundary conditions + + If separate = True, a pair of conditions for the (lower, upper) boundary + are provided instead of a single expression. + """ min_func = "fmin" if float_ix else "min" max_func = "fmax" if float_ix else "max" if mode in ['reflect', 'grid-mirror']: - ops = ''' - if ({ix} < 0) {{ - {ix} = - 1 -{ix}; - }} - {ix} %= {xsize} * 2; - {ix} = {min}({ix}, 2 * {xsize} - 1 - {ix});'''.format( - ix=ix, xsize=xsize, min=min_func) - elif mode == 'mirror': - ops = ''' - if ({xsize} == 1) {{ - {ix} = 0; - }} else {{ + if separate: + ops_upper = f''' + {ix} %= {xsize} * 2; + {ix} = {min_func}({ix}, 2 * {xsize} - 1 - {ix}); + ''' + ops_lower = f''' if ({ix} < 0) {{ - {ix} = -{ix}; + {ix} = - 1 -{ix}; }} - {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2); - {ix} = {min}({ix}, 2 * {xsize} - 2 - {ix}); - }}'''.format(ix=ix, xsize=xsize, min=min_func) + ''' + ops_upper + ops = (ops_lower, ops_upper) + else: + ops = f''' + if ({ix} < 0) {{ + {ix} = - 1 -{ix}; + }} + {ix} %= {xsize} * 2; + {ix} = {min_func}({ix}, 2 * {xsize} - 1 - {ix});''' + elif mode == 'mirror': + if separate: + temp1 = f''' + if ({xsize} == 1) {{ + {ix} = 0; + }} else {{ + ''' + temp2 = f''' + if ({ix} < 0) {{ + {ix} = -{ix}; + }} + ''' + temp3 = f''' + {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2); + {ix} = {min_func}({ix}, 2 * {xsize} - 2 - {ix}); + }}''' + ops_lower = temp1 + temp2 + temp3 + ops_upper = temp1 + temp3 + ops = (ops_lower, ops_upper) + else: + ops = f''' + if ({xsize} == 1) {{ + {ix} = 0; + }} else {{ + if ({ix} < 0) {{ + {ix} = -{ix}; + }} + {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2); + {ix} = {min_func}({ix}, 2 * {xsize} - 2 - {ix}); + }}''' elif mode == 'nearest': - ops = ''' - {ix} = {min}({max}(({T}){ix}, ({T})0), ({T})({xsize} - 1));'''.format( - ix=ix, xsize=xsize, min=min_func, max=max_func, - # force using 64-bit signed integer for ptrdiff_t, - # see cupy/cupy#6048 - T=('int' if int_t == 'int' else 'long long')) + T = 'int' if int_t == 'int' else 'long long' + if separate: + ops_lower = f'''{ix} = {max_func}(({T}){ix}, ({T})0);''' + ops_upper = f'''{ix} = {min_func}(({T}){ix}, ({T})({xsize} - 1));''' # noqa + ops = (ops_lower, ops_upper) + else: + ops = f'''{ix} = {min_func}({max_func}(({T}){ix}, ({T})0), ({T})({xsize} - 1));''' # noqa elif mode == 'grid-wrap': - ops = ''' - {ix} %= {xsize}; - if ({ix} < 0) {{ - {ix} += {xsize}; - }}'''.format(ix=ix, xsize=xsize) + if separate: + ops_upper = f''' + {ix} %= {xsize}; + ''' + ops_lower = ops_upper + f''' + if ({ix} < 0) {{ + {ix} += {xsize}; + }}''' + ops = (ops_lower, ops_upper) + else: + ops = f''' + {ix} %= {xsize}; + if ({ix} < 0) {{ + {ix} += {xsize}; + }}''' + elif mode == 'wrap': - ops = ''' - if ({ix} < 0) {{ - {ix} += ({sz} - 1) * (({int_t})(-{ix} / ({sz} - 1)) + 1); - }} else if ({ix} > ({sz} - 1)) {{ - {ix} -= ({sz} - 1) * ({int_t})({ix} / ({sz} - 1)); - }};'''.format(ix=ix, sz=xsize, int_t=int_t) + if separate: + ops_lower = f'''{ix} += ({xsize} - 1) * (({int_t})(-{ix} / ({xsize} - 1)) + 1);''' # noqa + ops_upper = f'''{ix} -= ({xsize} - 1) * ({int_t})({ix} / ({xsize} - 1));''' # noqa + ops = (ops_lower, ops_upper) + else: + ops = f''' + if ({ix} < 0) {{ + {ix} += ({xsize} - 1) * (({int_t})(-{ix} / ({xsize} - 1)) + 1); + }} else if ({ix} > ({xsize} - 1)) {{ + {ix} -= ({xsize} - 1) * ({int_t})({ix} / ({xsize} - 1)); + }};''' elif mode in ['constant', 'grid-constant']: - ops = ''' - if (({ix} < 0) || {ix} >= {xsize}) {{ - {ix} = -1; - }}'''.format(ix=ix, xsize=xsize) + if separate: + ops_lower = f''' + if ({ix} < 0) {{ + {ix} = -1; + }}''' + ops_upper = f''' + if ({ix} >= {xsize}) {{ + {ix} = -1; + }}''' + ops = (ops_lower, ops_upper) + else: + ops = f''' + if (({ix} < 0) || {ix} >= {xsize}) {{ + {ix} = -1; + }}''' + if separate: + ops = (ops, ops) return ops diff --git a/python/cucim/src/cucim/skimage/_vendored/_texture.py b/python/cucim/src/cucim/skimage/_vendored/_texture.py new file mode 100644 index 000000000..5c18bce71 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/_texture.py @@ -0,0 +1,194 @@ +import cupy +from cupy import _core +from cupy.cuda import runtime, texture + +_affine_transform_2d_array_kernel = _core.ElementwiseKernel( + 'U texObj, raw float32 m, uint64 width', 'T transformed_image', + ''' + float3 pixel = make_float3( + (float)(i / width), + (float)(i % width), + 1.0f + ); + float x = dot(pixel, make_float3(m[0], m[1], m[2])) + .5f; + float y = dot(pixel, make_float3(m[3], m[4], m[5])) + .5f; + transformed_image = tex2D(texObj, y, x); + ''', + 'cupyx_texture_affine_transformation_2d_array', + preamble=''' + inline __host__ __device__ float dot(float3 a, float3 b) + { + return a.x * b.x + a.y * b.y + a.z * b.z; + } + ''') + + +_affine_transform_3d_array_kernel = _core.ElementwiseKernel( + 'U texObj, raw float32 m, uint64 height, uint64 width', + 'T transformed_volume', + ''' + float4 voxel = make_float4( + (float)(i / (width * height)), + (float)((i % (width * height)) / width), + (float)((i % (width * height)) % width), + 1.0f + ); + float x = dot(voxel, make_float4(m[0], m[1], m[2], m[3])) + .5f; + float y = dot(voxel, make_float4(m[4], m[5], m[6], m[7])) + .5f; + float z = dot(voxel, make_float4(m[8], m[9], m[10], m[11])) + .5f; + transformed_volume = tex3D(texObj, z, y, x); + ''', + 'cupyx_texture_affine_transformation_3d_array', + preamble=''' + inline __host__ __device__ float dot(float4 a, float4 b) + { + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; + } + ''') + + +def _create_texture_object(data, + address_mode: str, + filter_mode: str, + read_mode: str, + border_color=0): + + if cupy.issubdtype(data.dtype, cupy.unsignedinteger): + fmt_kind = runtime.cudaChannelFormatKindUnsigned + elif cupy.issubdtype(data.dtype, cupy.integer): + fmt_kind = runtime.cudaChannelFormatKindSigned + elif cupy.issubdtype(data.dtype, cupy.floating): + fmt_kind = runtime.cudaChannelFormatKindFloat + else: + raise ValueError(f'Unsupported data type {data.dtype}') + + if address_mode == 'nearest': + address_mode = runtime.cudaAddressModeClamp + elif address_mode == 'constant': + address_mode = runtime.cudaAddressModeBorder + else: + raise ValueError( + f'Unsupported address mode {address_mode} ' + '(supported: constant, nearest)') + + if filter_mode == 'nearest': + filter_mode = runtime.cudaFilterModePoint + elif filter_mode == 'linear': + filter_mode = runtime.cudaFilterModeLinear + else: + raise ValueError( + f'Unsupported filter mode {filter_mode} ' + f'(supported: nearest, linear)') + + if read_mode == 'element_type': + read_mode = runtime.cudaReadModeElementType + elif read_mode == 'normalized_float': + read_mode = runtime.cudaReadModeNormalizedFloat + else: + raise ValueError( + f'Unsupported read mode {read_mode} ' + '(supported: element_type, normalized_float)') + + texture_fmt = texture.ChannelFormatDescriptor( + data.itemsize * 8, 0, 0, 0, fmt_kind) + # CUDAArray: last dimension is the fastest changing dimension + array = texture.CUDAarray(texture_fmt, *data.shape[::-1]) + res_desc = texture.ResourceDescriptor( + runtime.cudaResourceTypeArray, cuArr=array) + # TODO(the-lay): each dimension can have a different addressing mode + # TODO(the-lay): border color/value can be defined for up to 4 channels + tex_desc = texture.TextureDescriptor( + (address_mode, ) * data.ndim, filter_mode, read_mode, + borderColors=(border_color, )) + tex_obj = texture.TextureObject(res_desc, tex_desc) + array.copy_from(data) + + return tex_obj + + +def affine_transformation(data, + transformation_matrix, + output_shape=None, + output=None, + interpolation: str = 'linear', + mode: str = 'constant', + border_value=0): + """ + Apply an affine transformation. + + The method uses texture memory and supports only 2D and 3D float32 arrays + without channel dimension. + + Args: + data (cupy.ndarray): The input array or texture object. + transformation_matrix (cupy.ndarray): Affine transformation matrix. + Must be a homogeneous and have shape ``(ndim + 1, ndim + 1)``. + output_shape (tuple of ints): Shape of output. If not specified, + the input array shape is used. Default is None. + output (cupy.ndarray or ~cupy.dtype): The array in which to place the + output, or the dtype of the returned array. If not specified, + creates the output array with shape of ``output_shape``. Default is + None. + interpolation (str): Specifies interpolation mode: ``'linear'`` or + ``'nearest'``. Default is ``'linear'``. + mode (str): Specifies addressing mode for points outside of the array: + (`'constant'``, ``'nearest'``). Default is ``'constant'``. + border_value: Specifies value to be used for coordinates outside + of the array for ``'constant'`` mode. Default is 0. + + Returns: + cupy.ndarray: + The transformed input. + + .. seealso:: :func:`cupyx.scipy.ndimage.affine_transform` + """ + + ndim = data.ndim + if (ndim < 2) or (ndim > 3): + raise ValueError( + 'Texture memory affine transformation is defined only for ' + '2D and 3D arrays without channel dimension.') + + dtype = data.dtype + if dtype != cupy.float32: + raise ValueError(f'Texture memory affine transformation is available ' + f'only for float32 data type (not {dtype})') + + if interpolation not in ['linear', 'nearest']: + raise ValueError( + f'Unsupported interpolation {interpolation} ' + f'(supported: linear, nearest)') + + if transformation_matrix.shape != (ndim + 1, ndim + 1): + raise ValueError('Matrix must be have shape (ndim + 1, ndim + 1)') + + texture_object = _create_texture_object(data, + address_mode=mode, + filter_mode=interpolation, + read_mode='element_type', + border_color=border_value) + + if ndim == 2: + kernel = _affine_transform_2d_array_kernel + else: + kernel = _affine_transform_3d_array_kernel + + if output_shape is None: + output_shape = data.shape + + if output is None: + output = cupy.zeros(output_shape, dtype=dtype) + elif isinstance(output, (type, cupy.dtype)): + if output != cupy.float32: + raise ValueError(f'Texture memory affine transformation is ' + f'available only for float32 data type (not ' + f'{output})') + output = cupy.zeros(output_shape, dtype=output) + elif isinstance(output, cupy.ndarray): + if output.shape != output_shape: + raise ValueError('Output shapes do not match') + else: + raise ValueError('Output must be None, cupy.ndarray or cupy.dtype') + + kernel(texture_object, transformation_matrix, *output_shape[1:], output) + return output diff --git a/python/cucim/src/cucim/skimage/_vendored/ndimage.py b/python/cucim/src/cucim/skimage/_vendored/ndimage.py new file mode 100644 index 000000000..cd97578a8 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_vendored/ndimage.py @@ -0,0 +1,82 @@ +# locally defined filters that are more efficient than in CuPy +from cucim.skimage._vendored._ndimage_filters import correlate # NOQA +from cucim.skimage._vendored._ndimage_filters import convolve # NOQA +from cucim.skimage._vendored._ndimage_filters import correlate1d # NOQA +from cucim.skimage._vendored._ndimage_filters import convolve1d # NOQA +from cucim.skimage._vendored._ndimage_filters import uniform_filter1d # NOQA +from cucim.skimage._vendored._ndimage_filters import uniform_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import gaussian_filter1d # NOQA +from cucim.skimage._vendored._ndimage_filters import gaussian_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import prewitt # NOQA +from cucim.skimage._vendored._ndimage_filters import sobel # NOQA +from cucim.skimage._vendored._ndimage_filters import generic_laplace # NOQA +from cucim.skimage._vendored._ndimage_filters import laplace # NOQA +from cucim.skimage._vendored._ndimage_filters import gaussian_laplace # NOQA +from cucim.skimage._vendored._ndimage_filters import generic_gradient_magnitude # NOQA +from cucim.skimage._vendored._ndimage_filters import gaussian_gradient_magnitude # NOQA +from cucim.skimage._vendored._ndimage_filters import minimum_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import maximum_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import minimum_filter1d # NOQA +from cucim.skimage._vendored._ndimage_filters import maximum_filter1d # NOQA +from cucim.skimage._vendored._ndimage_filters import median_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import rank_filter # NOQA +from cucim.skimage._vendored._ndimage_filters import percentile_filter # NOQA + +# interpolation +from cucim.skimage._vendored._ndimage_interpolation import affine_transform # NOQA +from cucim.skimage._vendored._ndimage_interpolation import map_coordinates # NOQA +from cucim.skimage._vendored._ndimage_interpolation import rotate # NOQA +from cucim.skimage._vendored._ndimage_interpolation import shift # NOQA +from cucim.skimage._vendored._ndimage_interpolation import spline_filter # NOQA +from cucim.skimage._vendored._ndimage_interpolation import spline_filter1d # NOQA +from cucim.skimage._vendored._ndimage_interpolation import zoom # NOQA + +# morphology +from cucim.skimage._vendored._ndimage_morphology import generate_binary_structure # NOQA +from cucim.skimage._vendored._ndimage_morphology import iterate_structure # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_erosion # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_dilation # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_opening # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_closing # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_hit_or_miss # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_fill_holes # NOQA +from cucim.skimage._vendored._ndimage_morphology import binary_propagation # NOQA +from cucim.skimage._vendored._ndimage_morphology import grey_erosion # NOQA +from cucim.skimage._vendored._ndimage_morphology import grey_dilation # NOQA +from cucim.skimage._vendored._ndimage_morphology import grey_closing # NOQA +from cucim.skimage._vendored._ndimage_morphology import grey_opening # NOQA +from cucim.skimage._vendored._ndimage_morphology import morphological_gradient # NOQA +from cucim.skimage._vendored._ndimage_morphology import morphological_laplace # NOQA +from cucim.skimage._vendored._ndimage_morphology import white_tophat # NOQA +from cucim.skimage._vendored._ndimage_morphology import black_tophat # NOQA + +# Import the rest of the cupyx.scipy.ndimage API here + +# additional filters +from cupyx.scipy.ndimage import generic_filter # NOQA +from cupyx.scipy.ndimage import generic_filter1d # NOQA + +# fourier filters +from cupyx.scipy.ndimage import fourier_ellipsoid # NOQA +from cupyx.scipy.ndimage import fourier_gaussian # NOQA +from cupyx.scipy.ndimage import fourier_shift # NOQA +from cupyx.scipy.ndimage import fourier_uniform # NOQA + +# measurements +from cupyx.scipy.ndimage import label # NOQA +try: + from cupyx.scipy.ndimage import sum_labels # NOQA +except ImportError: + from cupyx.scipy.ndimage import sum as sum_labels # NOQA +from cupyx.scipy.ndimage import mean # NOQA +from cupyx.scipy.ndimage import variance # NOQA +from cupyx.scipy.ndimage import standard_deviation # NOQA +from cupyx.scipy.ndimage import minimum # NOQA +from cupyx.scipy.ndimage import maximum # NOQA +from cupyx.scipy.ndimage import minimum_position # NOQA +from cupyx.scipy.ndimage import maximum_position # NOQA +from cupyx.scipy.ndimage import median # NOQA +from cupyx.scipy.ndimage import extrema # NOQA +from cupyx.scipy.ndimage import center_of_mass # NOQA +from cupyx.scipy.ndimage import histogram # NOQA +from cupyx.scipy.ndimage import labeled_comprehension # NOQA diff --git a/python/cucim/src/cucim/skimage/_vendored/signaltools.py b/python/cucim/src/cucim/skimage/_vendored/signaltools.py index c54415e99..3184055fa 100644 --- a/python/cucim/src/cucim/skimage/_vendored/signaltools.py +++ b/python/cucim/src/cucim/skimage/_vendored/signaltools.py @@ -424,7 +424,7 @@ def choose_conv_method(in1, in2, mode="full", measure=False): returns `direct` (e.g., to protect against floating point integer precision). - .. versionadded:: 0.19 + .. versionadded:: 22.02.00 Examples -------- diff --git a/python/cucim/src/cucim/skimage/exposure/exposure.py b/python/cucim/src/cucim/skimage/exposure/exposure.py index f7c0eeb4f..2b16508f2 100644 --- a/python/cucim/src/cucim/skimage/exposure/exposure.py +++ b/python/cucim/src/cucim/skimage/exposure/exposure.py @@ -29,9 +29,7 @@ def _offset_array(arr, low_boundary, high_boundary): # prevent overflow errors when offsetting arr = arr.astype(offset_dtype) arr = arr - offset - else: - offset = 0 - return arr, offset + return arr def _bincount_histogram_centers(image, source_range): @@ -75,8 +73,10 @@ def _bincount_histogram(image, source_range, bin_centers=None): if bin_centers is None: bin_centers = _bincount_histogram_centers(image, source_range) image_min, image_max = bin_centers[0], bin_centers[-1] - image, offset = _offset_array(image, image_min.item(), image_max.item()) # synchronize # noqa - hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1) + image = _offset_array(image, image_min.item(), image_max.item()) # synchronize # noqa + hist = cp.bincount( + image.ravel(), minlength=image_max - min(image_min, 0) + 1 + ) if source_range == 'image': idx = max(image_min, 0) hist = hist[idx:] diff --git a/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py b/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py index f27c8fff7..c0f6792ff 100644 --- a/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py +++ b/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py @@ -21,7 +21,9 @@ def test_wrong_source_range(): im = cp.array([-1, 100], dtype=cp.int8) with pytest.raises(ValueError): - frequencies, bin_centers = exposure.histogram(im, source_range="foobar") + frequencies, bin_centers = exposure.histogram( + im, source_range="foobar" + ) def test_negative_overflow(): @@ -50,6 +52,15 @@ def test_int_range_image(): assert bin_centers[-1] == 100 +def test_multichannel_int_range_image(): + im = cp.array([[10, 5], [100, 102]], dtype=np.int8) + frequencies, bin_centers = exposure.histogram(im, channel_axis=-1) + for ch in range(im.shape[-1]): + assert len(frequencies[ch]) == len(bin_centers) + assert bin_centers[0] == 5 + assert bin_centers[-1] == 102 + + def test_peak_uint_range_dtype(): im = cp.array([10, 100], dtype=cp.uint8) frequencies, bin_centers = exposure.histogram(im, source_range="dtype") @@ -286,11 +297,7 @@ def test_rescale_in_range_clip(): def test_rescale_out_range(dtype): """Check that output range is correct. - .. versionchanged:: 0.17 - This function used to return dtype matching the input dtype. It now - matches the output. - - .. versionchanged:: 0.19 + .. versionchanged:: 22.02.00 float16 and float32 inputs now result in float32 output. Formerly they would give float64 outputs. """ diff --git a/python/cucim/src/cucim/skimage/feature/_basic_features.py b/python/cucim/src/cucim/skimage/feature/_basic_features.py index 6953b846d..9807906d7 100644 --- a/python/cucim/src/cucim/skimage/feature/_basic_features.py +++ b/python/cucim/src/cucim/skimage/feature/_basic_features.py @@ -6,6 +6,7 @@ import cupy as cp import numpy as np +from .._shared._gradient import gradient from cucim.skimage import feature, filters from cucim.skimage._shared import utils from cucim.skimage.util import img_as_float32 @@ -14,7 +15,7 @@ def _texture_filter(gaussian_filtered): combos = combinations_with_replacement H_elems = [ - cp.gradient(cp.gradient(gaussian_filtered)[ax0], axis=ax1) + gradient(gradient(gaussian_filtered)[ax0], axis=ax1) for ax0, ax1 in combos(range(gaussian_filtered.ndim), 2) ] eigvals = feature.hessian_matrix_eigvals(H_elems) diff --git a/python/cucim/src/cucim/skimage/feature/_canny.py b/python/cucim/src/cucim/skimage/feature/_canny.py index b9d0e7c80..56d0f5cab 100644 --- a/python/cucim/src/cucim/skimage/feature/_canny.py +++ b/python/cucim/src/cucim/skimage/feature/_canny.py @@ -12,7 +12,7 @@ Original author: Lee Kamentsky """ import cupy as cp -import cupyx.scipy.ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from cucim.skimage.util import dtype_limits @@ -201,7 +201,7 @@ def _generate_nonmaximum_suppression_op(large_int=False): @cp.memoize(for_each_device=True) def _get_nonmax_kernel(large_int=False): in_params = ('raw T isobel, raw T jsobel, raw T magnitude, ' - 'raw uint8 eroded_mask, T low_threshold') + 'raw uint8 eroded_mask, float64 low_threshold') out_params = 'T out' name = 'cupyx_skimage_canny_nonmaximum_suppression' if large_int: diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index 65b90315f..f52f98ad7 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -3,12 +3,13 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from scipy import spatial # TODO: use RAPIDS cuSpatial? from cucim.skimage.util import img_as_float # from ..transform import integral_image +from .._shared._gradient import gradient from .._shared.utils import _supported_float_type from .peak import peak_local_max from .util import _prepare_grayscale_input_nD @@ -217,14 +218,14 @@ def hessian_matrix(image, sigma=1, mode='constant', cval=0, order='rc'): gaussian_filtered = gaussian(image, sigma=sigma, mode=mode, cval=cval, channel_axis=channel_axis) - gradients = cp.gradient(gaussian_filtered) + gradients = gradient(gaussian_filtered) axes = range(image.ndim) if order == "rc": axes = reversed(axes) H_elems = [ - cp.gradient(gradients[ax0], axis=ax1) + gradient(gradients[ax0], axis=ax1) for ax0, ax1 in combinations_with_replacement(axes, 2) ] diff --git a/python/cucim/src/cucim/skimage/feature/peak.py b/python/cucim/src/cucim/skimage/feature/peak.py index eff5f6735..fbb2570f7 100644 --- a/python/cucim/src/cucim/skimage/feature/peak.py +++ b/python/cucim/src/cucim/skimage/feature/peak.py @@ -1,7 +1,7 @@ from warnings import warn import cupy as cp -import cupyx.scipy.ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi import numpy as np from scipy.ndimage import find_objects as cpu_find_objects diff --git a/python/cucim/src/cucim/skimage/feature/tests/test_canny.py b/python/cucim/src/cucim/skimage/feature/tests/test_canny.py index a10006b95..6e06114fc 100644 --- a/python/cucim/src/cucim/skimage/feature/tests/test_canny.py +++ b/python/cucim/src/cucim/skimage/feature/tests/test_canny.py @@ -1,6 +1,5 @@ -import unittest - import cupy as cp +import pytest from cupy.testing import assert_array_equal from cupyx.scipy.ndimage import binary_dilation, binary_erosion from skimage import data @@ -9,18 +8,18 @@ from cucim.skimage.util import img_as_float -class TestCanny(unittest.TestCase): +class TestCanny(): def test_00_00_zeros(self): """Test that the Canny filter finds no points for a blank field""" result = feature.canny(cp.zeros((20, 20)), 4, 0, 0, cp.ones((20, 20), bool)) - self.assertFalse(cp.any(result)) + assert not cp.any(result) def test_00_01_zeros_mask(self): """Test that the Canny filter finds no points in a masked image""" result = (feature.canny(cp.random.uniform(size=(20, 20)), 4, 0, 0, cp.zeros((20, 20), bool))) - self.assertFalse(cp.any(result)) + assert not cp.any(result) def test_01_01_circle(self): """Test that the Canny filter finds the outlines of a circle""" @@ -36,7 +35,7 @@ def test_01_01_circle(self): cd = binary_dilation(c, iterations=3, brute_force=True) ce = binary_erosion(c, iterations=3, brute_force=True) cde = cp.logical_and(cd, cp.logical_not(ce)) - self.assertTrue(cp.all(cde[result])) + assert cp.all(cde[result]) # # The circle has a radius of 100. There are two rings here, one # for the inside edge and one for the outside. So that's @@ -44,8 +43,8 @@ def test_01_01_circle(self): # The edge contains both pixels if there's a tie, so we # bump the count a little. point_count = cp.sum(result) - self.assertTrue(point_count > 1200) - self.assertTrue(point_count < 1600) + assert point_count > 1200 + assert point_count < 1600 def test_01_02_circle_with_noise(self): """Test that the Canny filter finds the circle outlines @@ -62,24 +61,30 @@ def test_01_02_circle_with_noise(self): cd = binary_dilation(c, iterations=4, brute_force=True) ce = binary_erosion(c, iterations=4, brute_force=True) cde = cp.logical_and(cd, cp.logical_not(ce)) - self.assertTrue(cp.all(cde[result])) + assert cp.all(cde[result]) point_count = cp.sum(result) - self.assertTrue(point_count > 1200) - self.assertTrue(point_count < 1600) + assert point_count > 1200 + assert point_count < 1600 def test_image_shape(self): - self.assertRaises(ValueError, feature.canny, cp.zeros((20, 20, 20)), 4, - 0, 0) + with pytest.raises(ValueError): + feature.canny(cp.zeros((20, 20, 20)), 4, 0, 0) def test_mask_none(self): result1 = feature.canny(cp.zeros((20, 20)), 4, 0, 0, cp.ones((20, 20), bool)) result2 = feature.canny(cp.zeros((20, 20)), 4, 0, 0) - self.assertTrue(cp.all(result1 == result2)) + assert cp.all(result1 == result2) @cp.testing.with_requires("scikit-image>=0.18") - def test_use_quantiles(self): - image = img_as_float(cp.asarray(data.camera()[::100, ::100])) + @pytest.mark.parametrize('image_dtype', [cp.uint8, cp.int64, cp.float32, + cp.float64]) + def test_use_quantiles(self, image_dtype): + dtype = cp.dtype(image_dtype) + image = cp.asarray(data.camera()[::100, ::100]) + if dtype.kind == 'f': + image = img_as_float(image) + image = image.astype(dtype) # Correct output produced manually with quantiles # of 0.8 and 0.6 for high and low respectively @@ -96,24 +101,33 @@ def test_use_quantiles(self): assert_array_equal(result, correct_output) + def test_img_all_ones(self): + image = cp.ones((10, 10)) + assert cp.all(feature.canny(image) == 0) + def test_invalid_use_quantiles(self): image = img_as_float(cp.array(data.camera()[::50, ::50])) - self.assertRaises(ValueError, feature.canny, image, use_quantiles=True, + with pytest.raises(ValueError): + feature.canny(image, use_quantiles=True, low_threshold=0.5, high_threshold=3.6) - self.assertRaises(ValueError, feature.canny, image, use_quantiles=True, + with pytest.raises(ValueError): + feature.canny(image, use_quantiles=True, low_threshold=-5, high_threshold=0.5) - self.assertRaises(ValueError, feature.canny, image, use_quantiles=True, + with pytest.raises(ValueError): + feature.canny(image, use_quantiles=True, low_threshold=99, high_threshold=0.9) - self.assertRaises(ValueError, feature.canny, image, use_quantiles=True, + with pytest.raises(ValueError): + feature.canny(image, use_quantiles=True, low_threshold=0.5, high_threshold=-100) # Example from issue #4282 image = data.camera() - self.assertRaises(ValueError, feature.canny, image, use_quantiles=True, + with pytest.raises(ValueError): + feature.canny(image, use_quantiles=True, low_threshold=50, high_threshold=150) def test_dtype(self): diff --git a/python/cucim/src/cucim/skimage/filters/_gabor.py b/python/cucim/src/cucim/skimage/filters/_gabor.py index e55720357..df4c041c4 100644 --- a/python/cucim/src/cucim/skimage/filters/_gabor.py +++ b/python/cucim/src/cucim/skimage/filters/_gabor.py @@ -1,7 +1,8 @@ import math import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi + from .._shared.utils import _supported_float_type, check_nD, warn diff --git a/python/cucim/src/cucim/skimage/filters/_median.py b/python/cucim/src/cucim/skimage/filters/_median.py index d0da048c9..2041f4161 100644 --- a/python/cucim/src/cucim/skimage/filters/_median.py +++ b/python/cucim/src/cucim/skimage/filters/_median.py @@ -1,15 +1,27 @@ from warnings import warn +import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import deprecate_kwarg +from ._median_hist import _can_use_histogram, _median_hist, KernelResourceError +try: + from math import prod +except ImportError: + from functools import reduce + from operator import mul -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") + def prod(x): + return reduce(mul, x) + + +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def median(image, footprint=None, out=None, mode='nearest', cval=0.0, - behavior='ndimage'): + behavior='ndimage', *, algorithm='auto', algorithm_kwargs={}): """Return local median of an image. Parameters @@ -47,6 +59,20 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0, .. versionchanged:: 0.16 Default ``behavior`` has been changed from 'rank' to 'ndimage' + Other Parameters + ---------------- + algorithm : {'auto', 'histogram', 'sorting'} + Determines which algorithm is used to compute the median. The default + of 'auto' will attempt to use a histogram-based algorithm for 2D + images with 8 or 16-bit integer data types. Otherwise a sorting-based + algorithm will be used. Note: this paramter is cuCIM-specific and does + not exist in upstream scikit-image. + algorithm_kwargs : dict + Any additional algorithm-specific keywords. Currently can only be used + to set the number of parallel partitions for the 'histogram' algorithm. + (e.g. ``algorithm_kwargs={'partitions': 256}``). Note: this paramter is + cuCIM-specific and does not exist in upstream scikit-image. + Returns ------- out : 2-D array (same dtype as input image) @@ -58,6 +84,22 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0, filtering offering more flexibility with additional parameters but dedicated for unsigned integer images. + Notes + ----- + An efficient, histogram-based median filter as described in [1]_ is faster + than the sorting based approach for larger kernel sizes (e.g. greater than + 13x13 or so in 2D). It has near-constant run time regardless of the kernel + size. The algorithm presented in [1]_ has been adapted to additional bit + depths here. When algorithm='auto', the histogram-based algorithm will be + chosen for integer-valued images with sufficiently large footprint size. + Otherwise, the sorting-based approach is used. + + References + ---------- + .. [1] O. Green, "Efficient Scalable Median Filtering Using Histogram-Based + Operations," in IEEE Transactions on Image Processing, vol. 27, no. 5, + pp. 2217-2228, May 2018, https://doi.org/10.1109/TIP.2017.2781375. + Examples -------- >>> import cupy as cp @@ -79,5 +121,65 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0, if footprint is None: footprint = ndi.generate_binary_structure(image.ndim, image.ndim) + + if algorithm == 'sorting': + can_use_histogram = False + elif algorithm in ['auto', 'histogram']: + can_use_histogram, reason = _can_use_histogram(image, footprint) + else: + raise ValueError(f"unknown algorithm: {algorithm}") + + if algorithm == 'histogram' and not can_use_histogram: + raise ValueError( + "The histogram-based algorithm was requested, but it cannot " + f"be used for this image and footprint (reason: {reason})." + ) + + # The sorting-based implementation in CuPy is faster for small footprints. + # Empirically, shapes above (13, 13) and above on RTX A6000 have faster + # execution for the histogram-based approach. + use_histogram = can_use_histogram + if algorithm == 'auto': + # prefer sorting-based algorithm if footprint shape is small + use_histogram = use_histogram and prod(footprint.shape) > 150 + + if use_histogram: + try: + # as in SciPy, a user-provided `out` can be an array or a dtype + output_array_provided = False + out_dtype = None + if out is not None: + output_array_provided = isinstance(out, cp.ndarray) + if not output_array_provided: + try: + out_dtype = cp.dtype(out) + except TypeError: + raise TypeError( + "out must be either a cupy.array or a valid input " + "to cupy.dtype" + ) + + # TODO: Can't currently pass an output array into _median_hist as a + # new array currently needs to be created during padding. + temp = _median_hist(image, footprint, mode=mode, cval=cval, + **algorithm_kwargs) + if output_array_provided: + out[:] = temp + else: + if out_dtype is not None: + temp = temp.astype(out_dtype, copy=False) + out = temp + return out + except KernelResourceError as e: + # Fall back to sorting-based implementation if we encounter a + # resource limit (e.g. insufficient shared memory per block). + warn("Kernel resource error encountered in histogram-based " + f"median kerne: {e}\n" + "Falling back to sorting-based median instead.") + + if algorithm_kwargs: + warn(f"algorithm_kwargs={algorithm_kwargs} ignored for sorting-based " + f"algorithm") + return ndi.median_filter(image, footprint=footprint, output=out, mode=mode, cval=cval) diff --git a/python/cucim/src/cucim/skimage/filters/_median_hist.py b/python/cucim/src/cucim/skimage/filters/_median_hist.py new file mode 100644 index 000000000..0700238c0 --- /dev/null +++ b/python/cucim/src/cucim/skimage/filters/_median_hist.py @@ -0,0 +1,515 @@ +import math +import os +from collections import namedtuple +from textwrap import dedent + +import cupy as cp +import numpy as np + +from .._shared.utils import _to_np_mode + +if hasattr(math, 'prod'): + prod = math.prod +else: + prod = np.prod + + +def _dtype_to_CUDA_int_type(dtype): + cpp_int_types = { + cp.uint8: 'unsigned char', + cp.uint16: 'unsigned short', + cp.uint32: 'unsigned int', + cp.uint64: 'unsigned long long', + cp.int8: 'signed char', + cp.int16: 'short', + cp.int32: 'int', + cp.int64: 'long long', + } + dtype = cp.dtype(dtype) + if dtype.type not in cpp_int_types: + raise ValueError(f"unrecognized dtype: {dtype.type}") + return cpp_int_types[dtype.type] + + +def _get_hist_dtype(footprint_shape): + """Determine C++ type and cupy.dtype to use for the histogram.""" + max_possible_count = prod(footprint_shape) + + if max_possible_count < 128: + dtype = cp.int8 + elif max_possible_count < 32768: + dtype = cp.int16 + else: + dtype = cp.int32 + return _dtype_to_CUDA_int_type(dtype), dtype + + +def _gen_global_definitions( + image_t='unsigned char', + hist_offset=0, + hist_int_t='int', + hist_size=256, + hist_size_coarse=8 +): + """Generate C++ #define statements needed for the CUDA kernels. + + The definitions used depend on the number of histogram bins and the + histogram data type. + """ + + if hist_size % hist_size_coarse != 0: + raise ValueError( + "`hist_size` must be a multiple of `hist_size_coarse`" + ) + hist_size_fine = hist_size // hist_size_coarse + log2_coarse = math.log2(hist_size_coarse) + log2_fine = math.log2(hist_size_fine) + if abs(math.remainder(log2_coarse, 1)) > 1e-7: + raise ValueError("log2_coarse must be a power of two") + elif abs(math.remainder(log2_fine, 1)) > 1e-7: + raise ValueError("log2_fine must be a power of two") + else: + log2_coarse = round(log2_coarse) + log2_fine = round(log2_fine) + + global_defs = f""" +#define HIST_SIZE {hist_size} +#define HIST_SIZE_COARSE {hist_size_coarse} +#define HIST_SIZE_FINE {hist_size_fine} +#define HIST_INT_T {hist_int_t} +#define HIST_OFFSET {hist_offset} +#define IMAGE_T {image_t} +#define LOG2_COARSE {log2_coarse} +#define LOG2_FINE {log2_fine} + """ + return global_defs + + +# TODO: look into using CUB's Block-wise collectives (e.g. BlockScan) +def _gen_preamble_median(hist_size_coarse, hist_size_fine): + """Generate bin size-dependent reduction functions. + + This helper generates the C++ code for the following two functions. + + histogramMedianParCoarseLookupOnly + histogramMedianParFineLookupOnly + + For each of these the number of calls to scanNeighbor is equal to + log2 of the number of bins. + """ + n_log2_coarse = math.log2(hist_size_coarse) + if hist_size_coarse < 2 or n_log2_coarse % 1.0 != 0: + raise ValueError("hist_size_coarse must be a positive power of 2") + + n_log2_fine = math.log2(hist_size_fine) + if hist_size_fine < 2 or n_log2_fine % 1.0 != 0: + raise ValueError("hist_size_fine must be a positive power of 2") + + ops = """ + + #define scanNeighbor(array, range, index, threadIndex) \\ + { \\ + HIST_INT_T v = 0; \\ + if (index <= threadIndex && threadIndex < range) \\ + v = array[threadIndex] + array[threadIndex-index]; \\ + __syncthreads(); \\ + if (index <= threadIndex && threadIndex < range) \\ + array[threadIndex] = v; \\ + } + + #define findMedian(array, range, threadIndex, result, count, position) \\ + if (threadIndex < range) \\ + { \\ + if (array[threadIndex+1] > position && array[threadIndex] <= position) \\ + { \\ + *result = threadIndex+1; \\ + *count = array[threadIndex]; \\ + } \\ + } + + __device__ void histogramMedianParCoarseLookupOnly(HIST_INT_T* H, HIST_INT_T* Hscan, const int medPos, int* retval, int* countAtMed){ + int tx=threadIdx.x; + *retval=*countAtMed=0; + if(tx= 3" + + # footprint radius can't be larger than the image + # TODO: need to check if we need this exact restriction + # (may be specific to OpenCV's boundary handling) + radii = tuple(s // 2 for s in footprint.shape) + if any(r > s for r, s in zip(radii, image.shape)): + return False, "footprint half-width cannot exceed the image extent" + + # only fully populated footprint is supported + if not np.all(footprint): # synchronizes! + return False, "footprint must be 1 everywhere" + + return True, None + + +class KernelResourceError(RuntimeError): + pass + + +def _get_kernel_params(image, footprint_shape, value_range='auto', + partitions=None, hist_size_coarse=None): + """Determine kernel launch parameters and #define values for its code. + + Parameters + ---------- + image : cupy.ndarray + The histogram bin range will depend on the image dtype unless specified + explicitly via `value_range` + footprint_shape : tuple of int + The shape of the footprint. The dtype used for storing the histogram + will depend on the footprint size. For small footprints, histograms + will be stored using int8, otherwise int16 will be used. + value_range : {'auto', 'dtype', 'image'}, optional + When value_range='dtype', the range will be determined based on the + maximal range of the data type. When ``value_range='image'``, the + minimum and maximum intensities present in the image will be used. When + set to auto 'auto', `dtype` is used for 8-bit images and otherwise + 'image' is used. + partitions : positive int, optional + The grid size used during kernel launch will be (partitions, 1, 1). + Increasing this will increase parallelism (and thus performance), but + at cost of additional GPU memory usage. Will be automatically truncated + to a value no larger than image.shape[0] // 2. + hist_size_coarse : int or None, optional + Can be used to override the default choice of the number of coarse + histogram bins. It is not generally recommended to set this as + infeasible values can easily be chosen. Using None, will give + automatically selected values that have been validated in previous + testing. + + Returns + ------- + CUDAParams : namedtuple + Various parameters used in kernel code generation and at launch time. + See comments next to the KernelParams declaration below for details. + """ + + if value_range == 'auto': + if image.dtype.itemsize < 2: + value_range = 'dtype' + else: + # to save memory, try using actual value range for >8-bit images + # (e.g. DICOM images often have 12-bit range) + value_range = 'image' + + if value_range == 'dtype': + if image.dtype.itemsize > 2: + raise ValueError( + "dtype range only supported for 8 and 16-bit integer dtypes." + ) + iinfo = cp.iinfo(image.dtype) + minv, maxv = iinfo.min, iinfo.max + elif value_range == 'image': + minv = int(image.min()) + maxv = int(image.max()) + else: + if len(value_range) != 2: + raise ValueError( + "value_range must be either 'dtype', 'image' or a " + "(min, max) sequence." + ) + minv, maxv = value_range + + if image.dtype.kind == 'u': + # cannot subtract a positive offset in the unsigned case + minv = min(minv, 0) + hist_offset = 0 if minv == 0 else -minv + hist_size = maxv - minv + 1 + hist_size = max(hist_size, 256) # use at least 256 bins + # round hist_size up to the nearest power of 2 + hist_size = round(2**math.ceil(math.log2(hist_size))) + hist_size = max(hist_size, 32) + + if hist_size_coarse is None: + # Empirically, robust to choose 32-fold less bins for hist_size coarse + hist_size_coarse = hist_size // 32 + + # have to set block[0] large enough that histogramMedianParFineLookupOnly + # and histogramMedianParCoarseLookupOnly search sizes fit within the number + # of threads in the block. + # Use the maximum of the coarse and fine sizes, rounded up to the nearest + # multiple of 32. + hist_size_fine = hist_size // hist_size_coarse + hist_size_max = max(hist_size_fine, hist_size_coarse) + # block0 must be at least the warp size + block0 = 32 * math.ceil(hist_size_max / 32) + if block0 > 256: + d = cp.cuda.Device() + max_block_x = d.attributes["MaxBlockDimX"] + if block0 > max_block_x: + raise KernelResourceError( + f"The requested block size of {block0} for the first dimension" + f", exceeds MaxBlockDimX={max_block_x} for this device." + ) + + if partitions is None: + # Substantial overhead in computing the first line, so need at least + # two lines per partition for best performance. Limit to bins_max + # partitions by default to avoid overly excessive memory overhead. + bins_max = max(16, 512 // (hist_size // 256)) + partitions = min(image.shape[0] // 2, bins_max) + else: + # cannot exceed the number of image rows + partitions = min(partitions, image.shape[0]) + + grid = (partitions, 1, 1) + block = (block0, 1, 1) + + hist_int_t, hist_dtype = _get_hist_dtype(footprint_shape) + + # All recent GPUs (CC>=3.5) allow at least 48k of shared memory per block, + # so don't bother checking the requirements unless thousands of histogram + # bins are requested. + # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications # noqa + if hist_size >= 8192: + smem_size = _check_shared_memory_requirement_bytes( + hist_dtype, hist_size_coarse, hist_size_fine + ) + d = cp.cuda.Device() + smem_available = d.attributes['MaxSharedMemoryPerBlock'] + if smem_size > smem_available: + raise KernelResourceError( + f"Shared memory requirement of {smem_size} bytes per block" + f"exceeds the device limit of {smem_available}." + ) + CUDAParams = namedtuple( + 'HistogramMedianKernelParams', + [ + 'grid', + 'block', + 'hist_size', # total number of histogram bins + 'hist_size_coarse', # number of coarse-level histogram bins + 'hist_dtype', # cupy.dtype of the histogram + 'hist_int_t', # C++ type of the histogram + 'hist_offset', # offset from 0 for the first bin + 'partitions' # number of parallel bands to use + ] + ) + return CUDAParams( + grid, + block, + hist_size, + hist_size_coarse, + hist_dtype, + hist_int_t, + hist_offset, + partitions, + ) + + +def _median_hist(image, footprint, output=None, mode='mirror', cval=0, + value_range='auto', partitions=None): + + if output is not None: + raise NotImplementedError( + "Use of a user-defined output array has not been implemented" + ) + + compatible_image, reason = _can_use_histogram(image, footprint) + if not compatible_image: + raise ValueError(reason) + + # kernel pointer offset calculations assume C-contiguous image data + image = cp.ascontiguousarray(image) + n_rows, n_cols = image.shape[:2] + if image.dtype.kind == 'b': + image = image.view(cp.uint8) + if image.dtype.kind not in 'iu': + raise ValueError("only integer-type images are accepted") + + radii = tuple(s // 2 for s in footprint.shape) + # med_pos is the index corresponding to the median + # (calculation here assumes all elements of the footprint are True) + med_pos = footprint.size // 2 + + params = _get_kernel_params( + image, footprint.shape, value_range, partitions + ) + + # pad as necessary to avoid boundary artifacts + # Don't have to pad along axis 0 if mode is already 'nearest' because the + # kernel already assumes 'nearest' mode internally. + autopad = True + pad_both_axes = mode != 'nearest' + if autopad: + if pad_both_axes: + npad = tuple((r, r) for r in radii) + else: + npad = ((0, 0),) * (image.ndim - 1) + ((radii[-1], radii[-1]),) + mode = _to_np_mode(mode) + if mode == 'constant': + pad_kwargs = dict(mode=mode, constant_values=cval) + else: + pad_kwargs = dict(mode=mode) + image = cp.pad(image, npad, **pad_kwargs) + # must update n_rows, n_cols after padding! + n_rows, n_cols = image.shape[:2] + + # generate the kernel + kern = _get_median_rawkernel( + image_t=_dtype_to_CUDA_int_type(image.dtype), + hist_offset=params.hist_offset, + hist_int_t=params.hist_int_t, + hist_size=params.hist_size, + hist_size_coarse=params.hist_size_coarse, + ) + + # allocate output and scratch space, `hist` and `coarse_hist`. + out = cp.empty_like(image) + hist = cp.zeros( + (n_cols * params.hist_size * params.partitions,), + params.hist_dtype, + ) + coarse_hist = cp.zeros( + (n_cols * params.hist_size_coarse * params.partitions,), + params.hist_dtype, + ) + + # call the kernel + r0, r1 = radii[:2] + s0, s1 = image.shape[:2] + kernel_args = (image, out, hist, coarse_hist, r0, r1, med_pos, s0, s1) + kern(params.grid, params.block, kernel_args) + + # remove any padding that was added + if autopad: + if pad_both_axes: + out_sl = tuple(slice(r, -r) for r in radii) + out = out[out_sl] + else: + out = out[..., radii[-1]:-radii[-1]] + return out diff --git a/python/cucim/src/cucim/skimage/filters/_separable_filtering.py b/python/cucim/src/cucim/skimage/filters/_separable_filtering.py new file mode 100644 index 000000000..eb8dad28a --- /dev/null +++ b/python/cucim/src/cucim/skimage/filters/_separable_filtering.py @@ -0,0 +1,990 @@ +import math + +import cupy as cp + +from cucim.skimage._vendored import _ndimage_util as util +from cucim.skimage._vendored._internal import _normalize_axis_index, prod +from cucim.skimage._vendored._ndimage_filters_core import ( + _ndimage_CAST_FUNCTION, _ndimage_includes) + + +def _get_constants(ndim, axis, kernel_size, anchor, patch_per_block=None): + if anchor is None: + anchor = kernel_size // 2 + halo_pixels_needed = max(kernel_size - anchor, anchor) + if patch_per_block is None: + patch_per_block = 4 + + if ndim == 2: + # note, in 2d axis 0 = "y" + # axis 1 = "x" + # for simplicity, keeping same halo size at both start and end + if axis == 1: + # as in OpenCV's column_filter.hpp + block_x = 16 + block_y = 16 + halo_size = math.ceil(halo_pixels_needed / block_x) + elif axis == 0: + # as in OpenCV's row_filter.hpp + block_x = 32 # 16 in CUDA example + block_y = 8 # 4 in CUDA example + halo_size = math.ceil(halo_pixels_needed / block_y) + # can have out of bounds access unless patch_per_block >= halo_size + patch_per_block = max(patch_per_block, halo_size) + block_z = 1 + elif ndim == 3: + # note, in 3d axis 0 = "z" + # axis 1 = "y" + # axis 2 = "x" + # for simplicity, keeping same halo size at both start and end + if axis == 2: + # as in OpenCV's column_filter.hpp + block_x = 16 + block_y = 4 + block_z = 4 + halo_size = math.ceil(halo_pixels_needed / block_x) + elif axis == 1: + # as in OpenCV's column_filter.hpp + block_x = 32 + block_y = 4 + block_z = 4 + halo_size = math.ceil(halo_pixels_needed / block_y) + elif axis == 0: + # as in OpenCV's row_filter.hpp + block_x = 32 + block_y = 4 + block_z = 4 + halo_size = math.ceil(halo_pixels_needed / block_z) + # can have out of bounds access unless patch_per_block >= halo_size + patch_per_block = max(patch_per_block, halo_size) + else: + raise NotImplementedError("Only 2D and 3D are currently supported") + block = (block_x, block_y, block_z) + return block, patch_per_block, halo_size + + +def _get_smem_shape(ndim, axis, block, patch_per_block, halo_size, anchor=None, + image_dtype=cp.float32): + bx, by, bz = block + if ndim == 2: + if axis == 0: + shape = ((patch_per_block + 2 * halo_size) * by, bx) + elif axis == 1: + shape = (by, (patch_per_block + 2 * halo_size) * bx) + elif ndim == 3: + if axis == 0: + shape = ((patch_per_block + 2 * halo_size) * bz, by, bx) + elif axis == 1: + shape = (bz, (patch_per_block + 2 * halo_size) * by, bx) + elif axis == 2: + shape = (bz, by, (patch_per_block + 2 * halo_size) * bx) + else: + raise NotImplementedError("TODO") + nbytes = cp.dtype(image_dtype).itemsize * prod(shape) + return shape, nbytes + + +def _get_warp_size(device_id=None): + if device_id is None: + device_id = cp.cuda.runtime.getDevice() + device_props = cp.cuda.runtime.getDeviceProperties(device_id) + return device_props['warpSize'] + + +def _get_shmem_limits(device_id=None): + if device_id is None: + device_id = cp.cuda.runtime.getDevice() + device_props = cp.cuda.runtime.getDeviceProperties(device_id) + shared_mp = device_props.get('sharedMemPerMultiprocessor', None) + shared_block = device_props.get('sharedMemPerBlock', None) + shared_block_optin = device_props.get('sharedMemPerBlockOptin', None) + global_l1_cache_supported = device_props.get('globalL1CacheSupported', None) + local_l1_cache_supported = device_props.get('localL1CacheSupported', None) + l2_size = device_props.get('l2CacheSize', None) + warp_size = device_props.get('warpSize', None) + regs_per_block = device_props.get('regsPerBlock', None) + return { + 'device_id': device_id, + 'shared_mp': shared_mp, + 'shared_block': shared_block, + 'shared_block_optin': shared_block_optin, + 'global_l1_cache_supported': global_l1_cache_supported, + 'local_l1_cache_supported': local_l1_cache_supported, + 'l2_size': l2_size, + 'warp_size': warp_size, + 'regs_per_block': regs_per_block, + } + + +class ResourceLimitError(RuntimeError): + pass + + +@cp.memoize(for_each_device=True) +def _check_smem_availability(ndim, axis, kernel_size, anchor=None, + patch_per_block=None, image_dtype=cp.float32, + device_id=None): + block, patch_per_block, halo_size = _get_constants( + ndim, axis, kernel_size, anchor=anchor, patch_per_block=patch_per_block + ) + shape, nbytes = _get_smem_shape( + ndim, axis, block, patch_per_block, halo_size, image_dtype + ) + props = _get_shmem_limits(device_id=device_id) + if nbytes > props['shared_block']: + raise ResourceLimitError("inadequate shared memory available") + + +_dtype_char_to_c_types = { + 'e': 'float16', + 'f': 'float', + 'd': 'double', + 'F': 'complex', + 'D': 'complex', + '?': 'char', + 'b': 'char', + 'h': 'short', + 'i': 'int', + 'l': 'long long', + 'B': 'unsigned char', + 'H': 'unsigned short', + 'I': 'unsigned int', + 'L': 'unsigned long long', +} + + +def _get_code_stage1_shared_memory_load_2d(ndim, axis, mode, cval): + """Generates the first stage of the function body. + + This involves just copying from the `src` array into the `smem` shared + memory array followed by a call to __syncthreads(). All boundary + handling also occurs within this function. + """ + + if ndim == 2 and axis == 0: + if mode not in ['constant', 'grid-constant']: + boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 'n_rows', separate=True) # noqa + + # as in OpenCV's column_filter.hpp + code = """ + __shared__ T smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; + const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x; + if (x >= n_cols){ + return; + } + const T* src_col = &src[x]; + const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y; + + // memory is contiguous along last (columns) axis + const int row_stride = n_cols; // stride (in elements) along axis 0 + int row; + + if (blockIdx.y > 0) + { + //Upper halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y) * row_stride]; + } + else + { + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) { + row = yStart - (HALO_SIZE - j) * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row < 0) + smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_lower + code += """ + smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride]; + } + } + + if (blockIdx.y + 2 < gridDim.y) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + j * BLOCK_DIM_Y) * row_stride]; + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y) * row_stride]; + } + else + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) { + row = yStart + j * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= n_rows) + smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride]; + } + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + { + row = yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= n_rows) + smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride]; + } + } + """ # noqa + elif ndim == 2 and axis == 1: + if mode not in ['constant', 'grid-constant']: + boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'col', 'n_cols', separate=True) # noqa + + # as in OpenCV's row_filter.hpp + code = """ + __shared__ T smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; + const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y; + if (y >= n_rows) { + return; + } + const int row_stride = n_cols; // stride (in elements) along axis 0 + int col; + const T* src_row = &src[y * row_stride]; + const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x; + + if (blockIdx.x > 0) + { + //Load left halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]; + } + else + { + //Load left halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j){ + col = xStart - (HALO_SIZE - j) * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col < 0) + smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_lower + code += """ + smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[col]; + } + } + if (blockIdx.x + 2 < gridDim.x) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere + { + //Load main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j)* BLOCK_DIM_X] = src_row[xStart + j * BLOCK_DIM_X]; + + //Load right halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]; + } + else + { + //Load main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) { + col = xStart + j * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col >= n_cols) + smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col]; + } + + //Load right halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j){ + col = xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col >= n_cols) + smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col]; + } + } + """ # noqa + + code += """ + __syncthreads(); + """ + return code + + +def _get_code_stage1_shared_memory_load_3d(ndim, axis, mode, cval): + """Generates the first stage of the function body. + + This involves just copying from the `src` array into the `smem` shared + memory array followed by a call to __syncthreads(). All boundary + handling also occurs within this function. + """ + + if ndim == 3 and axis == 0: + if mode not in ['constant', 'grid-constant']: + boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 's_0', separate=True) # noqa + + # as in OpenCV's column_filter.hpp + code = """ + __shared__ T smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Z][BLOCK_DIM_Y][BLOCK_DIM_X]; + const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x; + const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y; + if ((x >= s_2) || (y >= s_1)) { + return; + } + // memory is contiguous along last (columns) axis + const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0 + const int stride_1 = s_2; // stride (in elements) along axis 1 + + const T* src_col = &src[x + stride_1 * y]; + const int zStart = blockIdx.z * (BLOCK_DIM_Z * PATCH_PER_BLOCK) + threadIdx.z; + + int row; + + if (blockIdx.z > 0) + { + //Upper halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart - (HALO_SIZE - j) * BLOCK_DIM_Z) * stride_0]; + } + else + { + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) { + row = zStart - (HALO_SIZE - j) * BLOCK_DIM_Z; + """ # noqa + if mode == 'constant': + code += f""" + if (row < 0) + smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_lower + code += """ + smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0]; + } + } + + if (blockIdx.z + 2 < gridDim.z) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart + j * BLOCK_DIM_Z) * stride_0]; + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Z) * stride_0]; + } + else + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) { + row = zStart + j * BLOCK_DIM_Z; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= s_0) + smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0]; + } + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + { + row = zStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Z; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= s_0) + smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0]; + } + } + """ # noqa + elif ndim == 3 and axis == 1: + if mode not in ['constant', 'grid-constant']: + boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 's_1', separate=True) # noqa + + # as in OpenCV's column_filter.hpp + code = """ + __shared__ T smem[BLOCK_DIM_Z][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; + const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x; + const int z = blockIdx.z * BLOCK_DIM_Z + threadIdx.z; + if ((x >= s_2) || (z >= s_0)) { + return; + } + // memory is contiguous along last (columns) axis + const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0 + const int stride_1 = s_2; // stride (in elements) along axis 1 + + const T* src_col = &src[x + stride_0 * z]; + const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y; + + int row; + + if (blockIdx.y > 0) + { + //Upper halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y) * stride_1]; + } + else + { + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) { + row = yStart - (HALO_SIZE - j) * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row < 0) + smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_lower + code += """ + smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1]; + } + } + + if (blockIdx.y + 2 < gridDim.y) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + j * BLOCK_DIM_Y) * stride_1]; + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y) * stride_1]; + } + else + { + //Main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) { + row = yStart + j * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= s_1) + smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1]; + } + + //Lower halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + { + row = yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y; + """ # noqa + if mode == 'constant': + code += f""" + if (row >= s_1) + smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1]; + } + } + """ # noqa + elif ndim == 3 and axis == 2: + if mode not in ['constant', 'grid-constant']: + boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'col', 's_2', separate=True) # noqa + + # as in OpenCV's row_filter.hpp + code = """ + __shared__ T smem[BLOCK_DIM_Z][BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; + const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y; + const int z = blockIdx.z * BLOCK_DIM_Z + threadIdx.z; + if ((y >= s_1) || (z >= s_0)) { + return; + } + const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0 + const int stride_1 = s_2; // stride (in elements) along axis 1 + int col; + const T* src_row = &src[z * stride_0 + y * stride_1]; + const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x; + + if (blockIdx.x > 0) + { + //Load left halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]; + } + else + { + //Load left halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j){ + col = xStart - (HALO_SIZE - j) * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col < 0) + smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_lower + code += """ + smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[col]; + } + } + if (blockIdx.x + 2 < gridDim.x) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere + { + //Load main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j)* BLOCK_DIM_X] = src_row[xStart + j * BLOCK_DIM_X]; + + //Load right halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j) + smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]; + } + else + { + //Load main data + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) { + col = xStart + j * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col >= s_2) + smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col]; + } + + //Load right halo + #pragma unroll + for (int j = 0; j < HALO_SIZE; ++j){ + col = xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X; + """ # noqa + if mode == 'constant': + code += f""" + if (col >= s_2) + smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval}); + else + """ # noqa + else: + code += boundary_code_upper + code += """ + smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col]; + } + } + """ # noqa + + code += """ + __syncthreads(); + """ + return code + + +@cp.memoize(for_each_device=False) +def _get_code_stage1_shared_memory_load(ndim, axis, mode, cval): + if ndim == 2: + return _get_code_stage1_shared_memory_load_2d(ndim, axis, mode, cval) + elif ndim == 3: + return _get_code_stage1_shared_memory_load_3d(ndim, axis, mode, cval) + + +def _get_code_stage2_convolve_2d(ndim, axis, flip_kernel): + code = """ + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + { + """ + if flip_kernel: + kernel_idx = "KSIZE - 1 - k" + else: + kernel_idx = "k" + + if ndim == 2 and axis == 0: + code += """ + const int y = yStart + j * BLOCK_DIM_Y; + + if (y < n_rows) + { + """ + inner = f""" + sum = sum + static_cast(smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y - anchor + k][threadIdx.x]) * kernel[{kernel_idx}]; + """ # noqa + elif ndim == 2 and axis == 1: + code += """ + const int x = xStart + j * BLOCK_DIM_X; + + if (x < n_cols) + { + """ + inner = f""" + sum = sum + static_cast(smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X - anchor + k]) * kernel[{kernel_idx}]; + """ # noqa + code += f""" + W sum = static_cast(0); + + #pragma unroll + for (int k = 0; k < KSIZE; ++k) {{ + {inner} + }} + dst[y * row_stride + x] = cast(sum); + }} + }} + """ + return code + + +def _get_code_stage2_convolve_3d(ndim, axis, flip_kernel): + code = """ + #pragma unroll + for (int j = 0; j < PATCH_PER_BLOCK; ++j) + { + """ + if flip_kernel: + kernel_idx = "KSIZE - 1 - k" + else: + kernel_idx = "k" + + if ndim == 3 and axis == 0: + code += """ + const int z = zStart + j * BLOCK_DIM_Z; + + if (z < s_0) + { + """ + inner = f""" + sum = sum + static_cast(smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z - anchor + k][threadIdx.y][threadIdx.x]) * kernel[{kernel_idx}]; + """ # noqa + elif ndim == 3 and axis == 1: + code += """ + const int y = yStart + j * BLOCK_DIM_Y; + + if (y < s_1) + { + """ + inner = f""" + sum = sum + static_cast(smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y - anchor + k][threadIdx.x]) * kernel[{kernel_idx}]; + """ # noqa + elif ndim == 3 and axis == 2: + code += """ + const int x = xStart + j * BLOCK_DIM_X; + + if (x < s_2) + { + """ + inner = f""" + sum = sum + static_cast(smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X - anchor + k]) * kernel[{kernel_idx}]; + """ # noqa + code += f""" + W sum = static_cast(0); + + #pragma unroll + for (int k = 0; k < KSIZE; ++k) {{ + {inner} + }} + dst[z * stride_0 + y * stride_1 + x] = cast(sum); + }} + }} + """ + return code + + +@cp.memoize(for_each_device=False) +def _get_code_stage2_convolve(ndim, axis, flip_kernel): + if ndim == 2: + return _get_code_stage2_convolve_2d(ndim, axis, flip_kernel) + elif ndim == 3: + return _get_code_stage2_convolve_3d(ndim, axis, flip_kernel) + + +@cp.memoize(for_each_device=True) +def _get_separable_conv_kernel_src( + kernel_size, axis, ndim, anchor, image_c_type, kernel_c_type, + output_c_type, mode, cval, patch_per_block=None, flip_kernel=False +): + blocks, patch_per_block, halo_size = _get_constants( + ndim, axis, kernel_size, anchor, patch_per_block + ) + block_x, block_y, block_z = blocks + + mode_str = mode + if 'constant' in mode_str: + mode_str += f'_{cval:0.2f}'.replace('.', '_') + mode_str = mode_str.replace('-', '_') + if flip_kernel: + func_name = f'convolve_s{kernel_size}_{ndim}d_ax{axis}_{mode_str}' + else: + func_name = f'correlate_s{kernel_size}_{ndim}d_ax{axis}_{mode_str}' + func_name += f"_T{image_c_type}_W{kernel_c_type}_D{output_c_type}".replace('complex<', 'c').replace('>', '').replace('long ', 'l').replace('unsigned ', 'u') # noqa + func_name += f"_patch{patch_per_block}_halo{halo_size}" + # func_name += f"_bx{block_x}_by{block_y}" // these are fixed per axis + + code = """ + #include "cupy/carray.cuh" // for float16 + #include "cupy/complex.cuh" // for complex + """ + + # SciPy-style float -> unsigned integer casting for the output + # (use cast(sum) instead of static_cast(sum) for the output) + code += _ndimage_includes + _ndimage_CAST_FUNCTION + + code += f""" + const int KSIZE = {kernel_size}; + const int BLOCK_DIM_X = {block_x}; + const int BLOCK_DIM_Y = {block_y}; + const int BLOCK_DIM_Z = {block_z}; + const int PATCH_PER_BLOCK = {patch_per_block}; + const int HALO_SIZE = {halo_size}; + typedef {image_c_type} T; + typedef {output_c_type} D; + typedef {kernel_c_type} W; + """ + + if ndim == 2: + code += f""" + extern "C"{{ + __global__ void {func_name}(const T *src, D *dst, const W* kernel, const int anchor, int n_rows, int n_cols) + {{ + """ # noqa + elif ndim == 3: + code += f""" + extern "C"{{ + __global__ void {func_name}(const T *src, D *dst, const W* kernel, const int anchor, int s_0, int s_1, int s_2) + {{ + """ # noqa + code += _get_code_stage1_shared_memory_load(ndim, axis, mode, cval) + code += _get_code_stage2_convolve(ndim, axis, flip_kernel) + code += """ + } // end of function + } // extern "C" + """ + return func_name, blocks, patch_per_block, code + + +@cp.memoize(for_each_device=True) +def _get_separable_conv_kernel(kernel_size, axis, ndim, image_c_type, + kernel_c_type, output_c_type, anchor=None, + mode='nearest', cval=0, + patch_per_block=None, flip_kernel=False): + func_name, block, patch_per_block, code = _get_separable_conv_kernel_src( + kernel_size=kernel_size, + axis=axis, + ndim=ndim, + image_c_type=image_c_type, + kernel_c_type=kernel_c_type, + output_c_type=output_c_type, + anchor=anchor, + mode=mode, + cval=cval, + patch_per_block=patch_per_block, + flip_kernel=flip_kernel, + ) + options = ('--std=c++11', '-DCUPY_USE_JITIFY') + m = cp.RawModule(code=code, options=options) + return m.get_function(func_name), block, patch_per_block + + +def _get_grid(shape, block, axis, patch_per_block): + """Determine grid size from image shape and block parameters""" + ndim = len(shape) + if ndim == 2: + if axis == 0: + # column filter + grid = ( + math.ceil(shape[1] / block[0]), + math.ceil(shape[0] / (block[1] * patch_per_block)), + 1, + ) + elif axis == 1: + # row filter + grid = ( + math.ceil(shape[1] / (block[0] * patch_per_block)), + math.ceil(shape[0] / block[1]), + 1, + ) + else: + raise ValueError(f"invalid axis: {axis}") + elif ndim == 3: + if axis == 0: + # column filter + grid = ( + math.ceil(shape[2] / block[0]), + math.ceil(shape[1] / block[1]), + math.ceil(shape[0] / (block[2] * patch_per_block)), + ) + elif axis == 1: + # row filter + grid = ( + math.ceil(shape[2] / block[0]), + math.ceil(shape[1] / (block[1] * patch_per_block)), + math.ceil(shape[0] / block[2]), + ) + elif axis == 2: + # row filter + grid = ( + math.ceil(shape[2] / (block[0] * patch_per_block)), + math.ceil(shape[1] / block[1]), + math.ceil(shape[0] / block[2]), + ) + else: + raise ValueError(f"invalid axis: {axis}") + else: + raise NotImplementedError(f"unsupported ndim: {ndim}") + return grid + + +def _shmem_convolve1d(image, weights, axis=-1, output=None, mode="reflect", + cval=0.0, origin=0, convolution=False): + + ndim = image.ndim + if weights.ndim != 1: + raise ValueError("expected 1d weight array") + axis = _normalize_axis_index(axis, ndim) + origin = util._check_origin(origin, weights.size) + if weights.size == 0: + return cp.zeros_like(input) + util._check_mode(mode) + + if convolution: + # use flip_kernel to avoid cp.ascontiguousarray(weights[::-1])) + origin = -origin + if weights.size % 2 == 0: + origin -= 1 + elif weights.dtype.kind == "c": + # numpy.correlate conjugates weights rather than input. + weights = weights.conj() + + anchor = weights.size // 2 + origin + + if weights.size > 32: + # For large kernels, make sure we have adequate shared memory + _check_smem_availability(ndim, axis, weights.size, anchor=anchor, + patch_per_block=None, image_dtype=image.dtype, + device_id=None) + + # CUDA kernels assume C-contiguous memory layout + if not image.flags.c_contiguous: + image = cp.ascontiguousarray(image) + + complex_output = image.dtype.kind == 'c' + # Note: important to set use_cucim_casting=True for performance with + # 8 and 16-bit integer types. This causes the weights to get cast to + # float32 rather than float64. + weights_dtype = util._get_weights_dtype( + image, weights, use_cucim_casting=True + ) + if not weights.flags.c_contiguous or weights.dtype != weights_dtype: + weights = cp.ascontiguousarray(weights, weights_dtype) + + # promote output to nearest complex dtype if necessary + complex_output = complex_output or weights.dtype.kind == 'c' + output = util._get_output(output, image, None, complex_output) + + # handle potential overlap between input and output arrays + needs_temp = cp.shares_memory(output, image, 'MAY_SHARE_BOUNDS') + if needs_temp: + output, temp = util._get_output(output.dtype, input), output + + # index_c_type = util._get_inttype(image) + image_c_type = _dtype_char_to_c_types[image.dtype.char] + weights_c_type = _dtype_char_to_c_types[weights.dtype.char] + output_c_type = _dtype_char_to_c_types[output.dtype.char] + + conv_axis_kernel, block, patch_per_block = _get_separable_conv_kernel( + weights.size, + axis=axis, + ndim=ndim, + anchor=anchor, + image_c_type=image_c_type, + kernel_c_type=weights_c_type, + output_c_type=output_c_type, + mode=mode, + cval=cval, + patch_per_block=None, + flip_kernel=convolution, + ) + grid = _get_grid(image.shape, block, axis, patch_per_block) + args = (image, output, weights, anchor) + image.shape[:ndim] + conv_axis_kernel( + grid, + block, + args, + ) + if needs_temp: + output[:] = temp + output = temp + return output diff --git a/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu b/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu new file mode 100644 index 000000000..b084383b4 --- /dev/null +++ b/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu @@ -0,0 +1,251 @@ +/* Several functions and the primary kernel used for the histogram-based + * median are in this file. + * + * Note that this file cannot be compiled standalone as various definitions + * and a couple of the supporting functions get dynamically generated based + * on the actual histogram sizes. See the Python function + * `_get_median_rawkernel` defined in `_median_hist.py`. This function will + * generate the full kernel code given a set of parameters. + */ + +__device__ void histogramAddAndSubCoarse(HIST_INT_T* H, + const HIST_INT_T* hist_colAdd, + const HIST_INT_T* hist_colSub) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) { + H[tx] += hist_colAdd[tx] - hist_colSub[tx]; + } +} + +__device__ void histogramMultipleAddCoarse(HIST_INT_T* H, + const HIST_INT_T* hist_col, + int histCount) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) { + HIST_INT_T temp = H[tx]; + for (int i = 0; i < histCount; i++) + temp += hist_col[(i << LOG2_COARSE) + tx]; + H[tx] = temp; + } +} + +__device__ void histogramClearCoarse(HIST_INT_T* H) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) { + H[tx] = 0; + } +} + +__device__ void histogramAddCoarse(HIST_INT_T* H, const HIST_INT_T* hist_col) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) { + H[tx] += hist_col[tx]; + } +} + +__device__ void histogramSubCoarse(HIST_INT_T* H, const HIST_INT_T* hist_col) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) { + H[tx] -= hist_col[tx]; + } +} + +__device__ void histogramAddFine(HIST_INT_T* H, const HIST_INT_T* hist_col) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_FINE) { + H[tx] += hist_col[tx]; + } +} + +__device__ void histogramAddAndSubFine(HIST_INT_T* H, + const HIST_INT_T* hist_colAdd, + const HIST_INT_T* hist_colSub) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_FINE) { + H[tx] += hist_colAdd[tx] - hist_colSub[tx]; + } +} + +__device__ void histogramClearFine(HIST_INT_T* H) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_FINE) { + H[tx] = 0; + } +} + +__device__ void lucClearCoarse(int* luc) { + int tx = threadIdx.x; + if (tx < HIST_SIZE_COARSE) luc[tx] = 0; +} + +extern "C" __global__ void cuRankFilterMultiBlock(IMAGE_T* src, IMAGE_T* dest, + HIST_INT_T* histPar, + HIST_INT_T* coarseHistGrid, + int r0, int r1, int medPos_, + int rows, int cols) { + __shared__ HIST_INT_T HCoarse[HIST_SIZE_COARSE]; + __shared__ HIST_INT_T HCoarseScan[HIST_SIZE_FINE]; + __shared__ HIST_INT_T HFine[HIST_SIZE_COARSE][HIST_SIZE_FINE]; + + __shared__ int luc[HIST_SIZE_COARSE]; + + __shared__ int firstBin, countAtMed, retval; + + // extract values from params array + const int row_stride = cols; // stride (in elements) along axis 0 + + int extraRowThread = rows % gridDim.x; + int doExtraRow = blockIdx.x < extraRowThread; + int startRow = 0, stopRow = 0; + int rowsPerBlock = rows / gridDim.x + doExtraRow; + + // The following code partitions the work to the blocks. Some blocks will do + // one row more than other blocks. This code is responsible for doing that + // balancing + if (doExtraRow) { + startRow = rowsPerBlock * blockIdx.x; + stopRow = min(rows, startRow + rowsPerBlock); + } else { + startRow = (rowsPerBlock + 1) * extraRowThread + + (rowsPerBlock) * (blockIdx.x - extraRowThread); + stopRow = min(rows, startRow + rowsPerBlock); + } + + HIST_INT_T* hist = histPar + cols * HIST_SIZE * blockIdx.x; + HIST_INT_T* histCoarse = + coarseHistGrid + cols * HIST_SIZE_COARSE * blockIdx.x; + + if (blockIdx.x == (gridDim.x - 1)) stopRow = rows; + __syncthreads(); + int initNeeded = 0, initStartRow, initStopRow; + HIST_INT_T initVal; + + if (blockIdx.x == 0) { + // Note: skips one iteration in the initialization loop by starting at + // row 1 instead of 0 and using initVal r0+2 instead of r0+1. + initNeeded = 1; + initVal = r0 + 2; + initStartRow = 1; + initStopRow = r0; + } else if (startRow < (r0 + 2)) { + initNeeded = 1; + initVal = r0 + 2 - startRow; + initStartRow = 1; + initStopRow = r0 + startRow; + } else { + initNeeded = 0; + initVal = 0; + initStartRow = startRow - (r0 + 1); + initStopRow = r0 + startRow; + } + __syncthreads(); + + // In the original algorithm an initialization phase was required as part of + // the window was outside the image. In this parallel version, the + // initializtion is required for all thread blocks that part of the median + // filter is outside the window. For all threads in the block the same code + // will be executed. + if (initNeeded) { + for (int j = threadIdx.x; j < (cols); j += blockDim.x) { + hist[j * HIST_SIZE + src[j] + HIST_OFFSET] = initVal; + histCoarse[j * HIST_SIZE_COARSE + ((src[j] + HIST_OFFSET) >> LOG2_FINE)] = + initVal; + } + } + __syncthreads(); + + // For all remaining rows in the median filter, add the values to the the + // histogram + for (int j = threadIdx.x; j < cols; j += blockDim.x) { + for (int i = initStartRow; i < initStopRow; i++) { + int pos = min(i, rows - 1); + hist[j * HIST_SIZE + src[pos * row_stride + j] + HIST_OFFSET]++; + histCoarse[j * HIST_SIZE_COARSE + + ((src[pos * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]++; + } + } + __syncthreads(); + // Going through all the rows that the block is responsible for. + int inc = blockDim.x * HIST_SIZE; + int incCoarse = blockDim.x * HIST_SIZE_COARSE; + for (int i = startRow; i < stopRow; i++) { + // For every new row that is started the global histogram for the entire + // window is restarted. + + histogramClearCoarse(HCoarse); + lucClearCoarse(luc); + // Computing some necessary indices + int possub = max(0, i - r0 - 1), posadd = min(rows - 1, i + r0); + int histPos = threadIdx.x * HIST_SIZE; + int histCoarsePos = threadIdx.x * HIST_SIZE_COARSE; + // Going through all the elements of a specific row. For each histogram, a + // value is taken out and one value is added. + for (int j = threadIdx.x; j < cols; j += blockDim.x) { + hist[histPos + src[possub * row_stride + j] + HIST_OFFSET]--; + hist[histPos + src[posadd * row_stride + j] + HIST_OFFSET]++; + histCoarse[histCoarsePos + + ((src[possub * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]--; + histCoarse[histCoarsePos + + ((src[posadd * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]++; + + histPos += inc; + histCoarsePos += incCoarse; + } + __syncthreads(); + + histogramMultipleAddCoarse(HCoarse, histCoarse, 2 * r1 + 1); + int cols_m_1 = cols - 1; + + for (int j = r1; j < cols - r1; j++) { + int possub = max(j - r1, 0); + int posadd = min(j + 1 + r1, cols_m_1); + int medPos = medPos_; + __syncthreads(); + + histogramMedianParCoarseLookupOnly(HCoarse, HCoarseScan, medPos, + &firstBin, &countAtMed); + __syncthreads(); + + int loopIndex = luc[firstBin]; + if (loopIndex <= (j - r1)) { + histogramClearFine(HFine[firstBin]); + for (loopIndex = j - r1; loopIndex < min(j + r1 + 1, cols); + loopIndex++) { + histogramAddFine(HFine[firstBin], hist + (loopIndex * HIST_SIZE + + (firstBin << LOG2_FINE))); + } + } else { + for (; loopIndex < (j + r1 + 1); loopIndex++) { + histogramAddAndSubFine( + HFine[firstBin], + hist + (min(loopIndex, cols_m_1) * HIST_SIZE + + (firstBin << LOG2_FINE)), + hist + (max(loopIndex - 2 * r1 - 1, 0) * HIST_SIZE + + (firstBin << LOG2_FINE))); + __syncthreads(); + } + } + __syncthreads(); + luc[firstBin] = loopIndex; + + int leftOver = medPos - countAtMed; + if (leftOver >= 0) { + histogramMedianParFineLookupOnly(HFine[firstBin], HCoarseScan, leftOver, + &retval, &countAtMed); + } else + retval = 0; + __syncthreads(); + + if (threadIdx.x == 0) { + dest[i * row_stride + j] = + (firstBin << LOG2_FINE) + retval - HIST_OFFSET; + } + histogramAddAndSubCoarse(HCoarse, + histCoarse + (int)(posadd << LOG2_COARSE), + histCoarse + (int)(possub << LOG2_COARSE)); + + __syncthreads(); + } + __syncthreads(); + } +} diff --git a/python/cucim/src/cucim/skimage/filters/edges.py b/python/cucim/src/cucim/skimage/filters/edges.py index 6b4dcff37..193da4f0d 100644 --- a/python/cucim/src/cucim/skimage/filters/edges.py +++ b/python/cucim/src/cucim/skimage/filters/edges.py @@ -13,7 +13,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import _supported_float_type, check_nD from ..restoration.uft import laplacian diff --git a/python/cucim/src/cucim/skimage/filters/tests/test_median.py b/python/cucim/src/cucim/skimage/filters/tests/test_median.py index 3507032bf..6b0d8e002 100644 --- a/python/cucim/src/cucim/skimage/filters/tests/test_median.py +++ b/python/cucim/src/cucim/skimage/filters/tests/test_median.py @@ -2,10 +2,20 @@ import pytest from cupy.testing import assert_allclose from cupyx.scipy import ndimage +from skimage import data from cucim.skimage._shared.testing import expected_warnings from cucim.skimage.filters import median +try: + from math import prod +except ImportError: + from functools import reduce + from operator import mul + + def prod(x): + return reduce(mul, x) + @pytest.fixture def image(): @@ -17,6 +27,11 @@ def image(): dtype=cp.uint8) +@pytest.fixture +def camera(): + return cp.array(data.camera()) + + # TODO: mode='rank' disabled until it has been implmented @pytest.mark.parametrize( "mode, cval, behavior, warning_type", @@ -41,12 +56,167 @@ def test_selem_kwarg_deprecation(image): # TODO: update if rank.median implemented @pytest.mark.parametrize( - "behavior, func, params", - [('ndimage', ndimage.median_filter, {'size': (3, 3)})] + 'behavior, func', [('ndimage', ndimage.median_filter)], # ('rank', rank.median, {'footprint': cp.ones((3, 3), dtype=cp.uint8)})] ) -def test_median_behavior(image, behavior, func, params): - assert_allclose(median(image, behavior=behavior), func(image, **params)) +@pytest.mark.parametrize( + 'mode', ['reflect', 'mirror', 'nearest', 'constant', 'wrap'] +) +# include even shapes and singleton shape that force non-histogram code path. +# include some large shapes that always take the histogram-based code path. +@pytest.mark.parametrize( + 'footprint_shape', [ + (3, 3), (5, 5), (9, 15), (2, 2), (1, 1), (2, 7), (23, 23), (15, 35), + ] +) +@pytest.mark.parametrize('out', [None, cp.uint8, cp.float32, 'array']) +def test_median_behavior(camera, behavior, func, mode, footprint_shape, out): + footprint = cp.ones(footprint_shape, dtype=bool) + cam2 = camera[:, :177] # use anisotropic size + assert cam2.dtype == cp.uint8 + if out == 'array': + out = cp.zeros_like(cam2) + assert_allclose( + median(cam2, footprint, mode=mode, behavior=behavior, out=out), + func(cam2, size=footprint.shape, mode=mode, output=out), + ) + + +@pytest.mark.parametrize( + 'mode', ['reflect', 'mirror', 'nearest', 'constant', 'wrap'] +) +# use an anisotropic footprint large enough to trigger the histogram-based path +@pytest.mark.parametrize('footprint_shape', [(3, 3), (3, 5), (15, 23)]) +@pytest.mark.parametrize( + 'int_dtype', [cp.uint8, cp.int8, cp.uint16, cp.int16] +) +@pytest.mark.parametrize( + 'algorithm', ['auto', 'histogram', 'sorting'] +) +@pytest.mark.parametrize( + 'algorithm_kwargs', [{}, {'partitions': 32}] +) +def test_median_hist_dtypes( + mode, footprint_shape, int_dtype, algorithm, algorithm_kwargs, +): + footprint = cp.ones(footprint_shape, dtype=bool) + rng = cp.random.default_rng(123) + shape = (350, 407) + if int_dtype == cp.uint8: + img = rng.integers(0, 256, shape, dtype=cp.uint8) + elif int_dtype == cp.int8: + img = rng.integers(-128, 128, shape, dtype=int).astype(cp.int8) + elif int_dtype == cp.uint16: + if False: + # test with 12-bit range stored in 16-bit integers (e.g. DICOM) + img = rng.integers(0, 4096, shape, dtype=cp.uint16) + else: + # smaller dynamic range + # (range 4096 fails only on CI, but couldn't reproduce locally) + img = rng.integers(0, 1024, shape, dtype=cp.uint16) + elif int_dtype == cp.int16: + # chose a limited range of values to test 512 hist_size case + img = rng.integers(-128, 384, shape, dtype=int).astype(cp.int16) + + # 150 is the value used to auto-select between sorting vs. histogram + small_kernel = prod(footprint_shape) < 150 + if algorithm_kwargs and ( + algorithm == 'sorting' + or (algorithm == 'auto' and small_kernel) + ): + msg = ["algorithm_kwargs={'partitions': 32} ignored"] + else: + msg = [] + with expected_warnings(msg): + out = median(img, footprint, mode=mode, behavior='ndimage', + algorithm=algorithm, algorithm_kwargs=algorithm_kwargs) + expected = ndimage.median_filter(img, size=footprint.shape, mode=mode) + assert_allclose(expected, out) + + +# TODO: Determine source of isolated remote test failures when 16-bit range +# is > 1024. Could not reproduce locally. +@pytest.mark.parametrize('mode', ['reflect', ]) +# use an anisotropic footprint large enough to trigger the histogram-based path +@pytest.mark.parametrize('footprint_shape', [(7, 11)]) +@pytest.mark.parametrize( + 'int_dtype, irange', + [ + (cp.uint16, (0, 256)), + (cp.uint16, (0, 15)), + (cp.uint16, (128, 384)), + (cp.uint16, (0, 200)), + (cp.uint16, (0, 510)), + (cp.uint16, (500, 550)), + (cp.uint16, (0, 1024)), + pytest.param(cp.uint16, (0, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa + pytest.param(cp.uint16, (1024, 3185), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa + (cp.int16, (0, 256)), + (cp.int16, (-15, 15)), + (cp.int16, (128, 384)), + (cp.int16, (-128, 384)), + (cp.int16, (-400, 400)), + pytest.param(cp.int16, (-1024, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa + pytest.param(cp.int16, (150, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa + ] +) +def test_median_hist_16bit_offsets(mode, footprint_shape, int_dtype, irange): + """Make sure 16-bit cases are robust to various value ranges""" + footprint = cp.ones(footprint_shape, dtype=bool) + rng = cp.random.default_rng(123) + shape = (350, 407) + if int_dtype == cp.uint16: + # test with 12-bit range stored in 16-bit integers (e.g. DICOM) + img = rng.integers(irange[0], irange[1], shape, dtype=cp.uint16) + elif int_dtype == cp.int16: + # chose a limited range of values to test 512 hist_size case + img = rng.integers(irange[0], irange[1], shape, dtype=int) + img = img.astype(cp.int16) + out = median(img, footprint, mode=mode, behavior='ndimage', + algorithm='histogram') + expected = ndimage.median_filter(img, size=footprint.shape, mode=mode) + assert_allclose(expected, out) + + +@pytest.mark.parametrize('int_dtype', [cp.uint16, cp.int16]) +def test_median_hist_kernel_resource_limit_try_except(int_dtype): + # use an anisotropic footprint large enough to trigger + # the histogram-based path + footprint = cp.ones((15, 23), dtype=bool) + mode = 'nearest' + rng = cp.random.default_rng(123) + shape = (350, 407) # use anisotropic size + if int_dtype == cp.uint16: + # test with range likely to exceed the shared memory limit + img = rng.integers(0, 65536, shape, dtype=cp.uint16) + elif int_dtype == cp.int16: + # test with range likely to exceed the shared memory limit + img = rng.integers(-32768, 32767, shape, dtype=int).astype(cp.int16) + out = median(img, footprint, mode=mode) + expected = ndimage.median_filter(img, size=footprint.shape, mode=mode) + assert_allclose(expected, out) + + +@pytest.mark.parametrize( + 'algorithm', ['auto', 'histogram', 'sorting', 'invalid'] +) +def test_median_algorithm_parameter(algorithm): + """Call all algorithms for float32 input. + """ + footprint = cp.ones((15, 23), dtype=bool) + mode = 'nearest' + rng = cp.random.default_rng(123) + shape = (350, 407) # use anisotropic size + img = rng.standard_normal(shape, dtype=cp.float32) + if algorithm in ['invalid', 'histogram']: + # histogram supports only integer-valued dtypes + # 'invalid' is an uncrecognized algorithm + with pytest.raises(ValueError): + median(img, footprint, mode=mode, algorithm=algorithm) + else: + out = median(img, footprint, mode=mode, algorithm=algorithm) + expected = ndimage.median_filter(img, size=footprint.shape, mode=mode) + assert_allclose(expected, out) @pytest.mark.parametrize( diff --git a/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py b/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py new file mode 100644 index 000000000..0f2c5897f --- /dev/null +++ b/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py @@ -0,0 +1,333 @@ +import cupy as cp +import pytest + +from cucim.skimage._vendored.ndimage import (convolve1d, correlate1d, + gaussian_filter, gaussian_filter1d, + gaussian_gradient_magnitude, + gaussian_laplace, laplace, prewitt, + sobel, uniform_filter, + uniform_filter1d) + + +def _get_image(shape, dtype, seed=123): + rng = cp.random.default_rng(seed) + dtype = cp.dtype(dtype) + if dtype.kind == 'b': + image = rng.integers(0, 1, shape, dtype=cp.uint8).astype(bool) + elif dtype.kind in 'iu': + image = rng.integers(0, 128, shape, dtype=dtype) + elif dtype.kind in 'c': + real_dtype = cp.asarray([], dtype=dtype).real.dtype + image = rng.standard_normal(shape, dtype=real_dtype) + image = image + 1j * rng.standard_normal(shape, dtype=real_dtype) + else: + if dtype == cp.float16: + image = rng.standard_normal(shape).astype(dtype) + else: + image = rng.standard_normal(shape, dtype=dtype) + return image + + +def _get_rtol_atol(dtype): + real_dtype = cp.array([], dtype=dtype).real.dtype + rtol = atol = 1e-5 + if real_dtype == cp.float64: + rtol = atol = 1e-12 + elif real_dtype == cp.float16: + rtol = atol = 1e-3 + return rtol, atol + + +def _compare_implementations( + shape, kernel_size, axis, dtype, mode, cval=0.0, origin=0, + output_dtype=None, kernel_dtype=None, output_preallocated=False, + function=convolve1d, +): + dtype = cp.dtype(dtype) + if kernel_dtype is None: + kernel_dtype = dtype + image = _get_image(shape, dtype) + kernel = _get_image((kernel_size,), kernel_dtype) + rtol, atol = _get_rtol_atol(kernel.dtype) + kwargs = dict(axis=axis, mode=mode, cval=cval, origin=origin) + if output_dtype is not None: + output_dtype = cp.dtype(output_dtype) + if output_preallocated: + if output_dtype is None: + output_dtype = image.dtype + output1 = cp.empty(image.shape, dtype=output_dtype) + output2 = cp.empty(image.shape, dtype=output_dtype) + function( + image, kernel, output=output1, algorithm='elementwise', **kwargs + ) + function( + image, kernel, output=output2, algorithm='shared_memory', **kwargs + ) + cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol) + return + output1 = function( + image, kernel, output=output_dtype, algorithm='elementwise', **kwargs + ) + output2 = function( + image, kernel, output=output_dtype, algorithm='shared_memory', **kwargs + ) + cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol) + return + + +def _compare_implementations_other( + shape, dtype, mode, cval=0.0, + output_dtype=None, kernel_dtype=None, output_preallocated=False, + function=convolve1d, func_kwargs={}, +): + dtype = cp.dtype(dtype) + image = _get_image(shape, dtype) + rtol, atol = _get_rtol_atol(image.dtype) + kwargs = dict(mode=mode, cval=cval) + if func_kwargs: + kwargs.update(func_kwargs) + if output_dtype is not None: + output_dtype = cp.dtype(output_dtype) + if output_preallocated: + if output_dtype is None: + output_dtype = image.dtype + output1 = cp.empty(image.shape, dtype=output_dtype) + output2 = cp.empty(image.shape, dtype=output_dtype) + function(image, output=output1, algorithm='elementwise', **kwargs) + function(image, output=output2, algorithm='shared_memory', **kwargs) + cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol) + return + output1 = function( + image, output=output_dtype, algorithm='elementwise', **kwargs + ) + output2 = function( + image, output=output_dtype, algorithm='shared_memory', **kwargs + ) + cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol) + return + + +@pytest.mark.parametrize('shape', ((64, 57), (1000, 500))) +@pytest.mark.parametrize('axis', (0, 1)) +@pytest.mark.parametrize('origin', ('min', 0, 'max')) +@pytest.mark.parametrize('kernel_size', tuple(range(1, 17))) +@pytest.mark.parametrize('function', [convolve1d, correlate1d]) +def test_separable_kernel_sizes_and_origins( + shape, axis, origin, kernel_size, function +): + if kernel_size == 1: + origin = 0 + elif origin == 'min': + origin = -(kernel_size // 2) + elif origin == 'max': + origin = kernel_size // 2 + if kernel_size % 2 == 0: + origin -= 1 + _compare_implementations( + shape, + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode='nearest', + origin=origin, + function=function, + ) + + +@pytest.mark.parametrize('shape', ((64, 57), (1000, 500))) +@pytest.mark.parametrize('axis', (0, 1)) +@pytest.mark.parametrize( + 'kernel_size', + tuple(range(17, 129, 11)) + tuple(range(145, 275, 41)) +) +def test_separable_kernel_larger_sizes(shape, axis, kernel_size): + _compare_implementations( + shape, + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode='reflect', + origin=0, + ) + + +@pytest.mark.parametrize('shape', ((1000, 500),)) +@pytest.mark.parametrize('axis', (0, 1)) +def test_separable_elementwise_very_large_size_fallback(shape, axis): + """Very large kernel to make it likely shared memory will be exceeded.""" + _compare_implementations( + shape, + kernel_size=901, + axis=axis, + dtype=cp.float64, + mode='nearest', + origin=0, + ) + + +@pytest.mark.parametrize('shape', ((4000, 2000), (1, 1), (5, 500), (1500, 5))) +@pytest.mark.parametrize('axis', (-1, -2)) +@pytest.mark.parametrize('kernel_size', (1, 38, 129)) +@pytest.mark.parametrize( + 'mode', + ('nearest', 'reflect', 'wrap', 'mirror', 'constant', ('constant', 1)), +) +def test_separable_image_shapes_and_modes(shape, axis, kernel_size, mode): + + if isinstance(mode, tuple): + mode, cval = mode + else: + cval = 0 + + _compare_implementations( + shape, + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode=mode, + cval=cval, + origin=0, + ) + + +image_dtypes_tested = ( + cp.float16, cp.float32, cp.float64, cp.complex64, cp.complex128, bool, + cp.int8, cp.uint8, cp.int16, cp.uint16, cp.int32, cp.uint32, cp.int64, + cp.uint64, +) + + +@pytest.mark.parametrize('axis', (0, 1)) +@pytest.mark.parametrize('image_dtype', image_dtypes_tested) +@pytest.mark.parametrize( + 'kernel_dtype', (None, cp.float32, cp.uint8, cp.complex64) +) +def test_separable_image_and_kernel_dtypes(axis, image_dtype, kernel_dtype): + """Test many kernel and image dtype combinations""" + + _compare_implementations( + (64, 32), + kernel_size=3, + axis=axis, + dtype=image_dtype, + mode='nearest', + origin=0, + kernel_dtype=kernel_dtype, + ) + + +@pytest.mark.parametrize('axis', (0, 1)) +@pytest.mark.parametrize('image_dtype', image_dtypes_tested) +@pytest.mark.parametrize( + 'output_dtype', (None, cp.float32, cp.int32, cp.complex64) +) +@pytest.mark.parametrize('output_preallocated', (False, True)) +def test_separable_input_and_output_dtypes( + axis, image_dtype, output_dtype, output_preallocated +): + """Test many kernel and image dtype combinations""" + if cp.dtype(image_dtype).kind == 'c' and output_dtype is not None: + if not cp.dtype(output_dtype).kind == 'c': + pytest.skip('cannot cast complex values to real') + _compare_implementations( + (64, 32), + kernel_size=3, + axis=axis, + dtype=image_dtype, + mode='nearest', + origin=0, + kernel_dtype=None, + output_dtype=output_dtype, + output_preallocated=output_preallocated, + ) + + +@pytest.mark.parametrize('shape', ((64, 57),)) +@pytest.mark.parametrize('axis', (0, 1)) +@pytest.mark.parametrize('origin', ('min', 0, 'max')) +@pytest.mark.parametrize( + 'function, func_kwargs', + [ + (gaussian_filter, dict(sigma=1.5)), + (gaussian_filter1d, dict(sigma=1.5, axis=0)), + (gaussian_filter1d, dict(sigma=1.5, axis=1)), + (gaussian_gradient_magnitude, dict(sigma=3.5)), + (gaussian_laplace, dict(sigma=2.5)), + (laplace, {}), + (prewitt, {}), + (sobel, {}), + (uniform_filter, dict(size=7)), + (uniform_filter1d, dict(size=7, axis=0)), + (uniform_filter1d, dict(size=7, axis=1)), + ] +) +def test_separable_internal_kernel( + shape, axis, origin, function, func_kwargs +): + """ + Test case to make sure the 'algorithm' kwarg works for all other separable + ndimage filters as well. + """ + _compare_implementations_other( + shape, + dtype=cp.float32, + mode='nearest', + function=function, + func_kwargs=func_kwargs, + ) + + +@pytest.mark.parametrize('shape', ((16, 24, 32), (192, 128, 160))) +@pytest.mark.parametrize('axis', (0, 1, 2)) +@pytest.mark.parametrize('kernel_size', tuple(range(1, 17, 3))) +@pytest.mark.parametrize('function', [convolve1d, correlate1d]) +def test_separable_kernel_sizes_3d( + shape, axis, kernel_size, function +): + _compare_implementations( + shape, + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode='nearest', + origin=0, + function=function, + ) + + +@pytest.mark.parametrize('axis', (0, 1, 2)) +@pytest.mark.parametrize('kernel_size', (65, 129, 198)) +def test_separable_large_kernel_3d(axis, kernel_size): + _compare_implementations( + shape=(256, 128, 96), + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode='reflect', + origin=0, + ) + + +@pytest.mark.parametrize( + 'shape', ((64, 5, 64), (5, 64, 64), (64, 64, 5), (32, 32, 32)) +) +@pytest.mark.parametrize('axis', (-1, -2, -3)) +@pytest.mark.parametrize('kernel_size', (9,)) +@pytest.mark.parametrize( + 'mode', + ('nearest', 'reflect', 'wrap', 'mirror', 'constant', ('constant', 1)), +) +def test_separable_image_shapes_and_modes_3d(shape, axis, kernel_size, mode): + if isinstance(mode, tuple): + mode, cval = mode + else: + cval = 0 + _compare_implementations( + shape, + kernel_size=kernel_size, + axis=axis, + dtype=cp.float32, + mode=mode, + cval=cval, + origin=0, + ) diff --git a/python/cucim/src/cucim/skimage/filters/thresholding.py b/python/cucim/src/cucim/skimage/filters/thresholding.py index 8ca274209..14acc342e 100644 --- a/python/cucim/src/cucim/skimage/filters/thresholding.py +++ b/python/cucim/src/cucim/skimage/filters/thresholding.py @@ -6,7 +6,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from skimage.filters import threshold_isodata as _threshold_isodata_cpu from skimage.filters import threshold_minimum as _threshold_minimum_cpu from skimage.filters import threshold_multiotsu as _threshold_multiotsu_cpu @@ -735,8 +735,8 @@ def threshold_li(image, *, tolerance=None, initial_guess=None, return threshold -@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="23.02.00", + deprecated_version="22.02.00") def threshold_minimum(image=None, nbins=256, max_num_iter=10000, *, hist=None): """Return threshold value based on minimum method. diff --git a/python/cucim/src/cucim/skimage/measure/_blur_effect.py b/python/cucim/src/cucim/skimage/measure/_blur_effect.py index 4e0fe6fe9..69421512e 100644 --- a/python/cucim/src/cucim/skimage/measure/_blur_effect.py +++ b/python/cucim/src/cucim/skimage/measure/_blur_effect.py @@ -1,5 +1,5 @@ import cupy as cp -import cupyx.scipy.ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from ..color import rgb2gray from ..util import img_as_float diff --git a/python/cucim/src/cucim/skimage/measure/_label.py b/python/cucim/src/cucim/skimage/measure/_label.py index 9dc2fc2bd..c9969cdff 100644 --- a/python/cucim/src/cucim/skimage/measure/_label.py +++ b/python/cucim/src/cucim/skimage/measure/_label.py @@ -16,8 +16,8 @@ def _get_structure(ndim, connectivity): # TODO: currently uses int32 for the labels. should add int64 option as well @deprecate_kwarg({'input': 'label_image'}, - deprecated_version='0.19', - removed_version='1.0') + deprecated_version='22.02.00', + removed_version='23.02.00') def label(label_image, background=None, return_num=False, connectivity=None): r"""Label connected regions of an integer array. diff --git a/python/cucim/src/cucim/skimage/measure/profile.py b/python/cucim/src/cucim/skimage/measure/profile.py index 6643dc034..fc0b0cf1f 100644 --- a/python/cucim/src/cucim/skimage/measure/profile.py +++ b/python/cucim/src/cucim/skimage/measure/profile.py @@ -2,7 +2,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import _fix_ndimage_mode, _validate_interpolation_order diff --git a/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py b/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py index ff5b2ac5e..3b0013005 100644 --- a/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py +++ b/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py @@ -1,6 +1,7 @@ import cupy as cp +import pytest from cupy.testing import assert_array_equal -from skimage.data import astronaut, cells3d +from skimage.data import astronaut from cucim.skimage.color import rgb2gray from cucim.skimage.filters import gaussian @@ -47,6 +48,7 @@ def test_blur_effect_channel_axis(): def test_blur_effect_3d(): """Test that the blur metric works on a 3D image.""" + cells3d = pytest.importorskip('skimage.data.cells3d') image_3d = cp.array(cells3d()[:, 1, :, :]) # grab just the nuclei B0 = blur_effect(image_3d) B1 = blur_effect(gaussian(image_3d, sigma=1)) diff --git a/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py b/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py index 6bfa07d31..44d0e28e5 100644 --- a/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py +++ b/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py @@ -1,7 +1,7 @@ import functools import cupy as cp -from cupyx.scipy.ndimage import uniform_filter +import cucim.skimage._vendored.ndimage as ndi from .._shared import utils from .._shared.filters import gaussian @@ -187,7 +187,7 @@ def structural_similarity(im1, im2, filter_func = gaussian filter_args = {'sigma': sigma, 'truncate': truncate, 'mode': 'reflect'} else: - filter_func = uniform_filter + filter_func = ndi.uniform_filter filter_args = {'size': win_size} # ndimage filters need floating point data diff --git a/python/cucim/src/cucim/skimage/morphology/__init__.py b/python/cucim/src/cucim/skimage/morphology/__init__.py index e3a5423b4..cf1fbb774 100644 --- a/python/cucim/src/cucim/skimage/morphology/__init__.py +++ b/python/cucim/src/cucim/skimage/morphology/__init__.py @@ -1,4 +1,4 @@ -from ._skeletonize import thin +from ._skeletonize import medial_axis, thin from .binary import (binary_closing, binary_dilation, binary_erosion, binary_opening) from .footprints import (ball, cube, diamond, disk, octagon, octahedron, @@ -32,4 +32,5 @@ "remove_small_objects", "remove_small_holes", "thin", + "medial_axis", ] diff --git a/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py b/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py new file mode 100644 index 000000000..37f40b13e --- /dev/null +++ b/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py @@ -0,0 +1,67 @@ +import numpy as np + +# medial axis lookup tables (independent of image content) +# +# Note: lookup table generated using scikit-image code from +# https://github.com/scikit-image/scikit-image/blob/38b595d60befe3a0b4c0742995b9737200a079c6/skimage/morphology/_skeletonize.py#L449-L458 # noqa + +lookup_table = np.array( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, + 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ], + dtype=bool, +) + + +cornerness_table = np.array( + [ + 9, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, + 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, + 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 7, + 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, + 6, 5, 5, 4, 5, 4, 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, + 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 6, + 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, + 5, 4, 5, 4, 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5, + 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, + 4, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4, + 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 8, 7, 7, 6, 7, 6, 6, 5, + 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, + 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 6, 5, 5, 4, + 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, + 5, 4, 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4, 4, 3, 4, 3, + 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, + 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 6, 5, + 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4, 4, 3, 4, 3, 3, 2, + 4, 3, 3, 2, 3, 2, 2, 1, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, + 3, 2, 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 5, 4, 4, 3, + 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, + 2, 1, 2, 1, 1, 0 + ], + dtype=np.uint8, +) diff --git a/python/cucim/src/cucim/skimage/morphology/_skeletonize.py b/python/cucim/src/cucim/skimage/morphology/_skeletonize.py index 4a93622de..2bc06dec1 100644 --- a/python/cucim/src/cucim/skimage/morphology/_skeletonize.py +++ b/python/cucim/src/cucim/skimage/morphology/_skeletonize.py @@ -1,8 +1,15 @@ +import warnings + import cupy as cp -import cupyx.scipy.ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi import numpy as np +from cucim.core.operations.morphology import distance_transform_edt + from .._shared.utils import check_nD, deprecate_kwarg +from ._medial_axis_lookup import \ + cornerness_table as _medial_axis_cornerness_table +from ._medial_axis_lookup import lookup_table as _medial_axis_lookup_table # --------- Skeletonization and thinning based on Guo and Hall 1989 --------- @@ -39,8 +46,8 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=bool) -@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg({"max_iter": "max_num_iter"}, removed_version="23.02.00", + deprecated_version="22.02.00") def thin(image, max_num_iter=None): """ Perform morphological thinning of a binary image. @@ -62,7 +69,7 @@ def thin(image, max_num_iter=None): See Also -------- - skeletonize, medial_axis + medial_axis Notes ----- @@ -131,7 +138,7 @@ def thin(image, max_num_iter=None): # perform the two "subiterations" described in the paper for lut in [G123_LUT, G123P_LUT]: # correlate image with neighborhood mask - N = ndi.correlate(skel, mask, mode='constant') + N = ndi.correlate(skel, mask, mode="constant") # take deletion decision from this subiteration's LUT D = cp.take(lut, N) # perform deletion @@ -141,3 +148,218 @@ def thin(image, max_num_iter=None): num_iter += 1 return skel.astype(bool) + + +# --------- Skeletonization by medial axis transform -------- + + +def _get_tiebreaker(n, random_seed): + # CuPy generator doesn't currently have the permutation method, so + # fall back to cp.random.permutation instead. + cp.random.seed(random_seed) + if n < 2 << 31: + dtype = np.int32 + else: + dtype = np.intp + tiebreaker = cp.random.permutation(cp.arange(n, dtype=dtype)) + return tiebreaker + + +def medial_axis(image, mask=None, return_distance=False, *, random_state=None): + """Compute the medial axis transform of a binary image. + + Parameters + ---------- + image : binary ndarray, shape (M, N) + The image of the shape to be skeletonized. + mask : binary ndarray, shape (M, N), optional + If a mask is given, only those elements in `image` with a true + value in `mask` are used for computing the medial axis. + return_distance : bool, optional + If true, the distance transform is returned as well as the skeleton. + random_state : {None, int, `numpy.random.Generator`}, optional + If `random_state` is None the `numpy.random.Generator` singleton is + used. + If `random_state` is an int, a new ``Generator`` instance is used, + seeded with `random_state`. + If `random_state` is already a ``Generator`` instance then that + instance is used. + + .. versionadded:: 0.19 + + Returns + ------- + out : ndarray of bools + Medial axis transform of the image + dist : ndarray of ints, optional + Distance transform of the image (only returned if `return_distance` + is True) + + See Also + -------- + skeletonize + + Notes + ----- + This algorithm computes the medial axis transform of an image + as the ridges of its distance transform. + + The different steps of the algorithm are as follows + * A lookup table is used, that assigns 0 or 1 to each configuration of + the 3x3 binary square, whether the central pixel should be removed + or kept. We want a point to be removed if it has more than one neighbor + and if removing it does not change the number of connected components. + + * The distance transform to the background is computed, as well as + the cornerness of the pixel. + + * The foreground (value of 1) points are ordered by + the distance transform, then the cornerness. + + * A cython function is called to reduce the image to its skeleton. It + processes pixels in the order determined at the previous step, and + removes or maintains a pixel according to the lookup table. Because + of the ordering, it is possible to process all pixels in only one + pass. + + Examples + -------- + >>> square = np.zeros((7, 7), dtype=np.uint8) + >>> square[1:-1, 2:-2] = 1 + >>> square + array([[0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0]], dtype=uint8) + >>> medial_axis(square).astype(np.uint8) + array([[0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0]], dtype=uint8) + + """ + try: + from skimage.morphology._skeletonize_cy import _skeletonize_loop + except ImportError as e: + warnings.warn( + "Could not find required private skimage Cython function:\n" + "\tskimage.morphology._skeletonize_cy._skeletonize_loop\n" + ) + raise e + + if mask is None: + # masked_image is modified in-place later so make a copy of the input + masked_image = image.astype(bool, copy=True) + else: + masked_image = image.astype(bool, copy=True) + masked_image[~mask] = False + + # Load precomputed lookup table based on three conditions: + # 1. Keep only positive pixels + # AND + # 2. Keep if removing the pixel results in a different connectivity + # (if the number of connected components is different with and + # without the central pixel) + # OR + # 3. Keep if # pixels in neighborhood is 2 or less + # Note that this table is independent of the image + table = _medial_axis_lookup_table + + # Build distance transform + distance = distance_transform_edt(masked_image) + if return_distance: + store_distance = distance.copy() + + # Corners + # The processing order along the edge is critical to the shape of the + # resulting skeleton: if you process a corner first, that corner will + # be eroded and the skeleton will miss the arm from that corner. Pixels + # with fewer neighbors are more "cornery" and should be processed last. + # We use a cornerness_table lookup table where the score of a + # configuration is the number of background (0-value) pixels in the + # 3x3 neighborhood + cornerness_table = cp.asarray(_medial_axis_cornerness_table) + corner_score = _table_lookup(masked_image, cornerness_table) + + # Define arrays for inner loop + distance = distance[masked_image] + i, j = cp.where(masked_image) + + # Determine the order in which pixels are processed. + # We use a random # for tiebreaking. Assign each pixel in the image a + # predictable, random # so that masking doesn't affect arbitrary choices + # of skeletons + tiebreaker = _get_tiebreaker(n=distance.size, random_seed=random_state) + order = cp.lexsort( + cp.stack( + (tiebreaker, corner_score[masked_image], distance), + axis=0 + ) + ) + + # Call _skeletonize_loop on the CPU. It requies a single pass over the + # full array using a specific pixel order, so cannot be run multithreaded! + order = cp.asnumpy(order.astype(cp.int32, copy=False)) + table = cp.asnumpy(table.astype(cp.uint8, copy=False)) + i = cp.asnumpy(i).astype(dtype=np.intp, copy=False) + j = cp.asnumpy(j).astype(dtype=np.intp, copy=False) + result = cp.asnumpy(masked_image) + # Remove pixels not belonging to the medial axis + _skeletonize_loop(result.view(np.uint8), i, j, order, table) + result = cp.asarray(result.view(bool), dtype=bool) + + if mask is not None: + result[~mask] = image[~mask] + if return_distance: + return result, store_distance + else: + return result + + +def _table_lookup(image, table): + """ + Perform a morphological transform on an image, directed by its + neighbors + + Parameters + ---------- + image : ndarray + A binary image + table : ndarray + A 512-element table giving the transform of each pixel given + the values of that pixel and its 8-connected neighbors. + + Returns + ------- + result : ndarray of same shape as `image` + Transformed image + + Notes + ----- + The pixels are numbered like this:: + + 0 1 2 + 3 4 5 + 6 7 8 + + The index at a pixel is the sum of 2** for pixels + that evaluate to true. + """ + # + # We accumulate into the indexer to get the index into the table + # at each point in the image + # + # max possible value of indexer is 512, so just use int16 dtype + kernel = cp.array( + [[256, 128, 64], [32, 16, 8], [4, 2, 1]], + dtype=cp.int16 + ) + indexer = ndi.convolve(image, kernel, output=np.int16, mode="constant") + image = table[indexer] + return image diff --git a/python/cucim/src/cucim/skimage/morphology/binary.py b/python/cucim/src/cucim/skimage/morphology/binary.py index 56c5b6654..1df98fe00 100644 --- a/python/cucim/src/cucim/skimage/morphology/binary.py +++ b/python/cucim/src/cucim/skimage/morphology/binary.py @@ -4,7 +4,7 @@ import functools import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import deprecate_kwarg from .footprints import _footprint_is_sequence @@ -37,8 +37,9 @@ def _iterate_binary_func(binary_func, image, footprint, out): # default with the same dimension as the input image and size 3 along each # axis. @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def binary_erosion(image, footprint=None, out=None): """Return fast binary morphological erosion of an image. @@ -94,8 +95,9 @@ def binary_erosion(image, footprint=None, out=None): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def binary_dilation(image, footprint=None, out=None): """Return fast binary morphological dilation of an image. @@ -149,8 +151,9 @@ def binary_dilation(image, footprint=None, out=None): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def binary_opening(image, footprint=None, out=None): """Return fast binary morphological opening of an image. @@ -199,8 +202,9 @@ def binary_opening(image, footprint=None, out=None): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def binary_closing(image, footprint=None, out=None): """Return fast binary morphological closing of an image. diff --git a/python/cucim/src/cucim/skimage/morphology/footprints.py b/python/cucim/src/cucim/skimage/morphology/footprints.py index 744f11535..70e8fda7c 100644 --- a/python/cucim/src/cucim/skimage/morphology/footprints.py +++ b/python/cucim/src/cucim/skimage/morphology/footprints.py @@ -175,8 +175,8 @@ def _decompose_size(size, kernel_size=3): @deprecate_kwarg({'height': 'ncols', 'width': 'nrows'}, - deprecated_version='0.18.0', - removed_version='0.20.0') + deprecated_version='21.06.00', + removed_version='22.02.00') def rectangle(nrows, ncols, dtype=cp.uint8, *, decomposition=None): """Generates a flat, rectangular-shaped footprint. diff --git a/python/cucim/src/cucim/skimage/morphology/gray.py b/python/cucim/src/cucim/skimage/morphology/gray.py index ca6fa2845..ce5daf759 100644 --- a/python/cucim/src/cucim/skimage/morphology/gray.py +++ b/python/cucim/src/cucim/skimage/morphology/gray.py @@ -4,7 +4,7 @@ import functools import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import deprecate_kwarg from ..util import crop @@ -157,8 +157,8 @@ def func_out(image, footprint, out=None, *args, **kwargs): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") def erosion(image, footprint=None, out=None, shift_x=False, shift_y=False): """Return grayscale morphological erosion of an image. @@ -236,8 +236,8 @@ def erosion(image, footprint=None, out=None, shift_x=False, shift_y=False): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") def dilation(image, footprint=None, out=None, shift_x=False, shift_y=False): """Return grayscale morphological dilation of an image. @@ -324,8 +324,8 @@ def dilation(image, footprint=None, out=None, shift_x=False, shift_y=False): return out -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") @default_footprint @pad_for_eccentric_footprints def opening(image, footprint=None, out=None): @@ -390,8 +390,8 @@ def opening(image, footprint=None, out=None): return out -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") @default_footprint @pad_for_eccentric_footprints def closing(image, footprint=None, out=None): @@ -474,8 +474,8 @@ def _white_tophat_seqence(image, footprints, out): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") def white_tophat(image, footprint=None, out=None): """Return white top hat of an image. @@ -565,8 +565,8 @@ def white_tophat(image, footprint=None, out=None): @default_footprint -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") def black_tophat(image, footprint=None, out=None): """Return black top hat of an image. diff --git a/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py b/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py index 8847aaaf0..646503f13 100644 --- a/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py +++ b/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py @@ -15,8 +15,8 @@ from .._shared.utils import deprecate_kwarg -@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, + removed_version="23.02.00", deprecated_version="22.02.00") def reconstruction(seed, mask, method='dilation', footprint=None, offset=None): """Perform a morphological reconstruction of an image. diff --git a/python/cucim/src/cucim/skimage/morphology/misc.py b/python/cucim/src/cucim/skimage/morphology/misc.py index 62ff9bfd0..bf3215a3f 100644 --- a/python/cucim/src/cucim/skimage/morphology/misc.py +++ b/python/cucim/src/cucim/skimage/morphology/misc.py @@ -2,7 +2,7 @@ import functools import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import remove_arg, warn @@ -73,7 +73,7 @@ def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False, labelling if `ar` is bool. in_place : bool, optional (default: False) If ``True``, remove the objects in the input array itself. - Otherwise, make a copy. Deprecated since version 0.19. Please + Otherwise, make a copy. Deprecated since version 22.02.00. Please use `out` instead. out : ndarray Array of the same shape as `ar`, into which the output is @@ -172,7 +172,7 @@ def remove_small_holes(ar, area_threshold=64, connectivity=1, in_place=False, The connectivity defining the neighborhood of a pixel. in_place : bool, optional (default: False) If `True`, remove the connected components in the input array - itself. Otherwise, make a copy. Deprecated since version 0.19. + itself. Otherwise, make a copy. Deprecated since version 22.02.00. Please use `out` instead. out : ndarray Array of the same shape as `ar` and bool dtype, into which the diff --git a/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py b/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py index 532271c84..2fbafafc1 100644 --- a/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py +++ b/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py @@ -5,20 +5,25 @@ from skimage.morphology import thin as thin_cpu from cucim.skimage._shared._warnings import expected_warnings -from cucim.skimage.morphology import thin +from cucim.skimage.morphology import medial_axis, thin -class TestThin(): +class TestThin: @property def input_image(self): """image to test thinning with""" - ii = cp.array([[0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 0], - [0, 1, 0, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 0], - [0, 1, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8) + ii = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 0], + [0, 1, 0, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0], + ], + dtype=cp.uint8, + ) return ii def test_zeros(self): @@ -26,13 +31,18 @@ def test_zeros(self): def test_iter_1(self): result = thin(self.input_image, 1).astype(cp.uint8) - expected = cp.array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0], - [0, 1, 0, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8) + expected = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 1, 0, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ], + dtype=cp.uint8, + ) assert_array_equal(result, expected) def test_max_iter_kwarg_deprecation(self): @@ -43,13 +53,18 @@ def test_max_iter_kwarg_deprecation(self): def test_noiter(self): result = thin(self.input_image).astype(cp.uint8) - expected = cp.array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0], - [0, 1, 0, 1, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8) + expected = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 1, 0, 1, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ], + dtype=cp.uint8, + ) assert_array_equal(result, expected) def test_baddim(self): @@ -57,7 +72,7 @@ def test_baddim(self): with pytest.raises(ValueError): thin(ii) - @pytest.mark.parametrize('invert', [False, True]) + @pytest.mark.parametrize("invert", [False, True]) def test_compare_skimage(self, invert): h = data.horse() if invert: @@ -65,3 +80,84 @@ def test_compare_skimage(self, invert): result = thin(cp.asarray(h)) expected = thin_cpu(h) assert_array_equal(result, expected) + + +class TestMedialAxis: + def test_00_00_zeros(self): + """Test skeletonize on an array of all zeros""" + result = medial_axis(cp.zeros((10, 10), bool)) + assert not cp.any(result) + + def test_00_01_zeros_masked(self): + """Test skeletonize on an array that is completely masked""" + result = medial_axis(cp.zeros((10, 10), bool), cp.zeros((10, 10), bool)) + assert not cp.any(result) + + def test_vertical_line(self): + """Test a thick vertical line, issue #3861""" + img = cp.zeros((9, 9)) + img[:, 2] = 1 + img[:, 3] = 1 + img[:, 4] = 1 + + expected = cp.full(img.shape, False) + expected[:, 3] = True + + result = medial_axis(img) + assert_array_equal(result, expected) + + def test_01_01_rectangle(self): + """Test skeletonize on a rectangle""" + image = cp.zeros((9, 15), bool) + image[1:-1, 1:-1] = True + # + # The result should be four diagonals from the + # corners, meeting in a horizontal line + # + expected = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ], + dtype=bool, + ) + result = medial_axis(image) + assert cp.all(result == expected) + result, distance = medial_axis(image, return_distance=True) + assert distance.max() == 4 + + def test_01_02_hole(self): + """Test skeletonize on a rectangle with a hole in the middle""" + image = cp.zeros((9, 15), bool) + image[1:-1, 1:-1] = True + image[4, 4:-4] = False + expected = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ], + dtype=bool, + ) + result = medial_axis(image) + assert cp.all(result == expected) + + def test_narrow_image(self): + """Test skeletonize on a 1-pixel thin strip""" + image = cp.zeros((1, 5), bool) + image[:, 1:-1] = True + result = medial_axis(image) + assert cp.all(result == image) diff --git a/python/cucim/src/cucim/skimage/registration/_optical_flow.py b/python/cucim/src/cucim/skimage/registration/_optical_flow.py index 93ce39a5e..efbf3d7c3 100644 --- a/python/cucim/src/cucim/skimage/registration/_optical_flow.py +++ b/python/cucim/src/cucim/skimage/registration/_optical_flow.py @@ -9,6 +9,7 @@ import cupy as cp from cupyx.scipy import ndimage as ndi +from .._shared._gradient import gradient from .._shared.utils import _supported_float_type from ..transform import warp from ._optical_flow_utils import coarse_to_fine, get_warp_points @@ -79,7 +80,8 @@ def _tvl1(reference_image, moving_image, flow0, attachment, tightness, image1_warp = warp(moving_image, get_warp_points(grid, flow_current), mode='edge') - grad = cp.stack(cp.gradient(image1_warp)) + # output_as_array=True stacks the gradients along the first axis + grad = gradient(image1_warp, output_as_array=True) NI = (grad * grad).sum(0) NI[NI == 0] = 1 @@ -288,7 +290,8 @@ def _ilk(reference_image, moving_image, flow0, radius, num_warp, gaussian, moving_image_warp = warp(moving_image, get_warp_points(grid, flow), mode='edge') - grad = cp.stack(cp.gradient(moving_image_warp), axis=0) + # output_as_array=True stacks the gradients along the first axis + grad = gradient(moving_image_warp, output_as_array=True) error_image = ((grad * flow).sum(axis=0) + reference_image - moving_image_warp) diff --git a/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py b/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py index 0bafa2c2f..404f64132 100644 --- a/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py +++ b/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py @@ -4,7 +4,7 @@ from cupyx.scipy.ndimage import fourier_shift from cupyx.scipy.ndimage import shift as real_shift from numpy.testing import assert_almost_equal -from skimage.data import brain, camera +from skimage.data import camera from skimage.io import imread from cucim.skimage._shared.fft import fftmodule as fft @@ -67,6 +67,7 @@ def test_masked_registration_random_masks(): def test_masked_registration_3d_contiguous_mask(): """masked_register_translation should be able to register translations between volumes with contiguous masks.""" + brain = pytest.importorskip('skimage.data.brain') ref_vol = cp.array(brain()[:, ::2, ::2]) offset = (1, -5, 10) diff --git a/python/cucim/src/cucim/skimage/restoration/_denoise.py b/python/cucim/src/cucim/skimage/restoration/_denoise.py index 614c5e7b3..392fc4ad2 100644 --- a/python/cucim/src/cucim/skimage/restoration/_denoise.py +++ b/python/cucim/src/cucim/skimage/restoration/_denoise.py @@ -91,8 +91,9 @@ def _denoise_tv_chambolle_nd(image, weight=0.1, eps=2.0e-4, max_num_iter=200): return out -@utils.deprecate_kwarg({'n_iter_max': 'max_num_iter'}, removed_version="1.0", - deprecated_version="0.19.2") +@utils.deprecate_kwarg({'n_iter_max': 'max_num_iter'}, + removed_version="23.02.00", + deprecated_version="22.06.00") @utils.deprecate_multichannel_kwarg(multichannel_position=4) def denoise_tv_chambolle(image, weight=0.1, eps=2.0e-4, max_num_iter=200, multichannel=False, *, channel_axis=None): diff --git a/python/cucim/src/cucim/skimage/restoration/deconvolution.py b/python/cucim/src/cucim/skimage/restoration/deconvolution.py index 9809637eb..2a1cfa67c 100644 --- a/python/cucim/src/cucim/skimage/restoration/deconvolution.py +++ b/python/cucim/src/cucim/skimage/restoration/deconvolution.py @@ -383,8 +383,8 @@ def unsupervised_wiener(image, psf, reg=None, user_params=None, is_real=True, return (x_postmean, {'noise': gn_chain, 'prior': gx_chain}) -@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="23.02.00", + deprecated_version="22.02.00") def richardson_lucy(image, psf, num_iter=50, clip=True, filter_epsilon=None): """Richardson-Lucy deconvolution. diff --git a/python/cucim/src/cucim/skimage/restoration/j_invariant.py b/python/cucim/src/cucim/skimage/restoration/j_invariant.py index 19f86a444..2add11e72 100644 --- a/python/cucim/src/cucim/skimage/restoration/j_invariant.py +++ b/python/cucim/src/cucim/skimage/restoration/j_invariant.py @@ -3,7 +3,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import _supported_float_type from ..metrics import mean_squared_error diff --git a/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py b/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py index b6a697789..58f3bc0a1 100644 --- a/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py +++ b/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py @@ -25,7 +25,7 @@ # TODO: replace with CuPy version once completed def _denoise_wavelet(image, rescale_sigma=True, **kwargs): - if 'channel_axis' in kwargs and skimage.__version__ < '0.19': + if 'channel_axis' in kwargs and skimage.__version__ < '22.02.00': # convert channel_axis to older multichannel kwarg for skimage 0.18.x channel_axis = kwargs.pop('channel_axis') kwargs['multichannel'] = False if channel_axis is None else True @@ -62,7 +62,7 @@ def test_invariant_denoise_color(dtype): def test_invariant_denoise_color_deprecated(): - # can remove \A\Z here if only testing with scikit-image 0.19 + # can remove \A\Z here if only testing with cuCIM 22.02.00 with expected_warnings([r"`multichannel` is a deprecated argument|\A\Z"]): denoised_img_color = _invariant_denoise( noisy_img_color, _denoise_wavelet, diff --git a/python/cucim/src/cucim/skimage/segmentation/__init__.py b/python/cucim/src/cucim/skimage/segmentation/__init__.py index b5527b8a1..55b65058f 100644 --- a/python/cucim/src/cucim/skimage/segmentation/__init__.py +++ b/python/cucim/src/cucim/skimage/segmentation/__init__.py @@ -1,4 +1,6 @@ +from ._chan_vese import chan_vese from ._clear_border import clear_border +from ._expand_labels import expand_labels from ._join import join_segmentations, relabel_sequential from .boundaries import find_boundaries, mark_boundaries from .morphsnakes import (checkerboard_level_set, disk_level_set, @@ -7,12 +9,14 @@ from .random_walker_segmentation import random_walker __all__ = [ + "expand_labels", "random_walker", "find_boundaries", "mark_boundaries", "clear_border", "join_segmentations", "relabel_sequential", + "chan_vese", "morphological_geodesic_active_contour", "morphological_chan_vese", "inverse_gaussian_gradient", diff --git a/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py b/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py new file mode 100644 index 000000000..0401c1f07 --- /dev/null +++ b/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py @@ -0,0 +1,442 @@ +import cupy as cp +import numpy as np +from cupyx import rsqrt # reciprocal sqrt + +from cucim.core.operations.morphology import distance_transform_edt + +from .._shared.utils import _supported_float_type, deprecate_kwarg + + +@cp.fuse() +def _fused_curvature(phi, x_start, x_end, y_start, y_end, ul, ur, ll, lr): + fy = (y_end - y_start) / 2.0 + fx = (x_end - x_start) / 2.0 + fyy = y_end + y_start - 2 * phi + fxx = x_end + x_start - 2 * phi + fxy = .25 * (lr + ul - ur - ll) + grad2 = fx**2 + fy**2 + K = (fxx * fy**2 - 2 * fxy * fx * fy + fyy * fx**2) + K /= (grad2 * cp.sqrt(grad2) + 1e-8) + return K + + +def _cv_curvature(phi): + """Returns the 'curvature' of a level set 'phi'. + """ + P = cp.pad(phi, 1, mode='edge') + y_start = P[:-2, 1:-1] + y_end = P[2:, 1:-1] + x_start = P[1:-1, :-2] + x_end = P[1:-1, 2:] + + lower_right = P[2:, 2:] + lower_left = P[2:, :-2] + upper_right = P[:-2, 2:] + upper_left = P[:-2, :-2] + K = _fused_curvature(phi, x_start, x_end, y_start, y_end, upper_left, + upper_right, lower_left, lower_right) + return K + + +@cp.fuse() +def _fused_variance_kernel1(eta, x_start, x_mid, x_end, y_start, y_mid, y_end): + phixp = x_end - x_mid + phixn = x_mid - x_start + phix0 = x_end - x_start + phix0 /= 2.0 + phixp *= phixp + phixn *= phixn + phix0 *= phix0 + + phiyp = y_end - y_mid + phiyn = y_mid - y_start + phiy0 = y_end - y_start + phiy0 /= 2.0 + phiyp *= phiyp + phiyn *= phiyn + phiy0 *= phiy0 + + C1 = rsqrt(eta + phixp + phiy0) + C2 = rsqrt(eta + phixn + phiy0) + C3 = rsqrt(eta + phix0 + phiyp) + C4 = rsqrt(eta + phix0 + phiyn) + + K = x_end * C1 + K += x_start * C2 + K += y_end * C3 + K += y_start * C4 + + Csum = C1 + Csum += C2 + Csum += C3 + Csum += C4 + + return K, Csum + + +@cp.fuse() +def _fused_hphi_hinv(phi): + Hphi = (phi > 0).astype(phi.dtype) + Hinv = 1.0 - Hphi + return Hphi, Hinv + + +@cp.fuse() +def _fused_variance_kernel2( + image, c1, c2, lam1, lam2, phi, K, dt, mu, delta_phi, Csum +): + difference_term = image - c1 + difference_term *= difference_term + difference_term *= -lam1 + + term2 = image - c2 + term2 *= term2 + term2 *= lam2 + difference_term += term2 + + new_phi = phi + (dt * delta_phi) * (mu * K + difference_term) + out = new_phi / (1 + mu * dt * delta_phi * Csum) + return out + + +def _cv_calculate_variation(image, phi, mu, lambda1, lambda2, dt): + """Returns the variation of level set 'phi' based on algorithm parameters. + """ + eta = 1e-16 + P = cp.pad(phi, 1, mode='edge') + + x_end = P[1:-1, 2:] + x_mid = P[1:-1, 1:-1] + x_start = P[1:-1, :-2] + + y_end = P[2:, 1:-1] + y_mid = P[1:-1, 1:-1] + y_start = P[:-2, 1:-1] + + K, Csum = _fused_variance_kernel1( + eta, x_start, x_mid, x_end, y_start, y_mid, y_end + ) + Hphi, Hinv = _fused_hphi_hinv(phi) + c1, c2 = _cv_calculate_averages(image, Hphi, Hinv) + delta_phi = _cv_delta(phi) + out = _fused_variance_kernel2( + image, c1, c2, lambda1, lambda2, phi, K, dt, mu, delta_phi, Csum + ) + return out + + +@cp.fuse() +def _cv_heavyside(x, eps=1.): + """Returns the result of a regularised heavyside function of the + input value(s). + """ + return 0.5 * (1. + (2. / cp.pi) * cp.arctan(x / eps)) + + +@cp.fuse() +def _cv_delta(x, eps=1.): + """Returns the result of a regularised dirac function of the + input value(s). + """ + return eps / (eps * eps + x * x) + + +@cp.fuse() +def _fused_inplace_eps_div(num, denom, eps): + denom += eps + num /= denom + return + + +def _cv_calculate_averages(image, H, Hinv): + """Returns the average values 'inside' and 'outside'. + """ + Hsum = cp.sum(H) + Hinvsum = cp.sum(Hinv) + avg_inside = cp.sum(image * H) + avg_oustide = cp.sum(image * Hinv) + + eps = 10 * cp.finfo(image.dtype).eps + _fused_inplace_eps_div(avg_inside, Hsum, eps) + _fused_inplace_eps_div(avg_oustide, Hinvsum, eps) + return (avg_inside, avg_oustide) + + +@cp.fuse() +def _fused_difference_op1(image, c, h, lam): + out = image - c + out *= out + out *= h + out *= lam + return out + + +def _cv_difference_from_average_term(image, Hphi, lambda_pos, lambda_neg): + """Returns the 'energy' contribution due to the difference from + the average value within a region at each point. + """ + Hinv = 1. - Hphi + (c1, c2) = _cv_calculate_averages(image, Hphi, Hinv) + out = _fused_difference_op1(image, c1, Hphi, lambda_pos) + out += _fused_difference_op1(image, c2, Hinv, lambda_neg) + return out + + +def _cv_edge_length_term(phi, mu): + """Returns the 'energy' contribution due to the length of the + edge between regions at each point, multiplied by a factor 'mu'. + """ + e = _cv_curvature(phi) + e *= mu + return e + + +def _cv_energy(image, phi, mu, lambda1, lambda2): + """Returns the total 'energy' of the current level set function. + """ + H = _cv_heavyside(phi) + avgenergy = _cv_difference_from_average_term(image, H, lambda1, lambda2) + lenenergy = _cv_edge_length_term(phi, mu) + return cp.sum(avgenergy) + cp.sum(lenenergy) + + +def _cv_checkerboard(image_size, square_size, dtype=cp.float64): + """Generates a checkerboard level set function. + + According to Pascal Getreuer, such a level set function has fast + convergence. + """ + yv = cp.arange(image_size[0], dtype=dtype)[:, np.newaxis] + xv = cp.arange(image_size[1], dtype=dtype)[np.newaxis, :] + sf = cp.pi / square_size + xv *= sf + yv *= sf + cp.sin(xv, out=xv) + cp.sin(yv, out=yv) + return xv * yv + + +def _cv_large_disk(image_size): + """Generates a disk level set function. + + The disk covers the whole image along its smallest dimension. + """ + res = cp.ones(image_size, dtype=bool) + centerY = int((image_size[0] - 1) / 2) + centerX = int((image_size[1] - 1) / 2) + res[centerY, centerX] = 0. + radius = float(min(centerX, centerY)) + out = radius - distance_transform_edt(res) + out /= radius + return out + + +def _cv_small_disk(image_size): + """Generates a disk level set function. + + The disk covers half of the image along its smallest dimension. + """ + res = cp.ones(image_size, dtype=bool) + centerY = int((image_size[0] - 1) / 2) + centerX = int((image_size[1] - 1) / 2) + res[centerY, centerX] = 0. + radius = float(min(centerX, centerY)) / 2.0 + out = radius - distance_transform_edt(res) + out /= radius * 3 + return out + + +def _cv_init_level_set(init_level_set, image_shape, dtype=cp.float64): + """Generates an initial level set function conditional on input arguments. + """ + if type(init_level_set) == str: + if init_level_set == 'checkerboard': + res = _cv_checkerboard(image_shape, 5, dtype) + elif init_level_set == 'disk': + res = _cv_large_disk(image_shape) + elif init_level_set == 'small disk': + res = _cv_small_disk(image_shape) + else: + raise ValueError("Incorrect name for starting level set preset.") + else: + res = init_level_set + return res.astype(dtype, copy=False) + + +@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0", + deprecated_version="0.19") +def chan_vese(image, mu=0.25, lambda1=1.0, lambda2=1.0, tol=1e-3, + max_num_iter=500, dt=0.5, init_level_set='checkerboard', + extended_output=False): + """Chan-Vese segmentation algorithm. + + Active contour model by evolving a level set. Can be used to + segment objects without clearly defined boundaries. + + Parameters + ---------- + image : (M, N) ndarray + Grayscale image to be segmented. + mu : float, optional + 'edge length' weight parameter. Higher `mu` values will + produce a 'round' edge, while values closer to zero will + detect smaller objects. + lambda1 : float, optional + 'difference from average' weight parameter for the output + region with value 'True'. If it is lower than `lambda2`, this + region will have a larger range of values than the other. + lambda2 : float, optional + 'difference from average' weight parameter for the output + region with value 'False'. If it is lower than `lambda1`, this + region will have a larger range of values than the other. + tol : float, positive, optional + Level set variation tolerance between iterations. If the + L2 norm difference between the level sets of successive + iterations normalized by the area of the image is below this + value, the algorithm will assume that the solution was + reached. + max_num_iter : uint, optional + Maximum number of iterations allowed before the algorithm + interrupts itself. + dt : float, optional + A multiplication factor applied at calculations for each step, + serves to accelerate the algorithm. While higher values may + speed up the algorithm, they may also lead to convergence + problems. + init_level_set : str or (M, N) ndarray, optional + Defines the starting level set used by the algorithm. + If a string is inputted, a level set that matches the image + size will automatically be generated. Alternatively, it is + possible to define a custom level set, which should be an + array of float values, with the same shape as 'image'. + Accepted string values are as follows. + + 'checkerboard' + the starting level set is defined as + sin(x/5*pi)*sin(y/5*pi), where x and y are pixel + coordinates. This level set has fast convergence, but may + fail to detect implicit edges. + 'disk' + the starting level set is defined as the opposite + of the distance from the center of the image minus half of + the minimum value between image width and image height. + This is somewhat slower, but is more likely to properly + detect implicit edges. + 'small disk' + the starting level set is defined as the + opposite of the distance from the center of the image + minus a quarter of the minimum value between image width + and image height. + extended_output : bool, optional + If set to True, the return value will be a tuple containing + the three return values (see below). If set to False which + is the default value, only the 'segmentation' array will be + returned. + + Returns + ------- + segmentation : (M, N) ndarray, bool + Segmentation produced by the algorithm. + phi : (M, N) ndarray of floats + Final level set computed by the algorithm. + energies : list of floats + Shows the evolution of the 'energy' for each step of the + algorithm. This should allow to check whether the algorithm + converged. + + Notes + ----- + The Chan-Vese Algorithm is designed to segment objects without + clearly defined boundaries. This algorithm is based on level sets + that are evolved iteratively to minimize an energy, which is + defined by weighted values corresponding to the sum of differences + intensity from the average value outside the segmented region, the + sum of differences from the average value inside the segmented + region, and a term which is dependent on the length of the + boundary of the segmented region. + + This algorithm was first proposed by Tony Chan and Luminita Vese, + in a publication entitled "An Active Contour Model Without Edges" + [1]_. + + This implementation of the algorithm is somewhat simplified in the + sense that the area factor 'nu' described in the original paper is + not implemented, and is only suitable for grayscale images. + + Typical values for `lambda1` and `lambda2` are 1. If the + 'background' is very different from the segmented object in terms + of distribution (for example, a uniform black image with figures + of varying intensity), then these values should be different from + each other. + + Typical values for mu are between 0 and 1, though higher values + can be used when dealing with shapes with very ill-defined + contours. + + The 'energy' which this algorithm tries to minimize is defined + as the sum of the differences from the average within the region + squared and weighed by the 'lambda' factors to which is added the + length of the contour multiplied by the 'mu' factor. + + Supports 2D grayscale images only, and does not implement the area + term described in the original article. + + References + ---------- + .. [1] An Active Contour Model without Edges, Tony Chan and + Luminita Vese, Scale-Space Theories in Computer Vision, + 1999, :DOI:`10.1007/3-540-48236-9_13` + .. [2] Chan-Vese Segmentation, Pascal Getreuer Image Processing On + Line, 2 (2012), pp. 214-224, + :DOI:`10.5201/ipol.2012.g-cv` + .. [3] The Chan-Vese Algorithm - Project Report, Rami Cohen, 2011 + :arXiv:`1107.2782` + """ + if len(image.shape) != 2: + raise ValueError("Input image should be a 2D array.") + + float_dtype = _supported_float_type(image.dtype) + phi = _cv_init_level_set(init_level_set, image.shape, dtype=float_dtype) + if type(phi) != cp.ndarray or phi.shape != image.shape: + raise ValueError("The dimensions of initial level set do not " + "match the dimensions of image.") + + image = image.astype(float_dtype, copy=False) + image = image - cp.min(image) + if cp.max(image) != 0: + image = image / cp.max(image) + + i = 0 + if extended_output: + old_energy = _cv_energy(image, phi, mu, lambda1, lambda2) + energies = [] + phivar = tol + 1 + + while phivar > tol and i < max_num_iter: + # Save old level set values + oldphi = phi + + # Calculate new level set + phi = _cv_calculate_variation(image, phi, mu, lambda1, lambda2, dt) + phivar = phi - oldphi + phivar *= phivar + phivar = cp.sqrt(phivar.mean()) + + if extended_output: + # Extract energy + new_energy = _cv_energy(image, phi, mu, lambda1, lambda2) + + # Could compare energy to the previous level set to see if + # continuing is necessary + + # Save old energy values + energies.append(old_energy) + old_energy = new_energy + i += 1 + + segmentation = phi > 0 + + if extended_output: + return (segmentation, phi, energies) + else: + return segmentation diff --git a/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py b/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py new file mode 100644 index 000000000..8ec47a7be --- /dev/null +++ b/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py @@ -0,0 +1,96 @@ +import cupy as cp + +from cucim.core.operations.morphology import distance_transform_edt + + +def expand_labels(label_image, distance=1): + """Expand labels in label image by ``distance`` pixels without overlapping. + + Given a label image, ``expand_labels`` grows label regions (connected components) + outwards by up to ``distance`` pixels without overflowing into neighboring regions. + More specifically, each background pixel that is within Euclidean distance + of <= ``distance`` pixels of a connected component is assigned the label of that + connected component. + Where multiple connected components are within ``distance`` pixels of a background + pixel, the label value of the closest connected component will be assigned (see + Notes for the case of multiple labels at equal distance). + + Parameters + ---------- + label_image : ndarray of dtype int + label image + distance : float + Euclidean distance in pixels by which to grow the labels. Default is one. + + Returns + ------- + enlarged_labels : ndarray of dtype int + Labeled array, where all connected regions have been enlarged + + Notes + ----- + Where labels are spaced more than ``distance`` pixels are apart, this is + equivalent to a morphological dilation with a disc or hyperball of radius ``distance``. + However, in contrast to a morphological dilation, ``expand_labels`` will + not expand a label region into a neighboring region. + + This implementation of ``expand_labels`` is derived from CellProfiler [1]_, where + it is known as module "IdentifySecondaryObjects (Distance-N)" [2]_. + + There is an important edge case when a pixel has the same distance to + multiple regions, as it is not defined which region expands into that + space. Here, the exact behavior depends on the upstream implementation + of ``scipy.ndimage.distance_transform_edt``. + + See Also + -------- + :func:`cucim.skimage.measure.label`, :func:`cucim.skimage.morphology.dilation` # noqa + + References + ---------- + .. [1] https://cellprofiler.org + .. [2] https://github.com/CellProfiler/CellProfiler/blob/082930ea95add7b72243a4fa3d39ae5145995e9c/cellprofiler/modules/identifysecondaryobjects.py#L559 # noqa + + Examples + -------- + >>> labels = np.array([0, 1, 0, 0, 0, 0, 2]) + >>> expand_labels(labels, distance=1) + array([1, 1, 1, 0, 0, 2, 2]) + + Labels will not overwrite each other: + + >>> expand_labels(labels, distance=3) + array([1, 1, 1, 1, 2, 2, 2]) + + In case of ties, behavior is undefined, but currently resolves to the + label closest to ``(0,) * ndim`` in lexicographical order. + + >>> labels_tied = np.array([0, 1, 0, 2, 0]) + >>> expand_labels(labels_tied, 1) + array([1, 1, 1, 2, 2]) + >>> labels2d = np.array( + ... [[0, 1, 0, 0], + ... [2, 0, 0, 0], + ... [0, 3, 0, 0]] + ... ) + >>> expand_labels(labels2d, 1) + array([[2, 1, 1, 0], + [2, 2, 0, 0], + [2, 3, 3, 0]]) + """ + + distances, nearest_label_coords = distance_transform_edt( + label_image == 0, return_indices=True + ) + labels_out = cp.zeros_like(label_image) + dilate_mask = distances <= distance + # build the coordinates to find nearest labels, + # in contrast to [1] this implementation supports label arrays + # of any dimension + masked_nearest_label_coords = [ + dimension_indices[dilate_mask] + for dimension_indices in nearest_label_coords + ] + nearest_labels = label_image[tuple(masked_nearest_label_coords)] + labels_out[dilate_mask] = nearest_labels + return labels_out diff --git a/python/cucim/src/cucim/skimage/segmentation/boundaries.py b/python/cucim/src/cucim/skimage/segmentation/boundaries.py index 62724d9fc..d631c28e9 100644 --- a/python/cucim/src/cucim/skimage/segmentation/boundaries.py +++ b/python/cucim/src/cucim/skimage/segmentation/boundaries.py @@ -1,5 +1,5 @@ import cupy as cp -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import _supported_float_type from ..color import gray2rgb diff --git a/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py b/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py index 9938cb9d1..dd3f60a82 100644 --- a/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py +++ b/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py @@ -3,10 +3,12 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +from cupyx import rsqrt +import cucim.skimage._vendored.ndimage as ndi from cucim import _misc +from .._shared._gradient import gradient from .._shared.utils import check_nD, deprecate_kwarg __all__ = ['morphological_chan_vese', @@ -29,64 +31,53 @@ def __call__(self, *args, **kwargs): # SI and IS operators for 2D and 3D. -_P2 = [np.eye(3), - np.array([[0, 1, 0]] * 3), - np.flipud(np.eye(3)), - np.rot90([[0, 1, 0]] * 3)] -_P3 = [np.zeros((3, 3, 3)) for i in range(9)] - -_P3[0][:, :, 1] = 1 -_P3[1][:, 1, :] = 1 -_P3[2][1, :, :] = 1 -_P3[3][:, [0, 1, 2], [0, 1, 2]] = 1 -_P3[4][:, [0, 1, 2], [2, 1, 0]] = 1 -_P3[5][[0, 1, 2], :, [0, 1, 2]] = 1 -_P3[6][[0, 1, 2], :, [2, 1, 0]] = 1 -_P3[7][[0, 1, 2], [0, 1, 2], :] = 1 -_P3[8][[0, 1, 2], [2, 1, 0], :] = 1 - - -def sup_inf(u): +def _get_P2(): + _P2 = [cp.eye(3), + cp.array([[0, 1, 0]] * 3), + cp.array(np.flipud(np.eye(3))), + cp.array(np.rot90([[0, 1, 0]] * 3))] + return _P2 + + +def _get_P3(): + _P3 = [np.zeros((3, 3, 3)) for i in range(9)] + + _P3[0][:, :, 1] = 1 + _P3[1][:, 1, :] = 1 + _P3[2][1, :, :] = 1 + _P3[3][:, [0, 1, 2], [0, 1, 2]] = 1 + _P3[4][:, [0, 1, 2], [2, 1, 0]] = 1 + _P3[5][[0, 1, 2], :, [0, 1, 2]] = 1 + _P3[6][[0, 1, 2], :, [2, 1, 0]] = 1 + _P3[7][[0, 1, 2], [0, 1, 2], :] = 1 + _P3[8][[0, 1, 2], [2, 1, 0], :] = 1 + return [cp.array(p) for p in _P3] + + +def sup_inf(u, footprints, workspace=None): """SI operator.""" - - if _misc.ndim(u) == 2: - P = _P2 - elif _misc.ndim(u) == 3: - P = _P3 + if workspace is None: + erosions = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype) else: - raise ValueError("u has an invalid number of dimensions " - "(should be 2 or 3)") + erosions = workspace + for i, footprint in enumerate(footprints): + erosions[i, ...] = ndi.binary_erosion(u, footprint) + return erosions.max(0) - erosions = [] - for P_i in P: - e = ndi.binary_erosion(u, cp.asarray(P_i)).astype(np.int8, copy=False) - erosions.append(e) - return cp.stack(erosions, axis=0).max(0) - - -def inf_sup(u): +def inf_sup(u, footprints, workspace=None): """IS operator.""" - - if _misc.ndim(u) == 2: - P = _P2 - elif _misc.ndim(u) == 3: - P = _P3 + if workspace is None: + dilations = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype) else: - raise ValueError("u has an invalid number of dimensions " - "(should be 2 or 3)") - - dilations = [] - for P_i in P: - d = ndi.binary_dilation(u, cp.asarray(P_i)).astype(np.int8, - copy=False) - dilations.append(d) - - return cp.stack(dilations, axis=0).min(0) + dilations = workspace + for i, footprint in enumerate(footprints): + dilations[i, ...] = ndi.binary_dilation(u, footprint) + return dilations.min(0) -_curvop = _fcycle([lambda u: sup_inf(inf_sup(u)), # SIoIS - lambda u: inf_sup(sup_inf(u))]) # ISoSI +_curvop = _fcycle([lambda u, f, w: sup_inf(inf_sup(u, f, w), f, w), # SIoIS + lambda u, f, w: inf_sup(sup_inf(u, f, w), f, w)]) # ISoSI def _check_input(image, init_level_set): @@ -187,6 +178,11 @@ def checkerboard_level_set(image_shape, square_size=5): return res +@cp.fuse() +def _fused_inverse_kernel(gradnorm, alpha): + return rsqrt(1.0 + alpha * gradnorm) + + def inverse_gaussian_gradient(image, alpha=100.0, sigma=5.0): """Inverse of gradient magnitude. @@ -216,11 +212,35 @@ def inverse_gaussian_gradient(image, alpha=100.0, sigma=5.0): `morphological_geodesic_active_contour`. """ gradnorm = ndi.gaussian_gradient_magnitude(image, sigma, mode='nearest') - return 1.0 / cp.sqrt(1.0 + alpha * gradnorm) + return _fused_inverse_kernel(gradnorm, alpha) + + +@cp.fuse() +def _abs_grad_kernel(gx, gy): + return cp.abs(gx) + cp.abs(gy) -@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0", - deprecated_version="0.19") +@cp.fuse() +def _fused_variance_kernel( + image, c1, c2, lam1, lam2, abs_du, +): + difference_term = image - c1 + difference_term *= difference_term + difference_term *= lam1 + term2 = image - c2 + term2 *= term2 + term2 *= lam2 + difference_term -= term2 + + aux = abs_du * difference_term + aux_lt0 = aux < 0 + aux_gt0 = aux > 0 + return aux_lt0, aux_gt0 + + +@deprecate_kwarg({'iterations': 'num_iter'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def morphological_chan_vese(image, num_iter, init_level_set='checkerboard', smoothing=1, lambda1=1, lambda2=1, iter_callback=lambda x: None): @@ -298,36 +318,46 @@ def morphological_chan_vese(image, num_iter, init_level_set='checkerboard', u = (init_level_set > 0).astype(cp.int8) - iter_callback(u) + if _misc.ndim(u) == 2: + footprints = _get_P2() + elif _misc.ndim(u) == 3: + footprints = _get_P3() + else: + raise ValueError("u has an invalid number of dimensions " + "(should be 2 or 3)") + workspace = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype) - for _ in range(num_iter): + iter_callback(u) + for i in range(num_iter): # inside = u > 0 # outside = u <= 0 - c0 = (image * (1 - u)).sum() / float((1 - u).sum() + 1e-8) - c1 = (image * u).sum() / float(u.sum() + 1e-8) + c0 = (image * (1 - u)).sum() + c0 /= float((1 - u).sum() + 1e-8) + c1 = (image * u).sum() + c1 /= float(u.sum() + 1e-8) # Image attachment - du = cp.gradient(u) - abs_du = cp.abs(cp.stack(du, axis=0)).sum(0) - aux = abs_du * ( - lambda1 * (image - c1) ** 2 - lambda2 * (image - c0) ** 2 + du = gradient(u) + abs_du = _abs_grad_kernel(du[0], du[1]) + aux_lt0, aux_gt0 = _fused_variance_kernel( + image, c1, c0, lambda1, lambda2, abs_du ) - - u[aux < 0] = 1 - u[aux > 0] = 0 + u[aux_lt0] = 1 + u[aux_gt0] = 0 # Smoothing for _ in range(smoothing): - u = _curvop(u) + u = _curvop(u, footprints, workspace) iter_callback(u) return u -@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0", - deprecated_version="0.19") +@deprecate_kwarg({'iterations': 'num_iter'}, + removed_version="23.02.00", + deprecated_version="22.02.00") def morphological_geodesic_active_contour(gimage, num_iter, init_level_set='disk', smoothing=1, threshold='auto', balloon=0, @@ -418,13 +448,22 @@ def morphological_geodesic_active_contour(gimage, num_iter, threshold = cp.percentile(image, 40) structure = cp.ones((3,) * len(image.shape), dtype=cp.int8) - dimage = cp.gradient(image) + dimage = gradient(image) # threshold_mask = image > threshold if balloon != 0: threshold_mask_balloon = image > threshold / cp.abs(balloon) u = (init_level_set > 0).astype(cp.int8) + if _misc.ndim(u) == 2: + footprints = _get_P2() + elif _misc.ndim(u) == 3: + footprints = _get_P3() + else: + raise ValueError("u has an invalid number of dimensions " + "(should be 2 or 3)") + workspace = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype) + iter_callback(u) for _ in range(num_iter): @@ -439,7 +478,7 @@ def morphological_geodesic_active_contour(gimage, num_iter, # Image attachment aux = cp.zeros_like(image) - du = cp.gradient(u) + du = gradient(u) for el1, el2 in zip(dimage, du): aux += el1 * el2 u[aux > 0] = 1 @@ -447,7 +486,7 @@ def morphological_geodesic_active_contour(gimage, num_iter, # Smoothing for _ in range(smoothing): - u = _curvop(u) + u = _curvop(u, footprints, workspace) iter_callback(u) diff --git a/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py b/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py index 2acaaf8ba..0ec9ed408 100644 --- a/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py +++ b/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py @@ -9,7 +9,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from cupyx.scipy import sparse from cupyx.scipy.sparse.linalg import cg, spsolve @@ -343,12 +343,6 @@ def random_walker(data, labels, beta=130, mode='cg_j', tol=1.e-3, copy=True, probability that label `label_nb` reaches the pixel `(i, j)` first. - See Also - -------- - skimage.morphology.watershed : watershed segmentation - A segmentation algorithm based on mathematical morphology - and "flooding" of regions from markers. - Notes ----- Multichannel inputs are scaled with all channel data combined. Ensure all diff --git a/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py b/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py new file mode 100644 index 000000000..15b991883 --- /dev/null +++ b/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py @@ -0,0 +1,105 @@ +import cupy as cp +import pytest +from cupy.testing import assert_array_equal + +from cucim.skimage._shared._warnings import expected_warnings +from cucim.skimage._shared.utils import _supported_float_type +from cucim.skimage.segmentation import chan_vese + + +@pytest.mark.parametrize('dtype', [cp.float32, cp.float64]) +def test_chan_vese_flat_level_set(dtype): + # because the algorithm evolves the level set around the + # zero-level, it the level-set has no zero level, the algorithm + # will not produce results in theory. However, since a continuous + # approximation of the delta function is used, the algorithm + # still affects the entirety of the level-set. Therefore with + # infinite time, the segmentation will still converge. + img = cp.zeros((10, 10), dtype=dtype) + img[3:6, 3:6] = 1 + ls = cp.full((10, 10), 1000, dtype=dtype) + result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set=ls) + assert_array_equal(result.astype(float), cp.ones((10, 10))) + result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set=-ls) + assert_array_equal(result.astype(float), cp.zeros((10, 10))) + + +def test_chan_vese_small_disk_level_set(): + img = cp.zeros((10, 10)) + img[3:6, 3:6] = 1 + result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set="small disk") + assert_array_equal(result.astype(float), img) + + +def test_chan_vese_simple_shape(): + img = cp.zeros((10, 10)) + img[3:6, 3:6] = 1 + result = chan_vese(img, mu=0.0, tol=1e-8).astype(float) + assert_array_equal(result, img) + + +@pytest.mark.parametrize( + 'dtype', [cp.uint8, cp.float16, cp.float32, cp.float64] +) +def test_chan_vese_extended_output(dtype): + img = cp.zeros((10, 10), dtype=dtype) + img[3:6, 3:6] = 1 + result = chan_vese(img, mu=0.0, tol=1e-8, extended_output=True) + float_dtype = _supported_float_type(dtype) + assert result[1].dtype == float_dtype + assert all(arr.dtype == float_dtype for arr in result[2]) + assert_array_equal(len(result), 3) + + +def test_chan_vese_remove_noise(): + ref = cp.zeros((10, 10)) + ref[1:6, 1:6] = cp.array([[0, 1, 1, 1, 0], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [0, 1, 1, 1, 0]]) + img = ref.copy() + img[8, 3] = 1 + result = chan_vese(img, mu=0.3, tol=1e-3, max_num_iter=100, dt=10, + init_level_set="disk").astype(float) + assert_array_equal(result, ref) + + +def test_chan_vese_incorrect_image_type(): + img = cp.zeros((10, 10, 3)) + ls = cp.zeros((10, 9)) + with pytest.raises(ValueError): + chan_vese(img, mu=0.0, init_level_set=ls) + + +def test_chan_vese_gap_closing(): + ref = cp.zeros((20, 20)) + ref[8:15, :] = cp.ones((7, 20)) + img = ref.copy() + img[:, 6] = cp.zeros((20)) + result = chan_vese(img, mu=0.7, tol=1e-3, max_num_iter=1000, dt=1000, + init_level_set="disk").astype(float) + assert_array_equal(result, ref) + + +def test_chan_vese_max_iter_deprecation(): + img = cp.zeros((20, 20)) + with expected_warnings(["`max_iter` is a deprecated argument"]): + chan_vese(img, max_iter=10) + + +def test_chan_vese_incorrect_level_set(): + img = cp.zeros((10, 10)) + ls = cp.zeros((10, 9)) + with pytest.raises(ValueError): + chan_vese(img, mu=0.0, init_level_set=ls) + with pytest.raises(ValueError): + chan_vese(img, mu=0.0, init_level_set="a") + + +def test_chan_vese_blank_image(): + img = cp.zeros((10, 10)) + level_set = cp.random.rand(10, 10) + ref = level_set > 0 + result = chan_vese(img, mu=0.0, tol=0.0, init_level_set=level_set) + assert_array_equal(result, ref) diff --git a/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py b/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py new file mode 100644 index 000000000..2fe1df30c --- /dev/null +++ b/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py @@ -0,0 +1,226 @@ +import cupy as cp +import pytest +from cupy.testing import assert_array_equal + +from cucim.core.operations.morphology import distance_transform_edt +from cucim.skimage import data, measure +from cucim.skimage.segmentation import expand_labels + +SAMPLE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]) +SAMPLE1D_EXPANDED_3 = cp.array( + [4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0] +) + +# Some pixels are important edge cases with undefined behaviour: +# these are the pixels that are at the same distance from +# multiple labels. Ideally the label would be chosen at random +# to avoid bias, but as we are relying on the index map returned +# by the scipy.ndimage distance transform, what actually happens +# is determined by the upstream implementation of the distance +# tansform, thus we don't give any guarantees for the edge case pixels. +# +# Regardless, it seems prudent to have a test including an edge case +# so we can detect whether future upstream changes in scipy.ndimage +# modify the behaviour. + +EDGECASE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]) +EDGECASE1D_EXPANDED_3 = cp.array( + [4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0] +) + +SAMPLE2D = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] +) + +SAMPLE2D_EXPANDED_3 = cp.array( + [ + [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 0], + [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2], + [0, 0, 1, 0, 0, 0, 0, 2, 2, 2, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0], + ] +) + +# non-integer expansion +SAMPLE2D_EXPANDED_1_5 = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 2], + [1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2], + [0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] +) + + +EDGECASE2D = cp.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0], + [0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + ] +) + +EDGECASE2D_EXPANDED_4 = cp.array( + [ + [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], + [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0], + [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0], + ] +) + +SAMPLE3D = cp.array( + [ + [ + [0, 0, 0, 0], + [0, 3, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + ], + + [ + [0, 0, 0, 0], + [0, 3, 3, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + ], + + [ + [0, 0, 0, 0], + [0, 3, 0, 0], + [0, 0, 0, 0], + [0, 0, 5, 0], + ], + + [ + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 5, 0], + ] + ] +) + +SAMPLE3D_EXPANDED_2 = cp.array( + [ + [ + [3, 3, 3, 3], + [3, 3, 3, 3], + [3, 3, 3, 3], + [0, 3, 5, 0], + ], + + [ + [3, 3, 3, 3], + [3, 3, 3, 3], + [3, 3, 3, 3], + [0, 5, 5, 5], + ], + + [ + [3, 3, 3, 3], + [3, 3, 3, 3], + [3, 3, 5, 5], + [5, 5, 5, 5], + ], + + [ + [3, 3, 3, 0], + [3, 3, 3, 0], + [3, 3, 5, 5], + [5, 5, 5, 5], + ] + ] +) + +SAMPLE_EDGECASE_BEHAVIOUR = cp.array( + [[0, 1, 0, 0], [2, 0, 0, 0], [0, 3, 0, 0]] +) + + +@pytest.mark.parametrize( + "input_array, expected_output, expand_distance", + [ + (SAMPLE1D, SAMPLE1D_EXPANDED_3, 3), + (SAMPLE2D, SAMPLE2D_EXPANDED_3, 3), + (SAMPLE2D, SAMPLE2D_EXPANDED_1_5, 1.5), + (EDGECASE1D, EDGECASE1D_EXPANDED_3, 3), + (EDGECASE2D, EDGECASE2D_EXPANDED_4, 4), + (SAMPLE3D, SAMPLE3D_EXPANDED_2, 2) + ] +) +def test_expand_labels(input_array, expected_output, expand_distance): + if input_array.ndim == 1: + with pytest.raises(NotImplementedError): + expand_labels(input_array, expand_distance) + else: + expanded = expand_labels(input_array, expand_distance) + assert_array_equal(expanded, expected_output) + + +@pytest.mark.parametrize('ndim', [2, 3]) +@pytest.mark.parametrize('distance', range(6)) +def test_binary_blobs(ndim, distance): + """Check some invariants with label expansion. + + - New labels array should exactly contain the original labels array. + - Distance to old labels array within new labels should never exceed input + distance. + - Distance beyond the expanded labels should always exceed the input + distance. + """ + img = data.binary_blobs(length=64, blob_size_fraction=0.05, n_dim=ndim) + labels = measure.label(img) + expanded = expand_labels(labels, distance=distance) + original_mask = labels != 0 + assert_array_equal(labels[original_mask], expanded[original_mask]) + expanded_only_mask = (expanded - labels).astype(bool) + distance_map = distance_transform_edt(~original_mask) + expanded_distances = distance_map[expanded_only_mask] + if expanded_distances.size > 0: + assert cp.all(expanded_distances <= distance) + beyond_expanded_distances = distance_map[~expanded.astype(bool)] + if beyond_expanded_distances.size > 0: + assert cp.all(beyond_expanded_distances > distance) + + +def test_edge_case_behaviour(): + """ Check edge case behavior to detect upstream changes + + For edge cases where a pixel has the same distance to several regions, + lexicographical order seems to determine which region gets to expand + into this pixel given the current upstream behaviour in + scipy.ndimage.distance_map_edt. + + As a result, we expect different results when transposing the array. + If this test fails, something has changed upstream. + """ + expanded = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR, 1) + expanded_transpose = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR.T, 1) + assert not cp.all(expanded == expanded_transpose.T) diff --git a/python/cucim/src/cucim/skimage/transform/_geometric.py b/python/cucim/src/cucim/skimage/transform/_geometric.py index 372cf3b84..91211834b 100644 --- a/python/cucim/src/cucim/skimage/transform/_geometric.py +++ b/python/cucim/src/cucim/skimage/transform/_geometric.py @@ -1421,10 +1421,8 @@ def __init__(self, matrix=None, scale=None, rotation=None, if translation is None: translation = (0,) * dimensionality if dimensionality == 2: - ax = (0, 1) c, s = _cos(rotation), _sin(rotation) - matrix[ax, ax] = c - matrix[ax, ax[::-1]] = -s, s + matrix[:2, :2] = xp.array([[c, -s], [s, c]], dtype=float) else: # 3D rotation matrix[:3, :3] = xp.asarray(_euler_rotation_matrix(rotation)) diff --git a/python/cucim/src/cucim/skimage/transform/_warps.py b/python/cucim/src/cucim/skimage/transform/_warps.py index 05aea7f24..7f2f69b2b 100644 --- a/python/cucim/src/cucim/skimage/transform/_warps.py +++ b/python/cucim/src/cucim/skimage/transform/_warps.py @@ -2,7 +2,7 @@ import cupy as cp import numpy as np -from cupyx.scipy import ndimage as ndi +import cucim.skimage._vendored.ndimage as ndi from .._shared.utils import (_to_ndimage_mode, _validate_interpolation_order, channel_as_last_axis, convert_to_float, @@ -260,8 +260,8 @@ def rescale(image, scale, order=None, mode='reflect', cval=0, clip=True, Otherwise, this parameter indicates which axis of the array corresponds to channels. - .. versionadded:: 0.19 - ``channel_axis`` was added in 0.19. + .. versionadded:: 22.02.00 + ``channel_axis`` was added in 22.02.00. Notes ----- @@ -766,19 +766,33 @@ def _clip_warp_output(input_image, output_image, mode, cval, clip): """ if clip: min_val = input_image.min().item() - max_val = input_image.max().item() - - preserve_cval = (mode == 'constant' and not - (min_val <= cval <= max_val)) + if np.isnan(min_val): + # NaNs detected, use NaN-safe min/max + min_func = cp.nanmin + max_func = cp.nanmax + min_val = min_func(input_image).item() + else: + min_func = cp.min + max_func = cp.max + max_val = max_func(input_image).item() + + # Check if cval has been used such that it expands the effective input + # range + preserve_cval = ( + mode == 'constant' + and not min_val <= cval <= max_val + and min_func(output_image) <= cval <= max_func(output_image) + ) + # expand min/max range to account for cval if preserve_cval: - cval_mask = output_image == cval + # cast cval to the same dtype as the input image + cval = input_image.dtype.type(cval) + min_val = min(min_val, cval) + max_val = max(max_val, cval) cp.clip(output_image, min_val, max_val, out=output_image) - if preserve_cval: - output_image[cval_mask] = cval - def warp(image, inverse_map, map_args={}, output_shape=None, order=None, mode='constant', cval=0., clip=True, preserve_range=False): @@ -1080,8 +1094,8 @@ def warp_polar(image, center=None, *, radius=None, output_shape=None, Otherwise, this parameter indicates which axis of the array corresponds to channels. - .. versionadded:: 0.19 - ``channel_axis`` was added in 0.19. + .. versionadded:: 22.02.00 + ``channel_axis`` was added in 22.02.00. **kwargs : keyword arguments Passed to `transform.warp`. diff --git a/python/cucim/src/cucim/skimage/transform/tests/test_warps.py b/python/cucim/src/cucim/skimage/transform/tests/test_warps.py index 6504c082d..8c8717f51 100644 --- a/python/cucim/src/cucim/skimage/transform/tests/test_warps.py +++ b/python/cucim/src/cucim/skimage/transform/tests/test_warps.py @@ -118,6 +118,78 @@ def test_warp_clip(): assert_almost_equal(float(outx.max()), 1) +@pytest.mark.parametrize('order', [0, 1]) +def test_warp_clip_image_containing_nans(order): + # Test that clipping works as intended on an image with NaNs + # Orders >1 do not produce good output when the input image has + # NaNs, so those orders are not tested. + + x = cp.ones((15, 15), dtype=cp.float64) + x[7, 7] = cp.nan + + outx = rotate(x, 45, order=order, cval=2, resize=True, clip=True) + + assert_almost_equal(cp.nanmin(outx).item(), 1) + assert_almost_equal(cp.nanmax(outx).item(), 2) + + +@pytest.mark.parametrize('order', [0, 1]) +def test_warp_clip_cval_is_nan(order): + # Test that clipping works as intended when cval is NaN + # Orders > 1 do not produce good output when cval is NaN, so those + # orders are not tested. + + x = cp.ones((15, 15), dtype=cp.float64) + x[5:-5, 5:-5] = 2 + + outx = rotate(x, 45, order=order, cval=cp.nan, resize=True, clip=True) + + assert_almost_equal(cp.nanmin(outx).item(), 1) + assert_almost_equal(cp.nanmax(outx).item(), 2) + + +@pytest.mark.parametrize('order', range(6)) +def test_warp_clip_cval_outside_input_range(order): + # Test that clipping behavior considers cval part of the input range + + x = cp.ones((15, 15), dtype=cp.float64) + + # Specify a cval that is outside the input range to check clipping + outx = rotate(x, 45, order=order, cval=2, resize=True, clip=True) + + # The corners should be cval for all interpolation orders + outx = cp.asnumpy(outx) + assert_array_almost_equal([outx[0, 0], outx[0, -1], + outx[-1, 0], outx[-1, -1]], 2) + + # For all interpolation orders other than nearest-neighbor, the clipped + # output should have some pixels with values between the input (1) and + # cval (2) (i.e., clipping should not set them to 1) + if order > 0: + assert np.sum(np.less(1, outx) * np.less(outx, 2)) > 0 + + +@pytest.mark.parametrize('order', range(6)) +def test_warp_clip_cval_not_used(order): + # Test that clipping does not consider cval part of the input range if it + # is not used in the output image + + x = cp.ones((15, 15), dtype=cp.float64) + x[5:-5, 5:-5] = 2 + + # Transform the image by stretching it out by one pixel on each side so + # that cval will not actually be used + scale = 15 / (15 + 2) + transform = AffineTransform(scale=scale, translation=(1, 1)) + outx = warp(x, transform, mode='constant', order=order, cval=0, clip=True) + + # At higher orders of interpolation, the transformed image has overshoots + # beyond the input range that should be clipped to the range 1 to 2. Even + # though cval=0, the minimum value of the clipped output image should be + # 1 and not affected by the unused cval. + assert_array_almost_equal(outx.min(), 1) + + def test_homography(): x = cp.zeros((5, 5), dtype=cp.double) x[1, 1] = 1 diff --git a/python/cucim/tests/fixtures/testimage.py b/python/cucim/tests/fixtures/testimage.py index c50b8eb6f..343698676 100644 --- a/python/cucim/tests/fixtures/testimage.py +++ b/python/cucim/tests/fixtures/testimage.py @@ -21,9 +21,9 @@ from ..util.gen_image import ImageGenerator -def gen_image(tmpdir_factory, recipe): +def gen_image(tmpdir_factory, recipe, resolution=None): dataset_path = tmpdir_factory.mktemp('datasets').strpath - dataset_gen = ImageGenerator(dataset_path, [recipe]) + dataset_gen = ImageGenerator(dataset_path, [recipe], [resolution]) image_path = dataset_gen.gen() return (dataset_path, image_path[0]) @@ -63,9 +63,8 @@ def testimg_tiff_stripe_32x24_16_raw(tmpdir_factory): def testimg_tiff_stripe_32x24_16(request): return request.param -# tiff_stripe_4096x4096_256 - +# tiff_stripe_4096x4096_256 @pytest.fixture(scope='session') def testimg_tiff_stripe_4096x4096_256_jpeg(tmpdir_factory): dataset_path, image_path = gen_image( @@ -137,3 +136,45 @@ def testimg_tiff_stripe_100000x100000_256_raw(tmpdir_factory): ]) def testimg_tiff_stripe_100000x100000_256(request): return request.param + + +# testimg_tiff_stripe_4096_4096_256_jpeg_resolution +@pytest.fixture(scope='session') +def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_3_5_centimeter( + tmpdir_factory): + resolution = (0.3, 0.5, "CENTIMETER") + dataset_path, image_path = gen_image( + tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution) + yield image_path, resolution + # Clean up fake dataset folder + shutil.rmtree(dataset_path) + + +@pytest.fixture(scope='session') +def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_4_7_inch(tmpdir_factory): + resolution = (0.4, 0.7, "INCH") + dataset_path, image_path = gen_image( + tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution) + yield image_path, resolution + # Clean up fake dataset folder + shutil.rmtree(dataset_path) + + +@pytest.fixture(scope='session') +def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_9_1_none(tmpdir_factory): + resolution = (9, 1, "NONE") + dataset_path, image_path = gen_image( + tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution) + yield image_path, resolution + # Clean up fake dataset folder + shutil.rmtree(dataset_path) + + +@pytest.fixture(scope='session', params=[ + lazy_fixture( + 'testimg_tiff_stripe_4096_4096_256_jpeg_resolution_3_5_centimeter'), + lazy_fixture('testimg_tiff_stripe_4096_4096_256_jpeg_resolution_4_7_inch'), + lazy_fixture('testimg_tiff_stripe_4096_4096_256_jpeg_resolution_9_1_none'), +]) +def testimg_tiff_stripe_4096_4096_256_jpeg_resolution(request): + return request.param diff --git a/python/cucim/tests/unit/clara/test_load_image_metadata.py b/python/cucim/tests/unit/clara/test_load_image_metadata.py index 65e7d926b..2452d4324 100644 --- a/python/cucim/tests/unit/clara/test_load_image_metadata.py +++ b/python/cucim/tests/unit/clara/test_load_image_metadata.py @@ -14,6 +14,7 @@ # from ...util.io import open_image_cucim +import math def test_load_image_metadata(testimg_tiff_stripe_32x24_16): @@ -45,7 +46,7 @@ def test_load_image_metadata(testimg_tiff_stripe_32x24_16): # Returns physical size in tuple. assert img.spacing() == [1.0, 1.0, 1.0] # Units for each spacing element (size is same with `ndim`). - assert img.spacing_units() == ['micrometer', 'micrometer', 'color'] + assert img.spacing_units() == ['', '', 'color'] # Physical location of (0, 0, 0) (size is always 3). assert img.origin == [0.0, 0.0, 0.0] # Direction cosines (size is always 3x3). @@ -71,6 +72,54 @@ def test_load_image_metadata(testimg_tiff_stripe_32x24_16): assert img.raw_metadata == '{"axes": "YXC", "shape": [24, 32, 3]}' +def test_load_image_resolution_metadata(testimg_tiff_stripe_4096_4096_256_jpeg_resolution): # noqa: E501 + image, resolution = testimg_tiff_stripe_4096_4096_256_jpeg_resolution + img = open_image_cucim(image) + + x_resolution, y_resolution, resolution_unit = resolution + + if resolution_unit == "CENTIMETER": + x_spacing = 10000.0 / x_resolution + y_spacing = 10000.0 / y_resolution + spacing_unit = "micrometer" + elif resolution_unit == "INCH": + x_spacing = 25400.0 / x_resolution + y_spacing = 25400.0 / y_resolution + spacing_unit = "micrometer" + else: + x_spacing = x_resolution + y_spacing = y_resolution + spacing_unit = "" + + # Returns physical size in tuple. + assert all(map(lambda a, b: math.isclose(a, b, rel_tol=0.1), + img.spacing(), (y_spacing, x_spacing, 1.0))) + # Units for each spacing element (size is same with `ndim`). + assert img.spacing_units() == [spacing_unit, spacing_unit, 'color'] + + # A metadata object as `dict` + metadata = img.metadata + print(metadata) + assert isinstance(metadata, dict) + assert len(metadata) == 2 # 'cucim' and 'tiff' + assert math.isclose(metadata['tiff']['x_resolution'], + x_resolution, rel_tol=0.00001) + assert math.isclose(metadata['tiff']['y_resolution'], + y_resolution, rel_tol=0.00001) + unit_value = resolution_unit.lower() if resolution_unit != "NONE" else "" + assert metadata['tiff']['resolution_unit'] == unit_value + + # Check if lower resolution image's metadata has lower physical spacing. + num_levels = img.resolutions['level_count'] + for level in range(num_levels): + lowres_img = img.read_region((0, 0), (100, 100), level=level) + lowres_downsample = img.resolutions["level_downsamples"][level] + assert all(map(lambda a, b: math.isclose(a, b, rel_tol=0.1), + lowres_img.spacing(), + (y_spacing / lowres_downsample, + x_spacing / lowres_downsample, 1.0))) + + def test_load_rgba_image_metadata(tmpdir): """Test accessing RGBA image's metadata. diff --git a/python/cucim/tests/unit/core/test_stain_normalizer.py b/python/cucim/tests/unit/core/test_stain_normalizer.py index 4b50a4121..ee66ecf67 100644 --- a/python/cucim/tests/unit/core/test_stain_normalizer.py +++ b/python/cucim/tests/unit/core/test_stain_normalizer.py @@ -22,15 +22,15 @@ class TestStainExtractorMacenko(): @pytest.mark.parametrize( - 'image', + 'image, ErrorClass', [ - cp.full((3, 2, 4), -1), # negative value - cp.full((3, 2, 4), 256), # out of range value - None, - cp.full((3, 2, 4), 240), # uniformly below the beta threshold - ] + (cp.full((3, 2, 4), -1), ValueError), # negative value + (cp.full((3, 2, 4), 256), ValueError), # out of range value + (None, TypeError), + (cp.full((3, 2, 4), 240), ValueError), # uniformly below the beta threshold # noqa + ], ) - def test_transparent_image(self, image): + def test_transparent_image(self, image, ErrorClass): """ Test HE stain extraction on an image that comprises only transparent pixels - pixels with absorbance below the @@ -38,12 +38,8 @@ def test_transparent_image(self, image): since once the transparent pixels are removed, there are no remaining pixels to compute eigenvectors. """ - if image is None: - with pytest.raises(TypeError): - stain_extraction_pca(image) - else: - with pytest.raises(ValueError): - stain_extraction_pca(image) + with pytest.raises(ErrorClass): + stain_extraction_pca(image) @pytest.mark.parametrize( 'image', diff --git a/python/cucim/tests/util/gen_image.py b/python/cucim/tests/util/gen_image.py index 8eea5dc81..645db1f79 100644 --- a/python/cucim/tests/util/gen_image.py +++ b/python/cucim/tests/util/gen_image.py @@ -16,6 +16,7 @@ import argparse import logging import os +import tifffile try: from .gen_tiff import TiffGenerator @@ -28,16 +29,23 @@ class ImageGenerator: - def __init__(self, dest, recipes, logger=None): + def __init__(self, dest, recipes, resolutions=None, logger=None): self.logger = logger or logging.getLogger(__name__) self.dest = dest self.recipes = recipes + if resolutions is None: + resolutions = [(1, 1, "CENTIMETER")] * len(recipes) + if len(resolutions) != len(recipes): + raise RuntimeError( + 'Number of resolutions must be equal to number of recipes') + self.resolutions = resolutions + def gen(self): results = [] - for recipe in self.recipes: + for recipe, resolution in zip(self.recipes, self.resolutions): items = recipe.split(':') item_len = len(items) if not (1 <= item_len <= 6): @@ -69,10 +77,16 @@ def gen(self): raise RuntimeError( f'No data generated from [pattern={pattern},' + f' image_size={image_size}, tile_size={tile_size},' - + f' compression={compression}].') + + f' compression={compression}, resolution={resolution}].') file_name = f'{kind}_{pattern}_{image_size_str}_{tile_size}' - + if resolution is None or len(resolution) == 2: + unit = None + elif len(resolution) == 3: + unit = resolution[2] + resolution = resolution[:2] + if unit is None: + unit = tifffile.RESUNIT.NONE image_path = generator_obj.save_image(image_data, dest_folder, file_name=file_name, @@ -81,7 +95,9 @@ def gen(self): pattern=pattern, image_size=image_size, tile_size=tile_size, - compression=compression) + compression=compression, + resolution=resolution, + resolutionunit=unit) self.logger.info(' Generated %s...', image_path) results.append(image_path) diff --git a/python/cucim/tests/util/gen_tiff.py b/python/cucim/tests/util/gen_tiff.py index 713679657..324f8db14 100644 --- a/python/cucim/tests/util/gen_tiff.py +++ b/python/cucim/tests/util/gen_tiff.py @@ -38,7 +38,8 @@ def get_image(self, pattern, image_size): return None def save_image(self, image_data, dest_folder, file_name, kind, subpath, - pattern, image_size, tile_size, compression): + pattern, image_size, tile_size, compression, resolution, + resolutionunit): # You can add pyramid images (0: largest resolution) if isinstance(image_data, list): arr_stack = image_data @@ -55,10 +56,15 @@ def save_image(self, image_data, dest_folder, file_name, kind, subpath, tiff_file_name = str( (Path(dest_folder) / f'{file_name}.tif').absolute()) + level_resolution = None with TiffWriter(tiff_file_name, bigtiff=True) as tif: for level in range(len(arr_stack)): # save from the largest image src_arr = arr_stack[level] + if resolution: + level_resolution = (resolution[0] / (level + 1), + resolution[1] / (level + 1)) + tif.write( src_arr, software="tifffile", @@ -68,6 +74,8 @@ def save_image(self, image_data, dest_folder, file_name, kind, subpath, planarconfig="CONTIG", compression=compression, # requires imagecodecs subfiletype=1 if level else 0, + resolution=level_resolution, + resolutionunit=resolutionunit, ) return tiff_file_name diff --git a/run b/run index 8033eaf18..6fe8ce919 100755 --- a/run +++ b/run @@ -806,7 +806,9 @@ test() { install_python_test_deps_() { if [ -n "${CONDA_PREFIX}" ]; then - run_command conda install -c conda-forge -y \ + # https://github.com/rapidsai/cucim/pull/349#issuecomment-1203335731 + # Do not update or change already-installed dependencies. + run_command conda install -c conda-forge -y --freeze-installed \ --file ${TOP}/python/cucim/requirements-test.txt else if [ -n "${VIRTUAL_ENV}" ]; then