diff --git a/3rdparty/LICENSE.pba+ b/3rdparty/LICENSE.pba+
new file mode 100644
index 000000000..9d0b4030a
--- /dev/null
+++ b/3rdparty/LICENSE.pba+
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 School of Computing, National University of Singapore
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 387d53820..18d0f992e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,51 @@
+# cuCIM 22.08.00 (17 Aug 2022)
+
+## π¨ Breaking Changes
+
+- Stain extraction: use a less strict condition across channels when thresholding ([#316](https://github.com/rapidsai/cucim/pull/316)) [@grlee77](https://github.com/grlee77)
+
+## π Bug Fixes
+
+- create SimilarityTransform using CuPy 9.x-compatible indexing ([#365](https://github.com/rapidsai/cucim/pull/365)) [@grlee77](https://github.com/grlee77)
+- Add `__init__.py` in `cucim.core` ([#359](https://github.com/rapidsai/cucim/pull/359)) [@jakirkham](https://github.com/jakirkham)
+- Stain extraction: use a less strict condition across channels when thresholding ([#316](https://github.com/rapidsai/cucim/pull/316)) [@grlee77](https://github.com/grlee77)
+- Incorporate bug fixes from skimage 0.19.3 ([#312](https://github.com/rapidsai/cucim/pull/312)) [@grlee77](https://github.com/grlee77)
+- fix RawKernel bug for canny filter when quantiles are used ([#310](https://github.com/rapidsai/cucim/pull/310)) [@grlee77](https://github.com/grlee77)
+
+## π Documentation
+
+- Defer loading of `custom.js` ([#383](https://github.com/rapidsai/cucim/pull/383)) [@galipremsagar](https://github.com/galipremsagar)
+- add cucim.core.morphology to API docs + other docstring fixes ([#367](https://github.com/rapidsai/cucim/pull/367)) [@grlee77](https://github.com/grlee77)
+- Update README.md ([#361](https://github.com/rapidsai/cucim/pull/361)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder)
+- remove unimplemented functions from See Also and fix version numbers in deprecation warnings ([#356](https://github.com/rapidsai/cucim/pull/356)) [@grlee77](https://github.com/grlee77)
+- Forward-merge branch-22.06 to branch-22.08 ([#344](https://github.com/rapidsai/cucim/pull/344)) [@grlee77](https://github.com/grlee77)
+- Update README.md ([#315](https://github.com/rapidsai/cucim/pull/315)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder)
+- Update index.rst ([#314](https://github.com/rapidsai/cucim/pull/314)) [@HesAnEasyCoder](https://github.com/HesAnEasyCoder)
+- Update PyPI package documentation for v22.06.00 ([#311](https://github.com/rapidsai/cucim/pull/311)) [@gigony](https://github.com/gigony)
+
+## π New Features
+
+- Add segmentation with the Chan-Vese active contours method ([#343](https://github.com/rapidsai/cucim/pull/343)) [@grlee77](https://github.com/grlee77)
+- Add cucim.skimage.morphology.medial_axis ([#342](https://github.com/rapidsai/cucim/pull/342)) [@grlee77](https://github.com/grlee77)
+- Add cucim.skimage.segmentation.expand_labels ([#341](https://github.com/rapidsai/cucim/pull/341)) [@grlee77](https://github.com/grlee77)
+- Add Euclidean distance transform for images/volumes ([#318](https://github.com/rapidsai/cucim/pull/318)) [@grlee77](https://github.com/grlee77)
+
+## π οΈ Improvements
+
+- Revert "Allow CuPy 11" ([#362](https://github.com/rapidsai/cucim/pull/362)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix issues with day & night modes in python docs ([#360](https://github.com/rapidsai/cucim/pull/360)) [@galipremsagar](https://github.com/galipremsagar)
+- Allow CuPy 11 ([#357](https://github.com/rapidsai/cucim/pull/357)) [@jakirkham](https://github.com/jakirkham)
+- more efficient separable convolution ([#355](https://github.com/rapidsai/cucim/pull/355)) [@grlee77](https://github.com/grlee77)
+- Support resolution and spacing metadata ([#349](https://github.com/rapidsai/cucim/pull/349)) [@gigony](https://github.com/gigony)
+- Performance optimizations to morphological segmentation functions ([#340](https://github.com/rapidsai/cucim/pull/340)) [@grlee77](https://github.com/grlee77)
+- benchmarks: avoid use of deprecated pandas API ([#339](https://github.com/rapidsai/cucim/pull/339)) [@grlee77](https://github.com/grlee77)
+- Reduce memory overhead and improve performance of normalize_colors_pca ([#328](https://github.com/rapidsai/cucim/pull/328)) [@grlee77](https://github.com/grlee77)
+- Protect against obscure divide by zero error in edge case of `normalize_colors_pca` ([#327](https://github.com/rapidsai/cucim/pull/327)) [@grlee77](https://github.com/grlee77)
+- complete parametrization of cucim.skimage benchmarks ([#324](https://github.com/rapidsai/cucim/pull/324)) [@grlee77](https://github.com/grlee77)
+- parameterization of `filters` and `features` benchmarks (v2) ([#322](https://github.com/rapidsai/cucim/pull/322)) [@grlee77](https://github.com/grlee77)
+- Add a fast histogram-based median filter ([#317](https://github.com/rapidsai/cucim/pull/317)) [@grlee77](https://github.com/grlee77)
+- Remove custom compiler environment variables ([#307](https://github.com/rapidsai/cucim/pull/307)) [@ajschmidt8](https://github.com/ajschmidt8)
+
# cuCIM 22.06.00 (7 Jun 2022)
## π¨ Breaking Changes
@@ -17,6 +65,7 @@
## π New Features
- add missing `cucim.skimage.segmentation.clear_border` function ([#267](https://github.com/rapidsai/cucim/pull/267)) [@grlee77](https://github.com/grlee77)
+- add `cucim.core.operations.color.stain_extraction_pca` and `cucim.core.operations.color.normalize_colors_pca` for digital pathology H&E stain extraction and normalization ([#273](https://github.com/rapidsai/cucim/pull/273)) [@grlee77](https://github.com/grlee77), [@drbeh](https://github.com/drbeh)
## π οΈ Improvements
@@ -27,6 +76,7 @@
- Promote small integer types to single rather than double precision ([#278](https://github.com/rapidsai/cucim/pull/278)) [@grlee77](https://github.com/grlee77)
- improve efficiency of histogram-based thresholding functions ([#276](https://github.com/rapidsai/cucim/pull/276)) [@grlee77](https://github.com/grlee77)
- Remove unused dependencies in GPU tests job ([#268](https://github.com/rapidsai/cucim/pull/268)) [@Ethyling](https://github.com/Ethyling)
+- Enable footprint decomposition for morphology ([#274](https://github.com/rapidsai/cucim/pull/274)) [@grlee77](https://github.com/grlee77)
- Use conda compilers ([#232](https://github.com/rapidsai/cucim/pull/232)) [@Ethyling](https://github.com/Ethyling)
- Build packages using mambabuild ([#216](https://github.com/rapidsai/cucim/pull/216)) [@Ethyling](https://github.com/Ethyling)
diff --git a/LICENSE-3rdparty.md b/LICENSE-3rdparty.md
index 0a3789af7..984d0e952 100644
--- a/LICENSE-3rdparty.md
+++ b/LICENSE-3rdparty.md
@@ -281,3 +281,9 @@ StainTools
- https://github.com/Peter554/StainTools/blob/master/LICENSE.txt
- Copyright: Peter Byfield
- Usage: reference for stain color normalization algorithm
+
+PBA+
+- License: MIT License
+ - https://github.com/orzzzjq/Parallel-Banding-Algorithm-plus/blob/master/LICENSE
+- Copyright: School of Computing, National University of Singapore
+- Usage: PBA+ is used to implement the Euclidean distance transform.
diff --git a/README.md b/README.md
index 9f31f70ce..612fda219 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,43 @@
#

cuCIM
+[RAPIDS](https://rapids.ai) cuCIM is an open-source, accelerated computer vision and image processing software library for multidimensional images used in biomedical, geospatial, material and life science, and remote sensing use cases.
-[RAPIDS](https://rapids.ai) cuCIM is an extensible toolkit designed to provide GPU accelerated I/O, computer vision & image processing primitives for N-Dimensional images with a focus on biomedical imaging.
+cuCIM offers:
+
+- Enhanced Image Processing Capabilities for large and n-dimensional tag image file format (TIFF) files
+- Accelerated performance through Graphics Processing Unit (GPU)-based image processing and computer vision primitives
+- A Straightforward Pythonic Interface with Matching Application Programming Interface (API) for Openslide
+
+cuCIM supports the following formats:
+
+- Aperio ScanScope Virtual Slide (SVS)
+- Philips TIFF
+- Generic Tiled, Multi-resolution RGB TIFF files with the following compression schemes:
+ - No Compression
+ - JPEG
+ - JPEG2000
+ - Lempel-Ziv-Welch (LZW)
+ - Deflate
**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cucim/blob/main/README.md) ensure you are on the `main` branch.
-- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date)
- - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/)
+- [GTC 2022 Accelerating Storage IO to GPUs with Magnum IO [S41347]](https://events.rainfocus.com/widget/nvidia/gtcspring2022/sessioncatalog/session/1634960000577001Etxp)
+ - cuCIM's GDS API examples:
- [SciPy 2021 cuCIM - A GPU image I/O and processing library](https://www.scipy2021.scipy.org/)
- [video](https://youtu.be/G46kOOM9xbQ)
+- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date)
+ - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/)
+
+**Blogs**
+- [Enhanced Image Analysis with Multidimensional Image Processing](https://developer.nvidia.com/blog/enhanced-image-analysis-with-multidimensional-image-processing/)
+- [Accelerating Scikit-Image API with cuCIM: n-Dimensional Image Processing and IO on GPUs](https://developer.nvidia.com/blog/cucim-rapid-n-dimensional-image-processing-and-i-o-on-gpus/)
+- [Accelerating Digital Pathology Pipelines with NVIDIA Claraβ’ Deploy](https://developer.nvidia.com/blog/accelerating-digital-pathology-pipelines-with-nvidia-clara-deploy-2/)
+
+**Webinars**
+
+- [cuCIM: a GPU Image IO and Processing Library](https://www.youtube.com/watch?v=G46kOOM9xbQ)
+
+**[Documentation](https://docs.rapids.ai/api/cucim/stable)**
**Release notes** are available on our [wiki page](https://github.com/rapidsai/cucim/wiki/Release-Notes).
@@ -39,7 +68,7 @@ pip install scipy scikit-image cupy-cuda110
### Notebooks
-Please check out our [Welcome](notebooks/Welcome.ipynb) notebook ([NBViewer](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Welcome.ipynb))
+Please check out our [Welcome](notebooks/Welcome.ipynb) notebook ([NBViewer](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Welcome.ipynb))
#### Downloading sample images
diff --git a/VERSION b/VERSION
index ddc883c42..231fac64b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-22.06.00
+22.08.00
diff --git a/benchmarks/skimage/_image_bench.py b/benchmarks/skimage/_image_bench.py
index c7d5abfd8..854fae290 100644
--- a/benchmarks/skimage/_image_bench.py
+++ b/benchmarks/skimage/_image_bench.py
@@ -36,6 +36,7 @@ def __init__(
module_cpu=scipy.ndimage,
module_gpu=cupyx.scipy.ndimage,
function_is_generator=False,
+ run_cpu=True
):
self.shape = shape
@@ -67,9 +68,12 @@ def gen_gpu(*args, **kwargs):
self.module_name_cpu = module_cpu.__name__
self.module_name_gpu = module_gpu.__name__
+ self.run_cpu = run_cpu
+
def set_args(self, dtype):
if np.dtype(dtype).kind in "iu":
im1 = skimage.data.camera()
+ im1 = im1.astype(dtype)
else:
im1 = skimage.data.camera() / 255.0
im1 = im1.astype(dtype)
@@ -166,17 +170,20 @@ def run_benchmark(self, duration=3, verbose=True):
rep_kwargs_gpu = self.get_reps(
self.func_gpu, self.args_gpu, kw_gpu, duration, cpu=False
)
- perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
+ print("Number of Repetitions : ", rep_kwargs_gpu)
perf_gpu = repeat(self.func_gpu, self.args_gpu, kw_gpu, **rep_kwargs_gpu)
- df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()
+
df.at[index, "shape"] = f"{self.shape}"
# df.at[index, "description"] = index
df.at[index, "function_name"] = self.function_name
df.at[index, "dtype"] = np.dtype(dtype).name
df.at[index, "ndim"] = len(self.shape)
- df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
- df.at[index, "CPU: host (std)"] = perf.cpu_times.std()
+ if self.run_cpu == True:
+ perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
+ df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()
+ df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
+ df.at[index, "CPU: host (std)"] = perf.cpu_times.std()
df.at[index, "GPU: host (mean)"] = perf_gpu.cpu_times.mean()
df.at[index, "GPU: host (std)"] = perf_gpu.cpu_times.std()
diff --git a/benchmarks/skimage/bench_convolve.py b/benchmarks/skimage/bench_convolve.py
new file mode 100644
index 000000000..60007b8a3
--- /dev/null
+++ b/benchmarks/skimage/bench_convolve.py
@@ -0,0 +1,139 @@
+"""
+Benchmark locally modified ndimage functions vs. their CuPy counterparts
+"""
+import cupy as cp
+import cupyx.scipy.ndimage as ndi
+import pytest
+from cupyx.profiler import benchmark
+
+from cucim.skimage._vendored.ndimage import (
+ convolve1d, correlate1d, gaussian_filter, gaussian_filter1d,
+ gaussian_gradient_magnitude, gaussian_laplace, laplace, prewitt, sobel,
+ uniform_filter, uniform_filter1d,
+)
+
+d = cp.cuda.Device()
+
+
+def _get_image(shape, dtype, seed=123):
+
+ rng = cp.random.default_rng(seed)
+ dtype = cp.dtype(dtype)
+ if dtype.kind == 'b':
+ image = rng.integers(0, 1, shape, dtype=cp.uint8).astype(bool)
+ elif dtype.kind in 'iu':
+ image = rng.integers(0, 128, shape, dtype=dtype)
+ elif dtype.kind in 'c':
+ real_dtype = cp.asarray([], dtype=dtype).real.dtype
+ image = rng.standard_normal(shape, dtype=real_dtype)
+ image = image + 1j * rng.standard_normal(shape, dtype=real_dtype)
+ else:
+ if dtype == cp.float16:
+ image = rng.standard_normal(shape).astype(dtype)
+ else:
+ image = rng.standard_normal(shape, dtype=dtype)
+ return image
+
+
+def _compare_implementations(
+ shape, kernel_size, axis, dtype, mode, cval=0.0, origin=0,
+ output_dtype=None, kernel_dtype=None, output_preallocated=False,
+ function=convolve1d, max_duration=1
+):
+ dtype = cp.dtype(dtype)
+ if kernel_dtype is None:
+ kernel_dtype = dtype
+ image = _get_image(shape, dtype)
+ kernel = _get_image((kernel_size,), kernel_dtype)
+ kwargs = dict(axis=axis, mode=mode, cval=cval, origin=origin)
+ if output_dtype is not None:
+ output_dtype = cp.dtype(output_dtype)
+ function_ref = getattr(ndi, function.__name__)
+ if output_preallocated:
+ if output_dtype is None:
+ output_dtype = image.dtype
+ output1 = cp.empty(image.shape, dtype=output_dtype)
+ output2 = cp.empty(image.shape, dtype=output_dtype)
+ kwargs.update(dict(output=output1))
+ perf1 = benchmark(function_ref, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ kwargs.update(dict(output=output2, algorithm='shared_memory'))
+ perf2 = benchmark(function, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ return perf1, perf2
+ kwargs.update(dict(output=output_dtype))
+ perf1 = benchmark(function_ref, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ kwargs.update(dict(output=output_dtype, algorithm='shared_memory'))
+ perf2 = benchmark(function, (image, kernel), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ return perf1, perf2
+
+
+def _compare_implementations_other(
+ shape, dtype, mode, cval=0.0,
+ output_dtype=None, kernel_dtype=None, output_preallocated=False,
+ function=convolve1d, func_kwargs={}, max_duration=1,
+):
+ dtype = cp.dtype(dtype)
+ image = _get_image(shape, dtype)
+ kwargs = dict(mode=mode, cval=cval)
+ if func_kwargs:
+ kwargs.update(func_kwargs)
+ if output_dtype is not None:
+ output_dtype = cp.dtype(output_dtype)
+ function_ref = getattr(ndi, function.__name__)
+ if output_preallocated:
+ if output_dtype is None:
+ output_dtype = image.dtype
+ output1 = cp.empty(image.shape, dtype=output_dtype)
+ output2 = cp.empty(image.shape, dtype=output_dtype)
+ kwargs.update(dict(output=output1))
+ perf1 = benchmark(function_ref, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ kwargs.update(dict(output=output1, algorithm='shared_memory'))
+ perf2 = benchmark(function, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ return perf1, perf2
+ kwargs.update(dict(output=output_dtype))
+ perf1 = benchmark(function_ref, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ kwargs.update(dict(output=output_dtype, algorithm='shared_memory'))
+ perf2 = benchmark(function, (image,), kwargs=kwargs, n_warmup=10, n_repeat=10000, max_duration=max_duration)
+ return perf1, perf2
+
+
+print("\n\n")
+print("function | shape | dtype | mode | kernel size | preallocated | axis | dur (ms), CuPy | dur (ms), cuCIM | acceleration ")
+print("---------|-------|-------|------|-------------|--------------|------|----------------|-----------------|--------------")
+for function in [convolve1d]:
+ for shape in [(512, 512), (3840, 2160), (64, 64, 64), (256, 256, 256)]:
+ for dtype in [cp.float32, cp.uint8]:
+ for mode in ['nearest']:
+ for kernel_size in [3, 7, 11, 41]:
+ for output_preallocated in [False]: # , True]:
+ for axis in range(len(shape)):
+ output_dtype = dtype
+ perf1, perf2 = _compare_implementations(shape=shape, kernel_size=kernel_size, mode=mode, axis=axis, dtype=dtype, output_dtype=output_dtype, output_preallocated=output_preallocated, function=function)
+ t_elem = perf1.gpu_times * 1000.
+ t_shared = perf2.gpu_times * 1000.
+ print(f"{function.__name__} | {shape} | {cp.dtype(dtype).name} | {mode} | {kernel_size=} | prealloc={output_preallocated} | {axis=} | {t_elem.mean():0.3f} +/- {t_elem.std():0.3f} | {t_shared.mean():0.3f} +/- {t_shared.std():0.3f} | {t_elem.mean() / t_shared.mean():0.3f}")
+
+
+print("function | kwargs | shape | dtype | mode | preallocated | dur (ms), CuPy | dur (ms), cuCIM | acceleration ")
+print("---------|--------|-------|-------|------|--------------|----------------|-----------------|--------------")
+for function, func_kwargs in [
+ # (gaussian_filter1d, dict(sigma=1.0, axis=0)),
+ # (gaussian_filter1d, dict(sigma=1.0, axis=-1)),
+ # (gaussian_filter1d, dict(sigma=4.0, axis=0)),
+ # (gaussian_filter1d, dict(sigma=4.0, axis=-1)),
+ (gaussian_filter, dict(sigma=1.0)),
+ (gaussian_filter, dict(sigma=4.0)),
+ (uniform_filter, dict(size=11)),
+ (prewitt, dict(axis=0)),
+ (sobel, dict(axis=0)),
+ (prewitt, dict(axis=-1)),
+ (sobel, dict(axis=-1)),
+]:
+ for shape in [(512, 512), (3840, 2160), (64, 64, 64), (256, 256, 256)]:
+ for (dtype, output_dtype) in [(cp.float32, cp.float32), (cp.uint8, cp.float32)]:
+ for mode in ['nearest']:
+ for output_preallocated in [False, True]:
+ perf1, perf2 = _compare_implementations_other(shape=shape, mode=mode, dtype=dtype, output_dtype=output_dtype, output_preallocated=output_preallocated, function=function, func_kwargs=func_kwargs)
+ t_elem = perf1.gpu_times * 1000.
+ t_shared = perf2.gpu_times * 1000.
+ print(f"{function.__name__} | {func_kwargs} | {shape} | {cp.dtype(dtype).name} | {mode} | {output_preallocated} | {t_elem.mean():0.3f} +/- {t_elem.std():0.3f} | {t_shared.mean():0.3f} +/- {t_shared.std():0.3f} | {t_elem.mean() / t_shared.mean():0.3f}")
+
diff --git a/benchmarks/skimage/cucim_color_bench.py b/benchmarks/skimage/cucim_color_bench.py
index 66ce6b74c..87955dd0a 100644
--- a/benchmarks/skimage/cucim_color_bench.py
+++ b/benchmarks/skimage/cucim_color_bench.py
@@ -1,3 +1,4 @@
+import argparse
import os
import pickle
@@ -14,6 +15,7 @@
from _image_bench import ImageBench
+func_name_choices = ['convert_colorspace', 'rgb2hed', 'hed2rgb', 'lab2lch', 'lch2lab', 'xyz2lab', 'lab2xyz', 'rgba2rgb', 'label2rgb']
class ColorBench(ImageBench):
def set_args(self, dtype):
@@ -47,6 +49,7 @@ def __init__(
index_str=None,
module_cpu=scipy.ndimage,
module_gpu=cupyx.scipy.ndimage,
+ run_cpu=True,
):
self.contiguous_labels = contiguous_labels
super().__init__(
@@ -58,6 +61,7 @@ def __init__(
index_str=index_str,
module_cpu=module_cpu,
module_gpu=module_gpu,
+ run_cpu=run_cpu,
)
def set_args(self, dtype):
@@ -70,14 +74,12 @@ def set_args(self, dtype):
],
dtype=int,
)
- tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape))
+ tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape))
if self.contiguous_labels:
label = np.kron(a, np.ones(tiling, dtype=a.dtype))
else:
label = np.tile(a, tiling)
labeld = cp.asarray(label)
- if self.shape[-1] != 3:
- raise ValueError("shape must be 3 on the last axis")
imaged = cupy.testing.shaped_random(labeld.shape, xp=cp, dtype=dtype, scale=1.0)
image = cp.asnumpy(imaged)
self.args_cpu = (
@@ -90,96 +92,125 @@ def set_args(self, dtype):
)
-pfile = "cucim_color_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-all_colorspaces = True
-
-for shape in [(256, 256, 3), (3840, 2160, 3), (192, 192, 192, 3)]:
- ndim = len(shape)
+def main(args):
- if all_colorspaces:
- color_spaces = ["RGB", "HSV", "RGB CIE", "XYZ", "YUV", "YIQ", "YPbPr", "YCbCr", "YDbDr"]
+ pfile = "cucim_color_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
else:
- color_spaces = ["RGB", "HSV", "YUV", "XYZ"]
- for fromspace in color_spaces:
- for tospace in color_spaces:
- if fromspace == tospace:
- continue
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ all_colorspaces = False
+ ndim = len(shape)
+
+ for function_name in func_name_choices:
+
+ if function_name != args.func_name:
+ continue
+
+ if function_name == 'convert_colorspace':
+ if all_colorspaces:
+ color_spaces = ["RGB", "HSV", "RGB CIE", "XYZ", "YUV", "YIQ", "YPbPr", "YCbCr", "YDbDr"]
+ else:
+ color_spaces = ["RGB", "HSV", "YUV", "XYZ"]
+ for fromspace in color_spaces:
+ for tospace in color_spaces:
+ if fromspace == tospace:
+ continue
+
+ B = ColorBench(
+ function_name="convert_colorspace",
+ shape=shape + (3,),
+ dtypes=dtypes,
+ fixed_kwargs=dict(fromspace=fromspace, tospace=tospace),
+ var_kwargs={},
+ index_str=f"{fromspace.lower()}2{tospace.lower()}",
+ module_cpu=skimage.color,
+ module_gpu=cucim.skimage.color,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ elif function_name == 'rgba2rgb':
+ B = RGBABench(
+ function_name="rgba2rgb",
+ shape=shape[:-1] + (4,),
+ dtypes=dtypes,
+ fixed_kwargs={},
+ var_kwargs={},
+ module_cpu=skimage.color,
+ module_gpu=cucim.skimage.color,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ elif function_name == 'label2rgb':
+
+ for contiguous_labels in [True, False]:
+ if contiguous_labels:
+ index_str = "contiguous"
+ else:
+ index_str = None
+ B = LabelBench(
+ function_name="label2rgb",
+ shape=shape,
+ dtypes=dtypes,
+ contiguous_labels=contiguous_labels,
+ index_str=index_str,
+ fixed_kwargs=dict(bg_label=0),
+ var_kwargs=dict(kind=["avg", "overlay"]),
+ module_cpu=skimage.color,
+ module_gpu=cucim.skimage.color,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ elif function_name in [
+ 'rgb2hed', 'hed2rgb', 'lab2lch', 'lch2lab', 'xyz2lab', 'lab2xyz'
+ ]:
B = ColorBench(
- function_name="convert_colorspace",
- shape=shape,
+ function_name=function_name,
+ shape=shape + (3,),
dtypes=dtypes,
- fixed_kwargs=dict(fromspace=fromspace, tospace=tospace),
+ fixed_kwargs={},
var_kwargs={},
- index_str=f"{fromspace.lower()}2{tospace.lower()}",
module_cpu=skimage.color,
module_gpu=cucim.skimage.color,
+ run_cpu=run_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
- # rgb2hed and hed2rgb test combine_stains and separate_stains and all other
- # stains should have equivalent performance.
- #
- # Probably not necessary to benchmark combine_stains and separate_stains
- # e.g.
- # ihc_hdx = separate_stains(ihc, hdx_from_rgb)
- # ihc = combine_stains(ihc_hdx, rgb_from_hdx)
- #
-
- for fname in ["rgb2hed", "hed2rgb", "lab2lch", "lch2lab", "xyz2lab",
- "lab2xyz"]:
- B = ColorBench(
- function_name=fname,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs={},
- var_kwargs={},
- module_cpu=skimage.color,
- module_gpu=cucim.skimage.color,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
- B = RGBABench(
- function_name="rgba2rgb",
- shape=shape[:-1] + (4,),
- dtypes=dtypes,
- fixed_kwargs={},
- var_kwargs={},
- module_cpu=skimage.color,
- module_gpu=cucim.skimage.color,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
- for contiguous_labels in [True, False]:
- if contiguous_labels:
- index_str = "contiguous"
- else:
- index_str = None
- B = LabelBench(
- function_name="label2rgb",
- shape=shape,
- dtypes=dtypes,
- contiguous_labels=contiguous_labels,
- index_str=index_str,
- fixed_kwargs=dict(bg_label=0),
- var_kwargs=dict(kind=["avg", "overlay"]),
- module_cpu=skimage.color,
- module_gpu=cucim.skimage.color,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM color conversion functions')
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_exposure_bench.py b/benchmarks/skimage/cucim_exposure_bench.py
index 9f7ffbec2..d0fb4dbda 100644
--- a/benchmarks/skimage/cucim_exposure_bench.py
+++ b/benchmarks/skimage/cucim_exposure_bench.py
@@ -1,3 +1,4 @@
+import argparse
import os
import pickle
@@ -27,6 +28,7 @@ def set_args(self, dtype):
class MatchHistogramBench(ImageBench):
def set_args(self, dtype):
+
if np.dtype(dtype).kind in "iu":
scale = 256
else:
@@ -39,95 +41,101 @@ def set_args(self, dtype):
self.args_gpu = (imaged, imaged2)
-pfile = "cucim_exposure_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.uint8, np.float32]
-
-exposure_config = {
- "equalize_adapthist": dict(
- fixed_kwargs=dict(clip_limit=0.01, nbins=256),
- variable_kwargs=dict(),
- color_required=False,
- grayscale_only=False,
- dtypes=None,
- shapes=None,
- ),
- "histogram": dict(
- fixed_kwargs=dict(source_range="image"),
- variable_kwargs=dict(nbins=[16, 256], normalize=[True, False]),
- color_required=False,
- grayscale_only=True,
- dtypes=None,
- shapes=None,
- ),
-}
-
-for function_name, fixed_kwargs, var_kwargs, allow_color in [
- ("equalize_adapthist", dict(clip_limit=0.01, nbins=256), dict(), True),
- (
- "histogram",
- dict(source_range="image"),
- dict(nbins=[16, 256], normalize=[True, False]),
- False,
- ),
- ("cumulative_distribution", dict(), dict(nbins=[16, 256]), False),
- ("equalize_hist", dict(mask=None), dict(nbins=[16, 256]), False),
- ("rescale_intensity", dict(in_range="image", out_range="dtype"), dict(), False),
- ("adjust_gamma", dict(), dict(), False),
- ("adjust_log", dict(), dict(), False),
- ("adjust_sigmoid", dict(), dict(inv=[False, True]), False),
- ("is_low_contrast", dict(), dict(), False),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
- ndim = len(shape)
- if shape[-1] == 3 and not allow_color:
+def main(args):
+
+ pfile = "cucim_exposure_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color in [
+ ("equalize_adapthist", dict(clip_limit=0.01, nbins=256), dict(), True),
+ (
+ "histogram",
+ dict(source_range="image"),
+ dict(nbins=[16, 256], normalize=[True, False]),
+ False,
+ ),
+ ("cumulative_distribution", dict(), dict(nbins=[16, 256]), False),
+ ("equalize_hist", dict(mask=None), dict(nbins=[16, 256]), False),
+ ("rescale_intensity", dict(in_range="image", out_range="dtype"), dict(), False),
+ ("adjust_gamma", dict(), dict(), False),
+ ("adjust_log", dict(), dict(), False),
+ ("adjust_sigmoid", dict(), dict(inv=[False, True]), False),
+ ("is_low_contrast", dict(), dict(), False),
+ ]:
+
+ if function_name != args.func_name:
continue
- if function_name == "equalize_adapthist":
- # TODO: fix equalize_adapthist for size (3840, 2160) and kernel_size = [16, 16]
- size_factors = [4, 8, 16]
- kernel_sizes = []
- for size_factor in size_factors:
- kernel_sizes.append([max(s // size_factor, 1) for s in shape if s != 3])
- var_kwargs.update(dict(kernel_size=kernel_sizes))
-
- B = ExposureBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.exposure,
- module_gpu=cucim.skimage.exposure,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
- ndim = len(shape)
-
- channel_axis = -1 if shape[-1] in [3, 4] else None
-
- B = MatchHistogramBench(
- function_name="match_histograms",
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=dict(channel_axis=channel_axis),
- var_kwargs=dict(),
- module_cpu=skimage.exposure,
- module_gpu=cucim.skimage.exposure,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ ndim = len(shape)
+
+ if function_name == 'match_histograms':
+
+ channel_axis = -1 if shape[-1] in [3, 4] else None
+
+ B = MatchHistogramBench(
+ function_name="match_histograms",
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=dict(channel_axis=channel_axis),
+ var_kwargs=dict(),
+ module_cpu=skimage.exposure,
+ module_gpu=cucim.skimage.exposure,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ else:
+
+ if shape[-1] == 3 and not allow_color:
+ continue
+
+ if function_name == "equalize_adapthist":
+ # TODO: fix equalize_adapthist for size (3840, 2160) and kernel_size = [16, 16]
+ size_factors = [4, 8, 16]
+ kernel_sizes = []
+ for size_factor in size_factors:
+ kernel_sizes.append([max(s // size_factor, 1) for s in shape if s != 3])
+ var_kwargs.update(dict(kernel_size=kernel_sizes))
+
+ B = ExposureBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.exposure,
+ module_gpu=cucim.skimage.exposure,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM exposure functions')
+ func_name_choices = ['equalize_adapthist', 'cumulative_distribution', 'equalize_hist', 'rescale_intensity', 'adjust_gamma', 'adjust_log', 'adjust_sigmoid', 'is_low_contrast', 'match_histograms']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_feature_bench.py b/benchmarks/skimage/cucim_feature_bench.py
index 800086f7a..05f61493f 100644
--- a/benchmarks/skimage/cucim_feature_bench.py
+++ b/benchmarks/skimage/cucim_feature_bench.py
@@ -1,5 +1,6 @@
import os
import pickle
+import argparse
import cucim.skimage
import cucim.skimage.feature
@@ -26,102 +27,119 @@ def set_args(self, dtype):
self.args_cpu = (image, template)
self.args_gpu = (imaged, templated)
-
-pfile = "cucim_feature_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
- ("canny", dict(sigma=1.8), dict(), False, False),
- # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
- (
- "daisy",
- dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
- dict(normalization=["l1", "l2", "daisy"]),
- False,
- False,
- ),
- ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
- ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
- ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
- ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
- ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
- ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
- ("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
- ("corner_foerstner", dict(sigma=1), dict(), False, False),
- ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
-]:
-
- for shape in [(128, 128, 128), (512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
- if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
- # skip any large sizes that take too long
- continue
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
+def main(args):
+
+ pfile = "cucim_feature_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
+ ("canny", dict(sigma=1.8), dict(), False, False),
+ # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
+ (
+ "daisy",
+ dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
+ dict(normalization=["l1", "l2", "daisy"]),
+ False,
+ False,
+ ),
+ ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
+ ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
+ ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
+ ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
+ ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
+ ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
+ ("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
+ ("corner_foerstner", dict(sigma=1), dict(), False, False),
+ ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
+ ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True)
+ ]:
+
+ if function_name == args.func_name:
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ else:
continue
- if function_name == "multiscale_basic_features":
- fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
- if ndim == 3 and shape[-1] != 3:
- # Omit texture=True case to avoid excessive GPU memory usage
- var_kwargs["texture"] = [False]
-
- B = ImageBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.feature,
- module_gpu=cucim.skimage.feature,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True),
-]:
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
+ #if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
+ # skip any large sizes that take too long
ndim = len(shape)
- if not allow_nd:
- if allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
-
- B = MatchTemplateBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.feature,
- module_gpu=cucim.skimage.feature,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ run_cpu = not args.no_cpu
+
+ if function_name != "match_template":
+ if not allow_nd:
+ if not allow_color:
+ if ndim > 2:
+ continue
+ else:
+ if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+ continue
+
+ if shape[-1] == 3 and not allow_color:
+ continue
+
+ if function_name == "multiscale_basic_features":
+ fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+ if ndim == 3 and shape[-1] != 3:
+ # Omit texture=True case to avoid excessive GPU memory usage
+ var_kwargs["texture"] = [False]
+
+ B = ImageBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.feature,
+ module_gpu=cucim.skimage.feature,
+ run_cpu=run_cpu,
+ )
+ else:
+ if not allow_nd:
+ if allow_color:
+ if ndim > 2:
+ continue
+ else:
+ if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+ continue
+ if shape[-1] == 3 and not allow_color:
+ continue
+
+ B = MatchTemplateBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.feature,
+ module_gpu=cucim.skimage.feature,
+ run_cpu=run_cpu,
+ )
+
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM Feature')
+ func_name_choices = ["multiscale_basic_features","canny","daisy","structure_tensor","hessian_matrix","hessian_matrix_det","shape_index","corner_kitchen_rosenfeld","corner_harris","corner_shi_tomasi","corner_foerstner","corner_peaks","match_template"]
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_filters_bench.py b/benchmarks/skimage/cucim_filters_bench.py
index c60c27816..e12a9a9e8 100644
--- a/benchmarks/skimage/cucim_filters_bench.py
+++ b/benchmarks/skimage/cucim_filters_bench.py
@@ -1,5 +1,6 @@
import os
import pickle
+import argparse
import cucim.skimage
import cucim.skimage.filters
@@ -10,107 +11,117 @@
from _image_bench import ImageBench
-pfile = "cucim_filters_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _gabor.py
- (
- "gabor",
- dict(n_stds=3),
- dict(frequency=[0.075, 0.1, 0.2, 0.3]),
- False,
- False,
- ),
- # _gaussian.py
- (
- "gaussian",
- dict(truncate=4.0, preserve_range=True),
- dict(sigma=[0.25, 1, 4]),
- True,
- True,
- ),
- # _median.py
- ("median", dict(mode="nearest"), dict(), False, True),
- # _rank_order.py
- ("rank_order", dict(), dict(), False, True),
- # _unsharp_mask.py
- (
- "unsharp_mask",
- dict(),
- dict(radius=[0.5, 1.0, 2.0, 3.0]),
- True,
- True,
- ),
- # edges.py
- ("sobel", dict(), dict(axis=[None, 0, -1]), False, True),
- ("prewitt", dict(), dict(axis=[None, 0, -1]), False, True),
- ("scharr", dict(), dict(axis=[None, 0, -1]), False, True),
- ("roberts", dict(), dict(), False, False),
- ("roberts_pos_diag", dict(), dict(), False, False),
- ("roberts_neg_diag", dict(), dict(), False, False),
- ("farid", dict(), dict(), False, False),
- ("laplace", dict(ksize=3), dict(), False, True),
- # lpi_filter.py
- # TODO: benchmark wiener
- # ridges.py
- # TODO: had to set meijering, etc allow_nd to False just due to insufficient GPU memory
- (
- "meijering",
- dict(sigmas=range(1, 10, 2), alpha=None),
- dict(black_ridges=[True, False], mode=["reflect"]),
- False,
- False,
- ),
- (
- "sato",
- dict(sigmas=range(1, 10, 2)),
- dict(black_ridges=[True, False], mode=["reflect"]),
- False,
- False,
- ),
- (
- "frangi",
- dict(sigmas=range(1, 10, 2)),
- dict(black_ridges=[True, False], mode=["reflect"]),
- False,
- False,
- ),
- (
- "hessian",
- dict(sigmas=range(1, 10, 2)),
- dict(black_ridges=[True, False], mode=["reflect"]),
- False,
- False,
- ),
- # thresholding.py
- ("threshold_isodata", dict(), dict(nbins=[64, 256]), False, True),
- ("threshold_otsu", dict(), dict(nbins=[64, 256]), False, True),
- ("threshold_yen", dict(), dict(nbins=[64, 256]), False, True),
- # TODO: threshold_local should support n-dimensional data
- (
- "threshold_local",
- dict(),
- dict(block_size=[5, 15], method=["gaussian", "mean", "median"]),
- False,
- False,
- ),
- ("threshold_li", dict(), dict(), False, True),
- ("threshold_minimum", dict(), dict(nbins=[64, 256]), False, True),
- ("threshold_mean", dict(), dict(), False, True),
- ("threshold_triangle", dict(), dict(nbins=[64, 256]), False, True),
- ("threshold_niblack", dict(), dict(window_size=[7, 15, 65]), False, True),
- ("threshold_sauvola", dict(), dict(window_size=[7, 15, 65]), False, True),
- ("apply_hysteresis_threshold", dict(low=0.15, high=0.6), dict(), False, True),
- ("threshold_multiotsu", dict(), dict(nbins=[64, 256], classes=[3]), False, True),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+def main(args):
+
+ pfile = "cucim_filters_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+ dtypes = [np.dtype(args.dtype)]
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _gabor.py
+ (
+ "gabor",
+ dict(n_stds=3),
+ dict(frequency=[0.075, 0.1, 0.2, 0.3]),
+ False,
+ False,
+ ),
+ # _gaussian.py
+ (
+ "gaussian",
+ dict(truncate=4.0, preserve_range=True),
+ dict(sigma=[0.25, 1, 4]),
+ True,
+ True,
+ ),
+ # _median.py
+ ("median", dict(mode="nearest"), dict(), False, True),
+ # _rank_order.py
+ ("rank_order", dict(), dict(), False, True),
+ # _unsharp_mask.py
+ (
+ "unsharp_mask",
+ dict(),
+ dict(radius=[0.5, 1.0, 2.0, 3.0]),
+ True,
+ True,
+ ),
+ # edges.py
+ ("sobel", dict(), dict(axis=[None, 0, -1]), False, True),
+ ("prewitt", dict(), dict(axis=[None, 0, -1]), False, True),
+ ("scharr", dict(), dict(axis=[None, 0, -1]), False, True),
+ ("roberts", dict(), dict(), False, False),
+ ("roberts_pos_diag", dict(), dict(), False, False),
+ ("roberts_neg_diag", dict(), dict(), False, False),
+ ("farid", dict(), dict(), False, False),
+ ("laplace", dict(ksize=3), dict(), False, True),
+ # lpi_filter.py
+ # TODO: benchmark wiener
+ # ridges.py
+ # TODO: had to set meijering, etc allow_nd to False just due to insufficient GPU memory
+ (
+ "meijering",
+ dict(sigmas=range(1, 10, 2), alpha=None),
+ dict(black_ridges=[True, False], mode=["reflect"]),
+ False,
+ False,
+ ),
+ (
+ "sato",
+ dict(sigmas=range(1, 10, 2)),
+ dict(black_ridges=[True, False], mode=["reflect"]),
+ False,
+ False,
+ ),
+ (
+ "frangi",
+ dict(sigmas=range(1, 10, 2)),
+ dict(black_ridges=[True, False], mode=["reflect"]),
+ False,
+ False,
+ ),
+ (
+ "hessian",
+ dict(sigmas=range(1, 10, 2)),
+ dict(black_ridges=[True, False], mode=["reflect"]),
+ False,
+ False,
+ ),
+ # thresholding.py
+ ("threshold_isodata", dict(), dict(nbins=[64, 256]), False, True),
+ ("threshold_otsu", dict(), dict(nbins=[64, 256]), False, True),
+ ("threshold_yen", dict(), dict(nbins=[64, 256]), False, True),
+ # TODO: threshold_local should support n-dimensional data
+ (
+ "threshold_local",
+ dict(),
+ dict(block_size=[5, 15], method=["gaussian", "mean", "median"]),
+ False,
+ False,
+ ),
+ ("threshold_li", dict(), dict(), False, True),
+ ("threshold_minimum", dict(), dict(nbins=[64, 256]), False, True),
+ ("threshold_mean", dict(), dict(), False, True),
+ ("threshold_triangle", dict(), dict(nbins=[64, 256]), False, True),
+ ("threshold_niblack", dict(), dict(window_size=[7, 15, 65]), False, True),
+ ("threshold_sauvola", dict(), dict(window_size=[7, 15, 65]), False, True),
+ ("apply_hysteresis_threshold", dict(low=0.15, high=0.6), dict(), False, True),
+ ("threshold_multiotsu", dict(), dict(nbins=[64, 256], classes=[3]), False, True),
+ ]:
+ if function_name != args.func_name:
+ continue
+ else:
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+
+ # for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+
+ if function_name in ["gaussian", "unsharp_mask"]:
+ fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
ndim = len(shape)
if not allow_nd:
@@ -131,7 +142,7 @@
footprints = []
ndim = len(shape)
footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7]
- for footprint_size in [3, 5, 7, 9]:
+ for footprint_size in footprint_sizes:
footprints.append(
np.ones((footprint_size,) * ndim, dtype=bool)
)
@@ -148,12 +159,27 @@
var_kwargs=var_kwargs,
module_cpu=skimage.filters,
module_gpu=cucim.skimage.filters,
+ run_cpu=not args.no_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM Filters')
+ func_name_choices = ['gabor', 'gaussian', 'median', 'rank_order', 'unsharp_mask', 'sobel', 'prewitt', 'scharr', 'roberts', 'roberts_pos_diag', 'roberts_neg_diag', 'farid', 'laplace', 'meijering', 'sato', 'frangi', 'hessian', 'threshold_isodata', 'threshold_otsu', 'threshold_yen', 'threshold_local', 'threshold_li', 'threshold_minimum', 'threshold_mean', 'threshold_triangle', 'threshold_niblack', 'threshold_sauvola', 'apply_hysteresis_threshold', 'threshold_multiotsu']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_measure_bench.py b/benchmarks/skimage/cucim_measure_bench.py
index badb53e73..c5f207b64 100644
--- a/benchmarks/skimage/cucim_measure_bench.py
+++ b/benchmarks/skimage/cucim_measure_bench.py
@@ -1,3 +1,4 @@
+import argparse
import math
import os
import pickle
@@ -25,6 +26,7 @@ def __init__(
index_str=None,
module_cpu=skimage.measure,
module_gpu=cucim.skimage.measure,
+ run_cpu=True,
):
self.contiguous_labels = contiguous_labels
@@ -38,6 +40,7 @@ def __init__(
index_str=index_str,
module_cpu=module_cpu,
module_gpu=module_gpu,
+ run_cpu=run_cpu,
)
def set_args(self, dtype):
@@ -49,7 +52,7 @@ def set_args(self, dtype):
[0, 0, 0, 0, 0, 5, 0, 0],
]
)
- tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape))
+ tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape))
if self.contiguous_labels:
image = np.kron(a, np.ones(tiling, dtype=a.dtype))
else:
@@ -71,6 +74,7 @@ def __init__(
index_str=None,
module_cpu=skimage.measure,
module_gpu=cucim.skimage.measure,
+ run_cpu=True,
):
self.contiguous_labels = contiguous_labels
@@ -84,6 +88,7 @@ def __init__(
index_str=index_str,
module_cpu=module_cpu,
module_gpu=module_gpu,
+ run_cpu=run_cpu,
)
def set_args(self, dtype):
@@ -95,7 +100,7 @@ def set_args(self, dtype):
[0, 0, 0, 0, 0, 5, 0, 0],
]
)
- tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape))
+ tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape))
if self.contiguous_labels:
image = np.kron(a, np.ones(tiling, dtype=a.dtype))
else:
@@ -127,32 +132,70 @@ def set_args(self, dtype):
self.args_gpu = (imaged,)
-pfile = "cucim_measure_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _gaussian.py
- (
- "label",
- dict(return_num=False, background=0),
- dict(connectivity=[1, 2]),
- False,
- True,
- ),
- # regionprops.py
- ("regionprops", dict(), dict(), False, True),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+def main(args):
+
+ pfile = "cucim_measure_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _gaussian.py
+ (
+ "label",
+ dict(return_num=False, background=0),
+ dict(connectivity=[1, 2]),
+ False,
+ True,
+ ),
+ # regionprops.py
+ ("regionprops", dict(), dict(), False, True),
+ # _moments.py
+ ("moments", dict(), dict(order=[1, 2, 3, 4]), False, False),
+ ("moments_central", dict(), dict(order=[1, 2, 3]), False, True),
+ # omited from benchmarks (only tiny arrays): moments_normalized, moments_hu
+ ("centroid", dict(), dict(), False, True),
+ ("inertia_tensor", dict(), dict(), False, True),
+ ("inertia_tensor_eigvals", dict(), dict(), False, True),
+ # _polygon.py
+ # TODO: approximate_polygon, subdivide_polygon
+ # block.py
+ (
+ "block_reduce",
+ dict(),
+ dict(
+ func=[
+ cp.sum,
+ ]
+ ),
+ True,
+ True,
+ ), # variable block_size configured below
+ # entropy.py
+ ("shannon_entropy", dict(base=2), dict(), True, True),
+ # profile.py
+ (
+ "profile_line",
+ dict(src=(5, 7)),
+ dict(reduce_func=[cp.mean], linewidth=[1, 2, 4], order=[1, 3]),
+ True,
+ False,
+ ), # variable block_size configured below
+ ]:
+
+ if function_name != args.func_name:
+ continue
ndim = len(shape)
if not allow_nd:
- if allow_color:
+ if not allow_color:
if ndim > 2:
continue
else:
@@ -161,118 +204,89 @@ def set_args(self, dtype):
if shape[-1] == 3 and not allow_color:
continue
- Tester = LabelBench if function_name == "label" else RegionpropsBench
+ if function_name in ['label', 'regionprops']:
+
+ Tester = LabelBench if function_name == "label" else RegionpropsBench
+
+ for contiguous_labels in [True, False]:
+ if contiguous_labels:
+ index_str = f"contiguous"
+ else:
+ index_str = None
+ B = Tester(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ contiguous_labels=contiguous_labels,
+ index_str=index_str,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.measure,
+ module_gpu=cucim.skimage.measure,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+ else:
+
- for contiguous_labels in [True, False]:
- if contiguous_labels:
- index_str = f"contiguous"
- else:
- index_str = None
- B = Tester(
+ if function_name == "gabor" and np.prod(shape) > 1000000:
+ # avoid cases that are too slow on the CPU
+ var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1]
+
+ if function_name == "block_reduce":
+ ndim = len(shape)
+ if shape[-1] == 3:
+ block_sizes = [(b,) * (ndim - 1) + (3,) for b in (16, 32, 64)]
+ else:
+ block_sizes = [(b,) * ndim for b in (16, 32, 64)]
+ var_kwargs["block_size"] = block_sizes
+
+ if function_name == "profile_line":
+ fixed_kwargs["dst"] = (shape[0] - 32, shape[1] + 9)
+
+ if function_name == "median":
+ footprints = []
+ ndim = len(shape)
+ footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7]
+ for footprint_size in [3, 5, 7, 9]:
+ footprints.append(
+ np.ones((footprint_sizes,) * ndim, dtype=bool)
+ )
+ var_kwargs["footprint"] = footprints
+
+ if function_name in ["gaussian", "unsharp_mask"]:
+ fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+
+ B = FiltersBench(
function_name=function_name,
shape=shape,
dtypes=dtypes,
- contiguous_labels=contiguous_labels,
- index_str=index_str,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
module_cpu=skimage.measure,
module_gpu=cucim.skimage.measure,
+ run_cpu=run_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _moments.py
- ("moments", dict(), dict(order=[1, 2, 3, 4]), False, False),
- ("moments_central", dict(), dict(order=[1, 2, 3]), False, True),
- # omited from benchmarks (only tiny arrays): moments_normalized, moments_hu
- ("centroid", dict(), dict(), False, True),
- ("inertia_tensor", dict(), dict(), False, True),
- ("inertia_tensor_eigvals", dict(), dict(), False, True),
- # _polygon.py
- # TODO: approximate_polygon, subdivide_polygon
- # block.py
- (
- "block_reduce",
- dict(),
- dict(
- func=[
- cp.sum,
- ]
- ),
- True,
- True,
- ), # variable block_size configured below
- # entropy.py
- ("shannon_entropy", dict(base=2), dict(), True, True),
- # profile.py
- (
- "profile_line",
- dict(src=(5, 7)),
- dict(reduce_func=[cp.mean], linewidth=[1, 2, 4], order=[1, 3]),
- True,
- False,
- ), # variable block_size configured below
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
-
- if function_name == "gabor" and np.prod(shape) > 1000000:
- # avoid cases that are too slow on the CPU
- var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1]
-
- if function_name == "block_reduce":
- ndim = len(shape)
- if shape[-1] == 3:
- block_sizes = [(b,) * (ndim - 1) + (3,) for b in (16, 32, 64)]
- else:
- block_sizes = [(b,) * ndim for b in (16, 32, 64)]
- var_kwargs["block_size"] = block_sizes
-
- if function_name == "profile_line":
- fixed_kwargs["dst"] = (shape[0] - 32, shape[1] + 9)
-
- if function_name == "median":
- footprints = []
- ndim = len(shape)
- footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7]
- for footprint_size in [3, 5, 7, 9]:
- footprints.append(
- np.ones((footprint_sizes,) * ndim, dtype=bool)
- )
- var_kwargs["footprint"] = footprints
-
- if function_name in ["gaussian", "unsharp_mask"]:
- fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
-
- B = FiltersBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.measure,
- module_gpu=cucim.skimage.measure,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM measure functions')
+ func_name_choices = ['label', 'regionprops', 'moments', 'moments_central', 'centroid', 'inertia_tensor', 'inertia_tensor_eigvals', 'block_reduce', 'shannon_entropy', 'profile_line']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_metrics_bench.py b/benchmarks/skimage/cucim_metrics_bench.py
index 713c78fbd..af6b94814 100644
--- a/benchmarks/skimage/cucim_metrics_bench.py
+++ b/benchmarks/skimage/cucim_metrics_bench.py
@@ -1,3 +1,4 @@
+import argparse
import os
import pickle
@@ -17,44 +18,51 @@ def set_args(self, dtype):
imaged = cp.testing.shaped_arange(self.shape, dtype=dtype)
imaged2 = cp.testing.shaped_arange(self.shape, dtype=dtype)
imaged2 = imaged2 + 0.05 * cp.random.standard_normal(self.shape)
- imaged /= imaged.max()
- imaged2 /= imaged2.max()
+ imaged = imaged / imaged.max()
+ imaged2 = imaged2 / imaged2.max()
imaged2 = imaged2.clip(0, 1.0)
self.args_cpu = (cp.asnumpy(imaged), cp.asnumpy(imaged2))
self.args_gpu = (imaged, imaged2)
-pfile = "cucim_metrics_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _structural_similarity.py
- (
- "structural_similarity",
- dict(data_range=1.0),
- dict(gradient=[False, True], gaussian_weights=[False, True]),
- True,
- True,
- ),
- # simple_metrics.py
- ("mean_squared_error", dict(), dict(), True, True),
- (
- "normalized_root_mse",
- dict(),
- dict(normalization=["euclidean", "min-max", "mean"]),
- True,
- True,
- ),
- ("peak_signal_noise_ratio", dict(data_range=1.0), dict(), True, True),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+def main(args):
+
+ pfile = "cucim_metrics_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _structural_similarity.py
+ (
+ "structural_similarity",
+ dict(data_range=1.0),
+ dict(gradient=[False, True], gaussian_weights=[False, True]),
+ True,
+ True,
+ ),
+ # simple_metrics.py
+ ("mean_squared_error", dict(), dict(), True, True),
+ (
+ "normalized_root_mse",
+ dict(),
+ dict(normalization=["euclidean", "min-max", "mean"]),
+ True,
+ True,
+ ),
+ ("peak_signal_noise_ratio", dict(data_range=1.0), dict(), True, True),
+ ("normalized_mutual_information", dict(bins=100), dict(), True, True),
+
+ ]:
+ if function_name != args.func_name:
+ continue
ndim = len(shape)
if not allow_nd:
@@ -78,13 +86,28 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
module_cpu=skimage.metrics,
module_gpu=cucim.skimage.metrics,
+ run_cpu=run_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM metrics functions')
+ func_name_choices = ['structural_similarity', 'mean_squared_error', 'normalized_root_mse', 'peak_signal_noise_ratio', 'normalized_mutual_information']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_morphology_bench.py b/benchmarks/skimage/cucim_morphology_bench.py
index dd8fbed4c..8fe1d6a95 100644
--- a/benchmarks/skimage/cucim_morphology_bench.py
+++ b/benchmarks/skimage/cucim_morphology_bench.py
@@ -1,3 +1,4 @@
+import argparse
import copy
import functools
import math
@@ -30,6 +31,7 @@ def __init__(
var_kwargs={},
module_cpu=skimage.morphology,
module_gpu=cucim.skimage.morphology,
+ run_cpu=True,
):
array_kwargs = dict(footprint=footprint)
@@ -47,6 +49,7 @@ def __init__(
index_str=index_str,
module_cpu=module_cpu,
module_gpu=module_gpu,
+ run_cpu=run_cpu,
)
def set_args(self, dtype):
@@ -104,169 +107,169 @@ def set_args(self, dtype):
self.args_gpu = (a, 5)
-pfile = "cucim_morphology_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes_gray = [np.float32]
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_nd in [
- ("thin", dict(), dict(), True),
-]:
-
- for shape in [(512, 512), (3840, 2160)]:
-
- ndim = len(shape)
- if ndim != 2:
- raise ValueError("only 2d benchmark data has been implemented")
-
- if not allow_nd and ndim > 2:
+def main(args):
+
+ pfile = "cucim_morphology_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # binary.py
+ ("binary_erosion", dict(), dict(), False, True),
+ ("binary_dilation", dict(), dict(), False, True),
+ ("binary_opening", dict(), dict(), False, True),
+ ("binary_closing", dict(), dict(), False, True),
+ # misc.py
+ ("remove_small_objects", dict(), dict(), False, True),
+ ("remove_small_holes", dict(), dict(), False, True),
+ # gray.py
+ ("erosion", dict(), dict(), False, True),
+ ("dilation", dict(), dict(), False, True),
+ ("opening", dict(), dict(), False, True),
+ ("closing", dict(), dict(), False, True),
+ ("white_tophat", dict(), dict(), False, True),
+ ("black_tophat", dict(), dict(), False, True),
+ # _skeletonize.py
+ ("thin", dict(), dict(), False, True),
+ # grayreconstruct.py
+ ("reconstruction", dict(), dict(), False, True),
+ # footprints.py
+ # OMIT the functions from this file (each creates a structuring element)
+ ]:
+
+ if function_name != args.func_name:
continue
-
- B = SkeletonizeBench(
- function_name=function_name,
- shape=shape,
- dtypes=[bool],
- fixed_kwargs={},
- var_kwargs=var_kwargs,
- module_cpu=skimage.morphology,
- module_gpu=cucim.skimage.morphology,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_nd in [
- ("binary_erosion", dict(), dict(), True),
- ("binary_dilation", dict(), dict(), True),
- ("binary_opening", dict(), dict(), True),
- ("binary_closing", dict(), dict(), True),
-]:
-
- for shape in [(512, 512), (3840, 2160), (192, 192, 192)]:
-
ndim = len(shape)
- if not allow_nd and ndim > 2:
- continue
+ if function_name == 'thin':
+ if ndim != 2:
+ raise ValueError("only 2d benchmark data has been implemented")
- for connectivity in range(1, ndim + 1):
- index_str = f"conn={connectivity}"
- footprint = ndi.generate_binary_structure(ndim, connectivity)
+ if not allow_nd and ndim > 2:
+ continue
- B = BinaryMorphologyBench(
+ B = SkeletonizeBench(
function_name=function_name,
shape=shape,
dtypes=[bool],
- footprint=footprint,
fixed_kwargs={},
var_kwargs=var_kwargs,
- index_str=index_str,
module_cpu=skimage.morphology,
module_gpu=cucim.skimage.morphology,
+ run_cpu=run_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-for function_name, fixed_kwargs, var_kwargs, allow_nd in [
- # misc.py
- ("remove_small_objects", dict(), dict(), True),
- ("remove_small_holes", dict(), dict(), True),
-]:
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+ elif function_name.startswith('binary_'):
+
+ if not allow_nd and ndim > 2:
+ continue
+
+ for connectivity in range(1, ndim + 1):
+ index_str = f"conn={connectivity}"
+ footprint = ndi.generate_binary_structure(ndim, connectivity)
+
+ B = BinaryMorphologyBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=[bool],
+ footprint=footprint,
+ fixed_kwargs={},
+ var_kwargs=var_kwargs,
+ index_str=index_str,
+ module_cpu=skimage.morphology,
+ module_gpu=cucim.skimage.morphology,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
- for shape in [(512, 512), (3840, 2160), (192, 192, 192)]:
- ndim = len(shape)
- if not allow_nd and ndim > 2:
- continue
+ elif function_name in ['remove_small_holes', 'remove_small_objects']:
+ if not allow_nd and ndim > 2:
+ continue
- if function_name == "remove_small_objects":
- TestClass = RemoveSmallObjectsBench
- elif function_name == "remove_small_holes":
- TestClass = RemoveSmallHolesBench
- else:
- raise ValueError(f"unknown function: {function_name}")
- B = TestClass(
- function_name=function_name,
- shape=shape,
- dtypes=[bool],
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.morphology,
- module_gpu=cucim.skimage.morphology,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # grey.py
- ("erosion", dict(), dict(), False, True),
- ("dilation", dict(), dict(), False, True),
- ("opening", dict(), dict(), False, True),
- ("closing", dict(), dict(), False, True),
- ("white_tophat", dict(), dict(), False, True),
- ("black_tophat", dict(), dict(), False, True),
- # greyreconstruct.py
- ("reconstruction", dict(), dict(), False, True),
- # footprints.py
- # OMIT the functions from this file (each creates a structuring element)
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+ if function_name == "remove_small_objects":
+ TestClass = RemoveSmallObjectsBench
+ elif function_name == "remove_small_holes":
+ TestClass = RemoveSmallHolesBench
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
-
- if function_name == "gabor" and np.prod(shape) > 1000000:
- # avoid cases that are too slow on the CPU
- var_kwargs["frequency"] = [f for f in var_kwargs["frequency"] if f >= 0.1]
-
- if function_name == "median":
- footprints = []
- ndim = len(shape)
- footprint_sizes = [3, 5, 7, 9] if ndim == 2 else [3, 5, 7]
- for footprint_size in [3, 5, 7, 9]:
- footprints.append(
- np.ones((footprint_sizes,) * ndim, dtype=bool)
- )
- var_kwargs["footprint"] = footprints
-
- if function_name in ["gaussian", "unsharp_mask"]:
- fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+ raise ValueError(f"unknown function: {function_name}")
+ B = TestClass(
+ function_name=function_name,
+ shape=shape,
+ dtypes=[bool],
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.morphology,
+ module_gpu=cucim.skimage.morphology,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
- if function_name == "reconstruction":
- TestClass = ReconstructionBench
else:
- TestClass = ImageBench
- B = TestClass(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes_gray,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.morphology,
- module_gpu=cucim.skimage.morphology,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ if not allow_nd:
+ if not allow_color:
+ if ndim > 2:
+ continue
+ else:
+ if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+ continue
+
+ if shape[-1] == 3 and not allow_color:
+ continue
+ if function_name == "reconstruction":
+ TestClass = ReconstructionBench
+ else:
+ TestClass = ImageBench
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ B = TestClass(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.morphology,
+ module_gpu=cucim.skimage.morphology,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM morphology functions')
+ func_name_choices = ['binary_erosion', 'binary_dilation', 'binary_opening', 'binary_closing', 'remove_small_objects', 'remove_small_holes', 'erosion', 'dilation', 'opening', 'closing', 'white_tophat', 'black_tophat', 'thin', 'reconstruction']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_registration_bench.py b/benchmarks/skimage/cucim_registration_bench.py
index d90bd16a4..6d275dcec 100644
--- a/benchmarks/skimage/cucim_registration_bench.py
+++ b/benchmarks/skimage/cucim_registration_bench.py
@@ -1,3 +1,4 @@
+import argparse
import math
import os
import pickle
@@ -33,48 +34,93 @@ def set_args(self, dtype):
self.args_gpu = (imaged, imaged2)
-pfile = "cucim_registration_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _phase_cross_correlation.py
- ("phase_cross_correlation", dict(), dict(), False, True),
-]:
+def main(args):
+
+ pfile = "cucim_registration_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _phase_cross_correlation.py
+ ("phase_cross_correlation", dict(), dict(), False, True),
+ # optical flow functions
+ ("optical_flow_tvl1", dict(), dict(num_iter=[10], num_warp=[5]), False, True),
+ (
+ "optical_flow_ilk",
+ dict(),
+ dict(radius=[3, 7], num_warp=[10], gaussian=[False, True], prefilter=[False, True]),
+ False,
+ True,
+ ),
+
+ ]:
+
+ if function_name != args.func_name:
+ continue
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+ if function_name == 'phase_cross_correlation':
+
+ ndim = len(shape)
+ if not allow_nd:
+ if not allow_color:
+ if ndim > 2:
+ continue
+ else:
+ if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+ continue
+ if shape[-1] == 3 and not allow_color:
+ continue
+
+ for masked in [True, False]:
+
+ index_str = f"masked={masked}"
+ if masked:
+ moving_mask = cp.ones(shape, dtype=bool)
+ moving_mask[20:-20, :] = 0
+ moving_mask[:, 20:-20] = 0
+ reference_mask = cp.ones(shape, dtype=bool)
+ reference_mask[80:-80, :] = 0
+ reference_mask[:, 80:-80] = 0
+ fixed_kwargs["moving_mask"] = moving_mask
+ fixed_kwargs["reference_mask"] = reference_mask
+ else:
+ fixed_kwargs["moving_mask"] = None
+ fixed_kwargs["reference_mask"] = None
+
+ B = RegistrationBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ index_str=index_str,
+ module_cpu=skimage.registration,
+ module_gpu=cucim.skimage.registration,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
+ else:
- for masked in [True, False]:
-
- index_str = f"masked={masked}"
- if masked:
- moving_mask = cp.ones(shape, dtype=bool)
- moving_mask[20:-20, :] = 0
- moving_mask[:, 20:-20] = 0
- reference_mask = cp.ones(shape, dtype=bool)
- reference_mask[80:-80, :] = 0
- reference_mask[:, 80:-80] = 0
- fixed_kwargs["moving_mask"] = moving_mask
- fixed_kwargs["reference_mask"] = reference_mask
- else:
- fixed_kwargs["moving_mask"] = None
- fixed_kwargs["reference_mask"] = None
+ ndim = len(shape)
+ if not allow_nd:
+ if not allow_color:
+ if ndim > 2:
+ continue
+ else:
+ if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+ continue
+ if shape[-1] == 3 and not allow_color:
+ continue
B = RegistrationBench(
function_name=function_name,
@@ -82,54 +128,35 @@ def set_args(self, dtype):
dtypes=dtypes,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
- index_str=index_str,
module_cpu=skimage.registration,
module_gpu=cucim.skimage.registration,
+ run_cpu=run_cpu,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _phase_cross_correlation.py
- ("optical_flow_tvl1", dict(), dict(num_iter=[10], num_warp=[5]), False, True),
- (
- "optical_flow_ilk",
- dict(),
- dict(radius=[3, 7], num_warp=[10], gaussian=[False, True], prefilter=[False, True]),
- False,
- True,
- ),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
-
- B = RegistrationBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.registration,
- module_gpu=cucim.skimage.registration,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM registration functions')
+ func_name_choices = ['phase_cross_correlation', 'optical_flow_tvl1', 'optical_flow_ilk']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_restoration_bench.py b/benchmarks/skimage/cucim_restoration_bench.py
index e20be1b9f..a1523dbae 100644
--- a/benchmarks/skimage/cucim_restoration_bench.py
+++ b/benchmarks/skimage/cucim_restoration_bench.py
@@ -1,3 +1,4 @@
+import argparse
import math
import os
import pickle
@@ -97,65 +98,35 @@ def set_args(self, dtype):
self.args_gpu = (imaged, psfd)
-pfile = "cucim_restoration_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
+def main(args):
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _denoise.py
- ("denoise_tv_chambolle", dict(), dict(weight=[0.02]), True, True),
- # j_invariant.py
- ("calibrate_denoiser", dict(), dict(), False, True),
-]:
+ pfile = "cucim_restoration_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
- for shape in [(512, 512), (1980, 1080), (1980, 1080, 3), (128, 128, 128)]:
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
- else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
- if function_name == "denoise_tv_chambolle":
- fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _denoise.py
+ ("denoise_tv_chambolle", dict(), dict(weight=[0.02]), True, True),
+ # j_invariant.py
+ ("calibrate_denoiser", dict(), dict(), False, True),
+ # deconvolution.py
+ ("wiener", dict(balance=100.0), dict(), False, False),
+ ("unsupervised_wiener", dict(), dict(), False, False),
+ ("richardson_lucy", dict(), dict(num_iter=[5]), False, True),
+ ]:
- if function_name == "calibrate_denoiser":
- denoise_class = CalibratedDenoiseBench
- else:
- denoise_class = DenoiseBench
-
- B = denoise_class(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.restoration,
- module_gpu=cucim.skimage.restoration,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-# function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd = ('unsupervised_wiener', dict(), dict(), False, True)
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # deconvolution.py
- ("wiener", dict(balance=100.0), dict(), False, False),
- ("unsupervised_wiener", dict(), dict(), False, False),
- ("richardson_lucy", dict(), dict(num_iter=[5]), False, True),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+ if function_name != args.func_name:
+ continue
ndim = len(shape)
if not allow_nd:
@@ -168,21 +139,65 @@ def set_args(self, dtype):
if shape[-1] == 3 and not allow_color:
continue
- B = DeconvolutionBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.restoration,
- module_gpu=cucim.skimage.restoration,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ if function_name in ['denoise_tv_chambolle', 'calibrate_denoiser']:
+
+ if function_name == "denoise_tv_chambolle":
+ fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+
+ if function_name == "calibrate_denoiser":
+ denoise_class = CalibratedDenoiseBench
+ else:
+ denoise_class = DenoiseBench
+
+ B = denoise_class(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.restoration,
+ module_gpu=cucim.skimage.restoration,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ elif function_name in ['wiener', 'unsupervised_wiener', 'richardson_lucy']:
+
+ B = DeconvolutionBench(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.restoration,
+ module_gpu=cucim.skimage.restoration,
+ run_cpu=run_cpu,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM restoration functions')
+ func_name_choices = ['denoise_tv_chambolle', 'calibrate_denoiser', 'wiener', 'unsupervised_wiener', 'richardson_lucy']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_segmentation_bench.py b/benchmarks/skimage/cucim_segmentation_bench.py
index b770cdb36..382349a9b 100644
--- a/benchmarks/skimage/cucim_segmentation_bench.py
+++ b/benchmarks/skimage/cucim_segmentation_bench.py
@@ -1,8 +1,11 @@
+import argparse
import math
import os
import pickle
import cucim.skimage
+import cucim.skimage.data
+import cucim.skimage.exposure
import cucim.skimage.segmentation
import cupy as cp
import numpy as np
@@ -25,6 +28,7 @@ def __init__(
index_str=None,
module_cpu=skimage.measure,
module_gpu=cucim.skimage.measure,
+ run_cpu=True,
):
self.contiguous_labels = contiguous_labels
@@ -38,6 +42,7 @@ def __init__(
index_str=index_str,
module_cpu=module_cpu,
module_gpu=module_gpu,
+ run_cpu=run_cpu,
)
def set_args(self, dtype):
@@ -50,14 +55,38 @@ def set_args(self, dtype):
],
dtype=dtype,
)
- tiling = tuple(s // a_s for s, a_s in zip(shape, a.shape))
+ tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape))
if self.contiguous_labels:
- image = np.kron(a, np.ones(tiling, dtype=a.dtype))
+ labels = np.kron(a, np.ones(tiling, dtype=a.dtype))
else:
- image = np.tile(a, tiling)
- imaged = cp.asarray(image)
- self.args_cpu = (image,)
- self.args_gpu = (imaged,)
+ labels = np.tile(a, tiling)
+ labels_d = cp.asarray(labels)
+ self.args_cpu = (labels,)
+ self.args_gpu = (labels_d,)
+
+
+class LabelAndImageBench(LabelBench):
+
+ def set_args(self, dtype):
+ a = np.array(
+ [
+ [0, 0, 1, 1, 0, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 4, 0],
+ [2, 2, 0, 0, 3, 0, 4, 4],
+ [0, 0, 0, 0, 0, 5, 0, 0],
+ ],
+ dtype=dtype,
+ )
+ tiling = tuple(s // a_s for s, a_s in zip(self.shape, a.shape))
+ if self.contiguous_labels:
+ labels = np.kron(a, np.ones(tiling, dtype=a.dtype))
+ else:
+ labels = np.tile(a, tiling)
+ labels_d = cp.asarray(labels)
+ image_d = cp.random.standard_normal(labels.shape).astype(np.float32)
+ image = cp.asnumpy(image_d)
+ self.args_cpu = (image, labels)
+ self.args_gpu = (image_d, labels_d)
class MorphGeodesicBench(ImageBench):
@@ -75,7 +104,9 @@ def set_args(self, dtype):
imaged = cp.tile(im1, n_tile)[slices]
# need this preprocessing for morphological_geodesic_active_contour
- imaged = skimage.segmentation.inverse_gaussian_gradient(imaged)
+ imaged = cp.array(
+ skimage.segmentation.inverse_gaussian_gradient(cp.asnumpy(imaged))
+ )
image = cp.asnumpy(imaged)
assert imaged.dtype == dtype
@@ -84,29 +115,118 @@ def set_args(self, dtype):
self.args_gpu = (imaged,)
-pfile = "cucim_segmentation_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.int32]
+class RandomWalkerBench(ImageBench):
+
+
+ def set_args(self, dtype):
+ # Note: dtype only used for merkers array, data is hard-coded as float32
+
+ if np.dtype(dtype).kind not in 'iu':
+ raise ValueError("random_walker markers require integer dtype")
+
+ n_dim = len(self.shape)
+ data = cucim.skimage.img_as_float(
+ cucim.skimage.data.binary_blobs(
+ length=max(self.shape), n_dim=n_dim, seed=1
+ )
+ )
+ data = data[tuple(slice(s) for s in self.shape)]
+ sigma = 0.35
+ rng = np.random.default_rng(5)
+ data += cp.array(rng.normal(loc=0, scale=sigma, size=data.shape))
+ data = cucim.skimage.exposure.rescale_intensity(
+ data, in_range=(-sigma, 1 + sigma), out_range=(-1, 1)
+ )
+ data = data.astype(cp.float32)
+ data_cpu = cp.asnumpy(data)
+
+ # The range of the binary image spans over (-1, 1).
+ # We choose the hottest and the coldest pixels as markers.
+ markers = cp.zeros(data.shape, dtype=dtype)
+ markers[data < -0.95] = 1
+ markers[data > 0.95] = 2
+ markers_cpu = cp.asnumpy(markers)
+ self.args_cpu = (data_cpu, markers_cpu)
+ self.args_gpu = (data, markers)
+
+
+
+def main(args):
+
+ pfile = "cucim_segmentation_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+ dtypes = [np.dtype(args.dtype)]
+ dtypes_label = [np.dtype(args.dtype_label)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _denoise.py
- (
- "find_boundaries",
- dict(),
- dict(connectivity=[1], mode=["thick", "inner", "outer", "subpixel"]),
- False,
- True,
- ),
-]:
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _clear_border.py
+ (
+ "clear_border",
+ dict(),
+ dict(),
+ False,
+ True,
+ ),
+ # _join.py
+ (
+ "relabel_sequential",
+ dict(offset=5),
+ dict(),
+ False,
+ True,
+ ),
+ # boundaries.py
+ (
+ "find_boundaries",
+ dict(),
+ dict(connectivity=[1], mode=["thick", "inner", "outer", "subpixel"]),
+ False,
+ True,
+ ),
+ (
+ "mark_boundaries",
+ dict(),
+ dict(),
+ False,
+ True,
+ ),
+ (
+ "random_walker",
+ dict(beta=4, tol=1.e-4, prob_tol=1.e-2),
+ dict(mode=['cg', 'cg_j']),
+ False,
+ True,
+ ),
+ # morphsnakes.py
+ ("inverse_gaussian_gradient", dict(), dict(), False, True),
+ (
+ "morphological_geodesic_active_contour",
+ dict(),
+ dict(num_iter=[16], init_level_set=["checkerboard", "disk"]),
+ False,
+ False,
+ ),
+ (
+ "morphological_chan_vese",
+ dict(),
+ dict(num_iter=[16], init_level_set=["checkerboard", "disk"]),
+ False,
+ False,
+ ),
+ # omit: disk_level_set (simple array generation function)
+ # omit: checkerboard_level_set (simple array generation function)
+ ]:
- for shape in [
- (64, 64),
- ]: # (512, 512), (1980, 1080), (1980, 1080, 3), (128, 128, 128)]:
+ if function_name != args.func_name:
+ continue
ndim = len(shape)
if not allow_nd:
@@ -119,73 +239,72 @@ def set_args(self, dtype):
if shape[-1] == 3 and not allow_color:
continue
- B = LabelBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.segmentation,
- module_gpu=cucim.skimage.segmentation,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
-
-
-dtypes = [np.float32]
-# function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd = ('unsupervised_wiener', dict(), dict(), False, True)
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # morphsnakes.py
- ("inverse_gaussian_gradient", dict(), dict(), False, True),
- (
- "morphological_geodesic_active_contour",
- dict(),
- dict(num_iter=[16], init_level_set=["checkerboard", "disk"]),
- False,
- False,
- ),
- (
- "morphological_chan_vese",
- dict(),
- dict(num_iter=[16], init_level_set=["checkerboard", "disk"]),
- False,
- False,
- ),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+ if function_name in ["clear_border", "relabel_sequential", "find_boundaries", "mark_boundaries", "random_walker"]:
+ if function_name == 'random_walker':
+ fixed_kwargs['channel_axis'] = -1 if shape[-1] == 3 else None
- ndim = len(shape)
- if not allow_nd:
- if not allow_color:
- if ndim > 2:
- continue
+ if function_name == 'mark_boundaries':
+ bench_func = LabelAndImageBench
+ elif function_name == 'random_walker':
+ bench_func = RandomWalkerBench
else:
- if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
- continue
- if shape[-1] == 3 and not allow_color:
- continue
+ bench_func = LabelBench
- if function_name == "morphological_geodesic_active_contour":
- bench_class = MorphGeodesicBench
- else:
- bench_class = ImageBench
+ B = bench_func(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes_label,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.segmentation,
+ module_gpu=cucim.skimage.segmentation,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+
+ elif function_name in ["inverse_gaussian_gradient", "morphological_geodesic_active_contour", "morphological_chan_vese"]:
+
+ if function_name == "morphological_geodesic_active_contour":
+ bench_class = MorphGeodesicBench
+ else:
+ bench_class = ImageBench
+
+ B = bench_class(
+ function_name=function_name,
+ shape=shape,
+ dtypes=dtypes,
+ fixed_kwargs=fixed_kwargs,
+ var_kwargs=var_kwargs,
+ module_cpu=skimage.segmentation,
+ module_gpu=cucim.skimage.segmentation,
+ )
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
- B = ImageBench(
- function_name=function_name,
- shape=shape,
- dtypes=dtypes,
- fixed_kwargs=fixed_kwargs,
- var_kwargs=var_kwargs,
- module_cpu=skimage.segmentation,
- module_gpu=cucim.skimage.segmentation,
- )
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM segmentation functions')
+ func_name_choices = ["clear_border", "relabel_sequential", "find_boundaries", "mark_boundaries", "random_walker", "inverse_gaussian_gradient", "morphological_geodesic_active_contour", "morphological_chan_vese"]
+ label_dtype_choices = ['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('--dtype_label', type=str, help='Dtype of input image', choices = label_dtype_choices, required=False, default='uint8')
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cucim_transform_bench.py b/benchmarks/skimage/cucim_transform_bench.py
index 534a6a225..b6aa315f9 100644
--- a/benchmarks/skimage/cucim_transform_bench.py
+++ b/benchmarks/skimage/cucim_transform_bench.py
@@ -1,3 +1,4 @@
+import argparse
import os
import pickle
@@ -10,75 +11,83 @@
from _image_bench import ImageBench
-pfile = "cucim_transform_results.pickle"
-if os.path.exists(pfile):
- with open(pfile, "rb") as f:
- all_results = pickle.load(f)
-else:
- all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
- # _warps.py
- (
- "resize",
- dict(preserve_range=True),
- dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]),
- True,
- True,
- ), # scale handled in loop below
- (
- "resize_local_mean",
- dict(preserve_range=True),
- {},
- True,
- True,
- ), # scale handled in loop below
- (
- "rescale",
- dict(preserve_range=True),
- dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]),
- True,
- True,
- ), # output_shape handled in loop below
- (
- "rotate",
- dict(angle=15, preserve_range=True),
- dict(order=[0, 1, 3], mode=["reflect"], resize=[False, True]),
- False,
- False,
- ),
- ("downscale_local_mean", dict(), dict(), True, True), # factors handled in loop below
- (
- "swirl",
- dict(strength=1, preserve_range=True),
- dict(order=[0, 1, 3], mode=["reflect"]),
- False,
- False,
- ),
- # TODO : warp? already indirectly benchmarked via swirl, etc
- ("warp_polar", dict(), dict(scaling=["linear", "log"]), True, False),
- # integral.py
- ("integral_image", dict(), dict(), False, True),
- # TODO: integrate
- # pyramids.py
- (
- "pyramid_gaussian",
- dict(max_layer=6, downscale=2, preserve_range=True),
- dict(order=[0, 1, 3]),
- True,
- True,
- ),
- (
- "pyramid_laplacian",
- dict(max_layer=6, downscale=2, preserve_range=True),
- dict(order=[0, 1, 3]),
- True,
- True,
- ),
-]:
-
- for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
+
+def main(args):
+
+ pfile = "cucim_transform_results.pickle"
+ if os.path.exists(pfile):
+ with open(pfile, "rb") as f:
+ all_results = pickle.load(f)
+ else:
+ all_results = pd.DataFrame()
+
+ dtypes = [np.dtype(args.dtype)]
+ # image sizes/shapes
+ shape = tuple(list(map(int,(args.img_size.split(',')))))
+ run_cpu = not args.no_cpu
+
+ for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+ # _warps.py
+ (
+ "resize",
+ dict(preserve_range=True),
+ dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]),
+ True,
+ True,
+ ), # scale handled in loop below
+ (
+ "resize_local_mean",
+ dict(preserve_range=True),
+ {},
+ True,
+ True,
+ ), # scale handled in loop below
+ (
+ "rescale",
+ dict(preserve_range=True),
+ dict(order=[0, 1, 3], mode=["reflect"], anti_aliasing=[True]),
+ True,
+ True,
+ ), # output_shape handled in loop below
+ (
+ "rotate",
+ dict(angle=15, preserve_range=True),
+ dict(order=[0, 1, 3], mode=["reflect"], resize=[False, True]),
+ False,
+ False,
+ ),
+ ("downscale_local_mean", dict(), dict(), True, True), # factors handled in loop below
+ (
+ "swirl",
+ dict(strength=1, preserve_range=True),
+ dict(order=[0, 1, 3], mode=["reflect"]),
+ False,
+ False,
+ ),
+ # TODO : warp? already indirectly benchmarked via swirl, etc
+ ("warp_polar", dict(), dict(scaling=["linear", "log"]), True, False),
+ # integral.py
+ ("integral_image", dict(), dict(), False, True),
+ # TODO: integrate
+ # pyramids.py
+ (
+ "pyramid_gaussian",
+ dict(max_layer=6, downscale=2, preserve_range=True),
+ dict(order=[0, 1, 3]),
+ True,
+ True,
+ ),
+ (
+ "pyramid_laplacian",
+ dict(max_layer=6, downscale=2, preserve_range=True),
+ dict(order=[0, 1, 3]),
+ True,
+ True,
+ ),
+ ]:
+
+ if function_name != args.func_name:
+ continue
ndim = len(shape)
if not allow_nd:
@@ -131,12 +140,30 @@
module_gpu=cucim.skimage.transform,
function_is_generator=function_is_generator,
)
- results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ results = B.run_benchmark(duration=args.duration)
+ all_results = pd.concat([all_results, results["full"]])
+
+ fbase = os.path.splitext(pfile)[0]
+ all_results.to_csv(fbase + ".csv")
+ all_results.to_pickle(pfile)
+ try:
+ import tabular
+
+ with open(fbase + ".md", "wt") as f:
+ f.write(all_results.to_markdown())
+ except ImportError:
+ pass
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Benchmarking cuCIM transform functions')
+ func_name_choices = ['resize', 'resize_local_mean', 'rescale', 'rotate', 'downscale_local_mean', 'warp_polar', 'integral_image', 'pyramid_gaussian', 'pyramid_laplacian']
+ dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+ parser.add_argument('-i','--img_size', type=str, help='Size of input image (omit color channel, it will be appended as needed)', required=True)
+ parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices = dtype_choices, required=True)
+ parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices = func_name_choices, required=True)
+ parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+ parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
- f.write(all_results.to_markdown())
+ args = parser.parse_args()
+ main(args)
diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py
index 4c5143246..e09f3fe1d 100644
--- a/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py
+++ b/benchmarks/skimage/cupyx_scipy_ndimage_filter_bench.py
@@ -102,7 +102,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
for fname, wshape, var_kwargs in [
("convolve", weights_shape, dict(mode=modes)),
@@ -119,7 +119,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py
index bfc6b448d..06eb19fc2 100644
--- a/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py
+++ b/benchmarks/skimage/cupyx_scipy_ndimage_fourier_bench.py
@@ -43,7 +43,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
fbase = os.path.splitext(pfile)[0]
diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py
index 294c33d5e..de518ab82 100644
--- a/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py
+++ b/benchmarks/skimage/cupyx_scipy_ndimage_interp_bench.py
@@ -56,7 +56,7 @@ def set_args(self, dtype):
var_kwargs=dict(mode=modes, order=orders),
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
for fname, fixed_kwargs, var_kwargs in [
(
@@ -139,7 +139,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py
index 9f6e531af..bb1d479b1 100644
--- a/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py
+++ b/benchmarks/skimage/cupyx_scipy_ndimage_measurements_bench.py
@@ -145,7 +145,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
for fname in [
"sum",
@@ -180,7 +180,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
diff --git a/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py b/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py
index 54d58d667..7fbe9e7c6 100644
--- a/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py
+++ b/benchmarks/skimage/cupyx_scipy_ndimage_morphology_bench.py
@@ -126,7 +126,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
iterations = [1, 10, 30]
for fname, var_kwargs in [
@@ -152,7 +152,7 @@ def set_args(self, dtype):
var_kwargs=var_kwargs,
)
results = B.run_benchmark(duration=1)
- all_results = all_results.append(results["full"])
+ all_results = pd.concat([all_results, results["full"]])
fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
diff --git a/benchmarks/skimage/requirements-bench.txt b/benchmarks/skimage/requirements-bench.txt
new file mode 100644
index 000000000..2cc6ac30e
--- /dev/null
+++ b/benchmarks/skimage/requirements-bench.txt
@@ -0,0 +1,2 @@
+pandas>=1.0
+tabulate>=0.8.7
diff --git a/benchmarks/skimage/run-nv-bench-color.sh b/benchmarks/skimage/run-nv-bench-color.sh
new file mode 100755
index 000000000..14a0809ec
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-color.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 192,192,192)
+param_filt=(convert_colorspace rgb2hed hed2rgb lab2lch lch2lab xyz2lab lab2xyz rgba2rgb label2rgb)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_color_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-exposure.sh b/benchmarks/skimage/run-nv-bench-exposure.sh
new file mode 100755
index 000000000..4ed9e0748
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-exposure.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(equalize_adapthist cumulative_distribution equalize_hist rescale_intensity adjust_gamma adjust_log adjust_sigmoid is_low_contrast match_histograms)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_exposure_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-feature.sh b/benchmarks/skimage/run-nv-bench-feature.sh
new file mode 100755
index 000000000..fbab913c6
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-feature.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(multiscale_basic_features canny daisy structure_tensor hessian_matrix hessian_matrix_det shape_index corner_kitchen_rosenfeld corner_harris corner_shi_tomasi corner_foerstner corner_peaks match_template)
+param_dt=(float64 float32 float16)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_feature_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-filters.sh b/benchmarks/skimage/run-nv-bench-filters.sh
new file mode 100755
index 000000000..37388e9e9
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-filters.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(gabor gaussian median rank_order unsharp_mask sobel prewitt scharr roberts roberts_pos_diag roberts_neg_diag farid laplace meijering sato frangi hessian threshold_isodata threshold_otsu threshold_yen threshold_local threshold_li threshold_minimum threshold_mean threshold_triangle threshold_niblack threshold_sauvola apply_hysteresis_threshold threshold_multiotsu)
+# param_filt=(rank_order )
+param_dt=(float64 float32 float16)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_filters_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-measure.sh b/benchmarks/skimage/run-nv-bench-measure.sh
new file mode 100755
index 000000000..5c2b4f413
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-measure.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(label regionprops moments moments_central centroid inertia_tensor inertia_tensor_eigvals block_reduce shannon_entropy profile_line)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_measure_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-metrics.sh b/benchmarks/skimage/run-nv-bench-metrics.sh
new file mode 100755
index 000000000..b24ba775b
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-metrics.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(structural_similarity mean_squared_error normalized_root_mse peak_signal_noise_ratio normalized_mutual_information)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_metrics_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-morphology.sh b/benchmarks/skimage/run-nv-bench-morphology.sh
new file mode 100755
index 000000000..4981a58ad
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-morphology.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(binary_erosion binary_dilation binary_opening binary_closing remove_small_objects remove_small_holes erosion dilation opening closing white_tophat black_tophat thin reconstruction)
+# Note: user-specified dtype ignored for binary_* functions and thin (these only accept binary input)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_morphology_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-registration.sh b/benchmarks/skimage/run-nv-bench-registration.sh
new file mode 100755
index 000000000..18f572b53
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-registration.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(phase_cross_correlation optical_flow_tvl1 optical_flow_ilk)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_registration_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-restoration.sh b/benchmarks/skimage/run-nv-bench-restoration.sh
new file mode 100755
index 000000000..df7c9141f
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-restoration.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(denoise_tv_chambolle calibrate_denoiser wiener unsupervised_wiener richardson_lucy)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_restoration_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-segmentation.sh b/benchmarks/skimage/run-nv-bench-segmentation.sh
new file mode 100755
index 000000000..4723f0c60
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-segmentation.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+
+# these require an integer-valued label image
+param_filt=(clear_border relabel_sequential find_boundaries mark_boundaries random_walker)
+param_dt=(float32)
+param_dt_label=(uint8 uint32)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ for dt_label in "${param_dt_label[@]}"; do
+ python cucim_segmentation_bench.py -f $filt -i $shape -d $dt --dtype_label $dt_label -t 10
+ done
+ done
+ done
+done
+
+# these do not require an integer-valued input image
+param_filt=(inverse_gaussian_gradient morphological_geodesic_active_contour morphological_chan_vese)
+param_dt=(float32)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_segmentation_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/benchmarks/skimage/run-nv-bench-transform.sh b/benchmarks/skimage/run-nv-bench-transform.sh
new file mode 100755
index 000000000..c880cb699
--- /dev/null
+++ b/benchmarks/skimage/run-nv-bench-transform.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+param_shape=(512,512 3840,2160 3840,2160,3 192,192,192)
+param_filt=(resize resize_local_mean rescale rotate downscale_local_mean warp_polar integral_image pyramid_gaussian pyramid_laplacian)
+param_dt=(float32 uint8)
+for shape in "${param_shape[@]}"; do
+ for filt in "${param_filt[@]}"; do
+ for dt in "${param_dt[@]}"; do
+ python cucim_transform_bench.py -f $filt -i $shape -d $dt -t 10
+ done
+ done
+done
diff --git a/conda/environments/env.yml b/conda/environments/env.yml
index 895329fa6..0cee99491 100644
--- a/conda/environments/env.yml
+++ b/conda/environments/env.yml
@@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- cupy>=9
- - scikit-image>=0.18.1
+ - scikit-image>=0.19.0
- openslide
- zlib
- jpeg
diff --git a/conda/recipes/cucim/build.sh b/conda/recipes/cucim/build.sh
index 77e9b7aef..28b1112c2 100644
--- a/conda/recipes/cucim/build.sh
+++ b/conda/recipes/cucim/build.sh
@@ -4,14 +4,6 @@ CUCIM_BUILD_TYPE=${CUCIM_BUILD_TYPE:-release}
echo "CC : ${CC}"
echo "CXX : ${CXX}"
-echo "CUDAHOSTCXX : ${CUDAHOSTCXX}"
-
-# For now CUDAHOSTCXX is set to `/usr/bin/g++` by
-# https://github.com/rapidsai/docker/blob/161b200157206660d88fb02cf69fe58d363ac95e/generated-dockerfiles/rapidsai-core_ubuntu18.04-devel.Dockerfile
-# To use GCC-9 in conda build environment, need to set it to $CXX (=$BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++)
-# This can be removed once we switch to use gcc-9
-# : https://docs.rapids.ai/notices/rdn0002/
-export CUDAHOSTCXX=${CXX}
# CUDA needs to include $PREFIX/include as system include path
export CUDAFLAGS="-isystem $BUILD_PREFIX/include -isystem $PREFIX/include "
diff --git a/conda/recipes/cucim/meta.yaml b/conda/recipes/cucim/meta.yaml
index fa9377b93..479d17e83 100644
--- a/conda/recipes/cucim/meta.yaml
+++ b/conda/recipes/cucim/meta.yaml
@@ -32,7 +32,7 @@ requirements:
- cupy >=9,<11.0.0a0
- numpy 1.19
- scipy
- - scikit-image >=0.18.1,<0.20.0a0
+ - scikit-image >=0.19.0,<0.20.0a0
run:
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- python {{ python_version }}.*
@@ -41,7 +41,7 @@ requirements:
- cupy >=9,<11.0.0a0
- {{ pin_compatible('numpy') }}
- scipy
- - scikit-image >=0.18.1,<0.20.0a0
+ - scikit-image >=0.19.0,<0.20.0a0
# - openslide # skipping here but benchmark binary would needs openslide library
tests: # [linux64]
diff --git a/conda/recipes/libcucim/build.sh b/conda/recipes/libcucim/build.sh
index 9d2217bbc..7d21aebe3 100644
--- a/conda/recipes/libcucim/build.sh
+++ b/conda/recipes/libcucim/build.sh
@@ -4,16 +4,8 @@ CUCIM_BUILD_TYPE=${CUCIM_BUILD_TYPE:-release}
echo "CC : ${CC}"
echo "CXX : ${CXX}"
-echo "CUDAHOSTCXX : ${CUDAHOSTCXX}"
echo "CUDA : ${CUDA}"
-# For now CUDAHOSTCXX is set to `/usr/bin/g++` by
-# https://github.com/rapidsai/docker/blob/161b200157206660d88fb02cf69fe58d363ac95e/generated-dockerfiles/rapidsai-core_ubuntu18.04-devel.Dockerfile
-# To use GCC-9 in conda build environment, need to set it to $CXX (=$BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++)
-# This can be removed once we switch to use gcc-9
-# : https://docs.rapids.ai/notices/rdn0002/
-export CUDAHOSTCXX=${CXX}
-
# CUDA needs to include $PREFIX/include as system include path
export CUDAFLAGS="-isystem $BUILD_PREFIX/include -isystem $PREFIX/include "
export LD_LIBRARY_PATH="$BUILD_PREFIX/lib:$PREFIX/lib:$LD_LIBRARY_PATH"
diff --git a/cpp/plugins/cucim.kit.cumed/VERSION b/cpp/plugins/cucim.kit.cumed/VERSION
index ddc883c42..231fac64b 100644
--- a/cpp/plugins/cucim.kit.cumed/VERSION
+++ b/cpp/plugins/cucim.kit.cumed/VERSION
@@ -1 +1 @@
-22.06.00
+22.08.00
diff --git a/cpp/plugins/cucim.kit.cuslide/VERSION b/cpp/plugins/cucim.kit.cuslide/VERSION
index ddc883c42..231fac64b 100644
--- a/cpp/plugins/cucim.kit.cuslide/VERSION
+++ b/cpp/plugins/cucim.kit.cuslide/VERSION
@@ -1 +1 @@
-22.06.00
+22.08.00
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp
index b284e0f71..0b7cbeb21 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/cuslide.cpp
@@ -171,16 +171,46 @@ static bool CUCIM_ABI parser_parse(CuCIMFileHandle_ptr handle_ptr, cucim::io::fo
channel_names.emplace_back(std::string_view{ "A" });
}
- // TODO: Set correct spacing value
+ // Spacing units
+ std::pmr::vector spacing_units(&resource);
+ spacing_units.reserve(ndim);
+
std::pmr::vector spacing(&resource);
spacing.reserve(ndim);
- spacing.insert(spacing.end(), ndim, 1.0);
+ const auto resolution_unit = level0_ifd->resolution_unit();
+ const auto x_resolution = level0_ifd->x_resolution();
+ const auto y_resolution = level0_ifd->y_resolution();
+
+ switch (resolution_unit)
+ {
+ case 1: // no absolute unit of measurement
+ spacing.emplace_back(y_resolution);
+ spacing.emplace_back(x_resolution);
+ spacing.emplace_back(1.0f);
+
+ spacing_units.emplace_back(std::string_view{ "" });
+ spacing_units.emplace_back(std::string_view{ "" });
+ break;
+ case 2: // inch
+ spacing.emplace_back(y_resolution != 0 ? 25400 / y_resolution : 1.0f);
+ spacing.emplace_back(x_resolution != 0 ? 25400 / x_resolution : 1.0f);
+ spacing.emplace_back(1.0f);
+
+ spacing_units.emplace_back(std::string_view{ "micrometer" });
+ spacing_units.emplace_back(std::string_view{ "micrometer" });
+ break;
+ case 3: // centimeter
+ spacing.emplace_back(y_resolution != 0 ? 10000 / y_resolution : 1.0f);
+ spacing.emplace_back(x_resolution != 0 ? 10000 / x_resolution : 1.0f);
+ spacing.emplace_back(1.0f);
+
+ spacing_units.emplace_back(std::string_view{ "micrometer" });
+ spacing_units.emplace_back(std::string_view{ "micrometer" });
+ break;
+ default:
+ spacing.insert(spacing.end(), ndim, 1.0f);
+ }
- // TODO: Set correct spacing units
- std::pmr::vector spacing_units(&resource);
- spacing_units.reserve(ndim);
- spacing_units.emplace_back(std::string_view{ "micrometer" });
- spacing_units.emplace_back(std::string_view{ "micrometer" });
spacing_units.emplace_back(std::string_view{ "color" });
std::pmr::vector origin({ 0.0, 0.0, 0.0 }, &resource);
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
index 52a66cd80..9bcacb25d 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
@@ -63,6 +63,9 @@ IFD::IFD(TIFF* tiff, uint16_t index, ifd_offset_t offset) : tiff_(tiff), ifd_ind
model_ = std::string(model_char_ptr ? model_char_ptr : "");
TIFFGetField(tif, TIFFTAG_IMAGEDESCRIPTION, &model_char_ptr);
image_description_ = std::string(model_char_ptr ? model_char_ptr : "");
+ TIFFGetField(tif, TIFFTAG_RESOLUTIONUNIT, &resolution_unit_);
+ TIFFGetField(tif, TIFFTAG_XRESOLUTION, &x_resolution_);
+ TIFFGetField(tif, TIFFTAG_YRESOLUTION, &y_resolution_);
TIFFDirectory& tif_dir = tif->tif_dir;
flags_ = tif->tif_flags;
@@ -451,6 +454,18 @@ std::string& IFD::image_description()
{
return image_description_;
}
+uint16_t IFD::resolution_unit() const
+{
+ return resolution_unit_;
+}
+float IFD::x_resolution() const
+{
+ return x_resolution_;
+}
+float IFD::y_resolution() const
+{
+ return y_resolution_;
+}
uint32_t IFD::width() const
{
return width_;
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h
index e15724782..5737d82d5 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.h
@@ -72,6 +72,9 @@ class EXPORT_VISIBLE IFD : public std::enable_shared_from_this
std::string& software();
std::string& model();
std::string& image_description();
+ uint16_t resolution_unit() const;
+ float x_resolution() const;
+ float y_resolution() const;
uint32_t width() const;
uint32_t height() const;
uint32_t tile_width() const;
@@ -109,6 +112,10 @@ class EXPORT_VISIBLE IFD : public std::enable_shared_from_this
std::string software_;
std::string model_;
std::string image_description_;
+ uint16_t resolution_unit_ = 1; // 1 = No absolute unit of measurement, 2 = Inch, 3 = Centimeter
+ float x_resolution_ = 1.0f;
+ float y_resolution_ = 1.0f;
+
uint32_t flags_ = 0;
uint32_t width_ = 0;
uint32_t height_ = 0;
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp
index cf9d38e3e..1f1ac6ab6 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/tiff.cpp
@@ -387,6 +387,9 @@ void TIFF::resolve_vendor_format()
auto& first_ifd = ifds_[0];
std::string& model = first_ifd->model();
std::string& software = first_ifd->software();
+ const uint16_t resolution_unit = first_ifd->resolution_unit();
+ const float x_resolution = first_ifd->x_resolution();
+ const float y_resolution = first_ifd->y_resolution();
// Detect Aperio SVS format
{
@@ -416,6 +419,20 @@ void TIFF::resolve_vendor_format()
tiff_metadata.emplace("model", model);
tiff_metadata.emplace("software", software);
+ switch (resolution_unit)
+ {
+ case 2:
+ tiff_metadata.emplace("resolution_unit", "inch");
+ break;
+ case 3:
+ tiff_metadata.emplace("resolution_unit", "centimeter");
+ break;
+ default:
+ tiff_metadata.emplace("resolution_unit", "");
+ break;
+ }
+ tiff_metadata.emplace("x_resolution", x_resolution);
+ tiff_metadata.emplace("y_resolution", y_resolution);
(*json_metadata).emplace("tiff", std::move(tiff_metadata));
}
diff --git a/cpp/src/cuimage.cpp b/cpp/src/cuimage.cpp
index 3ba8fc18b..d9c0ee4dc 100644
--- a/cpp/src/cuimage.cpp
+++ b/cpp/src/cuimage.cpp
@@ -680,10 +680,11 @@ CuImage CuImage::read_region(std::vector&& location,
location.emplace_back(0);
location.emplace_back(0);
}
+
+ const ResolutionInfo& res_info = resolutions();
// If `size` is not specified, size would be (width, height) of the image at the specified `level`.
if (size.empty())
{
- const ResolutionInfo& res_info = resolutions();
const auto level_count = res_info.level_count();
if (level_count == 0)
{
@@ -853,19 +854,36 @@ CuImage CuImage::read_region(std::vector&& location,
// The first dimension is for 'batch' ('N')
spacing_units.emplace_back(std::string_view{ "batch" });
}
+ const auto& level_downsample = res_info.level_downsample(level);
for (; index < ndim; ++index)
{
- int64_t dim_char = dim_indices_.index(dims[index]);
+ int64_t dim_index = dim_indices_.index(dims[index]);
+ if (dim_index < 0)
+ {
+ throw std::runtime_error(fmt::format("[Error] Invalid dimension name: {}", dims[index]));
+ }
- const char* str_ptr = image_metadata_->spacing_units[dim_char];
- size_t str_len = strlen(image_metadata_->spacing_units[dim_char]);
+ const char* str_ptr = image_metadata_->spacing_units[dim_index];
+ size_t str_len = strlen(image_metadata_->spacing_units[dim_index]);
char* spacing_unit = static_cast(resource.allocate(str_len + 1));
memcpy(spacing_unit, str_ptr, str_len);
spacing_unit[str_len] = '\0';
- // std::pmr::string spacing_unit{ image_metadata_->spacing_units[dim_char], &resource };
+ // std::pmr::string spacing_unit{ image_metadata_->spacing_units[dim_index], &resource };
spacing_units.emplace_back(std::string_view{ spacing_unit });
+
+ // Update spacing based on level_downsample
+ char dim_char = image_metadata_->dims[dim_index];
+ switch (dim_char)
+ {
+ case 'X':
+ case 'Y':
+ spacing[index] /= level_downsample;
+ break;
+ default:
+ break;
+ }
}
std::pmr::vector origin(&resource);
diff --git a/cucim.code-workspace b/cucim.code-workspace
index ac73eea68..a73035b60 100644
--- a/cucim.code-workspace
+++ b/cucim.code-workspace
@@ -33,7 +33,7 @@
"CUCIM_TESTDATA_FOLDER": "${workspaceDirectory}/test_data",
// Add cuslide plugin's library path to LD_LIBRARY_PATH
"LD_LIBRARY_PATH": "${workspaceDirectory}/build-debug/lib:${workspaceDirectory}/cpp/plugins/cucim.kit.cuslide/build-debug/lib:${workspaceDirectory}/temp/cuda/lib64:${os_env:LD_LIBRARY_PATH}",
- "CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@22.06.00.so"
+ "CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@22.08.00.so"
},
"cwd": "${workspaceDirectory}",
"catch2": {
@@ -226,7 +226,7 @@
},
{
"name": "CUCIM_TEST_PLUGIN_PATH",
- "value": "cucim.kit.cuslide@22.06.00.so"
+ "value": "cucim.kit.cuslide@22.08.00.so"
}
],
"console": "externalTerminal",
@@ -254,7 +254,7 @@
},
{
"name": "CUCIM_TEST_PLUGIN_PATH",
- "value": "cucim.kit.cuslide@22.06.00.so"
+ "value": "cucim.kit.cuslide@22.08.00.so"
}
],
"console": "externalTerminal",
@@ -286,7 +286,7 @@
},
{
"name": "CUCIM_TEST_PLUGIN_PATH",
- "value": "cucim.kit.cuslide@22.06.00.so"
+ "value": "cucim.kit.cuslide@22.08.00.so"
}
],
"console": "externalTerminal",
diff --git a/docs/Makefile b/docs/Makefile
index aeb3540aa..2f6cf8a2b 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -4,7 +4,7 @@
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
-SPHINXPROJ = cuImage
+SPHINXPROJ = cuCIM
SOURCEDIR = source
BUILDDIR = build
diff --git a/docs/source/_static/params.css b/docs/source/_static/params.css
deleted file mode 100644
index c080d3669..000000000
--- a/docs/source/_static/params.css
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Mirrors the change in:
- * https://github.com/sphinx-doc/sphinx/pull/5976
- * which is not showing up in our theme.
- */
-.classifier:before {
- font-style: normal;
- margin: 0.5em;
- content: ":";
-}
-
-:root {
-
- --pst-color-active-navigation: 114, 83, 237;
- --pst-color-navbar-link: 77, 77, 77;
- --pst-color-navbar-link-hover: var(--pst-color-active-navigation);
- --pst-color-navbar-link-active: var(--pst-color-active-navigation);
- --pst-color-sidebar-link: 77, 77, 77;
- --pst-color-sidebar-link-hover: var(--pst-color-active-navigation);
- --pst-color-sidebar-link-active: var(--pst-color-active-navigation);
- --pst-color-sidebar-expander-background-hover: 244, 244, 244;
- --pst-color-sidebar-caption: 77, 77, 77;
- --pst-color-toc-link: 119, 117, 122;
- --pst-color-toc-link-hover: var(--pst-color-active-navigation);
- --pst-color-toc-link-active: var(--pst-color-active-navigation);
-
-}
-
-.special-table td, .special-table th {
- border: 1px solid #dee2e6;
-}
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 04cfda78c..059886c7e 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -55,6 +55,13 @@ intensity
:members:
:undoc-members:
+morphology
+----------
+
+.. automodule:: cucim.core.operations.morphology
+ :members:
+ :undoc-members:
+
spatial
-------
diff --git a/docs/source/conf.py b/docs/source/conf.py
index dfad46b01..e3957382d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -39,6 +39,7 @@
'sphinx.ext.intersphinx',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
+ 'sphinx.ext.mathjax',
'numpydoc',
'doi_role',
'IPython.sphinxext.ipython_console_highlighting',
@@ -71,9 +72,9 @@
# built documents.
#
# The short X.Y version.
-version = '22.06'
+version = '22.08'
# The full version, including alpha/beta/rc tags.
-release = '22.06.00'
+release = '22.08.00'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -172,8 +173,14 @@
]
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
+# Configuration for intersphinx: refer to other projects documentation.
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/', None),
+ 'cupy': ('https://docs.cupy.dev/en/stable/', None),
+ 'numpy': ('https://numpy.org/doc/stable', None),
+ 'scipy': ('https://docs.scipy.org/doc/scipy/', None),
+ 'skimage': ('https://scikit-image.org/docs/stable/', None),
+}
# Config numpydoc
@@ -182,5 +189,5 @@
def setup(app):
- app.add_css_file('params.css')
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
+ app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5c8dd9fcc..1fd04ad19 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,18 +1,81 @@
Welcome to cuCIM's documentation!
====================================
-cuCIM is a an extensible toolkit designed to provide GPU-accelearted I/O,
-computer vision and image processing primitives for N-Dimensional images
-with a focus on biomedical imaging. Our API mirrors `scikit-image
+cuCIM (Compute Unified Device Architecture Clara IMage) is an open-source, accelerated computer vision and image processing software library for multidimensional images used in biomedical, geospatial, material and life science, and remote sensing use cases.
+
+cuCIM provides GPU-accelearted I/O,
+computer vision and image processing primitives for N-Dimensional images including:
+
+- color conversion
+- exposure
+- feature extraction
+- filters
+- measure
+- metrics
+- morphology
+- registration
+- restoration
+- segmentation
+- transforms
+
+cuCIM supports the following formats:
+
+- Aperio ScanScope Virtual Slide (SVS)
+- Philips TIFF
+- Generic Tiled, Multi-resolution RGB TIFF files with the following
+ compression schemes:
+
+ - No Compression
+ - JPEG
+ - JPEG2000
+ - Lempel-Ziv-Welch (LZW)
+ - Deflate
+
+Our API mirrors `scikit-image
`_ for image manipulation and `OpenSlide
`_ for image loading.
-cuCIM is fully open sourced under the Apache-2.0 license, and the Clara and RAPIDS teams welcomes new and seasoned
-contributors, users and hobbyists! Thank you for your wonderful support!
+cuCIM is interoperable with the following workflows:
+
+- Albumentations
+- cuPY
+- Data Loading Library (DALI)
+- JFX
+- MONAI
+- Numba
+- NumPy
+- PyTorch
+- Tensorflow
+- Triton
+
+cuCIM is fully open sourced under the Apache-2.0 license, and the Clara
+and RAPIDS teams welcomes new and seasoned contributors, users and
+hobbyists! You may download cuCIM via Anaconda `Conda`_ or `PyPI`_ Thank
+you for your wonderful support! Below, we provide some resources to help
+get you started.
+
+**Blogs**
+
+- `Accelerating Scikit-Image API with cuCIM: n-Dimensional Image
+ Processing and IO on GPUs`_
+- `Accelerating Digital Pathology Pipelines with NVIDIA Claraβ’ Deploy`_
+
+**Webinars**
+
+- `cuCIM: a GPU Image IO and Processing Library`_
+
+.. _Conda: https://anaconda.org/rapidsai-nightly/cucim
+.. _PyPi: https://pypi.org/project/cucim/
+.. _`Accelerating Scikit-Image API with cuCIM: n-Dimensional Image Processing and IO on GPUs`: https://developer.nvidia.com/blog/cucim-rapid-n-dimensional-image-processing-and-i-o-on-gpus/
+.. _Accelerating Digital Pathology Pipelines with NVIDIA Claraβ’ Deploy: https://developer.nvidia.com/blog/accelerating-digital-pathology-pipelines-with-nvidia-clara-deploy-2/
+.. _`cuCIM: a GPU Image IO and Processing Library`: https://www.youtube.com/watch?v=G46kOOM9xbQ
+
+
+Contents
+==================
.. toctree::
:maxdepth: 4
- :caption: Contents:
api.rst
diff --git a/python/cucim/CHANGELOG.md b/python/cucim/CHANGELOG.md
index d29d31b41..81d223e82 100644
--- a/python/cucim/CHANGELOG.md
+++ b/python/cucim/CHANGELOG.md
@@ -1,6 +1,30 @@
# Changelog (See [Release Notes](https://github.com/rapidsai/cucim/wiki/Release-Notes))
+## [22.02.06](https://github.com/rapidsai/cucim/wiki/release_notes_v22.06.00)
+
+- [Update/Breaking] Promote small integer types to single rather than double precision ([#278](https://github.com/rapidsai/cucim/pull/278)) [@grlee77](https://github.com/grlee77)
+- [Bug] Populate correct channel names for RGBA image ([#294](https://github.com/rapidsai/cucim/pull/294)) [@gigony](https://github.com/gigony)
+- [Bug] Merge branch-22.04 into branch-22.06 ([#258](https://github.com/rapidsai/cucim/pull/258)) [@jakirkham](https://github.com/jakirkham)
+- [New] add missing `cucim.skimage.segmentation.clear_border` function ([#267](https://github.com/rapidsai/cucim/pull/267)) [@grlee77](https://github.com/grlee77)
+- [New] add `cucim.core.operations.color.stain_extraction_pca` and `cucim.core.operations.color.normalize_colors_pca` for digital pathology H&E stain extraction and normalization ([#273](https://github.com/rapidsai/cucim/pull/273)) [@grlee77](https://github.com/grlee77), [@drbeh](https://github.com/drbeh)
+- [Update] Update to use DLPack v0.6 ([#295](https://github.com/rapidsai/cucim/pull/295)) [@gigony](https://github.com/gigony)
+- [Update] Remove plugin-related messages temporarily ([#291](https://github.com/rapidsai/cucim/pull/291)) [@gigony](https://github.com/gigony)
+- [Update] Simplify recipes ([#286](https://github.com/rapidsai/cucim/pull/286)) [@Ethyling](https://github.com/Ethyling)
+- [Update] Use cupy.fuse to improve efficiency hessian_matrix_eigvals ([#280](https://github.com/rapidsai/cucim/pull/280)) [@grlee77](https://github.com/grlee77)
+- [Update] improve efficiency of histogram-based thresholding functions ([#276](https://github.com/rapidsai/cucim/pull/276)) [@grlee77](https://github.com/grlee77)
+- [Update] Remove unused dependencies in GPU tests job ([#268](https://github.com/rapidsai/cucim/pull/268)) [@Ethyling](https://github.com/Ethyling)
+- [Update] Enable footprint decomposition for morphology ([#274](https://github.com/rapidsai/cucim/pull/274)) [@grlee77](https://github.com/grlee77)
+- [Update] Use conda compilers ([#232](https://github.com/rapidsai/cucim/pull/232)) [@Ethyling](https://github.com/Ethyling)
+- [Update] Build packages using mambabuild ([#216](https://github.com/rapidsai/cucim/pull/216)) [@Ethyling](https://github.com/Ethyling)
+- [Doc] update outdated links to example data ([#289](https://github.com/rapidsai/cucim/pull/289)) [@grlee77](https://github.com/grlee77)
+- [Doc] Add missing API docs ([#275](https://github.com/rapidsai/cucim/pull/275)) [@grlee77](https://github.com/grlee77)
+
+## [22.02.04](https://github.com/rapidsai/cucim/wiki/release_notes_v22.04.00)
+
+- [Bug] Fix ImportError from vendored code ([#252](https://github.com/rapidsai/cucim/pull/252)) [@grlee77](https://github.com/grlee77)
+- [Bug] Fix wrong dimension in metadata ([#248](https://github.com/rapidsai/cucim/pull/248)) [@gigony](https://github.com/gigony)
+
## [22.02.01](https://github.com/rapidsai/cucim/wiki/release_notes_v22.02.01)
- [Bug] Check nullptr of handler in CuFileDriver::close() ([#229](https://github.com/rapidsai/cucim/pull/229)) [@gigony](https://github.com/gigony)
diff --git a/python/cucim/README.md b/python/cucim/README.md
index 94556f23c..8f1413137 100644
--- a/python/cucim/README.md
+++ b/python/cucim/README.md
@@ -6,10 +6,13 @@
**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cucim/blob/main/README.md) ensure you are on the `main` branch.
-- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date)
- - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/)
+- [GTC 2022 Accelerating Storage IO to GPUs with Magnum IO [S41347]](https://events.rainfocus.com/widget/nvidia/gtcspring2022/sessioncatalog/session/1634960000577001Etxp)
+ - cuCIM's GDS API examples:
- [SciPy 2021 cuCIM - A GPU image I/O and processing library](https://www.scipy2021.scipy.org/)
- [video](https://youtu.be/G46kOOM9xbQ)
+- [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=cucim&sort=date)
+ - [video](https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s32194/)
+
## Quick Start
@@ -24,7 +27,7 @@ pip install scipy scikit-image cupy-cuda110
### Jupyter Notebooks
-Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-22.06/notebooks/Welcome.ipynb) notebook.
+Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-22.08/notebooks/Welcome.ipynb) notebook.
### Open Image
@@ -97,15 +100,15 @@ visualize(region)
Aperio SVS (.svs) image format is supported since cuCIM [v21.10.01](https://github.com/rapidsai/cucim/wiki/release_notes_v21.10.01#support-aperio-svs-svs).
-Please check [this notebook](https://nbviewer.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Supporting_Aperio_SVS_Format.ipynb) to see how to use the feature.
+Please check [this notebook](https://nbviewer.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Supporting_Aperio_SVS_Format.ipynb) to see how to use the feature.
### Using Cache
-Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Using_Cache.ipynb).
+Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Using_Cache.ipynb).
### Accessing File with GDS
-Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.06/notebooks/Accessing_File_with_GDS.ipynb).
+Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/branch-22.08/notebooks/Accessing_File_with_GDS.ipynb).
### NVTX Support for Performance Analysis
@@ -178,4 +181,4 @@ is used in this project.
Apache-2.0 License (see `LICENSE` file).
-Copyright (c) 2020-2021, NVIDIA CORPORATION.
+Copyright (c) 2020-2022, NVIDIA CORPORATION.
diff --git a/python/cucim/VERSION b/python/cucim/VERSION
index ddc883c42..231fac64b 100644
--- a/python/cucim/VERSION
+++ b/python/cucim/VERSION
@@ -1 +1 @@
-22.06.00
+22.08.00
diff --git a/python/cucim/docs/getting_started/index.md b/python/cucim/docs/getting_started/index.md
index 200c9fc3b..cede687cf 100644
--- a/python/cucim/docs/getting_started/index.md
+++ b/python/cucim/docs/getting_started/index.md
@@ -14,15 +14,15 @@
## Installation
-Please download the latest SDK package (`cuCIM-v22.06.00-linux.tar.gz`).
+Please download the latest SDK package (`cuCIM-v22.08.00-linux.tar.gz`).
Untar the downloaded file.
```bash
-mkdir -p cuCIM-v22.06.00
-tar -xzvf cuCIM-v22.06.00-linux.tar.gz -C cuCIM-v22.06.00
+mkdir -p cuCIM-v22.08.00
+tar -xzvf cuCIM-v22.08.00-linux.tar.gz -C cuCIM-v22.08.00
-cd cuCIM-v22.06.00
+cd cuCIM-v22.08.00
```
## Run command
@@ -147,7 +147,7 @@ Its execution would show some metadata information and create two files -- `outp
```
$ ./bin/tiff_image notebooks/input/image.tif .
[Plugin: cucim.kit.cuslide] Loading...
-[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@22.06.00.so
+[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@22.08.00.so
[Plugin: cucim.kit.cuslide] loaded successfully. Version: 0
Initializing plugin: cucim.kit.cuslide (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cuslide)
is_loaded: true
diff --git a/python/cucim/docs/index.md b/python/cucim/docs/index.md
index 4f03922c3..e9c4df2a4 100644
--- a/python/cucim/docs/index.md
+++ b/python/cucim/docs/index.md
@@ -18,7 +18,7 @@ development/index -->
# cuCIM Documentation
-Current latest version is [Version 22.06.00](release_notes/v22.06.00.md).
+Current latest version is [Version 22.08.00](release_notes/v22.08.00.md).
**cuCIM** a toolkit to provide GPU accelerated I/O, image processing & computer vision primitives for N-Dimensional images with a focus on biomedical imaging.
diff --git a/python/cucim/requirements-test.txt b/python/cucim/requirements-test.txt
index ffdcf7092..6d62d5a29 100644
--- a/python/cucim/requirements-test.txt
+++ b/python/cucim/requirements-test.txt
@@ -1,8 +1,8 @@
-GPUtil==1.4.0
-imagecodecs==2021.6.8
-openslide-python==1.1.2
-psutil==5.8.0
-pytest==6.2.4
-pytest-cov==2.12.1
-pytest-lazy-fixture==0.6.3
-tifffile==2021.7.2
+GPUtil>=1.4.0
+imagecodecs>=2021.6.8
+openslide-python>=1.1.2
+psutil>=5.8.0
+pytest>=6.2.4
+pytest-cov>=2.12.1
+pytest-lazy-fixture>=0.6.3
+tifffile>=2022.7.28
diff --git a/python/cucim/setup.cfg b/python/cucim/setup.cfg
index 517f33fff..7743540c2 100644
--- a/python/cucim/setup.cfg
+++ b/python/cucim/setup.cfg
@@ -65,6 +65,6 @@ line_length = 80
known_first_party = cucim
default_section = THIRDPARTY
forced_separate = test_cucim
-skip = .tox,.eggs,ci/templates,build,dist,versioneer.py
+skip = .tox,.eggs,ci/templates,build,dist,versioneer.py,ndimage.py
sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
multi_line_output = GRID
diff --git a/python/cucim/src/cucim/core/__init__.py b/python/cucim/src/cucim/core/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cucim/src/cucim/core/operations/color/stain_normalizer.py b/python/cucim/src/cucim/core/operations/color/stain_normalizer.py
index f5141ede2..3e82b26b8 100644
--- a/python/cucim/src/cucim/core/operations/color/stain_normalizer.py
+++ b/python/cucim/src/cucim/core/operations/color/stain_normalizer.py
@@ -13,7 +13,6 @@
# limitations under the License.
import math
-import warnings
from typing import Union
import cupy as cp
@@ -56,12 +55,9 @@ def image_to_absorbance(image, source_intensity=255.0, dtype=cp.float32):
-----
If `image` has an integer dtype it will be clipped to range
``[1, source_intensity]``, while float image inputs are clipped to range
- ``[source_intensity/255, source_intensity]. The minimum is to avoid log(0).
- Absorbance is then given by
-
- .. math::
-
- absorbance = \\log{\\frac{image}{source_intensity}}.
+ ``[source_intensity/255, source_intensity]``.
+ The minimum is to avoid log(0). Absorbance is then given by
+ ``absorbance = log(image / source_intensity)``.
"""
dtype = cp.dtype(dtype)
if dtype.kind != "f":
@@ -209,9 +205,7 @@ def _covariance(a):
ddof = 1
fact = X.shape[1] - ddof
if fact <= 0:
- warnings.warn("Degrees of freedom <= 0 for slice",
- RuntimeWarning, stacklevel=2)
- fact = 0.0
+ raise RuntimeError("Degrees of freedom <= 0")
X -= X.mean(axis=1, keepdims=True)
if not X.flags.f_contiguous:
@@ -314,10 +308,10 @@ def stain_extraction_pca(image, source_intensity=240, alpha=1, beta=0.345,
)
# remove transparent pixels
- absorbance = absorbance[:, cp.all(absorbance > beta, axis=0)]
- if absorbance.size == 0:
+ absorbance = absorbance[:, cp.any(absorbance > beta, axis=0)]
+ if absorbance.size == 0 or absorbance.shape[1] <= 1:
raise ValueError(
- "All pixels of the input image are below the threshold."
+ "Multiple pixels of the input must be above the `beta` threshold."
)
# compute eigenvectors (do small 3x3 matrix calculations on the host)
@@ -383,7 +377,10 @@ def _get_raw_concentrations(src_stain_coeff, absorbance):
def _normalized_from_concentrations(conc_raw, max_percentile, ref_stain_coeff,
ref_max_conc, source_intensity,
original_shape, channel_axis):
- """Determine normalized image from concentrations."""
+ """Determine normalized image from concentrations.
+
+ Note: This function will also modify conc_raw in-place.
+ """
# verify conc_raw is shape (2, n_pixels)
if conc_raw.ndim != 2 or conc_raw.shape[0] != 2:
@@ -410,10 +407,10 @@ def _normalized_from_concentrations(conc_raw, max_percentile, ref_stain_coeff,
for ch_raw in conc_raw]
)
normalization_factors = ref_max_conc / max_conc
- conc_norm = conc_raw * normalization_factors[:, cp.newaxis]
+ conc_raw *= normalization_factors[:, cp.newaxis]
# reconstruct the image based on the reference stain matrix
- absorbance_norm = ref_stain_coeff.dot(conc_norm)
+ absorbance_norm = ref_stain_coeff.dot(conc_raw)
image_norm = absorbance_to_image(
absorbance_norm, source_intensity=source_intensity, dtype=np.uint8
)
@@ -527,8 +524,8 @@ def normalize_colors_pca(
image_norm = _normalized_from_concentrations(
conc_raw=conc_raw,
max_percentile=100 - alpha,
- ref_max_conc=cp.asarray(ref_max_conc),
- ref_stain_coeff=cp.asarray(ref_stain_coeff),
+ ref_max_conc=cp.asarray(ref_max_conc, dtype=conc_raw.dtype),
+ ref_stain_coeff=cp.asarray(ref_stain_coeff, dtype=conc_raw.dtype),
source_intensity=source_intensity,
channel_axis=channel_axis,
original_shape=image.shape,
diff --git a/python/cucim/src/cucim/core/operations/morphology/__init__.py b/python/cucim/src/cucim/core/operations/morphology/__init__.py
new file mode 100644
index 000000000..a9c676edb
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/__init__.py
@@ -0,0 +1,3 @@
+from ._distance_transform import distance_transform_edt
+
+__all__ = ["distance_transform_edt"]
diff --git a/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py b/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py
new file mode 100644
index 000000000..70523c5bf
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/_distance_transform.py
@@ -0,0 +1,183 @@
+import numpy as np
+
+from ._pba_2d import _pba_2d
+from ._pba_3d import _pba_3d
+
+# TODO: support sampling distances
+# support the distances and indices output arguments
+# support chamfer, chessboard and l1/manhattan distances too?
+
+
+def distance_transform_edt(image, sampling=None, return_distances=True,
+ return_indices=False, distances=None, indices=None,
+ *, block_params=None, float64_distances=False):
+ r"""Exact Euclidean distance transform.
+
+ This function calculates the distance transform of the `input`, by
+ replacing each foreground (non-zero) element, with its shortest distance to
+ the background (any zero-valued element).
+
+ In addition to the distance transform, the feature transform can be
+ calculated. In this case the index of the closest background element to
+ each foreground element is returned in a separate array.
+
+ Parameters
+ ----------
+ image : array_like
+ Input data to transform. Can be any type but will be converted into
+ binary: 1 wherever image equates to True, 0 elsewhere.
+ sampling : float, or sequence of float, optional
+ Spacing of elements along each dimension. If a sequence, must be of
+ length equal to the image rank; if a single number, this is used for
+ all axes. If not specified, a grid spacing of unity is implied.
+ return_distances : bool, optional
+ Whether to calculate the distance transform.
+ return_indices : bool, optional
+ Whether to calculate the feature transform.
+ distances : float32 cupy.ndarray, optional
+ An output array to store the calculated distance transform, instead of
+ returning it. `return_distances` must be True. It must be the same
+ shape as `image`.
+ indices : int32 cupy.ndarray, optional
+ An output array to store the calculated feature transform, instead of
+ returning it. `return_indicies` must be True. Its shape must be
+ `(image.ndim,) + image.shape`.
+
+ Other Parameters
+ ----------------
+ block_params : 3-tuple of int
+ The m1, m2, m3 algorithm parameters as described in [2]_. If None,
+ suitable defaults will be chosen. Note: This parameter is specific to
+ cuCIM and does not exist in SciPy.
+ float64_distances : bool, optional
+ If True, use double precision in the distance computation (to match
+ SciPy behavior). Otherwise, single precision will be used for
+ efficiency. Note: This parameter is specific to cuCIM and does not
+ exist in SciPy.
+
+ Returns
+ -------
+ distances : float64 ndarray, optional
+ The calculated distance transform. Returned only when
+ `return_distances` is True and `distances` is not supplied. It will
+ have the same shape as `image`.
+ indices : int32 ndarray, optional
+ The calculated feature transform. It has an image-shaped array for each
+ dimension of the image. See example below. Returned only when
+ `return_indices` is True and `indices` is not supplied.
+
+ Notes
+ -----
+ The Euclidean distance transform gives values of the Euclidean distance.
+
+ .. math::
+
+ y_i = \sqrt{\sum_{i}^{n} (x[i] - b[i])^2}
+
+ where :math:`b[i]` is the background point (value 0) with the smallest
+ Euclidean distance to input points :math:`x[i]`, and :math:`n` is the
+ number of dimensions.
+
+ Note that the `indices` output may differ from the one given by
+ :func:`scipy.ndimage.distance_transform_edt` in the case of input pixels
+ that are equidistant from multiple background points.
+
+ The parallel banding algorithm implemented here was originally described in
+ [1]_. The kernels used here correspond to the revised PBA+ implementation
+ that is described on the author's website [2]_. The source code of the
+ author's PBA+ implementation is available at [3]_.
+
+ References
+ ----------
+ .. [1] Thanh-Tung Cao, Ke Tang, Anis Mohamed, and Tiow-Seng Tan. 2010.
+ Parallel Banding Algorithm to compute exact distance transform with the
+ GPU. In Proceedings of the 2010 ACM SIGGRAPH symposium on Interactive
+ 3D Graphics and Games (I3D β10). Association for Computing Machinery,
+ New York, NY, USA, 83β90.
+ DOI:https://doi.org/10.1145/1730804.1730818
+ .. [2] https://www.comp.nus.edu.sg/~tants/pba.html
+ .. [3] https://github.com/orzzzjq/Parallel-Banding-Algorithm-plus
+
+ Examples
+ --------
+ >>> import cupy as cp
+ >>> from cucim.core.operations import morphology
+ >>> a = cp.array(([0,1,1,1,1],
+ ... [0,0,1,1,1],
+ ... [0,1,1,1,1],
+ ... [0,1,1,1,0],
+ ... [0,1,1,0,0]))
+ >>> morphology.distance_transform_edt(a)
+ array([[ 0. , 1. , 1.4142, 2.2361, 3. ],
+ [ 0. , 0. , 1. , 2. , 2. ],
+ [ 0. , 1. , 1.4142, 1.4142, 1. ],
+ [ 0. , 1. , 1.4142, 1. , 0. ],
+ [ 0. , 1. , 1. , 0. , 0. ]])
+
+ With a sampling of 2 units along x, 1 along y:
+
+ >>> morphology.distance_transform_edt(a, sampling=[2,1])
+ array([[ 0. , 1. , 2. , 2.8284, 3.6056],
+ [ 0. , 0. , 1. , 2. , 3. ],
+ [ 0. , 1. , 2. , 2.2361, 2. ],
+ [ 0. , 1. , 2. , 1. , 0. ],
+ [ 0. , 1. , 1. , 0. , 0. ]])
+
+ Asking for indices as well:
+
+ >>> edt, inds = morphology.distance_transform_edt(a, return_indices=True)
+ >>> inds
+ array([[[0, 0, 1, 1, 3],
+ [1, 1, 1, 1, 3],
+ [2, 2, 1, 3, 3],
+ [3, 3, 4, 4, 3],
+ [4, 4, 4, 4, 4]],
+ [[0, 0, 1, 1, 4],
+ [0, 1, 1, 1, 4],
+ [0, 0, 1, 4, 4],
+ [0, 0, 3, 3, 4],
+ [0, 0, 3, 3, 4]]])
+
+ """
+ if distances is not None:
+ raise NotImplementedError(
+ "preallocated distances image is not supported"
+ )
+ if indices is not None:
+ raise NotImplementedError(
+ "preallocated indices image is not supported"
+ )
+ scalar_sampling = None
+ if sampling is not None:
+ sampling = np.unique(np.atleast_1d(sampling))
+ if len(sampling) == 1:
+ scalar_sampling = float(sampling)
+ sampling = None
+ else:
+ raise NotImplementedError(
+ "non-uniform values in sampling is not currently supported"
+ )
+
+ if image.ndim == 3:
+ pba_func = _pba_3d
+ elif image.ndim == 2:
+ pba_func = _pba_2d
+ else:
+ raise NotImplementedError(
+ "Only 2D and 3D distance transforms are supported.")
+
+ vals = pba_func(
+ image,
+ sampling=sampling,
+ return_distances=return_distances,
+ return_indices=return_indices,
+ block_params=block_params
+ )
+
+ if return_distances and scalar_sampling is not None:
+ vals = (vals[0] * scalar_sampling,) + vals[1:]
+
+ if len(vals) == 1:
+ vals = vals[0]
+
+ return vals
diff --git a/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py b/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py
new file mode 100644
index 000000000..edb10a983
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/_pba_2d.py
@@ -0,0 +1,300 @@
+import math
+import os
+
+import cupy
+
+pba2d_defines_template = """
+
+// MARKER is used to mark blank pixels in the texture.
+// Any uncolored pixels will have x = MARKER.
+// Input texture should have x = MARKER for all pixels other than sites
+#define MARKER {marker}
+#define BLOCKSIZE {block_size_2d}
+#define pixel_int2_t {pixel_int2_t} // typically short2 (int2 for images with > 32k pixels per side)
+#define make_pixel(x, y) {make_pixel_func}(x, y) // typically make_short2 (make_int2 images with > 32k pixels per side
+
+""" # noqa
+
+
+def _init_marker(int_dtype):
+ """use a minimum value that is appropriate to the integer dtype"""
+ if int_dtype == cupy.int16:
+ # marker = cupy.iinfo(int_dtype).min
+ marker = -32768
+ elif int_dtype == cupy.int32:
+ # divide by two so we don't have to promote other intermediate int
+ # variables to 64-bit int
+ marker = -2147483648 // 2
+ else:
+ raise ValueError(
+ "expected int_dtype to be either cupy.int16 or cupy.int32"
+ )
+ return marker
+
+
+@cupy.memoize(True)
+def get_pba2d_src(block_size_2d=64, marker=-32768, pixel_int2_t='short2'):
+ make_pixel_func = 'make_' + pixel_int2_t
+
+ pba2d_code = pba2d_defines_template.format(
+ block_size_2d=block_size_2d,
+ marker=marker,
+ pixel_int2_t=pixel_int2_t,
+ make_pixel_func=make_pixel_func
+ )
+ kernel_directory = os.path.join(os.path.dirname(__file__), 'cuda')
+ with open(os.path.join(kernel_directory, 'pba_kernels_2d.h'), 'rt') as f:
+ pba2d_kernels = '\n'.join(f.readlines())
+
+ pba2d_code += pba2d_kernels
+ return pba2d_code
+
+
+def _get_block_size(check_warp_size=False):
+ if check_warp_size:
+ dev = cupy.cuda.runtime.getDevice()
+ device_properties = cupy.cuda.runtime.getDeviceProperties(dev)
+ return int(device_properties['warpSize'])
+ else:
+ return 32
+
+
+def _pack_int2(arr, marker=-32768, int_dtype=cupy.int16):
+ if arr.ndim != 2:
+ raise ValueError("only 2d arr suppported")
+ input_x = cupy.zeros(arr.shape, dtype=int_dtype)
+ input_y = cupy.zeros(arr.shape, dtype=int_dtype)
+ # TODO: create custom kernel for setting values in input_x, input_y
+ cond = arr == 0
+ y, x = cupy.where(cond)
+ input_x[cond] = x
+ mask = arr != 0
+ input_x[mask] = marker # 1 << 32
+ input_y[cond] = y
+ input_y[mask] = marker # 1 << 32
+ int2_dtype = cupy.dtype({'names': ['x', 'y'], 'formats': [int_dtype] * 2})
+ # in C++ code x is the contiguous axis and corresponds to width
+ # y is the non-contiguous axis and corresponds to height
+ # given that, store input_x as the last axis here
+ return cupy.squeeze(
+ cupy.stack((input_x, input_y), axis=-1).view(int2_dtype)
+ )
+
+
+def _unpack_int2(img, make_copy=False, int_dtype=cupy.int16):
+ temp = img.view(int_dtype).reshape(img.shape + (2,))
+ if make_copy:
+ temp = temp.copy()
+ return temp
+
+
+def _determine_padding(shape, padded_size, block_size):
+ # all kernels assume equal size along both axes, so pad up to equal size if
+ # shape is not isotropic
+ orig_sy, orig_sx = shape
+ if orig_sx != padded_size or orig_sy != padded_size:
+ padding_width = (
+ (0, padded_size - orig_sy), (0, padded_size - orig_sx)
+ )
+ else:
+ padding_width = None
+ return padding_width
+
+
+def _pba_2d(arr, sampling=None, return_distances=True, return_indices=False,
+ block_params=None, check_warp_size=False, *,
+ float64_distances=False):
+
+ # input_arr: a 2D image
+ # For each site at (x, y), the pixel at coordinate (x, y) should contain
+ # the pair (x, y). Pixels that are not sites should contain the pair
+ # (MARKER, MARKER)
+
+ # Note: could query warp size here, but for now just assume 32 to avoid
+ # overhead of querying properties
+ block_size = _get_block_size(check_warp_size)
+
+ if sampling is not None:
+ raise NotImplementedError("sampling not yet supported")
+ # if len(sampling) != 2:
+ # raise ValueError("sampling must be a sequence of two values.")
+
+ padded_size = math.ceil(max(arr.shape) / block_size) * block_size
+ if block_params is None:
+ # should be <= size / 64. sy must be a multiple of m1
+ m1 = max(1, min(padded_size // block_size, 32))
+
+ # size must be a multiple of m2
+ m2 = max(1, min(padded_size // block_size, 32))
+ # m2 must also be a power of two
+ m2 = 2**math.floor(math.log2(m2))
+ if padded_size % m2 != 0:
+ raise RuntimeError("error in setting default m2")
+
+ # should be <= 64. texture size must be a multiple of m3
+ m3 = min(min(m1, m2), 2)
+ else:
+ m1, m2, m3 = block_params
+
+ if m1 > padded_size // block_size:
+ raise ValueError("m1 too large. must be <= arr.shape[0] // 32")
+ if m2 > padded_size // block_size:
+ raise ValueError("m2 too large. must be <= arr.shape[1] // 32")
+ for m in (m1, m2, m3):
+ if padded_size % m != 0:
+ raise ValueError(
+ f"Largest dimension of image ({padded_size}) must be evenly "
+ f"disivible by each element of block_params: {(m1, m2, m3)}."
+ )
+
+ shape_max = max(arr.shape)
+ if shape_max <= 32768:
+ int_dtype = cupy.int16
+ pixel_int2_type = 'short2'
+ else:
+ if shape_max > (1 << 24):
+ # limit to coordinate range to 2**24 due to use of __mul24 in
+ # coordinate TOID macro
+ raise ValueError(
+ f"maximum axis size of {1 << 24} exceeded, for image with "
+ f"shape {arr.shape}"
+ )
+ int_dtype = cupy.int32
+ pixel_int2_type = 'int2'
+
+ marker = _init_marker(int_dtype)
+
+ orig_sy, orig_sx = arr.shape
+ padding_width = _determine_padding(arr.shape, padded_size, block_size)
+ if padding_width is not None:
+ arr = cupy.pad(arr, padding_width, mode='constant', constant_values=1)
+ size = arr.shape[0]
+
+ input_arr = _pack_int2(arr, marker=marker, int_dtype=int_dtype)
+ output = cupy.zeros_like(input_arr)
+
+ int2_dtype = cupy.dtype({'names': ['x', 'y'], 'formats': [int_dtype] * 2})
+ margin = cupy.empty((2 * m1 * size,), dtype=int2_dtype)
+
+ # phase 1 of PBA. m1 must divide texture size and be <= 64
+ pba2d = cupy.RawModule(
+ code=get_pba2d_src(
+ block_size_2d=block_size,
+ marker=marker,
+ pixel_int2_t=pixel_int2_type,
+ )
+ )
+ kernelFloodDown = pba2d.get_function('kernelFloodDown')
+ kernelFloodUp = pba2d.get_function('kernelFloodUp')
+ kernelPropagateInterband = pba2d.get_function('kernelPropagateInterband')
+ kernelUpdateVertical = pba2d.get_function('kernelUpdateVertical')
+ kernelProximatePoints = pba2d.get_function('kernelProximatePoints')
+ kernelCreateForwardPointers = pba2d.get_function(
+ 'kernelCreateForwardPointers'
+ )
+ kernelMergeBands = pba2d.get_function('kernelMergeBands')
+ kernelDoubleToSingleList = pba2d.get_function('kernelDoubleToSingleList')
+ kernelColor = pba2d.get_function('kernelColor')
+
+ block = (block_size, 1, 1)
+ grid = (math.ceil(size / block[0]), m1, 1)
+ bandSize1 = size // m1
+ # kernelFloodDown modifies input_arr in-place
+ kernelFloodDown(
+ grid,
+ block,
+ (input_arr, input_arr, size, bandSize1),
+ )
+ # kernelFloodUp modifies input_arr in-place
+ kernelFloodUp(
+ grid,
+ block,
+ (input_arr, input_arr, size, bandSize1),
+ )
+ # kernelFloodUp fills values into margin
+ kernelPropagateInterband(
+ grid,
+ block,
+ (input_arr, margin, size, bandSize1),
+ )
+ # kernelUpdateVertical stores output into an intermediate array of
+ # transposed shape
+ kernelUpdateVertical(
+ grid,
+ block,
+ (input_arr, margin, output, size, bandSize1),
+ )
+
+ # phase 2
+ block = (block_size, 1, 1)
+ grid = (math.ceil(size / block[0]), m2, 1)
+ bandSize2 = size // m2
+ kernelProximatePoints(
+ grid,
+ block,
+ (output, input_arr, size, bandSize2),
+ )
+ kernelCreateForwardPointers(
+ grid,
+ block,
+ (input_arr, input_arr, size, bandSize2),
+ )
+ # Repeatly merging two bands into one
+ noBand = m2
+ while noBand > 1:
+ grid = (math.ceil(size / block[0]), noBand // 2)
+ kernelMergeBands(
+ grid,
+ block,
+ (output, input_arr, input_arr, size, size // noBand),
+ )
+ noBand //= 2
+ # Replace the forward link with the X coordinate of the seed to remove
+ # the need of looking at the other texture. We need it for coloring.
+ grid = (math.ceil(size / block[0]), size)
+ kernelDoubleToSingleList(
+ grid,
+ block,
+ (output, input_arr, input_arr, size),
+ )
+
+ # Phase 3 of PBA
+ block = (block_size, m3, 1)
+ grid = (math.ceil(size / block[0]), 1, 1)
+ kernelColor(
+ grid,
+ block,
+ (input_arr, output, size),
+ )
+
+ output = _unpack_int2(output, make_copy=False, int_dtype=int_dtype)
+ # make sure to crop any padding that was added here!
+ x = output[:orig_sy, :orig_sx, 0]
+ y = output[:orig_sy, :orig_sx, 1]
+
+ # raise NotImplementedError("TODO")
+ vals = ()
+ if return_distances:
+ # TODO: custom kernel for more efficient distance computation
+ y0, x0 = cupy.meshgrid(
+ *(
+ cupy.arange(s, dtype=cupy.int32)
+ for s in (orig_sy, orig_sx)
+ ),
+ indexing='ij',
+ sparse=True,
+ )
+ tmp = (x - x0)
+ dist = tmp * tmp
+ tmp = (y - y0)
+ dist += tmp * tmp
+ if float64_distances:
+ dist = cupy.sqrt(dist)
+ else:
+ dist = dist.astype(cupy.float32)
+ cupy.sqrt(dist, out=dist)
+ vals = vals + (dist,)
+ if return_indices:
+ indices = cupy.stack((y, x), axis=0)
+ vals = vals + (indices,)
+ return vals
diff --git a/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py b/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py
new file mode 100644
index 000000000..dab484e90
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/_pba_3d.py
@@ -0,0 +1,311 @@
+import functools
+import math
+import numbers
+import os
+
+import cupy
+import numpy as np
+
+from ._pba_2d import _get_block_size
+
+try:
+ # math.lcm was introduced in Python 3.9
+ from math import lcm
+except ImportError:
+
+ """Fallback implementation of least common multiple (lcm)
+
+ TODO: remove once minimum Python requirement is >= 3.9
+ """
+
+ def _lcm(a, b):
+ return abs(b * (a // math.gcd(a, b)))
+
+ @functools.lru_cache()
+ def lcm(*args):
+ nargs = len(args)
+ if not all(isinstance(a, numbers.Integral) for a in args):
+ raise TypeError("all arguments must be integers")
+ if nargs == 0:
+ return 1
+ res = int(args[0])
+ if nargs == 1:
+ return abs(res)
+ for i in range(1, nargs):
+ x = int(args[i])
+ res = _lcm(res, x)
+ return res
+
+
+pba3d_defines_template = """
+
+#define MARKER {marker}
+#define MAX_INT {max_int}
+#define BLOCKSIZE {block_size_3d}
+
+"""
+
+# For efficiency, the original PBA+ packs three 10-bit integers and two binary
+# flags into a single 32-bit integer. The defines in
+# `pba3d_defines_encode_32bit` handle this format.
+pba3d_defines_encode_32bit = """
+// Sites : ENCODE(x, y, z, 0, 0)
+// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER
+#define ENCODED_INT_TYPE int
+#define ZERO 0
+#define ONE 1
+#define ENCODE(x, y, z, a, b) (((x) << 20) | ((y) << 10) | (z) | ((a) << 31) | ((b) << 30))
+#define DECODE(value, x, y, z) \
+ x = ((value) >> 20) & 0x3ff; \
+ y = ((value) >> 10) & 0x3ff; \
+ z = (value) & 0x3ff
+
+#define NOTSITE(value) (((value) >> 31) & 1)
+#define HASNEXT(value) (((value) >> 30) & 1)
+
+#define GET_X(value) (((value) >> 20) & 0x3ff)
+#define GET_Y(value) (((value) >> 10) & 0x3ff)
+#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0x3ff))
+
+""" # noqa
+
+
+# 64bit version of ENCODE/DECODE to allow a 20-bit integer per coordinate axis.
+pba3d_defines_encode_64bit = """
+// Sites : ENCODE(x, y, z, 0, 0)
+// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER
+#define ENCODED_INT_TYPE long long
+#define ZERO 0L
+#define ONE 1L
+#define ENCODE(x, y, z, a, b) (((x) << 40) | ((y) << 20) | (z) | ((a) << 61) | ((b) << 60))
+#define DECODE(value, x, y, z) \
+ x = ((value) >> 40) & 0xfffff; \
+ y = ((value) >> 20) & 0xfffff; \
+ z = (value) & 0xfffff
+
+#define NOTSITE(value) (((value) >> 61) & 1)
+#define HASNEXT(value) (((value) >> 60) & 1)
+
+#define GET_X(value) (((value) >> 40) & 0xfffff)
+#define GET_Y(value) (((value) >> 20) & 0xfffff)
+#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0xfffff))
+
+""" # noqa
+
+
+@cupy.memoize(True)
+def get_pba3d_src(block_size_3d=32, marker=-2147483648, max_int=2147483647,
+ size_max=1024):
+ pba3d_code = pba3d_defines_template.format(
+ block_size_3d=block_size_3d, marker=marker, max_int=max_int
+ )
+ if size_max > 1024:
+ pba3d_code += pba3d_defines_encode_64bit
+ else:
+ pba3d_code += pba3d_defines_encode_32bit
+ kernel_directory = os.path.join(os.path.dirname(__file__), 'cuda')
+ with open(os.path.join(kernel_directory, 'pba_kernels_3d.h'), 'rt') as f:
+ pba3d_kernels = '\n'.join(f.readlines())
+ pba3d_code += pba3d_kernels
+ return pba3d_code
+
+
+# TODO: custom kernel for encode3d
+def encode3d(arr, marker=-2147483648, bit_depth=32, size_max=1024):
+ if arr.ndim != 3:
+ raise ValueError("only 3d arr suppported")
+ if bit_depth not in [32, 64]:
+ raise ValueError("only bit_depth of 32 or 64 is supported")
+ if size_max > 1024:
+ dtype = np.int64
+ else:
+ dtype = np.int32
+ image = cupy.zeros(arr.shape, dtype=dtype, order='C')
+ cond = arr == 0
+ z, y, x = cupy.where(cond)
+ # z, y, x so that x is the contiguous axis
+ # (must match TOID macro in the C++ code!)
+ if size_max > 1024:
+ image[cond] = (((x) << 40) | ((y) << 20) | (z))
+ else:
+ image[cond] = (((x) << 20) | ((y) << 10) | (z))
+ image[arr != 0] = marker # 1 << 32
+ return image
+
+
+# TODO: custom kernel for decode3d
+def decode3d(output, size_max=1024):
+ if size_max > 1024:
+ x = (output >> 40) & 0xfffff
+ y = (output >> 20) & 0xfffff
+ z = output & 0xfffff
+ else:
+ x = (output >> 20) & 0x3ff
+ y = (output >> 10) & 0x3ff
+ z = output & 0x3ff
+ return (x, y, z)
+
+
+def _determine_padding(shape, block_size, m1, m2, m3, blockx, blocky):
+ # TODO: can possibly revise to consider only particular factors for LCM on
+ # a given axis
+ LCM = lcm(block_size, m1, m2, m3, blockx, blocky)
+ orig_sz, orig_sy, orig_sx = shape
+ round_up = False
+ if orig_sx % LCM != 0:
+ # round up size to a multiple of the band size
+ round_up = True
+ sx = LCM * math.ceil(orig_sx / LCM)
+ else:
+ sx = orig_sx
+ if orig_sy % LCM != 0:
+ # round up size to a multiple of the band size
+ round_up = True
+ sy = LCM * math.ceil(orig_sy / LCM)
+ else:
+ sy = orig_sy
+ if orig_sz % LCM != 0:
+ # round up size to a multiple of the band size
+ round_up = True
+ sz = LCM * math.ceil(orig_sz / LCM)
+ else:
+ sz = orig_sz
+
+ aniso = not (sx == sy == sz)
+ if aniso or round_up:
+ smax = max(sz, sy, sx)
+ padding_width = (
+ (0, smax - orig_sz), (0, smax - orig_sy), (0, smax - orig_sx)
+ )
+ else:
+ padding_width = None
+ return padding_width
+
+
+def _pba_3d(arr, sampling=None, return_distances=True, return_indices=False,
+ block_params=None, check_warp_size=False, *,
+ float64_distances=False):
+ if arr.ndim != 3:
+ raise ValueError(f"expected a 3D array, got {arr.ndim}D")
+
+ if sampling is not None:
+ raise NotImplementedError("sampling not yet supported")
+ # if len(sampling) != 3:
+ # raise ValueError("sampling must be a sequence of three values.")
+
+ if block_params is None:
+ m1 = 1
+ m2 = 1
+ m3 = 2
+ else:
+ m1, m2, m3 = block_params
+
+ # reduce blockx for small inputs
+ s_min = min(arr.shape)
+ if s_min <= 4:
+ blockx = 4
+ elif s_min <= 8:
+ blockx = 8
+ elif s_min <= 16:
+ blockx = 16
+ else:
+ blockx = 32
+ blocky = 4
+
+ block_size = _get_block_size(check_warp_size)
+
+ orig_sz, orig_sy, orig_sx = arr.shape
+ padding_width = _determine_padding(
+ arr.shape, block_size, m1, m2, m3, blockx, blocky
+ )
+ if padding_width is not None:
+ arr = cupy.pad(arr, padding_width, mode='constant', constant_values=1)
+ size = arr.shape[0]
+
+ # pba algorithm was implemented to use 32-bit integer to store compressed
+ # coordinates. input_arr will be C-contiguous, int32
+ size_max = max(arr.shape)
+ input_arr = encode3d(arr, size_max=size_max)
+ buffer_idx = 0
+ output = cupy.zeros_like(input_arr)
+ pba_images = [input_arr, output]
+
+ block = (blockx, blocky, 1)
+ grid = (size // block[0], size // block[1], 1)
+ pba3d = cupy.RawModule(
+ code=get_pba3d_src(block_size_3d=block_size, size_max=size_max)
+ )
+
+ kernelFloodZ = pba3d.get_function('kernelFloodZ')
+ kernelMaurerAxis = pba3d.get_function('kernelMaurerAxis')
+ kernelColorAxis = pba3d.get_function('kernelColorAxis')
+
+ kernelFloodZ(
+ grid,
+ block,
+ (pba_images[buffer_idx], pba_images[1 - buffer_idx], size)
+ )
+ buffer_idx = 1 - buffer_idx
+
+ block = (blockx, blocky, 1)
+ grid = (size // block[0], size // block[1], 1)
+ kernelMaurerAxis(
+ grid,
+ block,
+ (pba_images[buffer_idx], pba_images[1 - buffer_idx], size),
+ )
+
+ block = (block_size, m3, 1)
+ grid = (size // block[0], size, 1)
+ kernelColorAxis(
+ grid,
+ block,
+ (pba_images[1 - buffer_idx], pba_images[buffer_idx], size),
+ )
+
+ block = (blockx, blocky, 1)
+ grid = (size // block[0], size // block[1], 1)
+ kernelMaurerAxis(
+ grid,
+ block,
+ (pba_images[buffer_idx], pba_images[1 - buffer_idx], size),
+ )
+
+ block = (block_size, m3, 1)
+ grid = (size // block[0], size, 1)
+ kernelColorAxis(
+ grid,
+ block,
+ (pba_images[1 - buffer_idx], pba_images[buffer_idx], size),
+ )
+
+ output = pba_images[buffer_idx]
+ if return_distances or return_indices:
+ x, y, z = decode3d(output[:orig_sz, :orig_sy, :orig_sx],
+ size_max=size_max)
+
+ vals = ()
+ if return_distances:
+ # TODO: custom kernel for more efficient distance computation
+ orig_shape = (orig_sz, orig_sy, orig_sx)
+ z0, y0, x0 = cupy.meshgrid(
+ *(cupy.arange(s, dtype=cupy.int32) for s in orig_shape),
+ indexing='ij',
+ sparse=True
+ )
+ tmp = (x - x0)
+ dist = tmp * tmp
+ tmp = (y - y0)
+ dist += tmp * tmp
+ tmp = (z - z0)
+ dist += tmp * tmp
+ if float64_distances:
+ dist = cupy.sqrt(dist)
+ else:
+ dist = dist.astype(cupy.float32)
+ cupy.sqrt(dist, out=dist)
+ vals = vals + (dist,)
+ if return_indices:
+ indices = cupy.stack((z, y, x), axis=0)
+ vals = vals + (indices,)
+ return vals
diff --git a/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h
new file mode 100644
index 000000000..61677c682
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_2d.h
@@ -0,0 +1,451 @@
+// Euclidean Distance Transform
+//
+// Kernels for the 2D version of the Parallel Banding Algorithm (PBA+).
+//
+// MIT license: see 3rdparty/LICENSE.pba+
+// Copyright: (c) 2019 School of Computing, National University of Singapore
+//
+// Modifications by Gregory Lee (2022) (NVIDIA)
+// - add user-defined pixel_int2_t to enable
+// - replace __mul24 operations with standard multiplication operator
+
+
+// START OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT
+
+// The values included in this header file are those defined in the original
+// PBA+ implementation
+
+// However, the Python code generation can potentially generate a different
+// ENCODE/DECODE that use 20 bits per coordinates instead of 10 bits per
+// coordinate with ENCODED_INT_TYPE as `long long`.
+
+#ifndef MARKER
+#define MARKER -32768
+#endif
+
+#ifndef BLOCKSIZE
+#define BLOCKSIZE 32
+#endif
+
+#ifndef pixel_int2_t
+#define pixel_int2_t short2
+#define make_pixel(x, y) make_short2(x, y)
+#endif
+
+// END OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT
+
+
+#define TOID(x, y, size) ((y) * (size) + (x))
+
+#define LL long long
+__device__ bool dominate(LL x1, LL y1, LL x2, LL y2, LL x3, LL y3, LL x0)
+{
+ LL k1 = y2 - y1, k2 = y3 - y2;
+ return (k1 * (y1 + y2) + (x2 - x1) * ((x1 + x2) - (x0 << 1))) * k2 > \
+ (k2 * (y2 + y3) + (x3 - x2) * ((x2 + x3) - (x0 << 1))) * k1;
+}
+#undef LL
+
+
+extern "C"{
+
+__global__ void kernelFloodDown(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+ int id = TOID(tx, ty, size);
+
+ pixel_int2_t pixel1, pixel2;
+
+ pixel1 = make_pixel(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ pixel2 = input[id];
+
+ if (pixel2.x != MARKER)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelFloodUp(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = (blockIdx.y+1) * bandSize - 1;
+ int id = TOID(tx, ty, size);
+
+ pixel_int2_t pixel1, pixel2;
+ int dist1, dist2;
+
+ pixel1 = make_pixel(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; i++, id -= size) {
+ dist1 = abs(pixel1.y - ty + i);
+
+ pixel2 = input[id];
+ dist2 = abs(pixel2.y - ty + i);
+
+ if (dist2 < dist1)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelPropagateInterband(pixel_int2_t *input, pixel_int2_t *margin_out, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int inc = bandSize * size;
+ int ny, nid, nDist;
+ pixel_int2_t pixel;
+
+ // Top row, look backward
+ int ty = blockIdx.y * bandSize;
+ int topId = TOID(tx, ty, size);
+ int bottomId = TOID(tx, ty + bandSize - 1, size);
+ int tid = blockIdx.y * size + tx;
+ int bid = tid + (size * size / bandSize);
+
+ pixel = input[topId];
+ int myDist = abs(pixel.y - ty);
+ margin_out[tid] = pixel;
+
+ for (nid = bottomId - inc; nid >= 0; nid -= inc) {
+ pixel = input[nid];
+
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+
+ if (nDist < myDist)
+ margin_out[tid] = pixel;
+
+ break;
+ }
+ }
+
+ // Last row, look downward
+ ty = ty + bandSize - 1;
+ pixel = input[bottomId];
+ myDist = abs(pixel.y - ty);
+ margin_out[bid] = pixel;
+
+ for (ny = ty + 1, nid = topId + inc; ny < size; ny += bandSize, nid += inc) {
+ pixel = input[nid];
+
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+
+ if (nDist < myDist)
+ margin_out[bid] = pixel;
+
+ break;
+ }
+ }
+}
+
+__global__ void kernelUpdateVertical(pixel_int2_t *color, pixel_int2_t *margin, pixel_int2_t *output, int size, int bandSize)
+{
+ __shared__ pixel_int2_t block[BLOCKSIZE][BLOCKSIZE];
+
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+
+ pixel_int2_t top = margin[blockIdx.y * size + tx];
+ pixel_int2_t bottom = margin[(blockIdx.y + size / bandSize) * size + tx];
+ pixel_int2_t pixel;
+
+ int dist, myDist;
+
+ int id = TOID(tx, ty, size);
+
+ int n_step = bandSize / blockDim.x;
+ for(int step = 0; step < n_step; ++step) {
+ int y_start = blockIdx.y * bandSize + step * blockDim.x;
+ int y_end = y_start + blockDim.x;
+
+ for (ty = y_start; ty < y_end; ++ty, id += size) {
+ pixel = color[id];
+ myDist = abs(pixel.y - ty);
+
+ dist = abs(top.y - ty);
+ if (dist < myDist) { myDist = dist; pixel = top; }
+
+ dist = abs(bottom.y - ty);
+ if (dist < myDist) pixel = bottom;
+
+ // temporary result is stored in block
+ block[threadIdx.x][ty - y_start] = make_pixel(pixel.y, pixel.x);
+ }
+
+ __syncthreads();
+
+ // block is written to a transposed location in the output
+
+ int tid = TOID(blockIdx.y * bandSize + step * blockDim.x + threadIdx.x, \
+ blockIdx.x * blockDim.x, size);
+
+ for(int i = 0; i < blockDim.x; ++i, tid += size) {
+ output[tid] = block[i][threadIdx.x];
+ }
+
+ __syncthreads();
+ }
+}
+
+__global__ void kernelProximatePoints(pixel_int2_t *input, pixel_int2_t *stack, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+ int id = TOID(tx, ty, size);
+ int lasty = -1;
+ pixel_int2_t last1, last2, current;
+
+ last1.y = -1; last2.y = -1;
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ current = input[id];
+
+ if (current.x != MARKER) {
+ while (last2.y >= 0) {
+ if (!dominate(last1.x, last2.y, last2.x, \
+ lasty, current.x, current.y, tx))
+ break;
+
+ lasty = last2.y; last2 = last1;
+
+ if (last1.y >= 0)
+ last1 = stack[TOID(tx, last1.y, size)];
+ }
+
+ last1 = last2; last2 = make_pixel(current.x, lasty); lasty = current.y;
+
+ stack[id] = last2;
+ }
+ }
+
+ // Store the pointer to the tail at the last pixel of this band
+ if (lasty != ty + bandSize - 1)
+ stack[TOID(tx, ty + bandSize - 1, size)] = make_pixel(MARKER, lasty);
+}
+
+__global__ void kernelCreateForwardPointers(pixel_int2_t *input, pixel_int2_t *output, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = (blockIdx.y+1) * bandSize - 1;
+ int id = TOID(tx, ty, size);
+ int lasty = -1, nexty;
+ pixel_int2_t current;
+
+ // Get the tail pointer
+ current = input[id];
+
+ if (current.x == MARKER)
+ nexty = current.y;
+ else
+ nexty = ty;
+
+ for (int i = 0; i < bandSize; i++, id -= size)
+ if (ty - i == nexty) {
+ current = make_pixel(lasty, input[id].y);
+ output[id] = current;
+
+ lasty = nexty;
+ nexty = current.y;
+ }
+
+ // Store the pointer to the head at the first pixel of this band
+ if (lasty != ty - bandSize + 1)
+ output[id + size] = make_pixel(lasty, MARKER);
+}
+
+__global__ void kernelMergeBands(pixel_int2_t *color, pixel_int2_t *link, pixel_int2_t *output, int size, int bandSize)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int band1 = blockIdx.y * 2;
+ int band2 = band1 + 1;
+ int firsty, lasty;
+ pixel_int2_t last1, last2, current;
+ // last1 and last2: x component store the x coordinate of the site,
+ // y component store the backward pointer
+ // current: y component store the x coordinate of the site,
+ // x component store the forward pointer
+
+ // Get the two last items of the first list
+ lasty = band2 * bandSize - 1;
+ last2 = make_pixel(color[TOID(tx, lasty, size)].x,
+ link[TOID(tx, lasty, size)].y);
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+
+ if (lasty >= 0)
+ last2 = make_pixel(color[TOID(tx, lasty, size)].x,
+ link[TOID(tx, lasty, size)].y);
+ else
+ last2 = make_pixel(MARKER, MARKER);
+ }
+
+ if (last2.y >= 0) {
+ // Second item at the top of the stack
+ last1 = make_pixel(color[TOID(tx, last2.y, size)].x,
+ link[TOID(tx, last2.y, size)].y);
+ }
+
+ // Get the first item of the second band
+ firsty = band2 * bandSize;
+ current = make_pixel(link[TOID(tx, firsty, size)].x,
+ color[TOID(tx, firsty, size)].x);
+
+ if (current.y == MARKER) {
+ firsty = current.x;
+
+ if (firsty >= 0)
+ current = make_pixel(link[TOID(tx, firsty, size)].x,
+ color[TOID(tx, firsty, size)].x);
+ else
+ current = make_pixel(MARKER, MARKER);
+ }
+
+ // Count the number of item in the second band that survive so far.
+ // Once it reaches 2, we can stop.
+ int top = 0;
+
+ while (top < 2 && current.y >= 0) {
+ // While there's still something on the left
+ while (last2.y >= 0) {
+
+ if (!dominate(last1.x, last2.y, last2.x, \
+ lasty, current.y, firsty, tx))
+ break;
+
+ lasty = last2.y; last2 = last1;
+ top--;
+
+ if (last1.y >= 0)
+ last1 = make_pixel(color[TOID(tx, last1.y, size)].x,
+ link[TOID(tx, last1.y, size)].y);
+ }
+
+ // Update the current pointer
+ output[TOID(tx, firsty, size)] = make_pixel(current.x, lasty);
+
+ if (lasty >= 0)
+ output[TOID(tx, lasty, size)] = make_pixel(firsty, last2.y);
+
+ last1 = last2; last2 = make_pixel(current.y, lasty); lasty = firsty;
+ firsty = current.x;
+
+ top = max(1, top + 1);
+
+ // Advance the current pointer to the next one
+ if (firsty >= 0)
+ current = make_pixel(link[TOID(tx, firsty, size)].x,
+ color[TOID(tx, firsty, size)].x);
+ else
+ current = make_pixel(MARKER, MARKER);
+ }
+
+ // Update the head and tail pointer.
+ firsty = band1 * bandSize;
+ lasty = band2 * bandSize;
+ current = link[TOID(tx, firsty, size)];
+
+ if (current.y == MARKER && current.x < 0) { // No head?
+ last1 = link[TOID(tx, lasty, size)];
+
+ if (last1.y == MARKER)
+ current.x = last1.x;
+ else
+ current.x = lasty;
+
+ output[TOID(tx, firsty, size)] = current;
+ }
+
+ firsty = band1 * bandSize + bandSize - 1;
+ lasty = band2 * bandSize + bandSize - 1;
+ current = link[TOID(tx, lasty, size)];
+
+ if (current.x == MARKER && current.y < 0) { // No tail?
+ last1 = link[TOID(tx, firsty, size)];
+
+ if (last1.x == MARKER)
+ current.y = last1.y;
+ else
+ current.y = firsty;
+
+ output[TOID(tx, lasty, size)] = current;
+ }
+}
+
+__global__ void kernelDoubleToSingleList(pixel_int2_t *color, pixel_int2_t *link, pixel_int2_t *output, int size)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y;
+ int id = TOID(tx, ty, size);
+
+ output[id] = make_pixel(color[id].x, link[id].y);
+}
+
+__global__ void kernelColor(pixel_int2_t *input, pixel_int2_t *output, int size)
+{
+ __shared__ pixel_int2_t block[BLOCKSIZE][BLOCKSIZE];
+
+ int col = threadIdx.x;
+ int tid = threadIdx.y;
+ int tx = blockIdx.x * blockDim.x + col;
+ int dx, dy, lasty;
+ unsigned int best, dist;
+ pixel_int2_t last1, last2;
+
+ lasty = size - 1;
+
+ last2 = input[TOID(tx, lasty, size)];
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+ last2 = input[TOID(tx, lasty, size)];
+ }
+
+ if (last2.y >= 0)
+ last1 = input[TOID(tx, last2.y, size)];
+
+ int y_start, y_end, n_step = size / blockDim.x;
+ for(int step = 0; step < n_step; ++step) {
+ y_start = size - step * blockDim.x - 1;
+ y_end = size - (step + 1) * blockDim.x;
+
+ for (int ty = y_start - tid; ty >= y_end; ty -= blockDim.y) {
+ dx = last2.x - tx; dy = lasty - ty;
+ best = dist = dx * dx + dy * dy;
+
+ while (last2.y >= 0) {
+ dx = last1.x - tx; dy = last2.y - ty;
+ dist = dx * dx + dy * dy;
+
+ if (dist > best)
+ break;
+
+ best = dist; lasty = last2.y; last2 = last1;
+
+ if (last2.y >= 0)
+ last1 = input[TOID(tx, last2.y, size)];
+ }
+
+ block[threadIdx.x][ty - y_end] = make_pixel(lasty, last2.x);
+ }
+
+ __syncthreads();
+
+ // note: transposes back to original shape here
+ if(!threadIdx.y) {
+ int id = TOID(y_end + threadIdx.x, blockIdx.x * blockDim.x, size);
+ for(int i = 0; i < blockDim.x; ++i, id+=size) {
+ output[id] = block[i][threadIdx.x];
+ }
+ }
+
+ __syncthreads();
+ }
+}
+} // extern C
diff --git a/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h
new file mode 100644
index 000000000..c09f4b51f
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/cuda/pba_kernels_3d.h
@@ -0,0 +1,237 @@
+// Euclidean Distance Transform
+//
+// Kernels for the 3D version of the Parallel Banding Algorithm (PBA+).
+//
+// MIT license: see 3rdparty/LICENSE.pba+
+//
+// Modifications by Gregory Lee (2022) (NVIDIA)
+// - allow user-defined ENCODED_INT_TYPE, ENCODE, DECODE
+
+
+// START OF DEFINITIONS OVERRIDDEN BY THE PYTHON SCRIPT
+
+// The values included in this header file are those defined in the original
+// PBA+ implementation
+
+// However, the Python code generation can potentially generate a different
+// ENCODE/DECODE that use 20 bits per coordinates instead of 10 bits per
+// coordinate with ENCODED_INT_TYPE as `long long`.
+
+
+#ifndef MARKER
+#define MARKER -2147483648
+#endif // MARKER
+
+#ifndef MAX_INT
+#define MAX_INT 2147483647
+#endif
+
+#ifndef BLOCKSIZE
+#define BLOCKSIZE 32
+#endif
+
+#ifndef ENCODE
+
+// Sites : ENCODE(x, y, z, 0, 0)
+// Not sites : ENCODE(0, 0, 0, 1, 0) or MARKER
+#define ENCODED_INT_TYPE int
+#define ZERO 0
+#define ONE 1
+#define ENCODE(x, y, z, a, b) (((x) << 20) | ((y) << 10) | (z) | ((a) << 31) | ((b) << 30))
+#define DECODE(value, x, y, z) \
+ x = ((value) >> 20) & 0x3ff; \
+ y = ((value) >> 10) & 0x3ff; \
+ z = (value) & 0x3ff
+
+#define NOTSITE(value) (((value) >> 31) & 1)
+#define HASNEXT(value) (((value) >> 30) & 1)
+
+#define GET_X(value) (((value) >> 20) & 0x3ff)
+#define GET_Y(value) (((value) >> 10) & 0x3ff)
+#define GET_Z(value) ((NOTSITE((value))) ? MAX_INT : ((value) & 0x3ff))
+
+#endif // ENCODE
+
+// END OF DEFINITIONS DEFINED IN THE PYTHON SCRIPT
+
+
+#define LL long long
+__device__ bool dominate(LL x_1, LL y_1, LL z_1, LL x_2, LL y_2, LL z_2, LL x_3, LL y_3, LL z_3, LL x_0, LL z_0)
+{
+ LL k_1 = y_2 - y_1, k_2 = y_3 - y_2;
+
+ return (((y_1 + y_2) * k_1 + ((x_2 - x_1) * (x_1 + x_2 - (x_0 << 1)) + (z_2 - z_1) * (z_1 + z_2 - (z_0 << 1)))) * k_2 > \
+ ((y_2 + y_3) * k_2 + ((x_3 - x_2) * (x_2 + x_3 - (x_0 << 1)) + (z_3 - z_2) * (z_2 + z_3 - (z_0 << 1)))) * k_1);
+}
+#undef LL
+
+#define TOID(x, y, z, size) ((((z) * (size)) + (y)) * (size) + (x))
+
+
+extern "C"{
+
+__global__ void kernelFloodZ(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *output, int size)
+{
+
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+ int tz = 0;
+
+ int plane = size * size;
+ int id = TOID(tx, ty, tz, size);
+ ENCODED_INT_TYPE pixel1, pixel2;
+
+ pixel1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO);
+
+ // Sweep down
+ for (int i = 0; i < size; i++, id += plane) {
+ pixel2 = input[id];
+
+ if (!NOTSITE(pixel2))
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+
+ ENCODED_INT_TYPE dist1, dist2, nz;
+
+ id -= plane + plane;
+
+ // Sweep up
+ for (int i = size - 2; i >= 0; i--, id -= plane) {
+ nz = GET_Z(pixel1);
+ dist1 = abs(nz - (tz + i));
+
+ pixel2 = output[id];
+ nz = GET_Z(pixel2);
+ dist2 = abs(nz - (tz + i));
+
+ if (dist2 < dist1)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+
+__global__ void kernelMaurerAxis(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *stack, int size)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int tz = blockIdx.y * blockDim.y + threadIdx.y;
+ int ty = 0;
+
+ int id = TOID(tx, ty, tz, size);
+
+ ENCODED_INT_TYPE lasty = 0;
+ ENCODED_INT_TYPE x1, y1, z1, x2, y2, z2, nx, ny, nz;
+ ENCODED_INT_TYPE p = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), s1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), s2 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO);
+ ENCODED_INT_TYPE flag = 0;
+
+ for (ty = 0; ty < size; ++ty, id += size) {
+ p = input[id];
+
+ if (!NOTSITE(p)) {
+
+ while (HASNEXT(s2)) {
+ DECODE(s1, x1, y1, z1);
+ DECODE(s2, x2, y2, z2);
+ DECODE(p, nx, ny, nz);
+
+ if (!dominate(x1, y2, z1, x2, lasty, z2, nx, ty, nz, tx, tz))
+ break;
+
+ lasty = y2; s2 = s1; y2 = y1;
+
+ if (HASNEXT(s2))
+ s1 = stack[TOID(tx, y2, tz, size)];
+ }
+
+ DECODE(p, nx, ny, nz);
+ s1 = s2;
+ s2 = ENCODE(nx, lasty, nz, ZERO, flag);
+ y2 = lasty;
+ lasty = ty;
+
+ stack[id] = s2;
+
+ flag = ONE;
+ }
+ }
+
+ if (NOTSITE(p))
+ stack[TOID(tx, ty - 1, tz, size)] = ENCODE(ZERO, lasty, ZERO, ONE, flag);
+}
+
+__global__ void kernelColorAxis(ENCODED_INT_TYPE *input, ENCODED_INT_TYPE *output, int size)
+{
+ __shared__ ENCODED_INT_TYPE block[BLOCKSIZE][BLOCKSIZE];
+
+ int col = threadIdx.x;
+ int tid = threadIdx.y;
+ int tx = blockIdx.x * blockDim.x + col;
+ int tz = blockIdx.y;
+
+ ENCODED_INT_TYPE x1, y1, z1, x2, y2, z2;
+ ENCODED_INT_TYPE last1 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), last2 = ENCODE(ZERO,ZERO,ZERO,ONE,ZERO), lasty;
+ long long dx, dy, dz, best, dist;
+
+ lasty = size - 1;
+
+ last2 = input[TOID(tx, lasty, tz, size)];
+ DECODE(last2, x2, y2, z2);
+
+ if (NOTSITE(last2)) {
+ lasty = y2;
+ if(HASNEXT(last2)) {
+ last2 = input[TOID(tx, lasty, tz, size)];
+ DECODE(last2, x2, y2, z2);
+ }
+ }
+
+ if (HASNEXT(last2)) {
+ last1 = input[TOID(tx, y2, tz, size)];
+ DECODE(last1, x1, y1, z1);
+ }
+
+ int y_start, y_end, n_step = size / blockDim.x;
+ for(int step = 0; step < n_step; ++step) {
+ y_start = size - step * blockDim.x - 1;
+ y_end = size - (step + 1) * blockDim.x;
+
+ for (int ty = y_start - tid; ty >= y_end; ty -= blockDim.y) {
+ dx = x2 - tx; dy = lasty - ty; dz = z2 - tz;
+ best = dx * dx + dy * dy + dz * dz;
+
+ while (HASNEXT(last2)) {
+ dx = x1 - tx; dy = y2 - ty; dz = z1 - tz;
+ dist = dx * dx + dy * dy + dz * dz;
+
+ if(dist > best) break;
+
+ best = dist; lasty = y2; last2 = last1;
+ DECODE(last2, x2, y2, z2);
+
+ if (HASNEXT(last2)) {
+ last1 = input[TOID(tx, y2, tz, size)];
+ DECODE(last1, x1, y1, z1);
+ }
+ }
+
+ block[threadIdx.x][ty - y_end] = ENCODE(lasty, x2, z2, NOTSITE(last2), ZERO);
+ }
+
+ __syncthreads();
+
+ if(!threadIdx.y) {
+ int id = TOID(y_end + threadIdx.x, blockIdx.x * blockDim.x, tz, size);
+ for(int i = 0; i < blockDim.x; i++, id+=size) {
+ output[id] = block[i][threadIdx.x];
+ }
+ }
+
+ __syncthreads();
+ }
+}
+
+
+} // extern C
diff --git a/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py b/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py
new file mode 100644
index 000000000..ab46ae1fb
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/morphology/tests/test_distance_transform.py
@@ -0,0 +1,145 @@
+from copy import copy
+
+import cupy as cp
+import numpy as np
+import pytest
+import scipy.ndimage as ndi_cpu
+
+from cucim.core.operations.morphology import distance_transform_edt
+
+
+def binary_image(shape, pct_true=50):
+ rng = cp.random.default_rng(123)
+ x = rng.integers(0, 100, size=shape, dtype=cp.uint8)
+ return x >= pct_true
+
+
+def assert_percentile_equal(arr1, arr2, pct=95):
+ """Assert that at least pct% of the entries in arr1 and arr2 are equal."""
+ pct_mismatch = (100 - pct) / 100
+ arr1 = cp.asnumpy(arr1)
+ arr2 = cp.asnumpy(arr2)
+ mismatch = np.sum(arr1 != arr2) / arr1.size
+ assert mismatch < pct_mismatch
+
+
+@pytest.mark.parametrize('return_indices', [False, True])
+@pytest.mark.parametrize('return_distances', [False, True])
+@pytest.mark.parametrize(
+ 'shape, sampling',
+ [
+ ((256, 128), None),
+ ((384, 256), (1.5, 1.5)),
+ ((14, 32, 50), None),
+ ((50, 32, 24), (2, 2, 2)),
+ ]
+)
+@pytest.mark.parametrize('density', [5, 50, 95])
+@pytest.mark.parametrize('block_params', [None, (1, 1, 1)])
+def test_distance_transform_edt(
+ shape, sampling, return_distances, return_indices, density, block_params
+):
+
+ if not (return_indices or return_distances):
+ return
+
+ kwargs_scipy = dict(
+ sampling=sampling,
+ return_distances=return_distances,
+ return_indices=return_indices,
+ )
+ kwargs_cucim = copy(kwargs_scipy)
+ kwargs_cucim['block_params'] = block_params
+ img = binary_image(shape, pct_true=density)
+ out = distance_transform_edt(img, **kwargs_cucim)
+ expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img), **kwargs_scipy)
+ if return_indices and return_distances:
+ assert len(out) == 2
+ cp.testing.assert_allclose(out[0], expected[0])
+ # May differ at a small % of coordinates where multiple points were
+ # equidistant.
+ assert_percentile_equal(out[1], expected[1], pct=95)
+ elif return_distances:
+ cp.testing.assert_allclose(out, expected)
+ elif return_indices:
+ assert_percentile_equal(out, expected, pct=95)
+
+
+@pytest.mark.parametrize('return_indices', [False, True])
+@pytest.mark.parametrize('return_distances', [False, True])
+@pytest.mark.parametrize(
+ 'shape, sampling',
+ [
+ ((384, 256), (1, 3)),
+ ((50, 32, 24), (1, 2, 4)),
+ ]
+)
+@pytest.mark.parametrize('density', [5, 50, 95])
+def test_distance_transform_edt_nonuniform_sampling(
+ shape, sampling, return_distances, return_indices, density
+):
+
+ if not (return_indices or return_distances):
+ return
+
+ kwargs_scipy = dict(
+ sampling=sampling,
+ return_distances=return_distances,
+ return_indices=return_indices,
+ )
+ kwargs_cucim = copy(kwargs_scipy)
+ img = binary_image(shape, pct_true=density)
+ if sampling is not None and len(np.unique(sampling)) != 1:
+ with pytest.raises(NotImplementedError):
+ distance_transform_edt(img, **kwargs_cucim)
+ return
+
+
+@pytest.mark.parametrize('value', [0, 1, 3])
+@pytest.mark.parametrize('ndim', [2, 3])
+def test_distance_transform_edt_uniform_valued(value, ndim):
+ """ensure default block_params is robust to anisotropic shape."""
+ img = cp.full((48, ) * ndim, value, dtype=cp.uint8)
+ # ensure there is at least 1 pixel at background intensity
+ img[(slice(24, 25),) * ndim] = 0
+ out = distance_transform_edt(img)
+ expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img))
+ cp.testing.assert_allclose(out, expected)
+
+
+@pytest.mark.parametrize('sx', list(range(16)))
+@pytest.mark.parametrize('sy', list(range(16)))
+def test_distance_transform_edt_2d_aniso(sx, sy):
+ """ensure default block_params is robust to anisotropic shape."""
+ shape = (128 + sy, 128 + sx)
+ img = binary_image(shape, pct_true=80)
+ out = distance_transform_edt(img)
+ expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img))
+ cp.testing.assert_allclose(out, expected)
+
+
+@pytest.mark.parametrize('sx', list(range(4)))
+@pytest.mark.parametrize('sy', list(range(4)))
+@pytest.mark.parametrize('sz', list(range(4)))
+def test_distance_transform_edt_3d_aniso(sx, sy, sz):
+ """ensure default block_params is robust to anisotropic shape."""
+ shape = (16 + sz, 32 + sy, 48 + sx)
+ img = binary_image(shape, pct_true=80)
+ out = distance_transform_edt(img)
+ expected = ndi_cpu.distance_transform_edt(cp.asnumpy(img))
+ cp.testing.assert_allclose(out, expected)
+
+
+@pytest.mark.parametrize('ndim', [1, 4, 5])
+def test_distance_transform_edt_unsupported_ndim(ndim):
+ with pytest.raises(NotImplementedError):
+ distance_transform_edt(cp.zeros((8,) * ndim))
+
+
+@pytest.mark.skip(reason="excessive memory requirement")
+def test_distance_transform_edt_3d_int64():
+ shape = (1280, 1280, 1280)
+ img = binary_image(shape, pct_true=80)
+ distance_transform_edt(img)
+ # Note: no validation vs. scipy.ndimage due to excessive run time
+ return
diff --git a/python/cucim/src/cucim/skimage/_shared/_gradient.py b/python/cucim/src/cucim/skimage/_shared/_gradient.py
new file mode 100644
index 000000000..2b0b67334
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_shared/_gradient.py
@@ -0,0 +1,121 @@
+"""
+Simplified version of cupy.gradient
+
+This version doesn't support non-unit spacing or 2nd order edges.
+
+Importantly, this version does not promote all integer dtypes to float64, but
+instead will promote 8 and 16-bit integer types to float32.
+"""
+import cupy
+
+from cucim.skimage._shared.utils import _supported_float_type
+
+
+def gradient(f, axis=None, output_as_array=False):
+ """Return the gradient of an N-dimensional array.
+
+ The gradient is computed using second order accurate central differences
+ in the interior points and either first or second order accurate one-sides
+ (forward or backwards) differences at the boundaries.
+ The returned gradient hence has the same shape as the input array.
+
+ Args:
+ f (cupy.ndarray): An N-dimensional array containing samples of a scalar
+ function.
+ axis (None or int or tuple of ints, optional): The gradient is
+ calculated only along the given axis or axes. The default
+ (axis = None) is to calculate the gradient for all the axes of the
+ input array. axis may be negative, in which case it counts from the
+ last to the first axis.
+ output_as_array
+
+ Returns:
+ gradient (cupy.ndarray or list of cupy.ndarray): A set of ndarrays
+ (or a single ndarray if there is only one dimension) corresponding
+ to the derivatives of f with respect to each dimension. Each
+ derivative has the same shape as f.
+
+ """
+ ndim = f.ndim # number of dimensions
+ if axis is None:
+ axes = tuple(range(ndim))
+ else:
+ if cupy.isscalar(axis):
+ axis = (axis,)
+ for ax in axis:
+ if ax < -ndim or ax > ndim + 1:
+ raise ValueError(f"invalid axis: {ax}")
+ axes = tuple(ax + ndim if ax < 0 else ax for ax in axis)
+ len_axes = len(axes)
+
+ # use central differences on interior and one-sided differences on the
+ # endpoints. This preserves second order-accuracy over the full domain.
+
+ # create slice objects --- initially all are [:, :, ..., :]
+ slice1 = [slice(None)] * ndim
+ slice2 = [slice(None)] * ndim
+ slice3 = [slice(None)] * ndim
+ slice4 = [slice(None)] * ndim
+
+ otype = f.dtype
+ if cupy.issubdtype(otype, cupy.inexact):
+ pass
+ else:
+ # All other types convert to floating point.
+ float_dtype = _supported_float_type(otype)
+ if cupy.issubdtype(otype, cupy.integer):
+ f = f.astype(float_dtype)
+ otype = float_dtype
+
+ if output_as_array:
+ out = cupy.empty((ndim,) + f.shape, dtype=otype)
+ outvals = out
+ else:
+ outvals = []
+
+ for axis in axes:
+ if f.shape[axis] < 2:
+ raise ValueError(
+ "Shape of array too small to calculate a numerical gradient, "
+ "at least 2 elements are required."
+ )
+ # result allocation
+ if not output_as_array:
+ out = cupy.empty_like(f, dtype=otype)
+
+ # Numerical differentiation: 2nd order interior
+ slice1[axis] = slice(1, -1)
+ slice2[axis] = slice(None, -2)
+ slice3[axis] = slice(1, -1)
+ slice4[axis] = slice(2, None)
+
+ out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1)
+ out[out_sl] = (f[tuple(slice4)] - f[tuple(slice2)]) / 2.0
+
+ # Numerical differentiation: 1st order edges
+ slice1[axis] = 0
+ slice2[axis] = 1
+ slice3[axis] = 0
+ # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0])
+ out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1)
+ out[out_sl] = f[tuple(slice2)] - f[tuple(slice3)]
+
+ slice1[axis] = -1
+ slice2[axis] = -1
+ slice3[axis] = -2
+ # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2])
+ out_sl = (axis,) + tuple(slice1) if output_as_array else tuple(slice1)
+ out[out_sl] = f[tuple(slice2)] - f[tuple(slice3)]
+ if not output_as_array:
+ outvals.append(out)
+
+ # reset the slice object in this dimension to ":"
+ slice1[axis] = slice(None)
+ slice2[axis] = slice(None)
+ slice3[axis] = slice(None)
+ slice4[axis] = slice(None)
+
+ if len_axes == 1:
+ return outvals[0]
+ else:
+ return outvals
diff --git a/python/cucim/src/cucim/skimage/_shared/filters.py b/python/cucim/src/cucim/skimage/_shared/filters.py
index 2f68221b4..56cee8000 100644
--- a/python/cucim/src/cucim/skimage/_shared/filters.py
+++ b/python/cucim/src/cucim/skimage/_shared/filters.py
@@ -7,7 +7,8 @@
from collections.abc import Iterable
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
+
from .._shared import utils
from .._shared.utils import _supported_float_type, convert_to_float
diff --git a/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py b/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py
index f7135a0e3..c7d817b84 100644
--- a/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py
+++ b/python/cucim/src/cucim/skimage/_shared/tests/test_utils.py
@@ -66,13 +66,13 @@ def bar(arg0, arg1=0, arg2=1):
def test_deprecate_kwarg():
- @deprecate_kwarg({'old_arg1': 'new_arg1'}, '0.19')
+ @deprecate_kwarg({'old_arg1': 'new_arg1'}, '22.02.00')
def foo(arg0, new_arg1=1, arg2=None):
"""Expected docstring"""
return arg0, new_arg1, arg2
@deprecate_kwarg({'old_arg1': 'new_arg1'},
- deprecated_version='0.19',
+ deprecated_version='22.02.00',
warning_msg="Custom warning message")
def bar(arg0, new_arg1=1, arg2=None):
"""Expected docstring"""
@@ -116,7 +116,7 @@ def bar(arg0, new_arg1=1, arg2=None):
old_arg1 : DEPRECATED
Deprecated in favor of `new_arg1`.
- .. deprecated:: 0.19
+ .. deprecated:: 22.02.00
"""
assert len(recorded) == 0
diff --git a/python/cucim/src/cucim/skimage/_shared/utils.py b/python/cucim/src/cucim/skimage/_shared/utils.py
index 6798628c7..1e014ef3a 100644
--- a/python/cucim/src/cucim/skimage/_shared/utils.py
+++ b/python/cucim/src/cucim/skimage/_shared/utils.py
@@ -95,10 +95,11 @@ def __call__(self, func):
parameters = inspect.signature(func).parameters
arg_idx = list(parameters.keys()).index(self.arg_name)
warning_msg = (
- f"{self.arg_name} argument is deprecated and will be removed "
- f"in version {self.changed_version}. To avoid this warning, "
- f"please do not use the {self.arg_name} argument. Please "
- f"see {func.__name__} documentation for more details.")
+ f"{self.arg_name} argument is deprecated in upstream scikit-image "
+ f"and will be removed in cuCIM {self.changed_version}. To avoid "
+ f"this warning, please do not use the {self.arg_name} argument. "
+ f"Please see {func.__name__} documentation for more details."
+ )
if self.help_msg is not None:
warning_msg += f" {self.help_msg}"
@@ -206,7 +207,7 @@ def __init__(self, kwarg_mapping, deprecated_version, warning_msg=None,
self.warning_msg = ("`{old_arg}` is a deprecated argument name "
"for `{func_name}`. ")
if removed_version is not None:
- self.warning_msg += (f'It will be removed in '
+ self.warning_msg += (f'It will be removed in cuCIM '
f'version {removed_version}.')
self.warning_msg += "Please use `{new_arg}` instead."
else:
@@ -248,10 +249,11 @@ class deprecate_multichannel_kwarg(deprecate_kwarg):
"""
- def __init__(self, removed_version='1.0', multichannel_position=None):
+ def __init__(self, removed_version='2023.02.00',
+ multichannel_position=None):
super().__init__(
kwarg_mapping={'multichannel': 'channel_axis'},
- deprecated_version='0.19',
+ deprecated_version='22.02.00',
warning_msg=None,
removed_version=removed_version)
self.position = multichannel_position
@@ -293,7 +295,7 @@ def fixed_func(*args, **kwargs):
if func.__doc__ is not None:
newdoc = docstring_add_deprecated(
- func, {'multichannel': 'channel_axis'}, '0.19')
+ func, {'multichannel': 'channel_axis'}, '22.02.00')
fixed_func.__doc__ = newdoc
return fixed_func
diff --git a/python/cucim/src/cucim/skimage/_vendored/_internal.py b/python/cucim/src/cucim/skimage/_vendored/_internal.py
index b6d163a36..196486d94 100644
--- a/python/cucim/src/cucim/skimage/_vendored/_internal.py
+++ b/python/cucim/src/cucim/skimage/_vendored/_internal.py
@@ -1,3 +1,7 @@
+import math
+from functools import reduce
+from operator import mul
+
import cupy
import numpy
@@ -61,3 +65,11 @@ def _normalize_axis_indices(axes, ndim): # NOQA
res.append(axis)
return tuple(sorted(res))
+
+
+if hasattr(math, 'prod'):
+ prod = math.prod
+else:
+
+ def prod(iterable, *, start=1):
+ return reduce(mul, iterable, start)
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py
index 1ce475371..e4dc1311d 100644
--- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters.py
@@ -1,8 +1,224 @@
"""A vendored subset of cupyx.scipy.ndimage._filters"""
+import warnings
import cupy
+import numpy
+from cucim.skimage._vendored import _internal as internal
from cucim.skimage._vendored import _ndimage_filters_core as _filters_core
+from cucim.skimage._vendored import _ndimage_util as _util
+
+
+def correlate(input, weights, output=None, mode='reflect', cval=0.0, origin=0):
+ """Multi-dimensional correlate.
+
+ The array is correlated with the given kernel.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ weights (cupy.ndarray): Array of weights, same number of dimensions as
+ input
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of correlate.
+
+ .. seealso:: :func:`scipy.ndimage.correlate`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ return _correlate_or_convolve(input, weights, output, mode, cval, origin)
+
+
+def convolve(input, weights, output=None, mode='reflect', cval=0.0, origin=0):
+ """Multi-dimensional convolution.
+
+ The array is convolved with the given kernel.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ weights (cupy.ndarray): Array of weights, same number of dimensions as
+ input
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of convolution.
+
+ .. seealso:: :func:`scipy.ndimage.convolve`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ return _correlate_or_convolve(input, weights, output, mode, cval, origin,
+ True)
+
+
+def correlate1d(input, weights, axis=-1, output=None, mode="reflect", cval=0.0,
+ origin=0, *, algorithm=None):
+ """One-dimensional correlate.
+
+ The array is correlated with the given kernel.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ weights (cupy.ndarray): One-dimensional array of weights
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int): The origin parameter controls the placement of the
+ filter, relative to the center of the current element of the
+ input. Default is ``0``.
+
+ Returns:
+ cupy.ndarray: The result of the 1D correlation.
+
+ .. seealso:: :func:`scipy.ndimage.correlate1d`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ return _correlate_or_convolve1d(
+ input, weights, axis, output, mode, cval, origin, False, algorithm
+ )
+
+
+def convolve1d(input, weights, axis=-1, output=None, mode="reflect", cval=0.0,
+ origin=0, *, algorithm=None):
+ """One-dimensional convolution.
+
+ The array is convolved with the given kernel.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ weights (cupy.ndarray): One-dimensional array of weights
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int): The origin parameter controls the placement of the
+ filter, relative to the center of the current element of the
+ input. Default is ``0``.
+ Returns:
+ cupy.ndarray: The result of the 1D convolution.
+
+ .. seealso:: :func:`scipy.ndimage.convolve1d`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ return _correlate_or_convolve1d(
+ input, weights, axis, output, mode, cval, origin, True, algorithm
+ )
+
+
+def _correlate_or_convolve(input, weights, output, mode, cval, origin,
+ convolution=False):
+ origins, int_type = _filters_core._check_nd_args(input, weights,
+ mode, origin)
+ if weights.size == 0:
+ return cupy.zeros_like(input)
+
+ _util._check_cval(mode, cval, _util._is_integer_output(output, input))
+
+ if convolution:
+ weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]
+ origins = list(origins)
+ for i, wsize in enumerate(weights.shape):
+ origins[i] = -origins[i]
+ if wsize % 2 == 0:
+ origins[i] -= 1
+ origins = tuple(origins)
+ elif weights.dtype.kind == "c":
+ # numpy.correlate conjugates weights rather than input.
+ weights = weights.conj()
+ weights_dtype = _util._get_weights_dtype(input, weights, use_cucim_casting=True) # noqa
+ offsets = _filters_core._origins_to_offsets(origins, weights.shape)
+ kernel = _get_correlate_kernel(mode, weights.shape, int_type,
+ offsets, cval)
+ output = _filters_core._call_kernel(kernel, input, weights, output,
+ weights_dtype=weights_dtype)
+ return output
+
+
+def _correlate_or_convolve1d(input, weights, axis, output, mode, cval, origin,
+ convolution=False, algorithm=None):
+ # Calls fast shared-memory convolution when possible, otherwise falls back
+ # to the vendored elementwise _correlate_or_convolve
+ if algorithm is None:
+ if input.ndim == 2 and weights.size <= 256:
+ algorithm = 'shared_memory'
+ else:
+ algorithm = 'elementwise'
+ elif algorithm not in ['shared_memory', 'elementwise']:
+ raise ValueError(
+ "algorithm must be 'shared_memory', 'elementwise' or None"
+ )
+ if mode == 'wrap':
+ mode = 'grid-wrap'
+ if algorithm == 'shared_memory':
+ from cucim.skimage.filters._separable_filtering import (
+ ResourceLimitError, _shmem_convolve1d)
+ if input.ndim not in [2, 3]:
+ raise NotImplementedError(
+ f"shared_memory not implemented for ndim={input.ndim}"
+ )
+ try:
+ out = _shmem_convolve1d(input, weights, axis=axis, output=output,
+ mode=mode, cval=cval, origin=origin,
+ convolution=convolution)
+ return out
+ except ResourceLimitError:
+ # fallback to elementwise if inadequate shared memory available
+ warnings.warn(
+ "Inadequate resources for algorithm='shared_memory: "
+ "falling back to the elementwise implementation"
+ )
+ algorithm = 'elementwise'
+ if algorithm == 'elementwise':
+ weights, origins = _filters_core._convert_1d_args(
+ input.ndim, weights, origin, axis
+ )
+ return _correlate_or_convolve(
+ input, weights, output, mode, cval, origins, convolution
+ )
@cupy.memoize(for_each_device=True)
@@ -13,3 +229,949 @@ def _get_correlate_kernel(mode, w_shape, int_type, offsets, cval):
'sum += cast({value}) * wval;',
'y = cast(sum);',
mode, w_shape, int_type, offsets, cval, ctype='W')
+
+
+def _run_1d_correlates(input, params, get_weights, output, mode, cval,
+ origin=0, **filter_kwargs):
+ """
+ Enhanced version of _run_1d_filters that uses correlate1d as the filter
+ function. The params are a list of values to pass to the get_weights
+ callable given. If duplicate param values are found, the weights are
+ reused from the first invocation of get_weights. The get_weights callable
+ must return a 1D array of weights to give to correlate1d.
+ """
+ wghts = {}
+ for param in params:
+ if param not in wghts:
+ wghts[param] = get_weights(param)
+ wghts = [wghts[param] for param in params]
+ return _filters_core._run_1d_filters(
+ [None if w is None else correlate1d for w in wghts],
+ input, wghts, output, mode, cval, origin, **filter_kwargs)
+
+
+def uniform_filter1d(input, size, axis=-1, output=None, mode="reflect",
+ cval=0.0, origin=0, *, algorithm=None):
+ """One-dimensional uniform filter along the given axis.
+
+ The lines of the array along the given axis are filtered with a uniform
+ filter of the given size.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int): Length of the uniform filter.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int): The origin parameter controls the placement of the
+ filter, relative to the center of the current element of the
+ input. Default is ``0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.uniform_filter1d`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+ weights = cupy.full(size, 1 / size, dtype=weights_dtype)
+ return correlate1d(input, weights, axis, output, mode, cval,
+ origin, algorithm=algorithm)
+
+
+def uniform_filter(input, size=3, output=None, mode="reflect", cval=0.0,
+ origin=0, *, algorithm=None):
+ """Multi-dimensional uniform filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int or sequence of int): Lengths of the uniform filter for each
+ dimension. A single value applies to all axes.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of ``0`` is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.uniform_filter`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ sizes = _util._fix_sequence_arg(size, input.ndim, 'size', int)
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+
+ def get(size):
+ return None if size <= 1 else cupy.full(size, 1 / size, dtype=weights_dtype) # noqa
+
+ return _run_1d_correlates(
+ input, sizes, get, output, mode, cval, origin, algorithm=algorithm
+ )
+
+
+def gaussian_filter1d(input, sigma, axis=-1, order=0, output=None,
+ mode="reflect", cval=0.0, truncate=4.0, *,
+ algorithm=None):
+ """One-dimensional Gaussian filter along the given axis.
+
+ The lines of the array along the given axis are filtered with a Gaussian
+ filter of the given standard deviation.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ sigma (scalar): Standard deviation for Gaussian kernel.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ order (int): An order of ``0``, the default, corresponds to convolution
+ with a Gaussian kernel. A positive order corresponds to convolution
+ with that derivative of a Gaussian.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ truncate (float): Truncate the filter at this many standard deviations.
+ Default is ``4.0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.gaussian_filter1d`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ radius = int(float(truncate) * float(sigma) + 0.5)
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+ weights = _gaussian_kernel1d(sigma, int(order), radius, weights_dtype)
+ return correlate1d(
+ input, weights, axis, output, mode, cval, algorithm=algorithm
+ )
+
+
+def gaussian_filter(input, sigma, order=0, output=None, mode="reflect",
+ cval=0.0, truncate=4.0, *, algorithm=None):
+ """Multi-dimensional Gaussian filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ sigma (scalar or sequence of scalar): Standard deviations for each axis
+ of Gaussian kernel. A single value applies to all axes.
+ order (int or sequence of scalar): An order of ``0``, the default,
+ corresponds to convolution with a Gaussian kernel. A positive order
+ corresponds to convolution with that derivative of a Gaussian. A
+ single value applies to all axes.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ truncate (float): Truncate the filter at this many standard deviations.
+ Default is ``4.0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.gaussian_filter`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ sigmas = _util._fix_sequence_arg(sigma, input.ndim, 'sigma', float)
+ orders = _util._fix_sequence_arg(order, input.ndim, 'order', int)
+ truncate = float(truncate)
+ weights_dtype = cupy.promote_types(input, cupy.float32)
+
+ def get(param, dtype=weights_dtype):
+ sigma, order = param
+ radius = int(truncate * float(sigma) + 0.5)
+ if radius <= 0:
+ return None
+ return _gaussian_kernel1d(sigma, order, radius, dtype)
+
+ return _run_1d_correlates(input, list(zip(sigmas, orders)), get, output,
+ mode, cval, 0, algorithm=algorithm)
+
+
+def _gaussian_kernel1d(sigma, order, radius, dtype=cupy.float64):
+ """
+ Computes a 1-D Gaussian correlation kernel.
+ """
+ if order < 0:
+ raise ValueError('order must be non-negative')
+ sigma2 = sigma * sigma
+ x = numpy.arange(-radius, radius + 1)
+ phi_x = numpy.exp(-0.5 / sigma2 * x ** 2)
+ phi_x /= phi_x.sum()
+
+ if order == 0:
+ return cupy.asarray(phi_x)
+
+ # f(x) = q(x) * phi(x) = q(x) * exp(p(x))
+ # f'(x) = (q'(x) + q(x) * p'(x)) * phi(x)
+ # p'(x) = -1 / sigma ** 2
+ # Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the
+ # coefficients of q(x)
+ exponent_range = numpy.arange(order + 1)
+ q = numpy.zeros(order + 1)
+ q[0] = 1
+ D = numpy.diag(exponent_range[1:], 1) # D @ q(x) = q'(x)
+ P = numpy.diag(numpy.ones(order) / -sigma2, -1) # P @ q(x) = q(x) * p'(x)
+ Q_deriv = D + P
+ for _ in range(order):
+ q = Q_deriv.dot(q)
+ q = (x[:, None] ** exponent_range).dot(q)
+ return cupy.asarray((q * phi_x)[::-1], order='C', dtype=dtype)
+
+
+def prewitt(input, axis=-1, output=None, mode="reflect", cval=0.0, *,
+ algorithm=None):
+ """Compute a Prewitt filter along the given axis.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.prewitt`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+ smooth = cupy.ones(3, dtype=weights_dtype)
+ return _prewitt_or_sobel(
+ input, axis, output, mode, cval, smooth, algorithm
+ )
+
+
+def sobel(input, axis=-1, output=None, mode="reflect", cval=0.0, *,
+ algorithm=None):
+ """Compute a Sobel filter along the given axis.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.sobel`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+ smooth = cupy.array([1, 2, 1], dtype=weights_dtype)
+ return _prewitt_or_sobel(
+ input, axis, output, mode, cval, smooth, algorithm
+ )
+
+
+def _prewitt_or_sobel(input, axis, output, mode, cval, weights, algorithm):
+ axis = internal._normalize_axis_index(axis, input.ndim)
+
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+
+ def get(is_diff, dtype=weights_dtype):
+ return cupy.array([-1, 0, 1], dtype=dtype) if is_diff else weights # noqa
+
+ return _run_1d_correlates(input, [a == axis for a in range(input.ndim)],
+ get, output, mode, cval, algorithm=algorithm)
+
+
+def generic_laplace(input, derivative2, output=None, mode="reflect",
+ cval=0.0, extra_arguments=(), extra_keywords=None):
+ """Multi-dimensional Laplace filter using a provided second derivative
+ function.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ derivative2 (callable): Function or other callable with the following
+ signature that is called once per axis::
+
+ derivative2(input, axis, output, mode, cval,
+ *extra_arguments, **extra_keywords)
+
+ where ``input`` and ``output`` are ``cupy.ndarray``, ``axis`` is an
+ ``int`` from ``0`` to the number of dimensions, and ``mode``,
+ ``cval``, ``extra_arguments``, ``extra_keywords`` are the values
+ given to this function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ extra_arguments (sequence, optional):
+ Sequence of extra positional arguments to pass to ``derivative2``.
+ extra_keywords (dict, optional):
+ dict of extra keyword arguments to pass ``derivative2``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.generic_laplace`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ if extra_keywords is None:
+ extra_keywords = {}
+ ndim = input.ndim
+ modes = _util._fix_sequence_arg(mode, ndim, 'mode',
+ _util._check_mode)
+ output = _util._get_output(output, input)
+ if ndim == 0:
+ output[:] = input
+ return output
+ derivative2(input, 0, output, modes[0], cval,
+ *extra_arguments, **extra_keywords)
+ if ndim > 1:
+ tmp = _util._get_output(output.dtype, input)
+ for i in range(1, ndim):
+ derivative2(input, i, tmp, modes[i], cval,
+ *extra_arguments, **extra_keywords)
+ output += tmp
+ return output
+
+
+def laplace(input, output=None, mode="reflect", cval=0.0, *, algorithm=None):
+ """Multi-dimensional Laplace filter based on approximate second
+ derivatives.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.laplace`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ weights_dtype = cupy.promote_types(input.dtype, cupy.float32)
+ weights = cupy.array([1, -2, 1], dtype=weights_dtype)
+
+ def derivative2(input, axis, output, mode, cval):
+ return correlate1d(
+ input, weights, axis, output, mode, cval, algorithm=algorithm
+ )
+
+ return generic_laplace(input, derivative2, output, mode, cval)
+
+
+def gaussian_laplace(input, sigma, output=None, mode="reflect",
+ cval=0.0, *, algorithm=None, **kwargs):
+ """Multi-dimensional Laplace filter using Gaussian second derivatives.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ sigma (scalar or sequence of scalar): Standard deviations for each axis
+ of Gaussian kernel. A single value applies to all axes.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ kwargs (dict, optional):
+ dict of extra keyword arguments to pass ``gaussian_filter()``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.gaussian_laplace`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ def derivative2(input, axis, output, mode, cval):
+ order = [0] * input.ndim
+ order[axis] = 2
+ return gaussian_filter(input, sigma, order, output, mode, cval,
+ algorithm=algorithm, **kwargs)
+ return generic_laplace(input, derivative2, output, mode, cval)
+
+
+def generic_gradient_magnitude(input, derivative, output=None,
+ mode="reflect", cval=0.0,
+ extra_arguments=(), extra_keywords=None):
+ """Multi-dimensional gradient magnitude filter using a provided derivative
+ function.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ derivative (callable): Function or other callable with the following
+ signature that is called once per axis::
+
+ derivative(input, axis, output, mode, cval,
+ *extra_arguments, **extra_keywords)
+
+ where ``input`` and ``output`` are ``cupy.ndarray``, ``axis`` is an
+ ``int`` from ``0`` to the number of dimensions, and ``mode``,
+ ``cval``, ``extra_arguments``, ``extra_keywords`` are the values
+ given to this function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ extra_arguments (sequence, optional):
+ Sequence of extra positional arguments to pass to ``derivative2``.
+ extra_keywords (dict, optional):
+ dict of extra keyword arguments to pass ``derivative2``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.generic_gradient_magnitude`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ if extra_keywords is None:
+ extra_keywords = {}
+ ndim = input.ndim
+ modes = _util._fix_sequence_arg(mode, ndim, 'mode',
+ _util._check_mode)
+ output = _util._get_output(output, input)
+ if ndim == 0:
+ output[:] = input
+ return output
+ derivative(input, 0, output, modes[0], cval,
+ *extra_arguments, **extra_keywords)
+ output *= output
+ if ndim > 1:
+ tmp = _util._get_output(output.dtype, input)
+ for i in range(1, ndim):
+ derivative(input, i, tmp, modes[i], cval,
+ *extra_arguments, **extra_keywords)
+ tmp *= tmp
+ output += tmp
+ return cupy.sqrt(output, output, casting='unsafe')
+
+
+def gaussian_gradient_magnitude(input, sigma, output=None, mode="reflect",
+ cval=0.0, *, algorithm=None, **kwargs):
+ """Multi-dimensional gradient magnitude using Gaussian derivatives.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ sigma (scalar or sequence of scalar): Standard deviations for each axis
+ of Gaussian kernel. A single value applies to all axes.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ kwargs (dict, optional):
+ dict of extra keyword arguments to pass ``gaussian_filter()``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.gaussian_gradient_magnitude`
+
+ .. note::
+ When the output data type is integral (or when no output is provided
+ and input is integral) the results may not perfectly match the results
+ from SciPy due to floating-point rounding of intermediate results.
+ """
+ def derivative(input, axis, output, mode, cval):
+ order = [0] * input.ndim
+ order[axis] = 1
+ return gaussian_filter(input, sigma, order, output, mode, cval,
+ algorithm=algorithm, **kwargs)
+ return generic_gradient_magnitude(input, derivative, output, mode, cval)
+
+
+def minimum_filter(input, size=None, footprint=None, output=None,
+ mode="reflect", cval=0.0, origin=0):
+ """Multi-dimensional minimum filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int or sequence of int): One of ``size`` or ``footprint`` must be
+ provided. If ``footprint`` is given, ``size`` is ignored. Otherwise
+ ``footprint = cupy.ones(size)`` with ``size`` automatically made to
+ match the number of dimensions in ``input``.
+ footprint (cupy.ndarray): a boolean array which specifies which of the
+ elements within this shape will get passed to the filter function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.minimum_filter`
+ """
+ return _min_or_max_filter(input, size, footprint, None, output, mode,
+ cval, origin, 'min')
+
+
+def maximum_filter(input, size=None, footprint=None, output=None,
+ mode="reflect", cval=0.0, origin=0):
+ """Multi-dimensional maximum filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int or sequence of int): One of ``size`` or ``footprint`` must be
+ provided. If ``footprint`` is given, ``size`` is ignored. Otherwise
+ ``footprint = cupy.ones(size)`` with ``size`` automatically made to
+ match the number of dimensions in ``input``.
+ footprint (cupy.ndarray): a boolean array which specifies which of the
+ elements within this shape will get passed to the filter function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.maximum_filter`
+ """
+ return _min_or_max_filter(input, size, footprint, None, output, mode,
+ cval, origin, 'max')
+
+
+def _min_or_max_filter(input, size, ftprnt, structure, output, mode, cval,
+ origin, func):
+ # structure is used by morphology.grey_erosion() and grey_dilation()
+ # and not by the regular min/max filters
+
+ sizes, ftprnt, structure = _filters_core._check_size_footprint_structure(
+ input.ndim, size, ftprnt, structure)
+ if cval is cupy.nan:
+ raise NotImplementedError("NaN cval is unsupported")
+
+ if sizes is not None:
+ # Seperable filter, run as a series of 1D filters
+ fltr = minimum_filter1d if func == 'min' else maximum_filter1d
+ return _filters_core._run_1d_filters(
+ [fltr if size > 1 else None for size in sizes],
+ input, sizes, output, mode, cval, origin)
+
+ origins, int_type = _filters_core._check_nd_args(input, ftprnt,
+ mode, origin, 'footprint')
+ if structure is not None and structure.ndim != input.ndim:
+ raise RuntimeError('structure array has incorrect shape')
+
+ if ftprnt.size == 0:
+ return cupy.zeros_like(input)
+ offsets = _filters_core._origins_to_offsets(origins, ftprnt.shape)
+ kernel = _get_min_or_max_kernel(mode, ftprnt.shape, func,
+ offsets, float(cval), int_type,
+ has_structure=structure is not None,
+ has_central_value=bool(ftprnt[offsets]))
+ return _filters_core._call_kernel(kernel, input, ftprnt, output,
+ structure, weights_dtype=bool)
+
+
+def minimum_filter1d(input, size, axis=-1, output=None, mode="reflect",
+ cval=0.0, origin=0):
+ """Compute the minimum filter along a single axis.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int): Length of the minimum filter.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int): The origin parameter controls the placement of the
+ filter, relative to the center of the current element of the
+ input. Default is ``0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.minimum_filter1d`
+ """
+ return _min_or_max_1d(input, size, axis, output, mode, cval, origin, 'min')
+
+
+def maximum_filter1d(input, size, axis=-1, output=None, mode="reflect",
+ cval=0.0, origin=0):
+ """Compute the maximum filter along a single axis.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int): Length of the maximum filter.
+ axis (int): The axis of input along which to calculate. Default is -1.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int): The origin parameter controls the placement of the
+ filter, relative to the center of the current element of the
+ input. Default is ``0``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.maximum_filter1d`
+ """
+ return _min_or_max_1d(input, size, axis, output, mode, cval, origin, 'max')
+
+
+def _min_or_max_1d(input, size, axis=-1, output=None, mode="reflect", cval=0.0,
+ origin=0, func='min'):
+ ftprnt = cupy.ones(size, dtype=bool)
+ ftprnt, origin = _filters_core._convert_1d_args(input.ndim, ftprnt,
+ origin, axis)
+ origins, int_type = _filters_core._check_nd_args(input, ftprnt,
+ mode, origin, 'footprint')
+ offsets = _filters_core._origins_to_offsets(origins, ftprnt.shape)
+ kernel = _get_min_or_max_kernel(mode, ftprnt.shape, func, offsets,
+ float(cval), int_type, has_weights=False)
+ return _filters_core._call_kernel(kernel, input, None, output,
+ weights_dtype=bool)
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_min_or_max_kernel(mode, w_shape, func, offsets, cval, int_type,
+ has_weights=True, has_structure=False,
+ has_central_value=True):
+ # When there are no 'weights' (the footprint, for the 1D variants) then
+ # we need to make sure intermediate results are stored as doubles for
+ # consistent results with scipy.
+ ctype = 'X' if has_weights else 'double'
+ value = '{value}'
+ if not has_weights:
+ value = 'cast({})'.format(value)
+
+ # Having a non-flat structure biases the values
+ if has_structure:
+ value += ('-' if func == 'min' else '+') + 'cast(sval)'
+
+ if has_central_value:
+ pre = '{} value = x[i];'
+ found = 'value = {func}({value}, value);'
+ else:
+ # If the central pixel is not included in the footprint we cannot
+ # assume `x[i]` is not below the min or above the max and thus cannot
+ # seed with that value. Instead we keep track of having set `value`.
+ pre = '{} value; bool set = false;'
+ found = 'value = set ? {func}({value}, value) : {value}; set=true;'
+
+ return _filters_core._generate_nd_kernel(
+ func, pre.format(ctype),
+ found.format(func=func, value=value), 'y = cast(value);',
+ mode, w_shape, int_type, offsets, cval, ctype=ctype,
+ has_weights=has_weights, has_structure=has_structure)
+
+
+def rank_filter(input, rank, size=None, footprint=None, output=None,
+ mode="reflect", cval=0.0, origin=0):
+ """Multi-dimensional rank filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ rank (int): The rank of the element to get. Can be negative to count
+ from the largest value, e.g. ``-1`` indicates the largest value.
+ size (int or sequence of int): One of ``size`` or ``footprint`` must be
+ provided. If ``footprint`` is given, ``size`` is ignored. Otherwise
+ ``footprint = cupy.ones(size)`` with ``size`` automatically made to
+ match the number of dimensions in ``input``.
+ footprint (cupy.ndarray): a boolean array which specifies which of the
+ elements within this shape will get passed to the filter function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.rank_filter`
+ """
+ rank = int(rank)
+ return _rank_filter(input, lambda fs: rank + fs if rank < 0 else rank,
+ size, footprint, output, mode, cval, origin)
+
+
+def median_filter(input, size=None, footprint=None, output=None,
+ mode="reflect", cval=0.0, origin=0):
+ """Multi-dimensional median filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (int or sequence of int): One of ``size`` or ``footprint`` must be
+ provided. If ``footprint`` is given, ``size`` is ignored. Otherwise
+ ``footprint = cupy.ones(size)`` with ``size`` automatically made to
+ match the number of dimensions in ``input``.
+ footprint (cupy.ndarray): a boolean array which specifies which of the
+ elements within this shape will get passed to the filter function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.median_filter`
+ """
+ return _rank_filter(input, lambda fs: fs // 2,
+ size, footprint, output, mode, cval, origin)
+
+
+def percentile_filter(input, percentile, size=None, footprint=None,
+ output=None, mode="reflect", cval=0.0, origin=0):
+ """Multi-dimensional percentile filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ percentile (scalar): The percentile of the element to get (from ``0``
+ to ``100``). Can be negative, thus ``-20`` equals ``80``.
+ size (int or sequence of int): One of ``size`` or ``footprint`` must be
+ provided. If ``footprint`` is given, ``size`` is ignored. Otherwise
+ ``footprint = cupy.ones(size)`` with ``size`` automatically made to
+ match the number of dimensions in ``input``.
+ footprint (cupy.ndarray): a boolean array which specifies which of the
+ elements within this shape will get passed to the filter function.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output. Default is is same dtype as the input.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``'constant'``. Default is ``0.0``.
+ origin (int or sequence of int): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of the filtering.
+
+ .. seealso:: :func:`scipy.ndimage.percentile_filter`
+ """
+ percentile = float(percentile)
+ if percentile < 0.0:
+ percentile += 100.0
+ if percentile < 0.0 or percentile > 100.0:
+ raise RuntimeError('invalid percentile')
+ if percentile == 100.0:
+ def get_rank(fs):
+ return fs - 1
+ else:
+ def get_rank(fs):
+ return int(float(fs) * percentile / 100.0)
+ return _rank_filter(input, get_rank,
+ size, footprint, output, mode, cval, origin)
+
+
+def _rank_filter(input, get_rank, size=None, footprint=None, output=None,
+ mode="reflect", cval=0.0, origin=0):
+ _, footprint, _ = _filters_core._check_size_footprint_structure(
+ input.ndim, size, footprint, None, force_footprint=True)
+ if cval is cupy.nan:
+ raise NotImplementedError("NaN cval is unsupported")
+ origins, int_type = _filters_core._check_nd_args(input, footprint,
+ mode, origin, 'footprint')
+ if footprint.size == 0:
+ return cupy.zeros_like(input)
+ filter_size = int(footprint.sum())
+ rank = get_rank(filter_size)
+ if rank < 0 or rank >= filter_size:
+ raise RuntimeError('rank not within filter footprint size')
+ if rank == 0:
+ return _min_or_max_filter(input, None, footprint, None, output, mode,
+ cval, origins, 'min')
+ if rank == filter_size - 1:
+ return _min_or_max_filter(input, None, footprint, None, output, mode,
+ cval, origins, 'max')
+ offsets = _filters_core._origins_to_offsets(origins, footprint.shape)
+ kernel = _get_rank_kernel(filter_size, rank, mode, footprint.shape,
+ offsets, float(cval), int_type)
+ return _filters_core._call_kernel(kernel, input, footprint, output,
+ weights_dtype=bool)
+
+
+__SHELL_SORT = '''
+__device__ void sort(X *array, int size) {{
+ int gap = {gap};
+ while (gap > 1) {{
+ gap /= 3;
+ for (int i = gap; i < size; ++i) {{
+ X value = array[i];
+ int j = i - gap;
+ while (j >= 0 && value < array[j]) {{
+ array[j + gap] = array[j];
+ j -= gap;
+ }}
+ array[j + gap] = value;
+ }}
+ }}
+}}'''
+
+
+@cupy._util.memoize()
+def _get_shell_gap(filter_size):
+ gap = 1
+ while gap < filter_size:
+ gap = 3 * gap + 1
+ return gap
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_rank_kernel(filter_size, rank, mode, w_shape, offsets, cval,
+ int_type):
+ s_rank = min(rank, filter_size - rank - 1)
+ # The threshold was set based on the measurements on a V100
+ # TODO(leofang, anaruse): Use Optuna to automatically tune the threshold,
+ # as it may vary depending on the GPU in use, compiler version, dtype,
+ # filter size, etc.
+ if s_rank <= 80:
+ # When s_rank is small and register usage is low, this partial
+ # selection sort approach is faster than general sorting approach
+ # using shell sort.
+ if s_rank == rank:
+ comp_op = '<'
+ else:
+ comp_op = '>'
+ array_size = s_rank + 2
+ found_post = '''
+ if (iv > {rank} + 1) {{{{
+ int target_iv = 0;
+ X target_val = values[0];
+ for (int jv = 1; jv <= {rank} + 1; jv++) {{{{
+ if (target_val {comp_op} values[jv]) {{{{
+ target_val = values[jv];
+ target_iv = jv;
+ }}}}
+ }}}}
+ if (target_iv <= {rank}) {{{{
+ values[target_iv] = values[{rank} + 1];
+ }}}}
+ iv = {rank} + 1;
+ }}}}'''.format(rank=s_rank, comp_op=comp_op)
+ post = '''
+ X target_val = values[0];
+ for (int jv = 1; jv <= {rank}; jv++) {{
+ if (target_val {comp_op} values[jv]) {{
+ target_val = values[jv];
+ }}
+ }}
+ y=cast(target_val);'''.format(rank=s_rank, comp_op=comp_op)
+ sorter = ''
+ else:
+ array_size = filter_size
+ found_post = ''
+ post = 'sort(values,{});\ny=cast(values[{}]);'.format(
+ filter_size, rank)
+ sorter = __SHELL_SORT.format(gap=_get_shell_gap(filter_size))
+
+ return _filters_core._generate_nd_kernel(
+ 'rank_{}_{}'.format(filter_size, rank),
+ 'int iv = 0;\nX values[{}];'.format(array_size),
+ 'values[iv++] = {value};' + found_post, post,
+ mode, w_shape, int_type, offsets, cval, preamble=sorter)
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py
index 60aec68cf..469dff9d9 100644
--- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_filters_core.py
@@ -1,24 +1,172 @@
"""A vendored subset of cupyx.scipy.ndimage._filters_core"""
+import warnings
import cupy
import numpy
+from cucim.skimage._vendored import _internal as internal
from cucim.skimage._vendored import _ndimage_util as _util
-includes = r'''
-// workaround for HIP: line begins with #include
+
+def _origins_to_offsets(origins, w_shape):
+ return tuple(x // 2 + o for x, o in zip(w_shape, origins))
+
+
+def _check_size_footprint_structure(ndim, size, footprint, structure,
+ stacklevel=3, force_footprint=False):
+ if structure is None and footprint is None:
+ if size is None:
+ raise RuntimeError("no footprint or filter size provided")
+ sizes = _util._fix_sequence_arg(size, ndim, 'size', int)
+ if force_footprint:
+ return None, cupy.ones(sizes, bool), None
+ return sizes, None, None
+ if size is not None:
+ warnings.warn("ignoring size because {} is set".format(
+ 'structure' if footprint is None else 'footprint'),
+ UserWarning, stacklevel=stacklevel + 1)
+
+ if footprint is not None:
+ footprint = cupy.array(footprint, bool, True, 'C')
+ if not footprint.any():
+ raise ValueError("all-zero footprint is not supported")
+
+ if structure is None:
+ if not force_footprint and footprint.all():
+ if footprint.ndim != ndim:
+ raise RuntimeError("size must have length equal to input rank")
+ return footprint.shape, None, None
+ return None, footprint, None
+
+ structure = cupy.ascontiguousarray(structure)
+ if footprint is None:
+ footprint = cupy.ones(structure.shape, bool)
+ return None, footprint, structure
+
+
+def _convert_1d_args(ndim, weights, origin, axis):
+ if weights.ndim != 1 or weights.size < 1:
+ raise RuntimeError('incorrect filter size')
+ axis = internal._normalize_axis_index(axis, ndim)
+ w_shape = [1] * ndim
+ w_shape[axis] = weights.size
+ weights = weights.reshape(w_shape)
+ origins = [0] * ndim
+ origins[axis] = _util._check_origin(origin, weights.size)
+ return weights, tuple(origins)
+
+
+def _check_nd_args(input, weights, mode, origin, wghts_name='filter weights'):
+ _util._check_mode(mode)
+ # Weights must always be less than 2 GiB
+ if weights.nbytes >= (1 << 31):
+ raise RuntimeError('weights must be 2 GiB or less, use FFTs instead')
+ weight_dims = [x for x in weights.shape if x != 0]
+ if len(weight_dims) != input.ndim:
+ raise RuntimeError('{} array has incorrect shape'.format(wghts_name))
+ origins = _util._fix_sequence_arg(origin, len(weight_dims), 'origin', int)
+ for origin, width in zip(origins, weight_dims):
+ _util._check_origin(origin, width)
+ return tuple(origins), _util._get_inttype(input)
+
+
+def _run_1d_filters(filters, input, args, output, mode, cval, origin=0,
+ **filter_kwargs):
+ """
+ Runs a series of 1D filters forming an nd filter. The filters must be a
+ list of callables that take input, arg, axis, output, mode, cval, origin.
+ The args is a list of values that are passed for the arg value to the
+ filter. Individual filters can be None causing that axis to be skipped.
+ """
+ output = _util._get_output(output, input)
+ modes = _util._fix_sequence_arg(mode, input.ndim, 'mode',
+ _util._check_mode)
+ # for filters, "wrap" is a synonym for "grid-wrap".
+ modes = ['grid-wrap' if m == 'wrap' else m for m in modes]
+ origins = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)
+ n_filters = sum(filter is not None for filter in filters)
+ if n_filters == 0:
+ output[:] = input
+ return output
+ # We can't operate in-place efficiently, so use a 2-buffer system
+ temp = _util._get_output(output.dtype, input) if n_filters > 1 else None # noqa
+ iterator = zip(filters, args, modes, origins)
+ for axis, (fltr, arg, mode, origin) in enumerate(iterator):
+ if fltr is None:
+ continue
+ else:
+ break
+ if n_filters % 2 == 0:
+ fltr(input, arg, axis, temp, mode, cval, origin, **filter_kwargs)
+ input = temp
+ else:
+ fltr(input, arg, axis, output, mode, cval, origin, **filter_kwargs)
+ if n_filters == 1:
+ return output
+ input, output = output, temp
+ for axis, (fltr, arg, mode, origin) in enumerate(iterator, start=axis + 1):
+ if fltr is None:
+ continue
+ fltr(input, arg, axis, output, mode, cval, origin, **filter_kwargs)
+ input, output = output, input
+ return input
+
+
+def _call_kernel(kernel, input, weights, output, structure=None,
+ weights_dtype=numpy.float64, structure_dtype=numpy.float64):
+ """
+ Calls a constructed ElementwiseKernel. The kernel must take an input image,
+ an optional array of weights, an optional array for the structure, and an
+ output array.
+
+ weights and structure can be given as None (structure defaults to None) in
+ which case they are not passed to the kernel at all. If the output is given
+ as None then it will be allocated in this function.
+
+ This function deals with making sure that the weights and structure are
+ contiguous and float64 (or bool for weights that are footprints)*, that the
+ output is allocated and appriopately shaped. This also deals with the
+ situation that the input and output arrays overlap in memory.
+
+ * weights is always cast to float64 or bool in order to get an output
+ compatible with SciPy, though float32 might be sufficient when input dtype
+ is low precision. If weights_dtype is passed as weights.dtype then no
+ dtype conversion will occur. The input and output are never converted.
+ """
+ args = [input]
+ complex_output = input.dtype.kind == 'c'
+ if weights is not None:
+ weights = cupy.ascontiguousarray(weights, weights_dtype)
+ complex_output = complex_output or weights.dtype.kind == 'c'
+ args.append(weights)
+ if structure is not None:
+ structure = cupy.ascontiguousarray(structure, structure_dtype)
+ args.append(structure)
+ output = _util._get_output(output, input, None, complex_output) # noqa
+ needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
+ if needs_temp:
+ output, temp = _util._get_output(output.dtype, input, None, complex_output), output # noqa
+ args.append(output)
+ kernel(*args)
+ if needs_temp:
+ output[:] = temp
+ output = temp
+ return output
+
+
+_ndimage_includes = r'''
#include // let Jitify handle this
#include
-'''
-
-_CAST_FUNCTION = """
-// Implements a casting function to make it compatible with scipy
-// Use like cast(value)
template<> struct std::is_floating_point : std::true_type {};
template<> struct std::is_signed : std::true_type {};
template struct std::is_signed> : std::is_signed {};
+'''
+
+_ndimage_CAST_FUNCTION = """
+// Implements a casting function to make it compatible with scipy
+// Use like cast(value)
template
__device__ __forceinline__
typename std::enable_if<(!std::is_floating_point::value
@@ -148,7 +296,7 @@ def _generate_nd_kernel(name, pre, found, post, mode, w_shape, int_type,
name += '_with_structure'
if has_mask:
name += '_with_mask'
- preamble = includes + _CAST_FUNCTION + preamble
+ preamble = _ndimage_includes + _ndimage_CAST_FUNCTION + preamble
options += ('--std=c++11', '-DCUPY_USE_JITIFY')
return cupy.ElementwiseKernel(in_params, out_params, operation, name,
reduce_dims=False, preamble=preamble,
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py
new file mode 100644
index 000000000..fa65eed36
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interp_kernels.py
@@ -0,0 +1,598 @@
+import cupy
+import numpy
+
+from cucim.skimage._vendored import \
+ _ndimage_spline_kernel_weights as _spline_kernel_weights
+from cucim.skimage._vendored import \
+ _ndimage_spline_prefilter_core as _spline_prefilter_core
+from cucim.skimage._vendored import _ndimage_util as _util
+
+math_constants_preamble = r'''
+// workaround for HIP: line begins with #include
+#include
+'''
+
+spline_weights_inline = _spline_kernel_weights.spline_weights_inline
+
+
+def _get_coord_map(ndim, nprepad=0):
+ """Extract target coordinate from coords array (for map_coordinates).
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ coords (ndarray): array of shape (ncoords, ndim) containing the target
+ coordinates.
+ c_j: variables to hold the target coordinates
+
+ computes::
+
+ c_j = coords[i + j * ncoords];
+
+ ncoords is determined by the size of the output array, y.
+ y will be indexed by the CIndexer, _ind.
+ Thus ncoords = _ind.size();
+
+ """
+ ops = []
+ ops.append('ptrdiff_t ncoords = _ind.size();')
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = coords[i + {j} * ncoords]{pre};''')
+ return ops
+
+
+def _get_coord_zoom_and_shift(ndim, nprepad=0):
+ """Compute target coordinate based on a shift followed by a zoom.
+
+ This version zooms from the center of the edge pixels.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ in_coord[ndim]: array containing the source coordinate
+ zoom[ndim]: array containing the zoom for each axis
+ shift[ndim]: array containing the zoom for each axis
+
+ computes::
+
+ c_j = zoom[j] * (in_coord[j] - shift[j])
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = zoom[{j}] * ((W)in_coord[{j}] - shift[{j}]){pre};''')
+ return ops
+
+
+def _get_coord_zoom_and_shift_grid(ndim, nprepad=0):
+ """Compute target coordinate based on a shift followed by a zoom.
+
+ This version zooms from the outer edges of the grid pixels.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ in_coord[ndim]: array containing the source coordinate
+ zoom[ndim]: array containing the zoom for each axis
+ shift[ndim]: array containing the zoom for each axis
+
+ computes::
+
+ c_j = zoom[j] * (in_coord[j] - shift[j] + 0.5) - 0.5
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = zoom[{j}] * ((W)in_coord[{j}] - shift[j] + 0.5) - 0.5{pre};''')
+ return ops
+
+
+def _get_coord_zoom(ndim, nprepad=0):
+ """Compute target coordinate based on a zoom.
+
+ This version zooms from the center of the edge pixels.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ in_coord[ndim]: array containing the source coordinate
+ zoom[ndim]: array containing the zoom for each axis
+
+ computes::
+
+ c_j = zoom[j] * in_coord[j]
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = zoom[{j}] * (W)in_coord[{j}]{pre};''')
+ return ops
+
+
+def _get_coord_zoom_grid(ndim, nprepad=0):
+ """Compute target coordinate based on a zoom (grid_mode=True version).
+
+ This version zooms from the outer edges of the grid pixels.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ in_coord[ndim]: array containing the source coordinate
+ zoom[ndim]: array containing the zoom for each axis
+
+ computes::
+
+ c_j = zoom[j] * (in_coord[j] + 0.5) - 0.5
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = zoom[{j}] * ((W)in_coord[{j}] + 0.5) - 0.5{pre};''')
+ return ops
+
+
+def _get_coord_shift(ndim, nprepad=0):
+ """Compute target coordinate based on a shift.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ in_coord[ndim]: array containing the source coordinate
+ shift[ndim]: array containing the zoom for each axis
+
+ computes::
+
+ c_j = in_coord[j] - shift[j]
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = (W)in_coord[{j}] - shift[{j}]{pre};''')
+ return ops
+
+
+def _get_coord_affine(ndim, nprepad=0):
+ """Compute target coordinate based on a homogeneous transformation matrix.
+
+ The homogeneous matrix has shape (ndim, ndim + 1). It corresponds to
+ affine matrix where the last row of the affine is assumed to be:
+ ``[0] * ndim + [1]``.
+
+ Notes
+ -----
+ Assumes the following variables have been initialized on the device::
+
+ mat(array): array containing the (ndim, ndim + 1) transform matrix.
+ in_coords(array): coordinates of the input
+
+ For example, in 2D:
+
+ c_0 = mat[0] * in_coords[0] + mat[1] * in_coords[1] + aff[2];
+ c_1 = mat[3] * in_coords[0] + mat[4] * in_coords[1] + aff[5];
+
+ """
+ ops = []
+ pre = f" + (W){nprepad}" if nprepad > 0 else ''
+ ncol = ndim + 1
+ for j in range(ndim):
+ ops.append(f'''
+ W c_{j} = (W)0.0;''')
+ for k in range(ndim):
+ ops.append(f'''
+ c_{j} += mat[{ncol * j + k}] * (W)in_coord[{k}];''')
+ ops.append(f'''
+ c_{j} += mat[{ncol * j + ndim}]{pre};''')
+ return ops
+
+
+def _unravel_loop_index(shape, uint_t='unsigned int'):
+ """
+ declare a multi-index array in_coord and unravel the 1D index, i into it.
+ This code assumes that the array is a C-ordered array.
+ """
+ ndim = len(shape)
+ code = [f'''
+ {uint_t} in_coord[{ndim}];
+ {uint_t} s, t, idx = i;''']
+ for j in range(ndim - 1, 0, -1):
+ code.append(f'''
+ s = {shape[j]};
+ t = idx / s;
+ in_coord[{j}] = idx - t * s;
+ idx = t;''')
+ code.append('''
+ in_coord[0] = idx;''')
+ return '\n'.join(code)
+
+
+def _generate_interp_custom(coord_func, ndim, large_int, yshape, mode, cval,
+ order, name='', integer_output=False, nprepad=0,
+ omit_in_coord=False):
+ """
+ Args:
+ coord_func (function): generates code to do the coordinate
+ transformation. See for example, `_get_coord_shift`.
+ ndim (int): The number of dimensions.
+ large_int (bool): If true use Py_ssize_t instead of int for indexing.
+ yshape (tuple): Shape of the output array.
+ mode (str): Signal extension mode to use at the array boundaries
+ cval (float): constant value used when `mode == 'constant'`.
+ name (str): base name for the interpolation kernel
+ integer_output (bool): boolean indicating whether the output has an
+ integer type.
+ nprepad (int): integer indicating the amount of prepadding at the
+ boundaries.
+
+ Returns:
+ operation (str): code body for the ElementwiseKernel
+ name (str): name for the ElementwiseKernel
+ """
+
+ ops = []
+ internal_dtype = 'double' if integer_output else 'Y'
+ ops.append(f'{internal_dtype} out = 0.0;')
+
+ if large_int:
+ uint_t = 'size_t'
+ int_t = 'ptrdiff_t'
+ else:
+ uint_t = 'unsigned int'
+ int_t = 'int'
+
+ # determine strides for x along each axis
+ for j in range(ndim):
+ ops.append(f'const {int_t} xsize_{j} = x.shape()[{j}];')
+ ops.append(f'const {uint_t} sx_{ndim - 1} = 1;')
+ for j in range(ndim - 1, 0, -1):
+ ops.append(f'const {uint_t} sx_{j - 1} = sx_{j} * xsize_{j};')
+
+ if not omit_in_coord:
+ # create in_coords array to store the unraveled indices
+ ops.append(_unravel_loop_index(yshape, uint_t))
+
+ # compute the transformed (target) coordinates, c_j
+ ops = ops + coord_func(ndim, nprepad)
+
+ if cval is numpy.nan:
+ cval = '(Y)CUDART_NAN'
+ elif cval == numpy.inf:
+ cval = '(Y)CUDART_INF'
+ elif cval == -numpy.inf:
+ cval = '(Y)(-CUDART_INF)'
+ else:
+ cval = f'({internal_dtype}){cval}'
+
+ if mode == 'constant':
+ # use cval if coordinate is outside the bounds of x
+ _cond = ' || '.join(
+ [f'(c_{j} < 0) || (c_{j} > xsize_{j} - 1)' for j in range(ndim)])
+ ops.append(f'''
+ if ({_cond})
+ {{
+ out = {cval};
+ }}
+ else
+ {{''')
+
+ if order == 0:
+ if mode == 'wrap':
+ ops.append('double dcoord;') # mode 'wrap' requires this to work
+ for j in range(ndim):
+ # determine nearest neighbor
+ if mode == 'wrap':
+ ops.append(f'''
+ dcoord = c_{j};''')
+ else:
+ ops.append(f'''
+ {int_t} cf_{j} = ({int_t})floor((double)c_{j} + 0.5);''')
+
+ # handle boundary
+ if mode != 'constant':
+ if mode == 'wrap':
+ ixvar = 'dcoord'
+ float_ix = True
+ else:
+ ixvar = f'cf_{j}'
+ float_ix = False
+ ops.append(
+ _util._generate_boundary_condition_ops(
+ mode, ixvar, f'xsize_{j}', int_t, float_ix))
+ if mode == 'wrap':
+ ops.append(f'''
+ {int_t} cf_{j} = ({int_t})floor(dcoord + 0.5);''')
+
+ # sum over ic_j will give the raveled coordinate in the input
+ ops.append(f'''
+ {int_t} ic_{j} = cf_{j} * sx_{j};''')
+ _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)])
+ if mode == 'grid-constant':
+ _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)])
+ ops.append(f'''
+ if ({_cond}) {{
+ out = {cval};
+ }} else {{
+ out = ({internal_dtype})x[{_coord_idx}];
+ }}''')
+ else:
+ ops.append(f'''
+ out = ({internal_dtype})x[{_coord_idx}];''')
+
+ elif order == 1:
+ for j in range(ndim):
+ # get coordinates for linear interpolation along axis j
+ ops.append(f'''
+ {int_t} cf_{j} = ({int_t})floor((double)c_{j});
+ {int_t} cc_{j} = cf_{j} + 1;
+ {int_t} n_{j} = (c_{j} == cf_{j}) ? 1 : 2; // points needed
+ ''')
+
+ if mode == 'wrap':
+ ops.append(f'''
+ double dcoordf = c_{j};
+ double dcoordc = c_{j} + 1;''')
+ else:
+ # handle boundaries for extension modes.
+ ops.append(f'''
+ {int_t} cf_bounded_{j} = cf_{j};
+ {int_t} cc_bounded_{j} = cc_{j};''')
+
+ if mode != 'constant':
+ if mode == 'wrap':
+ ixvar = 'dcoordf'
+ float_ix = True
+ else:
+ ixvar = f'cf_bounded_{j}'
+ float_ix = False
+ ops.append(
+ _util._generate_boundary_condition_ops(
+ mode, ixvar, f'xsize_{j}', int_t, float_ix))
+
+ ixvar = 'dcoordc' if mode == 'wrap' else f'cc_bounded_{j}'
+ ops.append(
+ _util._generate_boundary_condition_ops(
+ mode, ixvar, f'xsize_{j}', int_t, float_ix))
+ if mode == 'wrap':
+ ops.append(
+ f'''
+ {int_t} cf_bounded_{j} = ({int_t})floor(dcoordf);;
+ {int_t} cc_bounded_{j} = ({int_t})floor(dcoordf + 1);;
+ '''
+ )
+
+ ops.append(f'''
+ for (int s_{j} = 0; s_{j} < n_{j}; s_{j}++)
+ {{
+ W w_{j};
+ {int_t} ic_{j};
+ if (s_{j} == 0)
+ {{
+ w_{j} = (W)cc_{j} - c_{j};
+ ic_{j} = cf_bounded_{j} * sx_{j};
+ }} else
+ {{
+ w_{j} = c_{j} - (W)cf_{j};
+ ic_{j} = cc_bounded_{j} * sx_{j};
+ }}''')
+ elif order > 1:
+ if mode == 'grid-constant':
+ spline_mode = 'constant'
+ elif mode == 'nearest':
+ spline_mode = 'nearest'
+ else:
+ spline_mode = _spline_prefilter_core._get_spline_mode(mode)
+
+ # wx, wy are temporary variables used during spline weight computation
+ ops.append(f'''
+ W wx, wy;
+ {int_t} start;''')
+ for j in range(ndim):
+ # determine weights along the current axis
+ ops.append(f'''
+ W weights_{j}[{order + 1}];''')
+ ops.append(spline_weights_inline[order].format(j=j, order=order))
+
+ # get starting coordinate for spline interpolation along axis j
+ if mode in ['wrap']:
+ ops.append(f'double dcoord = c_{j};')
+ coord_var = 'dcoord'
+ ops.append(
+ _util._generate_boundary_condition_ops(
+ mode, coord_var, f'xsize_{j}', int_t, True))
+ else:
+ coord_var = f'(double)c_{j}'
+
+ if order & 1:
+ op_str = '''
+ start = ({int_t})floor({coord_var}) - {order_2};'''
+ else:
+ op_str = '''
+ start = ({int_t})floor({coord_var} + 0.5) - {order_2};'''
+ ops.append(
+ op_str.format(
+ int_t=int_t, coord_var=coord_var, order_2=order // 2
+ ))
+
+ # set of coordinate values within spline footprint along axis j
+ ops.append(f'''{int_t} ci_{j}[{order + 1}];''')
+ for k in range(order + 1):
+ ixvar = f'ci_{j}[{k}]'
+ ops.append(f'''
+ {ixvar} = start + {k};''')
+ ops.append(
+ _util._generate_boundary_condition_ops(
+ spline_mode, ixvar, f'xsize_{j}', int_t))
+
+ # loop over the order + 1 values in the spline filter
+ ops.append(f'''
+ W w_{j};
+ {int_t} ic_{j};
+ for (int k_{j} = 0; k_{j} <= {order}; k_{j}++)
+ {{
+ w_{j} = weights_{j}[k_{j}];
+ ic_{j} = ci_{j}[k_{j}] * sx_{j};
+ ''')
+
+ if order > 0:
+
+ _weight = ' * '.join([f'w_{j}' for j in range(ndim)])
+ _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)])
+ if mode == 'grid-constant' or (order > 1 and mode == 'constant'):
+ _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)])
+ ops.append(f'''
+ if ({_cond}) {{
+ out += {cval} * ({internal_dtype})({_weight});
+ }} else {{
+ {internal_dtype} val = ({internal_dtype})x[{_coord_idx}];
+ out += val * ({internal_dtype})({_weight});
+ }}''')
+ else:
+ ops.append(f'''
+ {internal_dtype} val = ({internal_dtype})x[{_coord_idx}];
+ out += val * ({internal_dtype})({_weight});''')
+
+ ops.append('}' * ndim)
+
+ if mode == 'constant':
+ ops.append('}')
+
+ if integer_output:
+ ops.append('y = (Y)rint((double)out);')
+ else:
+ ops.append('y = (Y)out;')
+ operation = '\n'.join(ops)
+
+ mode_str = mode.replace('-', '_') # avoid hyphen in kernel name
+ name = 'cupyx_scipy_ndimage_interpolate_{}_order{}_{}_{}d_y{}'.format(
+ name, order, mode_str, ndim, '_'.join([f'{j}' for j in yshape]),
+ )
+ if uint_t == 'size_t':
+ name += '_i64'
+ return operation, name
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
+ integer_output=False, nprepad=0):
+ in_params = 'raw X x, raw W coords'
+ out_params = 'Y y'
+ operation, name = _generate_interp_custom(
+ coord_func=_get_coord_map,
+ ndim=ndim,
+ large_int=large_int,
+ yshape=yshape,
+ mode=mode,
+ cval=cval,
+ order=order,
+ name='map',
+ integer_output=integer_output,
+ nprepad=nprepad,
+ omit_in_coord=True, # input image coordinates are not needed
+ )
+ return cupy.ElementwiseKernel(in_params, out_params, operation, name,
+ preamble=math_constants_preamble)
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
+ integer_output=False, nprepad=0):
+ in_params = 'raw X x, raw W shift'
+ out_params = 'Y y'
+ operation, name = _generate_interp_custom(
+ coord_func=_get_coord_shift,
+ ndim=ndim,
+ large_int=large_int,
+ yshape=yshape,
+ mode=mode,
+ cval=cval,
+ order=order,
+ name='shift',
+ integer_output=integer_output,
+ nprepad=nprepad,
+ )
+ return cupy.ElementwiseKernel(in_params, out_params, operation, name,
+ preamble=math_constants_preamble)
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
+ integer_output=False, grid_mode=False, nprepad=0):
+ in_params = 'raw X x, raw W shift, raw W zoom'
+ out_params = 'Y y'
+ if grid_mode:
+ zoom_shift_func = _get_coord_zoom_and_shift_grid
+ else:
+ zoom_shift_func = _get_coord_zoom_and_shift
+ operation, name = _generate_interp_custom(
+ coord_func=zoom_shift_func,
+ ndim=ndim,
+ large_int=large_int,
+ yshape=yshape,
+ mode=mode,
+ cval=cval,
+ order=order,
+ name="zoom_shift_grid" if grid_mode else "zoom_shift",
+ integer_output=integer_output,
+ nprepad=nprepad,
+ )
+ return cupy.ElementwiseKernel(in_params, out_params, operation, name,
+ preamble=math_constants_preamble)
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
+ integer_output=False, grid_mode=False, nprepad=0):
+ in_params = 'raw X x, raw W zoom'
+ out_params = 'Y y'
+ operation, name = _generate_interp_custom(
+ coord_func=_get_coord_zoom_grid if grid_mode else _get_coord_zoom,
+ ndim=ndim,
+ large_int=large_int,
+ yshape=yshape,
+ mode=mode,
+ cval=cval,
+ order=order,
+ name="zoom_grid" if grid_mode else "zoom",
+ integer_output=integer_output,
+ nprepad=nprepad,
+ )
+ return cupy.ElementwiseKernel(in_params, out_params, operation, name,
+ preamble=math_constants_preamble)
+
+
+@cupy._util.memoize(for_each_device=True)
+def _get_affine_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
+ integer_output=False, nprepad=0):
+ in_params = 'raw X x, raw W mat'
+ out_params = 'Y y'
+ operation, name = _generate_interp_custom(
+ coord_func=_get_coord_affine,
+ ndim=ndim,
+ large_int=large_int,
+ yshape=yshape,
+ mode=mode,
+ cval=cval,
+ order=order,
+ name='affine',
+ integer_output=integer_output,
+ nprepad=nprepad,
+ )
+ return cupy.ElementwiseKernel(in_params, out_params, operation, name,
+ preamble=math_constants_preamble)
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py
new file mode 100644
index 000000000..18275213b
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_interpolation.py
@@ -0,0 +1,757 @@
+import cmath
+import math
+import warnings
+
+import cupy
+import numpy
+from cupy import _core
+from cupy.cuda import runtime
+
+from cucim.skimage._vendored import _ndimage_interp_kernels as _interp_kernels
+from cucim.skimage._vendored import \
+ _ndimage_spline_prefilter_core as _spline_prefilter_core
+from cucim.skimage._vendored import _ndimage_util as _util
+from cucim.skimage._vendored._internal import _normalize_axis_index, prod
+
+
+def _check_parameter(func_name, order, mode):
+ if order is None:
+ warnings.warn(f'Currently the default order of {func_name} is 1. In a '
+ 'future release this may change to 3 to match '
+ 'scipy.ndimage ')
+ elif order < 0 or 5 < order:
+ raise ValueError('spline order is not supported')
+
+ if mode not in ('constant', 'grid-constant', 'nearest', 'mirror',
+ 'reflect', 'grid-mirror', 'wrap', 'grid-wrap', 'opencv',
+ '_opencv_edge'):
+ raise ValueError('boundary mode ({}) is not supported'.format(mode))
+
+
+def _get_spline_output(input, output):
+ """Create workspace array, temp, and the final dtype for the output.
+
+ Differs from SciPy by not always forcing the internal floating point dtype
+ to be double precision.
+ """
+ complex_data = input.dtype.kind == 'c'
+ if complex_data:
+ min_float_dtype = cupy.complex64
+ else:
+ min_float_dtype = cupy.float32
+ if isinstance(output, cupy.ndarray):
+ if complex_data and output.dtype.kind != 'c':
+ raise ValueError(
+ 'output must have complex dtype for complex inputs'
+ )
+ float_dtype = cupy.promote_types(output.dtype, min_float_dtype)
+ output_dtype = output.dtype
+ else:
+ if output is None:
+ output = output_dtype = input.dtype
+ else:
+ output_dtype = cupy.dtype(output)
+ float_dtype = cupy.promote_types(output, min_float_dtype)
+
+ if (isinstance(output, cupy.ndarray)
+ and output.dtype == float_dtype == output_dtype
+ and output.flags.c_contiguous):
+ if output is not input:
+ _core.elementwise_copy(input, output)
+ temp = output
+ else:
+ temp = input.astype(float_dtype, copy=False)
+ temp = cupy.ascontiguousarray(temp)
+ if cupy.shares_memory(temp, input, 'MAY_SHARE_BOUNDS'):
+ temp = temp.copy()
+ return temp, float_dtype, output_dtype
+
+
+def spline_filter1d(input, order=3, axis=-1, output=cupy.float64,
+ mode='mirror'):
+ """
+ Calculate a 1-D spline filter along the given axis.
+
+ The lines of the array along the given axis are filtered by a
+ spline filter. The order of the spline must be >= 2 and <= 5.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ axis (int): The axis along which the spline filter is applied. Default
+ is the last axis.
+ output (cupy.ndarray or dtype, optional): The array in which to place
+ the output, or the dtype of the returned array. Default is
+ ``numpy.float64``.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+
+ Returns:
+ cupy.ndarray: The result of prefiltering the input.
+
+ .. seealso:: :func:`scipy.spline_filter1d`
+ """
+ if order < 0 or order > 5:
+ raise RuntimeError('spline order not supported')
+ x = input
+ ndim = x.ndim
+ axis = _normalize_axis_index(axis, ndim)
+
+ # order 0, 1 don't require reshaping as no CUDA kernel will be called
+ # scalar or size 1 arrays also don't need to be filtered
+ run_kernel = not (order < 2 or x.ndim == 0 or x.shape[axis] == 1)
+ if not run_kernel:
+ output = _util._get_output(output, input)
+ _core.elementwise_copy(x, output)
+ return output
+
+ temp, data_dtype, output_dtype = _get_spline_output(x, output)
+ data_type = cupy._core._scalar.get_typename(temp.dtype)
+ pole_type = cupy._core._scalar.get_typename(temp.real.dtype)
+
+ index_type = _util._get_inttype(input)
+ index_dtype = cupy.int32 if index_type == 'int' else cupy.int64
+
+ n_samples = x.shape[axis]
+ n_signals = x.size // n_samples
+ info = cupy.array((n_signals, n_samples) + x.shape, dtype=index_dtype)
+
+ # empirical choice of block size that seemed to work well
+ block_size = max(2 ** math.ceil(numpy.log2(n_samples / 32)), 8)
+ kern = _spline_prefilter_core.get_raw_spline1d_kernel(
+ axis,
+ ndim,
+ mode,
+ order=order,
+ index_type=index_type,
+ data_type=data_type,
+ pole_type=pole_type,
+ block_size=block_size,
+ )
+
+ # Due to recursive nature, a given line of data must be processed by a
+ # single thread. n_signals lines will be processed in total.
+ block = (block_size,)
+ grid = ((n_signals + block[0] - 1) // block[0],)
+
+ # apply prefilter gain
+ poles = _spline_prefilter_core.get_poles(order=order)
+ temp *= _spline_prefilter_core.get_gain(poles)
+
+ # apply caual + anti-causal IIR spline filters
+ kern(grid, block, (temp, info))
+
+ if isinstance(output, cupy.ndarray) and temp is not output:
+ # copy kernel output into the user-provided output array
+ _core.elementwise_copy(temp, output)
+ return output
+ return temp.astype(output_dtype, copy=False)
+
+
+def spline_filter(input, order=3, output=cupy.float64, mode='mirror'):
+ """Multidimensional spline filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ output (cupy.ndarray or dtype, optional): The array in which to place
+ the output, or the dtype of the returned array. Default is
+ ``numpy.float64``.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+
+ Returns:
+ cupy.ndarray: The result of prefiltering the input.
+
+ .. seealso:: :func:`scipy.spline_filter1d`
+ """
+ if order < 2 or order > 5:
+ raise RuntimeError('spline order not supported')
+
+ x = input
+ temp, data_dtype, output_dtype = _get_spline_output(x, output)
+ if order not in [0, 1] and input.ndim > 0:
+ for axis in range(x.ndim):
+ spline_filter1d(x, order, axis, output=temp, mode=mode)
+ x = temp
+ if isinstance(output, cupy.ndarray):
+ _core.elementwise_copy(temp, output)
+ else:
+ output = temp
+ if output.dtype != output_dtype:
+ output = output.astype(output_dtype)
+ return output
+
+
+def _check_coordinates(coordinates, order, allow_float32=True):
+ if coordinates.dtype.kind == 'f':
+ if allow_float32:
+ coord_dtype = cupy.promote_types(coordinates.dtype, cupy.float32)
+ else:
+ coord_dtype = cupy.promote_types(coordinates.dtype, cupy.float64)
+ coordinates = coordinates.astype(coord_dtype, copy=False)
+ elif coordinates.dtype.kind in 'iu':
+ if order > 1:
+ # order > 1 (spline) kernels require floating-point coordinates
+ if allow_float32:
+ coord_dtype = cupy.promote_types(
+ coordinates.dtype, cupy.float32
+ )
+ else:
+ coord_dtype = cupy.promote_types(
+ coordinates.dtype, cupy.float64
+ )
+ coordinates = coordinates.astype(coord_dtype)
+ else:
+ raise ValueError('coordinates should have floating point dtype')
+ if not coordinates.flags.c_contiguous:
+ coordinates = cupy.ascontiguousarray(coordinates)
+ return coordinates
+
+
+def _prepad_for_spline_filter(input, mode, cval):
+ if mode in ['nearest', 'grid-constant']:
+ # these modes need padding to get accurate boundary values
+ npad = 12 # empirical factor chosen by SciPy
+ if mode == 'grid-constant':
+ kwargs = dict(mode='constant', constant_values=cval)
+ else:
+ kwargs = dict(mode='edge')
+ padded = cupy.pad(input, npad, **kwargs)
+ else:
+ npad = 0
+ padded = input
+ return padded, npad
+
+
+def _filter_input(image, prefilter, mode, cval, order):
+ """Perform spline prefiltering when needed.
+
+ Spline orders > 1 need a prefiltering stage to preserve resolution.
+
+ For boundary modes without analytical spline boundary conditions, some
+ prepadding of the input with cupy.pad is used to maintain accuracy.
+ ``npad`` is an integer corresponding to the amount of padding at each edge
+ of the array.
+ """
+ if not prefilter or order < 2:
+ return (cupy.ascontiguousarray(image), 0)
+ padded, npad = _prepad_for_spline_filter(image, mode, cval)
+ float_dtype = cupy.promote_types(image.dtype, cupy.float32)
+ filtered = spline_filter(padded, order, output=float_dtype, mode=mode)
+ return cupy.ascontiguousarray(filtered), npad
+
+
+def map_coordinates(input, coordinates, output=None, order=3,
+ mode='constant', cval=0.0, prefilter=True):
+ """Map the input array to new coordinates by interpolation.
+
+ The array of coordinates is used to find, for each point in the output, the
+ corresponding coordinates in the input. The value of the input at those
+ coordinates is determined by spline interpolation of the requested order.
+
+ The shape of the output is derived from that of the coordinate array by
+ dropping the first axis. The values of the array along the first axis are
+ the coordinates in the input array at which the output value is found.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ coordinates (array_like): The coordinates at which ``input`` is
+ evaluated.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+ cval (scalar): Value used for points outside the boundaries of
+ the input if ``mode='constant'`` or ``mode='opencv'``. Default is
+ 0.0
+ prefilter (bool): It is not used yet. It just exists for compatibility
+ with :mod:`scipy.ndimage`.
+
+ Returns:
+ cupy.ndarray:
+ The result of transforming the input. The shape of the output is
+ derived from that of ``coordinates`` by dropping the first axis.
+
+ .. seealso:: :func:`scipy.ndimage.map_coordinates`
+ """
+
+ _check_parameter('map_coordinates', order, mode)
+
+ if mode == 'opencv' or mode == '_opencv_edge':
+ input = cupy.pad(input, [(1, 1)] * input.ndim, 'constant',
+ constant_values=cval)
+ coordinates = cupy.add(coordinates, 1)
+ mode = 'constant'
+
+ ret = _util._get_output(output, input, coordinates.shape[1:])
+ integer_output = ret.dtype.kind in 'iu'
+ _util._check_cval(mode, cval, integer_output)
+
+ if input.dtype.kind in 'iu':
+ input = input.astype(cupy.float32)
+ coordinates = _check_coordinates(coordinates, order)
+ filtered, nprepad = _filter_input(input, prefilter, mode, cval, order)
+ large_int = max(prod(input.shape), coordinates.shape[0]) > 1 << 31
+ kern = _interp_kernels._get_map_kernel(
+ input.ndim, large_int, yshape=coordinates.shape, mode=mode, cval=cval,
+ order=order, integer_output=integer_output, nprepad=nprepad)
+ kern(filtered, coordinates, ret)
+ return ret
+
+
+def affine_transform(input, matrix, offset=0.0, output_shape=None, output=None,
+ order=3, mode='constant', cval=0.0, prefilter=True, *,
+ texture_memory=False):
+ """Apply an affine transformation.
+
+ Given an output image pixel index vector ``o``, the pixel value is
+ determined from the input image at position
+ ``cupy.dot(matrix, o) + offset``.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ matrix (cupy.ndarray): The inverse coordinate transformation matrix,
+ mapping output coordinates to input coordinates. If ``ndim`` is the
+ number of dimensions of ``input``, the given matrix must have one
+ of the following shapes:
+
+ - ``(ndim, ndim)``: the linear transformation matrix for each
+ output coordinate.
+ - ``(ndim,)``: assume that the 2D transformation matrix is
+ diagonal, with the diagonal specified by the given value.
+ - ``(ndim + 1, ndim + 1)``: assume that the transformation is
+ specified using homogeneous coordinates. In this case, any
+ value passed to ``offset`` is ignored.
+ - ``(ndim, ndim + 1)``: as above, but the bottom row of a
+ homogeneous transformation matrix is always
+ ``[0, 0, ..., 1]``, and may be omitted.
+
+ offset (float or sequence): The offset into the array where the
+ transform is applied. If a float, ``offset`` is the same for each
+ axis. If a sequence, ``offset`` should contain one value for each
+ axis.
+ output_shape (tuple of ints): Shape tuple.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+ cval (scalar): Value used for points outside the boundaries of
+ the input if ``mode='constant'`` or ``mode='opencv'``. Default is
+ 0.0
+ prefilter (bool): It is not used yet. It just exists for compatibility
+ with :mod:`scipy.ndimage`.
+ texture_memory (bool): If True, uses GPU texture memory. Supports only:
+
+ - 2D and 3D float32 arrays as input
+ - ``(ndim + 1, ndim + 1)`` homogeneous float32 transformation
+ matrix
+ - ``mode='constant'`` and ``mode='nearest'``
+ - ``order=0`` (nearest neighbor) and ``order=1`` (linear
+ interpolation)
+ - NVIDIA CUDA GPUs
+
+ Returns:
+ cupy.ndarray or None:
+ The transformed input. If ``output`` is given as a parameter,
+ ``None`` is returned.
+
+ .. seealso:: :func:`scipy.ndimage.affine_transform`
+ """
+
+ if texture_memory:
+ # _texture only available in CuPy 10.x so delay the import
+ # We do not use this texture-based implementation in cuCIM.
+ from cucim.skimage._vendored import _texture
+
+ if runtime.is_hip:
+ raise RuntimeError(
+ 'HIP currently does not support texture acceleration')
+ tm_interp = 'linear' if order > 0 else 'nearest'
+ return _texture.affine_transformation(data=input,
+ transformation_matrix=matrix,
+ output_shape=output_shape,
+ output=output,
+ interpolation=tm_interp,
+ mode=mode,
+ border_value=cval)
+
+ _check_parameter('affine_transform', order, mode)
+
+ offset = _util._fix_sequence_arg(offset, input.ndim, 'offset', float)
+
+ if matrix.ndim not in [1, 2] or matrix.shape[0] < 1:
+ raise RuntimeError('no proper affine matrix provided')
+ if matrix.ndim == 2:
+ if matrix.shape[0] == matrix.shape[1] - 1:
+ offset = matrix[:, -1]
+ matrix = matrix[:, :-1]
+ elif matrix.shape[0] == input.ndim + 1:
+ offset = matrix[:-1, -1]
+ matrix = matrix[:-1, :-1]
+ if matrix.shape != (input.ndim, input.ndim):
+ raise RuntimeError('improper affine shape')
+
+ if mode == 'opencv':
+ m = cupy.zeros((input.ndim + 1, input.ndim + 1))
+ m[:-1, :-1] = matrix
+ m[:-1, -1] = offset
+ m[-1, -1] = 1
+ m = cupy.linalg.inv(m)
+ m[:2] = cupy.roll(m[:2], 1, axis=0)
+ m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1)
+ matrix = m[:-1, :-1]
+ offset = m[:-1, -1]
+
+ if output_shape is None:
+ output_shape = input.shape
+
+ if mode == 'opencv' or mode == '_opencv_edge':
+ if matrix.ndim == 1:
+ matrix = cupy.diag(matrix)
+ coordinates = cupy.indices(output_shape, dtype=cupy.float64)
+ coordinates = cupy.dot(matrix, coordinates.reshape((input.ndim, -1)))
+ coordinates += cupy.expand_dims(cupy.asarray(offset), -1)
+ ret = _util._get_output(output, input, shape=output_shape)
+ ret[:] = map_coordinates(input, coordinates, ret.dtype, order, mode,
+ cval, prefilter).reshape(output_shape)
+ return ret
+
+ matrix = matrix.astype(cupy.float64, copy=False)
+ ndim = input.ndim
+ output = _util._get_output(output, input, shape=output_shape)
+ if input.dtype.kind in 'iu':
+ input = input.astype(cupy.float32)
+ filtered, nprepad = _filter_input(input, prefilter, mode, cval, order)
+
+ integer_output = output.dtype.kind in 'iu'
+ _util._check_cval(mode, cval, integer_output)
+ large_int = max(prod(input.shape), prod(output_shape)) > 1 << 31
+ if matrix.ndim == 1:
+ offset = cupy.asarray(offset, dtype=cupy.float64)
+ offset = -offset / matrix
+ kern = _interp_kernels._get_zoom_shift_kernel(
+ ndim, large_int, output_shape, mode, cval=cval, order=order,
+ integer_output=integer_output, nprepad=nprepad)
+ kern(filtered, offset, matrix, output)
+ else:
+ kern = _interp_kernels._get_affine_kernel(
+ ndim, large_int, output_shape, mode, cval=cval, order=order,
+ integer_output=integer_output, nprepad=nprepad)
+ m = cupy.zeros((ndim, ndim + 1), dtype=cupy.float64)
+ m[:, :-1] = matrix
+ m[:, -1] = cupy.asarray(offset, dtype=cupy.float64)
+ kern(filtered, m, output)
+ return output
+
+
+def _minmax(coor, minc, maxc):
+ if coor[0] < minc[0]:
+ minc[0] = coor[0]
+ elif coor[0] > maxc[0]:
+ maxc[0] = coor[0]
+ if coor[1] < minc[1]:
+ minc[1] = coor[1]
+ elif coor[1] > maxc[1]:
+ maxc[1] = coor[1]
+ return minc, maxc
+
+
+def rotate(input, angle, axes=(1, 0), reshape=True, output=None, order=3,
+ mode='constant', cval=0.0, prefilter=True):
+ """Rotate an array.
+
+ The array is rotated in the plane defined by the two axes given by the
+ ``axes`` parameter using spline interpolation of the requested order.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ angle (float): The rotation angle in degrees.
+ axes (tuple of 2 ints): The two axes that define the plane of rotation.
+ Default is the first two axes.
+ reshape (bool): If ``reshape`` is True, the output shape is adapted so
+ that the input array is contained completely in the output. Default
+ is True.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+ cval (scalar): Value used for points outside the boundaries of
+ the input if ``mode='constant'`` or ``mode='opencv'``. Default is
+ 0.0
+ prefilter (bool): It is not used yet. It just exists for compatibility
+ with :mod:`scipy.ndimage`.
+
+ Returns:
+ cupy.ndarray or None:
+ The rotated input.
+
+ .. seealso:: :func:`scipy.ndimage.rotate`
+ """
+
+ _check_parameter('rotate', order, mode)
+
+ if mode == 'opencv':
+ mode = '_opencv_edge'
+
+ input_arr = input
+ axes = list(axes)
+ if axes[0] < 0:
+ axes[0] += input_arr.ndim
+ if axes[1] < 0:
+ axes[1] += input_arr.ndim
+ if axes[0] > axes[1]:
+ axes = [axes[1], axes[0]]
+ if axes[0] < 0 or input_arr.ndim <= axes[1]:
+ raise ValueError('invalid rotation plane specified')
+
+ ndim = input_arr.ndim
+ rad = math.radians(angle)
+ sincos = cmath.rect(1, rad)
+ cos, sin = sincos.real, sincos.imag
+
+ # determine offsets and output shape as in scipy.ndimage.rotate
+ rot_matrix = numpy.array([[cos, sin],
+ [-sin, cos]])
+
+ img_shape = numpy.asarray(input_arr.shape)
+ in_plane_shape = img_shape[axes]
+ if reshape:
+ # Compute transformed input bounds
+ iy, ix = in_plane_shape
+ out_bounds = rot_matrix @ [[0, 0, iy, iy],
+ [0, ix, 0, ix]]
+ # Compute the shape of the transformed input plane
+ out_plane_shape = (out_bounds.ptp(axis=1) + 0.5).astype(cupy.int64)
+ else:
+ out_plane_shape = img_shape[axes]
+
+ out_center = rot_matrix @ ((out_plane_shape - 1) / 2)
+ in_center = (in_plane_shape - 1) / 2
+
+ output_shape = img_shape
+ output_shape[axes] = out_plane_shape
+ output_shape = tuple(output_shape)
+
+ matrix = numpy.identity(ndim)
+ matrix[axes[0], axes[0]] = cos
+ matrix[axes[0], axes[1]] = sin
+ matrix[axes[1], axes[0]] = -sin
+ matrix[axes[1], axes[1]] = cos
+
+ offset = numpy.zeros(ndim, dtype=cupy.float64)
+ offset[axes] = in_center - out_center
+
+ matrix = cupy.asarray(matrix)
+ offset = cupy.asarray(offset)
+
+ return affine_transform(input, matrix, offset, output_shape, output, order,
+ mode, cval, prefilter)
+
+
+def shift(input, shift, output=None, order=3, mode='constant', cval=0.0,
+ prefilter=True):
+ """Shift an array.
+
+ The array is shifted using spline interpolation of the requested order.
+ Points outside the boundaries of the input are filled according to the
+ given mode.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ shift (float or sequence): The shift along the axes. If a float,
+ ``shift`` is the same for each axis. If a sequence, ``shift``
+ should contain one value for each axis.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+ cval (scalar): Value used for points outside the boundaries of
+ the input if ``mode='constant'`` or ``mode='opencv'``. Default is
+ 0.0
+ prefilter (bool): It is not used yet. It just exists for compatibility
+ with :mod:`scipy.ndimage`.
+
+ Returns:
+ cupy.ndarray or None:
+ The shifted input.
+
+ .. seealso:: :func:`scipy.ndimage.shift`
+ """
+
+ _check_parameter('shift', order, mode)
+
+ shift = _util._fix_sequence_arg(shift, input.ndim, 'shift', float)
+
+ if mode == 'opencv':
+ mode = '_opencv_edge'
+
+ output = affine_transform(
+ input,
+ cupy.ones(input.ndim, input.dtype),
+ cupy.negative(cupy.asarray(shift)),
+ None,
+ output,
+ order,
+ mode,
+ cval,
+ prefilter,
+ )
+ else:
+ output = _util._get_output(output, input)
+ if input.dtype.kind in 'iu':
+ input = input.astype(cupy.float32)
+ filtered, nprepad = _filter_input(input, prefilter, mode, cval, order)
+ integer_output = output.dtype.kind in 'iu'
+ _util._check_cval(mode, cval, integer_output)
+ large_int = prod(input.shape) > 1 << 31
+ kern = _interp_kernels._get_shift_kernel(
+ input.ndim, large_int, input.shape, mode, cval=cval, order=order,
+ integer_output=integer_output, nprepad=nprepad)
+ shift = cupy.asarray(shift, dtype=cupy.float64, order='C')
+ if shift.ndim != 1:
+ raise ValueError('shift must be 1d')
+ if shift.size != filtered.ndim:
+ raise ValueError('len(shift) must equal input.ndim')
+ kern(filtered, shift, output)
+ return output
+
+
+def zoom(input, zoom, output=None, order=3, mode='constant', cval=0.0,
+ prefilter=True, *, grid_mode=False):
+ """Zoom an array.
+
+ The array is zoomed using spline interpolation of the requested order.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ zoom (float or sequence): The zoom factor along the axes. If a float,
+ ``zoom`` is the same for each axis. If a sequence, ``zoom`` should
+ contain one value for each axis.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array.
+ order (int): The order of the spline interpolation, default is 3. Must
+ be in the range 0-5.
+ mode (str): Points outside the boundaries of the input are filled
+ according to the given mode (``'constant'``, ``'nearest'``,
+ ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
+ ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
+ cval (scalar): Value used for points outside the boundaries of
+ the input if ``mode='constant'`` or ``mode='opencv'``. Default is
+ 0.0
+ prefilter (bool): It is not used yet. It just exists for compatibility
+ with :mod:`scipy.ndimage`.
+ grid_mode (bool, optional): If False, the distance from the pixel
+ centers is zoomed. Otherwise, the distance including the full pixel
+ extent is used. For example, a 1d signal of length 5 is considered
+ to have length 4 when ``grid_mode`` is False, but length 5 when
+ ``grid_mode`` is True. See the following visual illustration:
+
+ .. code-block:: text
+
+ | pixel 1 | pixel 2 | pixel 3 | pixel 4 | pixel 5 |
+ |<-------------------------------------->|
+ vs.
+ |<----------------------------------------------->|
+
+ The starting point of the arrow in the diagram above corresponds to
+ coordinate location 0 in each mode.
+
+ Returns:
+ cupy.ndarray or None:
+ The zoomed input.
+
+ .. seealso:: :func:`scipy.ndimage.zoom`
+ """
+
+ _check_parameter('zoom', order, mode)
+
+ zoom = _util._fix_sequence_arg(zoom, input.ndim, 'zoom', float)
+
+ output_shape = []
+ for s, z in zip(input.shape, zoom):
+ output_shape.append(int(round(s * z)))
+ output_shape = tuple(output_shape)
+
+ if mode == 'opencv':
+ zoom = []
+ offset = []
+ for in_size, out_size in zip(input.shape, output_shape):
+ if out_size > 1:
+ zoom.append(float(in_size) / out_size)
+ offset.append((zoom[-1] - 1) / 2.0)
+ else:
+ zoom.append(0)
+ offset.append(0)
+ mode = 'nearest'
+
+ output = affine_transform(
+ input,
+ cupy.asarray(zoom),
+ offset,
+ output_shape,
+ output,
+ order,
+ mode,
+ cval,
+ prefilter,
+ )
+ else:
+ if grid_mode:
+
+ # warn about modes that may have surprising behavior
+ suggest_mode = None
+ if mode == 'constant':
+ suggest_mode = 'grid-constant'
+ elif mode == 'wrap':
+ suggest_mode = 'grid-wrap'
+ if suggest_mode is not None:
+ warnings.warn(
+ f'It is recommended to use mode = {suggest_mode} instead '
+ f'of {mode} when grid_mode is True.')
+
+ zoom = []
+ for in_size, out_size in zip(input.shape, output_shape):
+ if grid_mode and out_size > 0:
+ zoom.append(in_size / out_size)
+ elif out_size > 1:
+ zoom.append((in_size - 1) / (out_size - 1))
+ else:
+ zoom.append(0)
+
+ output = _util._get_output(output, input, shape=output_shape)
+ if input.dtype.kind in 'iu':
+ input = input.astype(cupy.float32)
+ filtered, nprepad = _filter_input(input, prefilter, mode, cval, order)
+ integer_output = output.dtype.kind in 'iu'
+ _util._check_cval(mode, cval, integer_output)
+ large_int = max(prod(input.shape), prod(output_shape)) > 1 << 31
+ kern = _interp_kernels._get_zoom_kernel(
+ input.ndim, large_int, output_shape, mode, order=order,
+ integer_output=integer_output, grid_mode=grid_mode,
+ nprepad=nprepad)
+ zoom = cupy.asarray(zoom, dtype=cupy.float64)
+ kern(filtered, zoom, output)
+ return output
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py
new file mode 100644
index 000000000..5c6462cff
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_morphology.py
@@ -0,0 +1,1016 @@
+import operator
+import warnings
+
+import cupy
+import numpy
+from cupy import _core
+
+from cucim.skimage._vendored import _ndimage_filters as _filters
+from cucim.skimage._vendored import _ndimage_filters_core as _filters_core
+from cucim.skimage._vendored import _ndimage_util as _util
+
+
+@cupy.memoize(for_each_device=True)
+def _get_binary_erosion_kernel(
+ w_shape, int_type, offsets, center_is_true, border_value, invert, masked,
+ all_weights_nonzero
+):
+ if invert:
+ border_value = int(not border_value)
+ true_val = 0
+ false_val = 1
+ else:
+ true_val = 1
+ false_val = 0
+
+ if masked:
+ pre = """
+ bool mv = (bool)mask[i];
+ bool _in = (bool)x[i];
+ if (!mv) {{
+ y = cast(_in);
+ return;
+ }} else if ({center_is_true} && _in == {false_val}) {{
+ y = cast(_in);
+ return;
+ }}""".format(center_is_true=int(center_is_true),
+ false_val=false_val)
+ else:
+ pre = """
+ bool _in = (bool)x[i];
+ if ({center_is_true} && _in == {false_val}) {{
+ y = cast(_in);
+ return;
+ }}""".format(center_is_true=int(center_is_true),
+ false_val=false_val)
+ pre = pre + """
+ y = cast({true_val});""".format(true_val=true_val)
+
+ # {{{{ required because format is called again within _generate_nd_kernel
+ found = """
+ if ({{cond}}) {{{{
+ if (!{border_value}) {{{{
+ y = cast({false_val});
+ return;
+ }}}}
+ }}}} else {{{{
+ bool nn = {{value}} ? {true_val} : {false_val};
+ if (!nn) {{{{
+ y = cast({false_val});
+ return;
+ }}}}
+ }}}}""".format(true_val=int(true_val),
+ false_val=int(false_val),
+ border_value=int(border_value),)
+
+ name = 'binary_erosion'
+ if false_val:
+ name += '_invert'
+ return _filters_core._generate_nd_kernel(
+ name,
+ pre,
+ found,
+ '',
+ 'constant', w_shape, int_type, offsets, 0, ctype='Y', has_weights=True,
+ has_structure=False, has_mask=masked, binary_morphology=True,
+ all_weights_nonzero=all_weights_nonzero)
+
+
+def _center_is_true(structure, origin):
+ coor = tuple([oo + ss // 2 for ss, oo in zip(structure.shape, origin)])
+ return bool(structure[coor]) # device synchronization
+
+
+def iterate_structure(structure, iterations, origin=None):
+ """Iterate a structure by dilating it with itself.
+
+ Args:
+ structure(array_like): Structuring element (an array of bools,
+ for example), to be dilated with itself.
+ iterations(int): The number of dilations performed on the structure
+ with itself.
+ origin(int or tuple of int, optional): If origin is None, only the
+ iterated structure is returned. If not, a tuple of the iterated
+ structure and the modified origin is returned.
+
+ Returns:
+ cupy.ndarray: A new structuring element obtained by dilating
+ ``structure`` (``iterations`` - 1) times with itself.
+
+ .. seealso:: :func:`scipy.ndimage.iterate_structure`
+ """
+ if iterations < 2:
+ return structure.copy()
+ ni = iterations - 1
+ shape = [ii + ni * (ii - 1) for ii in structure.shape]
+ pos = [ni * (structure.shape[ii] // 2) for ii in range(len(shape))]
+ slc = tuple(
+ slice(pos[ii], pos[ii] + structure.shape[ii], None)
+ for ii in range(len(shape))
+ )
+ out = cupy.zeros(shape, bool)
+ out[slc] = structure != 0
+ out = binary_dilation(out, structure, iterations=ni)
+ if origin is None:
+ return out
+ else:
+ origin = _util._fix_sequence_arg(origin, structure.ndim, 'origin', int)
+ origin = [iterations * o for o in origin]
+ return out, origin
+
+
+def generate_binary_structure(rank, connectivity):
+ """Generate a binary structure for binary morphological operations.
+
+ Args:
+ rank(int): Number of dimensions of the array to which the structuring
+ element will be applied, as returned by ``np.ndim``.
+ connectivity(int): ``connectivity`` determines which elements of the
+ output array belong to the structure, i.e., are considered as
+ neighbors of the central element. Elements up to a squared distance
+ of ``connectivity`` from the center are considered neighbors.
+ ``connectivity`` may range from 1 (no diagonal elements are
+ neighbors) to ``rank`` (all elements are neighbors).
+
+ Returns:
+ cupy.ndarray: Structuring element which may be used for binary
+ morphological operations, with ``rank`` dimensions and all
+ dimensions equal to 3.
+
+ .. seealso:: :func:`scipy.ndimage.generate_binary_structure`
+ """
+ if connectivity < 1:
+ connectivity = 1
+ if rank < 1:
+ return cupy.asarray(True, dtype=bool)
+ output = numpy.fabs(numpy.indices([3] * rank) - 1)
+ output = numpy.add.reduce(output, 0)
+ output = output <= connectivity
+ return cupy.asarray(output)
+
+
+def _binary_erosion(input, structure, iterations, mask, output, border_value,
+ origin, invert, brute_force=True):
+ try:
+ iterations = operator.index(iterations)
+ except TypeError:
+ raise TypeError('iterations parameter should be an integer')
+
+ if input.dtype.kind == 'c':
+ raise TypeError('Complex type not supported')
+ if structure is None:
+ structure = generate_binary_structure(input.ndim, 1)
+ all_weights_nonzero = input.ndim == 1
+ center_is_true = True
+ default_structure = True
+ else:
+ structure = structure.astype(dtype=bool, copy=False)
+ # transfer to CPU for use in determining if it is fully dense
+ # structure_cpu = cupy.asnumpy(structure)
+ default_structure = False
+ if structure.ndim != input.ndim:
+ raise RuntimeError('structure and input must have same dimensionality')
+ if not structure.flags.c_contiguous:
+ structure = cupy.ascontiguousarray(structure)
+ if structure.size < 1:
+ raise RuntimeError('structure must not be empty')
+
+ if mask is not None:
+ if mask.shape != input.shape:
+ raise RuntimeError('mask and input must have equal sizes')
+ if not mask.flags.c_contiguous:
+ mask = cupy.ascontiguousarray(mask)
+ masked = True
+ else:
+ masked = False
+ origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)
+
+ if isinstance(output, cupy.ndarray):
+ if output.dtype.kind == 'c':
+ raise TypeError('Complex output type not supported')
+ else:
+ output = bool
+ output = _util._get_output(output, input)
+ temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
+ if temp_needed:
+ # input and output arrays cannot share memory
+ temp = output
+ output = _util._get_output(output.dtype, input)
+ if structure.ndim == 0:
+ # kernel doesn't handle ndim=0, so special case it here
+ if float(structure):
+ output[...] = cupy.asarray(input, dtype=bool)
+ else:
+ output[...] = ~cupy.asarray(input, dtype=bool)
+ return output
+ origin = tuple(origin)
+ int_type = _util._get_inttype(input)
+ offsets = _filters_core._origins_to_offsets(origin, structure.shape)
+ if not default_structure:
+ # synchronize required to determine if all weights are non-zero
+ nnz = int(cupy.count_nonzero(structure))
+ all_weights_nonzero = nnz == structure.size
+ if all_weights_nonzero:
+ center_is_true = True
+ else:
+ center_is_true = _center_is_true(structure, origin)
+
+ erode_kernel = _get_binary_erosion_kernel(
+ structure.shape, int_type, offsets, center_is_true, border_value,
+ invert, masked, all_weights_nonzero,
+ )
+
+ if iterations == 1:
+ if masked:
+ output = erode_kernel(input, structure, mask, output)
+ else:
+ output = erode_kernel(input, structure, output)
+ elif center_is_true and not brute_force:
+ raise NotImplementedError(
+ 'only brute_force iteration has been implemented'
+ )
+ else:
+ if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'):
+ raise ValueError('output and input may not overlap in memory')
+ tmp_in = cupy.empty_like(input, dtype=output.dtype)
+ tmp_out = output
+ if iterations >= 1 and not iterations & 1:
+ tmp_in, tmp_out = tmp_out, tmp_in
+ if masked:
+ tmp_out = erode_kernel(input, structure, mask, tmp_out)
+ else:
+ tmp_out = erode_kernel(input, structure, tmp_out)
+ # TODO: kernel doesn't return the changed status, so determine it here
+ changed = not (input == tmp_out).all() # synchronize!
+ ii = 1
+ while ii < iterations or ((iterations < 1) and changed):
+ tmp_in, tmp_out = tmp_out, tmp_in
+ if masked:
+ tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out)
+ else:
+ tmp_out = erode_kernel(tmp_in, structure, tmp_out)
+ changed = not (tmp_in == tmp_out).all()
+ ii += 1
+ if not changed and (not ii & 1): # synchronize!
+ # can exit early if nothing changed
+ # (only do this after even number of tmp_in/out swaps)
+ break
+ output = tmp_out
+ if temp_needed:
+ _core.elementwise_copy(output, temp)
+ output = temp
+ return output
+
+
+def binary_erosion(input, structure=None, iterations=1, mask=None, output=None,
+ border_value=0, origin=0, brute_force=False):
+ """Multidimensional binary erosion with a given structuring element.
+
+ Binary erosion is a mathematical morphology operation used for image
+ processing.
+
+ Args:
+ input(cupy.ndarray): The input binary array_like to be eroded.
+ Non-zero (True) elements form the subset to be eroded.
+ structure(cupy.ndarray, optional): The structuring element used for the
+ erosion. Non-zero elements are considered True. If no structuring
+ element is provided an element is generated with a square
+ connectivity equal to one. (Default value = None).
+ iterations(int, optional): The erosion is repeated ``iterations`` times
+ (one, by default). If iterations is less than 1, the erosion is
+ repeated until the result does not change anymore. Only an integer
+ of iterations is accepted.
+ mask(cupy.ndarray or None, optional): If a mask is given, only those
+ elements with a True value at the corresponding mask element are
+ modified at each iteration. (Default value = None)
+ output(cupy.ndarray, optional): Array of the same shape as input, into
+ which the output is placed. By default, a new array is created.
+ border_value(int (cast to 0 or 1), optional): Value at the
+ border in the output array. (Default value = 0)
+ origin(int or tuple of ints, optional): Placement of the filter, by
+ default 0.
+ brute_force(boolean, optional): Memory condition: if False, only the
+ pixels whose value was changed in the last iteration are tracked as
+ candidates to be updated (eroded) in the current iteration; if
+ True all pixels are considered as candidates for erosion,
+ regardless of what happened in the previous iteration.
+
+ Returns:
+ cupy.ndarray: The result of binary erosion.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_erosion`
+ """
+ return _binary_erosion(input, structure, iterations, mask, output,
+ border_value, origin, 0, brute_force)
+
+
+def binary_dilation(input, structure=None, iterations=1, mask=None,
+ output=None, border_value=0, origin=0, brute_force=False):
+ """Multidimensional binary dilation with the given structuring element.
+
+ Args:
+ input(cupy.ndarray): The input binary array_like to be dilated.
+ Non-zero (True) elements form the subset to be dilated.
+ structure(cupy.ndarray, optional): The structuring element used for the
+ dilation. Non-zero elements are considered True. If no structuring
+ element is provided an element is generated with a square
+ connectivity equal to one. (Default value = None).
+ iterations(int, optional): The dilation is repeated ``iterations``
+ times (one, by default). If iterations is less than 1, the dilation
+ is repeated until the result does not change anymore. Only an
+ integer of iterations is accepted.
+ mask(cupy.ndarray or None, optional): If a mask is given, only those
+ elements with a True value at the corresponding mask element are
+ modified at each iteration. (Default value = None)
+ output(cupy.ndarray, optional): Array of the same shape as input, into
+ which the output is placed. By default, a new array is created.
+ border_value(int (cast to 0 or 1), optional): Value at the
+ border in the output array. (Default value = 0)
+ origin(int or tuple of ints, optional): Placement of the filter, by
+ default 0.
+ brute_force(boolean, optional): Memory condition: if False, only the
+ pixels whose value was changed in the last iteration are tracked as
+ candidates to be updated (dilated) in the current iteration; if
+ True all pixels are considered as candidates for dilation,
+ regardless of what happened in the previous iteration.
+
+ Returns:
+ cupy.ndarray: The result of binary dilation.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_dilation`
+ """
+ if structure is None:
+ structure = generate_binary_structure(input.ndim, 1)
+ origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)
+ structure = structure[tuple([slice(None, None, -1)] * structure.ndim)]
+ for ii in range(len(origin)):
+ origin[ii] = -origin[ii]
+ if not structure.shape[ii] & 1:
+ origin[ii] -= 1
+ return _binary_erosion(input, structure, iterations, mask, output,
+ border_value, origin, 1, brute_force)
+
+
+def binary_opening(input, structure=None, iterations=1, output=None, origin=0,
+ mask=None, border_value=0, brute_force=False):
+ """
+ Multidimensional binary opening with the given structuring element.
+
+ The *opening* of an input image by a structuring element is the
+ *dilation* of the *erosion* of the image by the structuring element.
+
+ Args:
+ input(cupy.ndarray): The input binary array to be opened.
+ Non-zero (True) elements form the subset to be opened.
+ structure(cupy.ndarray, optional): The structuring element used for the
+ opening. Non-zero elements are considered True. If no structuring
+ element is provided an element is generated with a square
+ connectivity equal to one. (Default value = None).
+ iterations(int, optional): The opening is repeated ``iterations`` times
+ (one, by default). If iterations is less than 1, the opening is
+ repeated until the result does not change anymore. Only an integer
+ of iterations is accepted.
+ output(cupy.ndarray, optional): Array of the same shape as input, into
+ which the output is placed. By default, a new array is created.
+ origin(int or tuple of ints, optional): Placement of the filter, by
+ default 0.
+ mask(cupy.ndarray or None, optional): If a mask is given, only those
+ elements with a True value at the corresponding mask element are
+ modified at each iteration. (Default value = None)
+ border_value(int (cast to 0 or 1), optional): Value at the
+ border in the output array. (Default value = 0)
+ brute_force(boolean, optional): Memory condition: if False, only the
+ pixels whose value was changed in the last iteration are tracked as
+ candidates to be updated (dilated) in the current iteration; if
+ True all pixels are considered as candidates for opening,
+ regardless of what happened in the previous iteration.
+
+ Returns:
+ cupy.ndarray: The result of binary opening.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_opening`
+ """
+ if structure is None:
+ rank = input.ndim
+ structure = generate_binary_structure(rank, 1)
+ tmp = binary_erosion(input, structure, iterations, mask, None,
+ border_value, origin, brute_force)
+ return binary_dilation(tmp, structure, iterations, mask, output,
+ border_value, origin, brute_force)
+
+
+def binary_closing(input, structure=None, iterations=1, output=None, origin=0,
+ mask=None, border_value=0, brute_force=False):
+ """
+ Multidimensional binary closing with the given structuring element.
+
+ The *closing* of an input image by a structuring element is the
+ *erosion* of the *dilation* of the image by the structuring element.
+
+ Args:
+ input(cupy.ndarray): The input binary array to be closed.
+ Non-zero (True) elements form the subset to be closed.
+ structure(cupy.ndarray, optional): The structuring element used for the
+ closing. Non-zero elements are considered True. If no structuring
+ element is provided an element is generated with a square
+ connectivity equal to one. (Default value = None).
+ iterations(int, optional): The closing is repeated ``iterations`` times
+ (one, by default). If iterations is less than 1, the closing is
+ repeated until the result does not change anymore. Only an integer
+ of iterations is accepted.
+ output(cupy.ndarray, optional): Array of the same shape as input, into
+ which the output is placed. By default, a new array is created.
+ origin(int or tuple of ints, optional): Placement of the filter, by
+ default 0.
+ mask(cupy.ndarray or None, optional): If a mask is given, only those
+ elements with a True value at the corresponding mask element are
+ modified at each iteration. (Default value = None)
+ border_value(int (cast to 0 or 1), optional): Value at the
+ border in the output array. (Default value = 0)
+ brute_force(boolean, optional): Memory condition: if False, only the
+ pixels whose value was changed in the last iteration are tracked as
+ candidates to be updated (dilated) in the current iteration; if
+ True all pixels are considered as candidates for closing,
+ regardless of what happened in the previous iteration.
+
+ Returns:
+ cupy.ndarray: The result of binary closing.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_closing`
+ """
+ if structure is None:
+ rank = input.ndim
+ structure = generate_binary_structure(rank, 1)
+ tmp = binary_dilation(input, structure, iterations, mask, None,
+ border_value, origin, brute_force)
+ return binary_erosion(tmp, structure, iterations, mask, output,
+ border_value, origin, brute_force)
+
+
+def binary_hit_or_miss(input, structure1=None, structure2=None, output=None,
+ origin1=0, origin2=None):
+ """
+ Multidimensional binary hit-or-miss transform.
+
+ The hit-or-miss transform finds the locations of a given pattern
+ inside the input image.
+
+ Args:
+ input (cupy.ndarray): Binary image where a pattern is to be detected.
+ structure1 (cupy.ndarray, optional): Part of the structuring element to
+ be fitted to the foreground (non-zero elements) of ``input``. If no
+ value is provided, a structure of square connectivity 1 is chosen.
+ structure2 (cupy.ndarray, optional): Second part of the structuring
+ element that has to miss completely the foreground. If no value is
+ provided, the complementary of ``structure1`` is taken.
+ output (cupy.ndarray, dtype or None, optional): Array of the same shape
+ as input, into which the output is placed. By default, a new array
+ is created.
+ origin1 (int or tuple of ints, optional): Placement of the first part
+ of the structuring element ``structure1``, by default 0 for a
+ centered structure.
+ origin2 (int or tuple of ints or None, optional): Placement of the
+ second part of the structuring element ``structure2``, by default 0
+ for a centered structure. If a value is provided for ``origin1``
+ and not for ``origin2``, then ``origin2`` is set to ``origin1``.
+
+ Returns:
+ cupy.ndarray: Hit-or-miss transform of ``input`` with the given
+ structuring element (``structure1``, ``structure2``).
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_hit_or_miss`
+ """
+ if structure1 is None:
+ structure1 = generate_binary_structure(input.ndim, 1)
+ if structure2 is None:
+ structure2 = cupy.logical_not(structure1)
+ origin1 = _util._fix_sequence_arg(origin1, input.ndim, 'origin1', int)
+ if origin2 is None:
+ origin2 = origin1
+ else:
+ origin2 = _util._fix_sequence_arg(origin2, input.ndim, 'origin2', int)
+
+ tmp1 = _binary_erosion(input, structure1, 1, None, None, 0, origin1, 0,
+ False)
+ inplace = isinstance(output, cupy.ndarray)
+ result = _binary_erosion(input, structure2, 1, None, output, 0, origin2, 1,
+ False)
+ if inplace:
+ cupy.logical_not(output, output)
+ cupy.logical_and(tmp1, output, output)
+ else:
+ cupy.logical_not(result, result)
+ return cupy.logical_and(tmp1, result)
+
+
+def binary_propagation(input, structure=None, mask=None, output=None,
+ border_value=0, origin=0):
+ """
+ Multidimensional binary propagation with the given structuring element.
+
+ Args:
+ input (cupy.ndarray): Binary image to be propagated inside ``mask``.
+ structure (cupy.ndarray, optional): Structuring element used in the
+ successive dilations. The output may depend on the structuring
+ element, especially if ``mask`` has several connex components. If
+ no structuring element is provided, an element is generated with a
+ squared connectivity equal to one.
+ mask (cupy.ndarray, optional): Binary mask defining the region into
+ which ``input`` is allowed to propagate.
+ output (cupy.ndarray, optional): Array of the same shape as input, into
+ which the output is placed. By default, a new array is created.
+ border_value (int, optional): Value at the border in the output array.
+ The value is cast to 0 or 1.
+ origin (int or tuple of ints, optional): Placement of the filter.
+
+ Returns:
+ cupy.ndarray : Binary propagation of ``input`` inside ``mask``.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_propagation`
+ """
+ return binary_dilation(input, structure, -1, mask, output, border_value,
+ origin, brute_force=True)
+
+
+def binary_fill_holes(input, structure=None, output=None, origin=0):
+ """Fill the holes in binary objects.
+
+ Args:
+ input (cupy.ndarray): N-D binary array with holes to be filled.
+ structure (cupy.ndarray, optional): Structuring element used in the
+ computation; large-size elements make computations faster but may
+ miss holes separated from the background by thin regions. The
+ default element (with a square connectivity equal to one) yields
+ the intuitive result where all holes in the input have been filled.
+ output (cupy.ndarray, dtype or None, optional): Array of the same shape
+ as input, into which the output is placed. By default, a new array
+ is created.
+ origin (int, tuple of ints, optional): Position of the structuring
+ element.
+
+ Returns:
+ cupy.ndarray: Transformation of the initial image ``input`` where holes
+ have been filled.
+
+ .. warning::
+
+ This function may synchronize the device.
+
+ .. seealso:: :func:`scipy.ndimage.binary_fill_holes`
+ """
+ mask = cupy.logical_not(input)
+ tmp = cupy.zeros(mask.shape, bool)
+ inplace = isinstance(output, cupy.ndarray)
+ # TODO (grlee77): set brute_force=False below once implemented
+ if inplace:
+ binary_dilation(tmp, structure, -1, mask, output, 1, origin,
+ brute_force=True)
+ cupy.logical_not(output, output)
+ else:
+ output = binary_dilation(tmp, structure, -1, mask, None, 1, origin,
+ brute_force=True)
+ cupy.logical_not(output, output)
+ return output
+
+
+def grey_erosion(input, size=None, footprint=None, structure=None, output=None,
+ mode='reflect', cval=0.0, origin=0):
+ """Calculates a greyscale erosion.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the greyscale erosion. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for greyscale erosion. Non-zero values
+ give the set of neighbors of the center over which minimum is
+ chosen.
+ structure (array of ints): Structuring element used for the greyscale
+ erosion. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of greyscale erosion.
+
+ .. seealso:: :func:`scipy.ndimage.grey_erosion`
+ """
+
+ if size is None and footprint is None and structure is None:
+ raise ValueError('size, footprint or structure must be specified')
+
+ return _filters._min_or_max_filter(input, size, footprint, structure,
+ output, mode, cval, origin, 'min')
+
+
+def grey_dilation(input, size=None, footprint=None, structure=None,
+ output=None, mode='reflect', cval=0.0, origin=0):
+ """Calculates a greyscale dilation.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the greyscale dilation. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for greyscale dilation. Non-zero values
+ give the set of neighbors of the center over which maximum is
+ chosen.
+ structure (array of ints): Structuring element used for the greyscale
+ dilation. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of greyscale dilation.
+
+ .. seealso:: :func:`scipy.ndimage.grey_dilation`
+ """
+
+ if size is None and footprint is None and structure is None:
+ raise ValueError('size, footprint or structure must be specified')
+ if structure is not None:
+ structure = cupy.array(structure)
+ structure = structure[tuple([slice(None, None, -1)] * structure.ndim)]
+ if footprint is not None:
+ footprint = cupy.array(footprint)
+ footprint = footprint[tuple([slice(None, None, -1)] * footprint.ndim)]
+
+ origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)
+ for i in range(len(origin)):
+ origin[i] = -origin[i]
+ if footprint is not None:
+ sz = footprint.shape[i]
+ elif structure is not None:
+ sz = structure.shape[i]
+ elif numpy.isscalar(size):
+ sz = size
+ else:
+ sz = size[i]
+ if sz % 2 == 0:
+ origin[i] -= 1
+
+ return _filters._min_or_max_filter(input, size, footprint, structure,
+ output, mode, cval, origin, 'max')
+
+
+def grey_closing(input, size=None, footprint=None, structure=None,
+ output=None, mode='reflect', cval=0.0, origin=0):
+ """Calculates a multi-dimensional greyscale closing.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the greyscale closing. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for greyscale closing. Non-zero values
+ give the set of neighbors of the center over which closing is
+ chosen.
+ structure (array of ints): Structuring element used for the greyscale
+ closing. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of greyscale closing.
+
+ .. seealso:: :func:`scipy.ndimage.grey_closing`
+ """
+ if (size is not None) and (footprint is not None):
+ warnings.warn('ignoring size because footprint is set', UserWarning,
+ stacklevel=2)
+ tmp = grey_dilation(input, size, footprint, structure, None, mode, cval,
+ origin)
+ return grey_erosion(tmp, size, footprint, structure, output, mode, cval,
+ origin)
+
+
+def grey_opening(input, size=None, footprint=None, structure=None,
+ output=None, mode='reflect', cval=0.0, origin=0):
+ """Calculates a multi-dimensional greyscale opening.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the greyscale opening. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for greyscale opening. Non-zero values
+ give the set of neighbors of the center over which opening is
+ chosen.
+ structure (array of ints): Structuring element used for the greyscale
+ opening. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The result of greyscale opening.
+
+ .. seealso:: :func:`scipy.ndimage.grey_opening`
+ """
+ if (size is not None) and (footprint is not None):
+ warnings.warn('ignoring size because footprint is set', UserWarning,
+ stacklevel=2)
+ tmp = grey_erosion(input, size, footprint, structure, None, mode, cval,
+ origin)
+ return grey_dilation(tmp, size, footprint, structure, output, mode, cval,
+ origin)
+
+
+def morphological_gradient(
+ input,
+ size=None,
+ footprint=None,
+ structure=None,
+ output=None,
+ mode='reflect',
+ cval=0.0,
+ origin=0,
+):
+ """
+ Multidimensional morphological gradient.
+
+ The morphological gradient is calculated as the difference between a
+ dilation and an erosion of the input with a given structuring element.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the morphological gradient. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for morphological gradient. Non-zero
+ values give the set of neighbors of the center over which opening
+ is chosen.
+ structure (array of ints): Structuring element used for the
+ morphological gradient. ``structure`` may be a non-flat
+ structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The morphological gradient of the input.
+
+ .. seealso:: :func:`scipy.ndimage.morphological_gradient`
+ """
+ tmp = grey_dilation(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+ if isinstance(output, cupy.ndarray):
+ grey_erosion(
+ input, size, footprint, structure, output, mode, cval, origin
+ )
+ return cupy.subtract(tmp, output, output)
+ else:
+ return tmp - grey_erosion(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+
+
+def morphological_laplace(
+ input,
+ size=None,
+ footprint=None,
+ structure=None,
+ output=None,
+ mode='reflect',
+ cval=0.0,
+ origin=0,
+):
+ """
+ Multidimensional morphological laplace.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the morphological laplace. Optional if ``footprint`` or
+ ``structure`` is provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for morphological laplace. Non-zero
+ values give the set of neighbors of the center over which opening
+ is chosen.
+ structure (array of ints): Structuring element used for the
+ morphological laplace. ``structure`` may be a non-flat
+ structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: The morphological laplace of the input.
+
+ .. seealso:: :func:`scipy.ndimage.morphological_laplace`
+ """
+ tmp1 = grey_dilation(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+ if isinstance(output, cupy.ndarray):
+ grey_erosion(
+ input, size, footprint, structure, output, mode, cval, origin
+ )
+ cupy.add(tmp1, output, output)
+ cupy.subtract(output, input, output)
+ return cupy.subtract(output, input, output)
+ else:
+ tmp2 = grey_erosion(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+ cupy.add(tmp1, tmp2, tmp2)
+ cupy.subtract(tmp2, input, tmp2)
+ cupy.subtract(tmp2, input, tmp2)
+ return tmp2
+
+
+def white_tophat(
+ input,
+ size=None,
+ footprint=None,
+ structure=None,
+ output=None,
+ mode='reflect',
+ cval=0.0,
+ origin=0,
+):
+ """
+ Multidimensional white tophat filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the white tophat. Optional if ``footprint`` or ``structure`` is
+ provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for the white tophat. Non-zero values
+ give the set of neighbors of the center over which opening is
+ chosen.
+ structure (array of ints): Structuring element used for the white
+ tophat. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarray: Result of the filter of ``input`` with ``structure``.
+
+ .. seealso:: :func:`scipy.ndimage.white_tophat`
+ """
+ if (size is not None) and (footprint is not None):
+ warnings.warn(
+ 'ignoring size because footprint is set', UserWarning, stacklevel=2
+ )
+ tmp = grey_erosion(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+ tmp = grey_dilation(
+ tmp, size, footprint, structure, output, mode, cval, origin
+ )
+ if input.dtype == numpy.bool_ and tmp.dtype == numpy.bool_:
+ cupy.bitwise_xor(input, tmp, out=tmp)
+ else:
+ cupy.subtract(input, tmp, out=tmp)
+ return tmp
+
+
+def black_tophat(
+ input,
+ size=None,
+ footprint=None,
+ structure=None,
+ output=None,
+ mode='reflect',
+ cval=0.0,
+ origin=0,
+):
+ """
+ Multidimensional black tophat filter.
+
+ Args:
+ input (cupy.ndarray): The input array.
+ size (tuple of ints): Shape of a flat and full structuring element used
+ for the black tophat. Optional if ``footprint`` or ``structure`` is
+ provided.
+ footprint (array of ints): Positions of non-infinite elements of a flat
+ structuring element used for the black tophat. Non-zero values
+ give the set of neighbors of the center over which opening is
+ chosen.
+ structure (array of ints): Structuring element used for the black
+ tophat. ``structure`` may be a non-flat structuring element.
+ output (cupy.ndarray, dtype or None): The array in which to place the
+ output.
+ mode (str): The array borders are handled according to the given mode
+ (``'reflect'``, ``'constant'``, ``'nearest'``, ``'mirror'``,
+ ``'wrap'``). Default is ``'reflect'``.
+ cval (scalar): Value to fill past edges of input if mode is
+ ``constant``. Default is ``0.0``.
+ origin (scalar or tuple of scalar): The origin parameter controls the
+ placement of the filter, relative to the center of the current
+ element of the input. Default of 0 is equivalent to
+ ``(0,)*input.ndim``.
+
+ Returns:
+ cupy.ndarry : Result of the filter of ``input`` with ``structure``.
+
+ .. seealso:: :func:`scipy.ndimage.black_tophat`
+ """
+ if (size is not None) and (footprint is not None):
+ warnings.warn(
+ 'ignoring size because footprint is set', UserWarning, stacklevel=2
+ )
+ tmp = grey_dilation(
+ input, size, footprint, structure, None, mode, cval, origin
+ )
+ tmp = grey_erosion(
+ tmp, size, footprint, structure, output, mode, cval, origin
+ )
+ if input.dtype == numpy.bool_ and tmp.dtype == numpy.bool_:
+ cupy.bitwise_xor(tmp, input, out=tmp)
+ else:
+ cupy.subtract(tmp, input, out=tmp)
+ return tmp
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py
new file mode 100644
index 000000000..b2fc84449
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_kernel_weights.py
@@ -0,0 +1,73 @@
+"""Determination of spline kernel weights (adapted from SciPy)
+
+See more verbose comments for each case there:
+https://github.com/scipy/scipy/blob/eba29d69846ab1299976ff4af71c106188397ccc/scipy/ndimage/src/ni_splines.c#L7 # NOQA
+
+``spline_weights_inline`` is a dict where the key is the spline order and the
+value is the spline weight initialization code.
+"""
+
+spline_weights_inline = {}
+
+# Note: This order = 1 case is currently unused (order = 1 has a different code
+# path in _interp_kernels.py). I think that existing code is a bit more
+# efficient.
+spline_weights_inline[1] = '''
+wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5);
+weights_{j}[0] = 1.0 - wx;
+weights_{j}[1] = wx;
+'''
+
+spline_weights_inline[2] = '''
+wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5);
+weights_{j}[1] = 0.75 - wx * wx;
+wy = 0.5 - wx;
+weights_{j}[0] = 0.5 * wy * wy;
+weights_{j}[2] = 1.0 - weights_{j}[0] - weights_{j}[1];
+'''
+
+spline_weights_inline[3] = '''
+wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5);
+wy = 1.0 - wx;
+weights_{j}[1] = (wx * wx * (wx - 2.0) * 3.0 + 4.0) / 6.0;
+weights_{j}[2] = (wy * wy * (wy - 2.0) * 3.0 + 4.0) / 6.0;
+weights_{j}[0] = wy * wy * wy / 6.0;
+weights_{j}[3] = 1.0 - weights_{j}[0] - weights_{j}[1] - weights_{j}[2];
+'''
+
+spline_weights_inline[4] = '''
+wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5);
+wy = wx * wx;
+weights_{j}[2] = wy * (wy * 0.25 - 0.625) + 115.0 / 192.0;
+wy = 1.0 + wx;
+weights_{j}[1] = wy * (wy * (wy * (5.0 - wy) / 6.0 - 1.25) + 5.0 / 24.0) +
+ 55.0 / 96.0;
+wy = 1.0 - wx;
+weights_{j}[3] = wy * (wy * (wy * (5.0 - wy) / 6.0 - 1.25) + 5.0 / 24.0) +
+ 55.0 / 96.0;
+wy = 0.5 - wx;
+wy = wy * wy;
+weights_{j}[0] = wy * wy / 24.0;
+weights_{j}[4] = 1.0 - weights_{j}[0] - weights_{j}[1]
+ - weights_{j}[2] - weights_{j}[3];
+'''
+
+spline_weights_inline[5] = '''
+wx = c_{j} - floor({order} & 1 ? c_{j} : c_{j} + 0.5);
+wy = wx * wx;
+weights_{j}[2] = wy * (wy * (0.25 - wx / 12.0) - 0.5) + 0.55;
+wy = 1.0 - wx;
+wy = wy * wy;
+weights_{j}[3] = wy * (wy * (0.25 - (1.0 - wx) / 12.0) - 0.5) + 0.55;
+wy = wx + 1.0;
+weights_{j}[1] = wy * (wy * (wy * (wy * (wy / 24.0 - 0.375) + 1.25) - 1.75)
+ + 0.625) + 0.425;
+wy = 2.0 - wx;
+weights_{j}[4] = wy * (wy * (wy * (wy * (wy / 24.0 - 0.375) + 1.25) - 1.75)
+ + 0.625) + 0.425;
+wy = 1.0 - wx;
+wy = wy * wy;
+weights_{j}[0] = (1.0 - wx) * wy * wy / 120.0;
+weights_{j}[5] = 1.0 - weights_{j}[0] - weights_{j}[1] - weights_{j}[2]
+ - weights_{j}[3] - weights_{j}[4];
+'''
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py
new file mode 100644
index 000000000..c44df836e
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_spline_prefilter_core.py
@@ -0,0 +1,256 @@
+"""
+Spline poles and boundary handling implemented as in SciPy
+
+https://github.com/scipy/scipy/blob/ee6ae72f83a0995aeb34929aed881d3f36fccfda/scipy/ndimage/src/ni_splines.c # noqa
+"""
+import functools
+import math
+import operator
+import textwrap
+
+import cupy
+
+
+def get_poles(order):
+ if order == 2:
+ # sqrt(8.0) - 3.0
+ return (-0.171572875253809902396622551580603843,)
+ elif order == 3:
+ # sqrt(3.0) - 2.0
+ return (-0.267949192431122706472553658494127633,)
+ elif order == 4:
+ # sqrt(664.0 - sqrt(438976.0)) + sqrt(304.0) - 19.0
+ # sqrt(664.0 + sqrt(438976.0)) - sqrt(304.0) - 19.0
+ return (-0.361341225900220177092212841325675255,
+ -0.013725429297339121360331226939128204)
+ elif order == 5:
+ # sqrt(67.5 - sqrt(4436.25)) + sqrt(26.25) - 6.5
+ # sqrt(67.5 + sqrt(4436.25)) - sqrt(26.25) - 6.5
+ return (-0.430575347099973791851434783493520110,
+ -0.043096288203264653822712376822550182)
+ else:
+ raise ValueError('only order 2-5 supported')
+
+
+def get_gain(poles):
+ return functools.reduce(operator.mul,
+ [(1.0 - z) * (1.0 - 1.0 / z) for z in poles])
+
+
+def _causal_init_code(mode):
+ """Code for causal initialization step of IIR filtering.
+
+ c is a 1d array of length n and z is a filter pole
+ """
+ code = f'''
+ // causal init for mode={mode}'''
+ if mode == 'mirror':
+ code += '''
+ z_i = z;
+ z_n_1 = pow(z, (P)(n - 1));
+
+ c[0] = c[0] + z_n_1 * c[(n - 1) * element_stride];
+ for (i = 1; i < min(n - 1, static_cast({n_boundary})); ++i) {{
+ c[0] += z_i * (c[i * element_stride] +
+ z_n_1 * c[(n - 1 - i) * element_stride]);
+ z_i *= z;
+ }}
+ c[0] /= 1 - z_n_1 * z_n_1;'''
+ elif mode == 'grid-wrap':
+ code += '''
+ z_i = z;
+
+ for (i = 1; i < min(n, static_cast({n_boundary})); ++i) {{
+ c[0] += z_i * c[(n - i) * element_stride];
+ z_i *= z;
+ }}
+ c[0] /= 1 - z_i; /* z_i = pow(z, n) */'''
+ elif mode == 'reflect':
+ code += '''
+ z_i = z;
+ z_n = pow(z, (P)n);
+ c0 = c[0];
+
+ c[0] = c[0] + z_n * c[(n - 1) * element_stride];
+ for (i = 1; i < min(n, static_cast({n_boundary})); ++i) {{
+ c[0] += z_i * (c[i * element_stride] +
+ z_n * c[(n - 1 - i) * element_stride]);
+ z_i *= z;
+ }}
+ c[0] *= z / (1 - z_n * z_n);
+ c[0] += c0;'''
+ else:
+ raise ValueError('invalid mode: {}'.format(mode))
+ return code
+
+
+def _anticausal_init_code(mode):
+ """Code for the anti-causal initialization step of IIR filtering.
+
+ c is a 1d array of length n and z is a filter pole
+ """
+ code = f'''
+ // anti-causal init for mode={mode}'''
+ if mode == 'mirror':
+ code += '''
+ c[(n - 1) * element_stride] = (
+ z * c[(n - 2) * element_stride] +
+ c[(n - 1) * element_stride]) * z / (z * z - 1);'''
+ elif mode == 'grid-wrap':
+ code += '''
+ z_i = z;
+
+ for (i = 0; i < min(n - 1, static_cast({n_boundary})); ++i) {{
+ c[(n - 1) * element_stride] += z_i * c[i * element_stride];
+ z_i *= z;
+ }}
+ c[(n - 1) * element_stride] *= z / (z_i - 1); /* z_i = pow(z, n) */'''
+ elif mode == 'reflect':
+ code += '''
+ c[(n - 1) * element_stride] *= z / (z - 1);'''
+ else:
+ raise ValueError('invalid mode: {}'.format(mode))
+ return code
+
+
+def _get_spline_mode(mode):
+ """spline boundary mode for interpolation with order >= 2."""
+ if mode in ['mirror', 'reflect', 'grid-wrap']:
+ # exact analytic boundary conditions exist for these modes.
+ return mode
+ elif mode == 'grid-mirror':
+ # grid-mirror is a synonym for 'reflect'
+ return 'reflect'
+ # No exact analytical spline boundary condition implemented. Reflect gives
+ # lower error than using mirror or wrap for mode 'nearest'. Otherwise, a
+ # mirror spline boundary condition is used.
+ return 'reflect' if mode == 'nearest' else 'mirror'
+
+
+def _get_spline1d_code(mode, poles, n_boundary):
+ """Generates the code required for IIR filtering of a single 1d signal.
+
+ Prefiltering is done by causal filtering followed by anti-causal filtering.
+ Multiple boundary conditions have been implemented.
+ """
+ code = ['''
+ __device__ void spline_prefilter1d(
+ T* __restrict__ c, idx_t signal_length, idx_t element_stride)
+ {{''']
+
+ # variables common to all boundary modes
+ code.append('''
+ idx_t i, n = signal_length;
+ P z, z_i;''')
+
+ # retrieve the spline boundary extension mode to use
+ mode = _get_spline_mode(mode)
+
+ if mode == 'mirror':
+ # variables specific to mirror boundary mode
+ code.append('''
+ P z_n_1;''')
+ elif mode == 'reflect':
+ # variables specific to reflect boundary mode
+ code.append('''
+ P z_n;
+ T c0;''')
+
+ for pole in poles:
+
+ code.append(f'''
+ // select the current pole
+ z = {pole};''')
+
+ # initialize and apply the causal filter
+ code.append(_causal_init_code(mode))
+ code.append('''
+ // apply the causal filter for the current pole
+ for (i = 1; i < n; ++i) {{
+ c[i * element_stride] += z * c[(i - 1) * element_stride];
+ }}''')
+ # initialize and apply the anti-causal filter
+ code.append(_anticausal_init_code(mode))
+ code.append('''
+ // apply the anti-causal filter for the current pole
+ for (i = n - 2; i >= 0; --i) {{
+ c[i * element_stride] = z * (c[(i + 1) * element_stride] -
+ c[i * element_stride]);
+ }}''')
+
+ code += ['''
+ }}''']
+ return textwrap.dedent('\n'.join(code)).format(n_boundary=n_boundary)
+
+
+_FILTER_GENERAL = '''
+#include "cupy/carray.cuh"
+#include "cupy/complex.cuh"
+typedef {data_type} T;
+typedef {pole_type} P;
+typedef {index_type} idx_t;
+template
+__device__ T* row(
+ T* ptr, idx_t i, idx_t axis, idx_t ndim, const idx_t* shape) {{
+ idx_t index = 0, stride = 1;
+ for (idx_t a = ndim - 1; a > 0; --a) {{
+ if (a != axis) {{
+ index += (i % shape[a]) * stride;
+ i /= shape[a];
+ }}
+ stride *= shape[a];
+ }}
+ return ptr + index + stride * i;
+}}
+'''
+
+
+_batch_spline1d_strided_template = """
+extern "C" __global__
+__launch_bounds__({block_size})
+void {kernel_name}(T* __restrict__ y, const idx_t* __restrict__ info) {{
+ const idx_t n_signals = info[0], n_samples = info[1],
+ * __restrict__ shape = info+2;
+ idx_t y_elem_stride = 1;
+ for (int a = {ndim} - 1; a > {axis}; --a) {{ y_elem_stride *= shape[a]; }}
+ idx_t unraveled_idx = blockDim.x * blockIdx.x + threadIdx.x;
+ idx_t batch_idx = unraveled_idx;
+ if (batch_idx < n_signals)
+ {{
+ T* __restrict__ y_i = row(y, batch_idx, {axis}, {ndim}, shape);
+ spline_prefilter1d(y_i, n_samples, y_elem_stride);
+ }}
+}}
+"""
+
+
+@cupy.memoize(for_each_device=True)
+def get_raw_spline1d_kernel(axis, ndim, mode, order, index_type='int',
+ data_type='double', pole_type='double',
+ block_size=128):
+ """Generate a kernel for applying a spline prefilter along a given axis."""
+ poles = get_poles(order)
+
+ # determine number of samples for the boundary approximation
+ # (SciPy uses n_boundary = n_samples but this is excessive)
+ largest_pole = max([abs(p) for p in poles])
+ # tol < 1e-7 fails test cases comparing to SciPy at atol = rtol = 1e-5
+ tol = 1e-10 if pole_type == 'float' else 1e-18
+ n_boundary = math.ceil(math.log(tol, largest_pole))
+
+ # headers and general utility function for extracting rows of data
+ code = _FILTER_GENERAL.format(index_type=index_type,
+ data_type=data_type,
+ pole_type=pole_type)
+
+ # generate source for a 1d function for a given boundary mode and poles
+ code += _get_spline1d_code(mode, poles, n_boundary)
+
+ # generate code handling batch operation of the 1d filter
+ mode_str = mode.replace('-', '_') # cannot have '-' in kernel name
+ kernel_name = (f'cupyx_scipy_ndimage_spline_filter_{ndim}d_ord{order}_'
+ f'axis{axis}_{mode_str}')
+ code += _batch_spline1d_strided_template.format(ndim=ndim, axis=axis,
+ block_size=block_size,
+ kernel_name=kernel_name)
+ return cupy.RawKernel(code, kernel_name)
diff --git a/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py b/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py
index 1d87b3256..1a6a830a5 100644
--- a/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py
+++ b/python/cucim/src/cucim/skimage/_vendored/_ndimage_util.py
@@ -5,6 +5,33 @@
import numpy
+def _is_integer_output(output, input):
+ if output is None:
+ return input.dtype.kind in 'iu'
+ elif isinstance(output, cupy.ndarray):
+ return output.dtype.kind in 'iu'
+ return cupy.dtype(output).kind in 'iu'
+
+
+def _check_cval(mode, cval, integer_output):
+ if mode == 'constant' and integer_output and not cupy.isfinite(cval):
+ raise NotImplementedError("Non-finite cval is not supported for "
+ "outputs with integer dtype.")
+
+
+def _get_weights_dtype(input, weights, use_cucim_casting=False):
+ if weights.dtype.kind == "c" or input.dtype.kind == "c":
+ return cupy.promote_types(input.real.dtype, cupy.complex64)
+ elif weights.dtype.kind in 'iub':
+ if use_cucim_casting:
+ from cucim.skimage._shared.utils import _supported_float_type
+ return _supported_float_type(weights.dtype)
+ else:
+ # convert integer dtype weights to double as in SciPy
+ return cupy.float64
+ return cupy.promote_types(input.real.dtype, cupy.float32)
+
+
def _get_output(output, input, shape=None, complex_output=False):
shape = input.shape if shape is None else shape
if output is None:
@@ -12,17 +39,17 @@ def _get_output(output, input, shape=None, complex_output=False):
_dtype = cupy.promote_types(input.dtype, cupy.complex64)
else:
_dtype = input.dtype
- output = cupy.zeros(shape, dtype=_dtype)
+ output = cupy.empty(shape, dtype=_dtype)
elif isinstance(output, (type, cupy.dtype)):
if complex_output and cupy.dtype(output).kind != 'c':
warnings.warn("promoting specified output dtype to complex")
output = cupy.promote_types(output, cupy.complex64)
- output = cupy.zeros(shape, dtype=output)
+ output = cupy.empty(shape, dtype=output)
elif isinstance(output, str):
output = numpy.sctypeDict[output]
if complex_output and cupy.dtype(output).kind != 'c':
raise RuntimeError("output must have complex dtype")
- output = cupy.zeros(shape, dtype=output)
+ output = cupy.empty(shape, dtype=output)
elif output.shape != shape:
raise RuntimeError("output shape not correct")
elif complex_output and output.dtype.kind != 'c':
@@ -44,6 +71,13 @@ def _fix_sequence_arg(arg, ndim, name, conv=lambda x: x):
return lst
+def _check_origin(origin, width):
+ origin = int(origin)
+ if (width // 2 + origin < 0) or (width // 2 + origin >= width):
+ raise ValueError('invalid origin')
+ return origin
+
+
def _check_mode(mode):
if mode not in ('reflect', 'constant', 'nearest', 'mirror', 'wrap',
'grid-mirror', 'grid-wrap', 'grid-reflect'):
@@ -52,13 +86,6 @@ def _check_mode(mode):
return mode
-def _check_origin(origin, width):
- origin = int(origin)
- if (width // 2 + origin < 0) or (width // 2 + origin >= width):
- raise ValueError('invalid origin')
- return origin
-
-
def _get_inttype(input):
# The integer type to use for indices in the input array
# The indices actually use byte positions and we can't just use
@@ -70,53 +97,118 @@ def _get_inttype(input):
def _generate_boundary_condition_ops(mode, ix, xsize, int_t="int",
- float_ix=False):
+ float_ix=False, separate=False):
+ """Generate boundary conditions
+
+ If separate = True, a pair of conditions for the (lower, upper) boundary
+ are provided instead of a single expression.
+ """
min_func = "fmin" if float_ix else "min"
max_func = "fmax" if float_ix else "max"
if mode in ['reflect', 'grid-mirror']:
- ops = '''
- if ({ix} < 0) {{
- {ix} = - 1 -{ix};
- }}
- {ix} %= {xsize} * 2;
- {ix} = {min}({ix}, 2 * {xsize} - 1 - {ix});'''.format(
- ix=ix, xsize=xsize, min=min_func)
- elif mode == 'mirror':
- ops = '''
- if ({xsize} == 1) {{
- {ix} = 0;
- }} else {{
+ if separate:
+ ops_upper = f'''
+ {ix} %= {xsize} * 2;
+ {ix} = {min_func}({ix}, 2 * {xsize} - 1 - {ix});
+ '''
+ ops_lower = f'''
if ({ix} < 0) {{
- {ix} = -{ix};
+ {ix} = - 1 -{ix};
}}
- {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2);
- {ix} = {min}({ix}, 2 * {xsize} - 2 - {ix});
- }}'''.format(ix=ix, xsize=xsize, min=min_func)
+ ''' + ops_upper
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''
+ if ({ix} < 0) {{
+ {ix} = - 1 -{ix};
+ }}
+ {ix} %= {xsize} * 2;
+ {ix} = {min_func}({ix}, 2 * {xsize} - 1 - {ix});'''
+ elif mode == 'mirror':
+ if separate:
+ temp1 = f'''
+ if ({xsize} == 1) {{
+ {ix} = 0;
+ }} else {{
+ '''
+ temp2 = f'''
+ if ({ix} < 0) {{
+ {ix} = -{ix};
+ }}
+ '''
+ temp3 = f'''
+ {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2);
+ {ix} = {min_func}({ix}, 2 * {xsize} - 2 - {ix});
+ }}'''
+ ops_lower = temp1 + temp2 + temp3
+ ops_upper = temp1 + temp3
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''
+ if ({xsize} == 1) {{
+ {ix} = 0;
+ }} else {{
+ if ({ix} < 0) {{
+ {ix} = -{ix};
+ }}
+ {ix} = 1 + ({ix} - 1) % (({xsize} - 1) * 2);
+ {ix} = {min_func}({ix}, 2 * {xsize} - 2 - {ix});
+ }}'''
elif mode == 'nearest':
- ops = '''
- {ix} = {min}({max}(({T}){ix}, ({T})0), ({T})({xsize} - 1));'''.format(
- ix=ix, xsize=xsize, min=min_func, max=max_func,
- # force using 64-bit signed integer for ptrdiff_t,
- # see cupy/cupy#6048
- T=('int' if int_t == 'int' else 'long long'))
+ T = 'int' if int_t == 'int' else 'long long'
+ if separate:
+ ops_lower = f'''{ix} = {max_func}(({T}){ix}, ({T})0);'''
+ ops_upper = f'''{ix} = {min_func}(({T}){ix}, ({T})({xsize} - 1));''' # noqa
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''{ix} = {min_func}({max_func}(({T}){ix}, ({T})0), ({T})({xsize} - 1));''' # noqa
elif mode == 'grid-wrap':
- ops = '''
- {ix} %= {xsize};
- if ({ix} < 0) {{
- {ix} += {xsize};
- }}'''.format(ix=ix, xsize=xsize)
+ if separate:
+ ops_upper = f'''
+ {ix} %= {xsize};
+ '''
+ ops_lower = ops_upper + f'''
+ if ({ix} < 0) {{
+ {ix} += {xsize};
+ }}'''
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''
+ {ix} %= {xsize};
+ if ({ix} < 0) {{
+ {ix} += {xsize};
+ }}'''
+
elif mode == 'wrap':
- ops = '''
- if ({ix} < 0) {{
- {ix} += ({sz} - 1) * (({int_t})(-{ix} / ({sz} - 1)) + 1);
- }} else if ({ix} > ({sz} - 1)) {{
- {ix} -= ({sz} - 1) * ({int_t})({ix} / ({sz} - 1));
- }};'''.format(ix=ix, sz=xsize, int_t=int_t)
+ if separate:
+ ops_lower = f'''{ix} += ({xsize} - 1) * (({int_t})(-{ix} / ({xsize} - 1)) + 1);''' # noqa
+ ops_upper = f'''{ix} -= ({xsize} - 1) * ({int_t})({ix} / ({xsize} - 1));''' # noqa
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''
+ if ({ix} < 0) {{
+ {ix} += ({xsize} - 1) * (({int_t})(-{ix} / ({xsize} - 1)) + 1);
+ }} else if ({ix} > ({xsize} - 1)) {{
+ {ix} -= ({xsize} - 1) * ({int_t})({ix} / ({xsize} - 1));
+ }};'''
elif mode in ['constant', 'grid-constant']:
- ops = '''
- if (({ix} < 0) || {ix} >= {xsize}) {{
- {ix} = -1;
- }}'''.format(ix=ix, xsize=xsize)
+ if separate:
+ ops_lower = f'''
+ if ({ix} < 0) {{
+ {ix} = -1;
+ }}'''
+ ops_upper = f'''
+ if ({ix} >= {xsize}) {{
+ {ix} = -1;
+ }}'''
+ ops = (ops_lower, ops_upper)
+ else:
+ ops = f'''
+ if (({ix} < 0) || {ix} >= {xsize}) {{
+ {ix} = -1;
+ }}'''
+ if separate:
+ ops = (ops, ops)
return ops
diff --git a/python/cucim/src/cucim/skimage/_vendored/_texture.py b/python/cucim/src/cucim/skimage/_vendored/_texture.py
new file mode 100644
index 000000000..5c18bce71
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/_texture.py
@@ -0,0 +1,194 @@
+import cupy
+from cupy import _core
+from cupy.cuda import runtime, texture
+
+_affine_transform_2d_array_kernel = _core.ElementwiseKernel(
+ 'U texObj, raw float32 m, uint64 width', 'T transformed_image',
+ '''
+ float3 pixel = make_float3(
+ (float)(i / width),
+ (float)(i % width),
+ 1.0f
+ );
+ float x = dot(pixel, make_float3(m[0], m[1], m[2])) + .5f;
+ float y = dot(pixel, make_float3(m[3], m[4], m[5])) + .5f;
+ transformed_image = tex2D(texObj, y, x);
+ ''',
+ 'cupyx_texture_affine_transformation_2d_array',
+ preamble='''
+ inline __host__ __device__ float dot(float3 a, float3 b)
+ {
+ return a.x * b.x + a.y * b.y + a.z * b.z;
+ }
+ ''')
+
+
+_affine_transform_3d_array_kernel = _core.ElementwiseKernel(
+ 'U texObj, raw float32 m, uint64 height, uint64 width',
+ 'T transformed_volume',
+ '''
+ float4 voxel = make_float4(
+ (float)(i / (width * height)),
+ (float)((i % (width * height)) / width),
+ (float)((i % (width * height)) % width),
+ 1.0f
+ );
+ float x = dot(voxel, make_float4(m[0], m[1], m[2], m[3])) + .5f;
+ float y = dot(voxel, make_float4(m[4], m[5], m[6], m[7])) + .5f;
+ float z = dot(voxel, make_float4(m[8], m[9], m[10], m[11])) + .5f;
+ transformed_volume = tex3D(texObj, z, y, x);
+ ''',
+ 'cupyx_texture_affine_transformation_3d_array',
+ preamble='''
+ inline __host__ __device__ float dot(float4 a, float4 b)
+ {
+ return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+ }
+ ''')
+
+
+def _create_texture_object(data,
+ address_mode: str,
+ filter_mode: str,
+ read_mode: str,
+ border_color=0):
+
+ if cupy.issubdtype(data.dtype, cupy.unsignedinteger):
+ fmt_kind = runtime.cudaChannelFormatKindUnsigned
+ elif cupy.issubdtype(data.dtype, cupy.integer):
+ fmt_kind = runtime.cudaChannelFormatKindSigned
+ elif cupy.issubdtype(data.dtype, cupy.floating):
+ fmt_kind = runtime.cudaChannelFormatKindFloat
+ else:
+ raise ValueError(f'Unsupported data type {data.dtype}')
+
+ if address_mode == 'nearest':
+ address_mode = runtime.cudaAddressModeClamp
+ elif address_mode == 'constant':
+ address_mode = runtime.cudaAddressModeBorder
+ else:
+ raise ValueError(
+ f'Unsupported address mode {address_mode} '
+ '(supported: constant, nearest)')
+
+ if filter_mode == 'nearest':
+ filter_mode = runtime.cudaFilterModePoint
+ elif filter_mode == 'linear':
+ filter_mode = runtime.cudaFilterModeLinear
+ else:
+ raise ValueError(
+ f'Unsupported filter mode {filter_mode} '
+ f'(supported: nearest, linear)')
+
+ if read_mode == 'element_type':
+ read_mode = runtime.cudaReadModeElementType
+ elif read_mode == 'normalized_float':
+ read_mode = runtime.cudaReadModeNormalizedFloat
+ else:
+ raise ValueError(
+ f'Unsupported read mode {read_mode} '
+ '(supported: element_type, normalized_float)')
+
+ texture_fmt = texture.ChannelFormatDescriptor(
+ data.itemsize * 8, 0, 0, 0, fmt_kind)
+ # CUDAArray: last dimension is the fastest changing dimension
+ array = texture.CUDAarray(texture_fmt, *data.shape[::-1])
+ res_desc = texture.ResourceDescriptor(
+ runtime.cudaResourceTypeArray, cuArr=array)
+ # TODO(the-lay): each dimension can have a different addressing mode
+ # TODO(the-lay): border color/value can be defined for up to 4 channels
+ tex_desc = texture.TextureDescriptor(
+ (address_mode, ) * data.ndim, filter_mode, read_mode,
+ borderColors=(border_color, ))
+ tex_obj = texture.TextureObject(res_desc, tex_desc)
+ array.copy_from(data)
+
+ return tex_obj
+
+
+def affine_transformation(data,
+ transformation_matrix,
+ output_shape=None,
+ output=None,
+ interpolation: str = 'linear',
+ mode: str = 'constant',
+ border_value=0):
+ """
+ Apply an affine transformation.
+
+ The method uses texture memory and supports only 2D and 3D float32 arrays
+ without channel dimension.
+
+ Args:
+ data (cupy.ndarray): The input array or texture object.
+ transformation_matrix (cupy.ndarray): Affine transformation matrix.
+ Must be a homogeneous and have shape ``(ndim + 1, ndim + 1)``.
+ output_shape (tuple of ints): Shape of output. If not specified,
+ the input array shape is used. Default is None.
+ output (cupy.ndarray or ~cupy.dtype): The array in which to place the
+ output, or the dtype of the returned array. If not specified,
+ creates the output array with shape of ``output_shape``. Default is
+ None.
+ interpolation (str): Specifies interpolation mode: ``'linear'`` or
+ ``'nearest'``. Default is ``'linear'``.
+ mode (str): Specifies addressing mode for points outside of the array:
+ (`'constant'``, ``'nearest'``). Default is ``'constant'``.
+ border_value: Specifies value to be used for coordinates outside
+ of the array for ``'constant'`` mode. Default is 0.
+
+ Returns:
+ cupy.ndarray:
+ The transformed input.
+
+ .. seealso:: :func:`cupyx.scipy.ndimage.affine_transform`
+ """
+
+ ndim = data.ndim
+ if (ndim < 2) or (ndim > 3):
+ raise ValueError(
+ 'Texture memory affine transformation is defined only for '
+ '2D and 3D arrays without channel dimension.')
+
+ dtype = data.dtype
+ if dtype != cupy.float32:
+ raise ValueError(f'Texture memory affine transformation is available '
+ f'only for float32 data type (not {dtype})')
+
+ if interpolation not in ['linear', 'nearest']:
+ raise ValueError(
+ f'Unsupported interpolation {interpolation} '
+ f'(supported: linear, nearest)')
+
+ if transformation_matrix.shape != (ndim + 1, ndim + 1):
+ raise ValueError('Matrix must be have shape (ndim + 1, ndim + 1)')
+
+ texture_object = _create_texture_object(data,
+ address_mode=mode,
+ filter_mode=interpolation,
+ read_mode='element_type',
+ border_color=border_value)
+
+ if ndim == 2:
+ kernel = _affine_transform_2d_array_kernel
+ else:
+ kernel = _affine_transform_3d_array_kernel
+
+ if output_shape is None:
+ output_shape = data.shape
+
+ if output is None:
+ output = cupy.zeros(output_shape, dtype=dtype)
+ elif isinstance(output, (type, cupy.dtype)):
+ if output != cupy.float32:
+ raise ValueError(f'Texture memory affine transformation is '
+ f'available only for float32 data type (not '
+ f'{output})')
+ output = cupy.zeros(output_shape, dtype=output)
+ elif isinstance(output, cupy.ndarray):
+ if output.shape != output_shape:
+ raise ValueError('Output shapes do not match')
+ else:
+ raise ValueError('Output must be None, cupy.ndarray or cupy.dtype')
+
+ kernel(texture_object, transformation_matrix, *output_shape[1:], output)
+ return output
diff --git a/python/cucim/src/cucim/skimage/_vendored/ndimage.py b/python/cucim/src/cucim/skimage/_vendored/ndimage.py
new file mode 100644
index 000000000..cd97578a8
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/_vendored/ndimage.py
@@ -0,0 +1,82 @@
+# locally defined filters that are more efficient than in CuPy
+from cucim.skimage._vendored._ndimage_filters import correlate # NOQA
+from cucim.skimage._vendored._ndimage_filters import convolve # NOQA
+from cucim.skimage._vendored._ndimage_filters import correlate1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import convolve1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import uniform_filter1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import uniform_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import gaussian_filter1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import gaussian_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import prewitt # NOQA
+from cucim.skimage._vendored._ndimage_filters import sobel # NOQA
+from cucim.skimage._vendored._ndimage_filters import generic_laplace # NOQA
+from cucim.skimage._vendored._ndimage_filters import laplace # NOQA
+from cucim.skimage._vendored._ndimage_filters import gaussian_laplace # NOQA
+from cucim.skimage._vendored._ndimage_filters import generic_gradient_magnitude # NOQA
+from cucim.skimage._vendored._ndimage_filters import gaussian_gradient_magnitude # NOQA
+from cucim.skimage._vendored._ndimage_filters import minimum_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import maximum_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import minimum_filter1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import maximum_filter1d # NOQA
+from cucim.skimage._vendored._ndimage_filters import median_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import rank_filter # NOQA
+from cucim.skimage._vendored._ndimage_filters import percentile_filter # NOQA
+
+# interpolation
+from cucim.skimage._vendored._ndimage_interpolation import affine_transform # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import map_coordinates # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import rotate # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import shift # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import spline_filter # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import spline_filter1d # NOQA
+from cucim.skimage._vendored._ndimage_interpolation import zoom # NOQA
+
+# morphology
+from cucim.skimage._vendored._ndimage_morphology import generate_binary_structure # NOQA
+from cucim.skimage._vendored._ndimage_morphology import iterate_structure # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_erosion # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_dilation # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_opening # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_closing # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_hit_or_miss # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_fill_holes # NOQA
+from cucim.skimage._vendored._ndimage_morphology import binary_propagation # NOQA
+from cucim.skimage._vendored._ndimage_morphology import grey_erosion # NOQA
+from cucim.skimage._vendored._ndimage_morphology import grey_dilation # NOQA
+from cucim.skimage._vendored._ndimage_morphology import grey_closing # NOQA
+from cucim.skimage._vendored._ndimage_morphology import grey_opening # NOQA
+from cucim.skimage._vendored._ndimage_morphology import morphological_gradient # NOQA
+from cucim.skimage._vendored._ndimage_morphology import morphological_laplace # NOQA
+from cucim.skimage._vendored._ndimage_morphology import white_tophat # NOQA
+from cucim.skimage._vendored._ndimage_morphology import black_tophat # NOQA
+
+# Import the rest of the cupyx.scipy.ndimage API here
+
+# additional filters
+from cupyx.scipy.ndimage import generic_filter # NOQA
+from cupyx.scipy.ndimage import generic_filter1d # NOQA
+
+# fourier filters
+from cupyx.scipy.ndimage import fourier_ellipsoid # NOQA
+from cupyx.scipy.ndimage import fourier_gaussian # NOQA
+from cupyx.scipy.ndimage import fourier_shift # NOQA
+from cupyx.scipy.ndimage import fourier_uniform # NOQA
+
+# measurements
+from cupyx.scipy.ndimage import label # NOQA
+try:
+ from cupyx.scipy.ndimage import sum_labels # NOQA
+except ImportError:
+ from cupyx.scipy.ndimage import sum as sum_labels # NOQA
+from cupyx.scipy.ndimage import mean # NOQA
+from cupyx.scipy.ndimage import variance # NOQA
+from cupyx.scipy.ndimage import standard_deviation # NOQA
+from cupyx.scipy.ndimage import minimum # NOQA
+from cupyx.scipy.ndimage import maximum # NOQA
+from cupyx.scipy.ndimage import minimum_position # NOQA
+from cupyx.scipy.ndimage import maximum_position # NOQA
+from cupyx.scipy.ndimage import median # NOQA
+from cupyx.scipy.ndimage import extrema # NOQA
+from cupyx.scipy.ndimage import center_of_mass # NOQA
+from cupyx.scipy.ndimage import histogram # NOQA
+from cupyx.scipy.ndimage import labeled_comprehension # NOQA
diff --git a/python/cucim/src/cucim/skimage/_vendored/signaltools.py b/python/cucim/src/cucim/skimage/_vendored/signaltools.py
index c54415e99..3184055fa 100644
--- a/python/cucim/src/cucim/skimage/_vendored/signaltools.py
+++ b/python/cucim/src/cucim/skimage/_vendored/signaltools.py
@@ -424,7 +424,7 @@ def choose_conv_method(in1, in2, mode="full", measure=False):
returns `direct` (e.g., to protect against floating point integer
precision).
- .. versionadded:: 0.19
+ .. versionadded:: 22.02.00
Examples
--------
diff --git a/python/cucim/src/cucim/skimage/exposure/exposure.py b/python/cucim/src/cucim/skimage/exposure/exposure.py
index f7c0eeb4f..2b16508f2 100644
--- a/python/cucim/src/cucim/skimage/exposure/exposure.py
+++ b/python/cucim/src/cucim/skimage/exposure/exposure.py
@@ -29,9 +29,7 @@ def _offset_array(arr, low_boundary, high_boundary):
# prevent overflow errors when offsetting
arr = arr.astype(offset_dtype)
arr = arr - offset
- else:
- offset = 0
- return arr, offset
+ return arr
def _bincount_histogram_centers(image, source_range):
@@ -75,8 +73,10 @@ def _bincount_histogram(image, source_range, bin_centers=None):
if bin_centers is None:
bin_centers = _bincount_histogram_centers(image, source_range)
image_min, image_max = bin_centers[0], bin_centers[-1]
- image, offset = _offset_array(image, image_min.item(), image_max.item()) # synchronize # noqa
- hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1)
+ image = _offset_array(image, image_min.item(), image_max.item()) # synchronize # noqa
+ hist = cp.bincount(
+ image.ravel(), minlength=image_max - min(image_min, 0) + 1
+ )
if source_range == 'image':
idx = max(image_min, 0)
hist = hist[idx:]
diff --git a/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py b/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py
index f27c8fff7..c0f6792ff 100644
--- a/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py
+++ b/python/cucim/src/cucim/skimage/exposure/tests/test_exposure.py
@@ -21,7 +21,9 @@
def test_wrong_source_range():
im = cp.array([-1, 100], dtype=cp.int8)
with pytest.raises(ValueError):
- frequencies, bin_centers = exposure.histogram(im, source_range="foobar")
+ frequencies, bin_centers = exposure.histogram(
+ im, source_range="foobar"
+ )
def test_negative_overflow():
@@ -50,6 +52,15 @@ def test_int_range_image():
assert bin_centers[-1] == 100
+def test_multichannel_int_range_image():
+ im = cp.array([[10, 5], [100, 102]], dtype=np.int8)
+ frequencies, bin_centers = exposure.histogram(im, channel_axis=-1)
+ for ch in range(im.shape[-1]):
+ assert len(frequencies[ch]) == len(bin_centers)
+ assert bin_centers[0] == 5
+ assert bin_centers[-1] == 102
+
+
def test_peak_uint_range_dtype():
im = cp.array([10, 100], dtype=cp.uint8)
frequencies, bin_centers = exposure.histogram(im, source_range="dtype")
@@ -286,11 +297,7 @@ def test_rescale_in_range_clip():
def test_rescale_out_range(dtype):
"""Check that output range is correct.
- .. versionchanged:: 0.17
- This function used to return dtype matching the input dtype. It now
- matches the output.
-
- .. versionchanged:: 0.19
+ .. versionchanged:: 22.02.00
float16 and float32 inputs now result in float32 output. Formerly they
would give float64 outputs.
"""
diff --git a/python/cucim/src/cucim/skimage/feature/_basic_features.py b/python/cucim/src/cucim/skimage/feature/_basic_features.py
index 6953b846d..9807906d7 100644
--- a/python/cucim/src/cucim/skimage/feature/_basic_features.py
+++ b/python/cucim/src/cucim/skimage/feature/_basic_features.py
@@ -6,6 +6,7 @@
import cupy as cp
import numpy as np
+from .._shared._gradient import gradient
from cucim.skimage import feature, filters
from cucim.skimage._shared import utils
from cucim.skimage.util import img_as_float32
@@ -14,7 +15,7 @@
def _texture_filter(gaussian_filtered):
combos = combinations_with_replacement
H_elems = [
- cp.gradient(cp.gradient(gaussian_filtered)[ax0], axis=ax1)
+ gradient(gradient(gaussian_filtered)[ax0], axis=ax1)
for ax0, ax1 in combos(range(gaussian_filtered.ndim), 2)
]
eigvals = feature.hessian_matrix_eigvals(H_elems)
diff --git a/python/cucim/src/cucim/skimage/feature/_canny.py b/python/cucim/src/cucim/skimage/feature/_canny.py
index b9d0e7c80..56d0f5cab 100644
--- a/python/cucim/src/cucim/skimage/feature/_canny.py
+++ b/python/cucim/src/cucim/skimage/feature/_canny.py
@@ -12,7 +12,7 @@
Original author: Lee Kamentsky
"""
import cupy as cp
-import cupyx.scipy.ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from cucim.skimage.util import dtype_limits
@@ -201,7 +201,7 @@ def _generate_nonmaximum_suppression_op(large_int=False):
@cp.memoize(for_each_device=True)
def _get_nonmax_kernel(large_int=False):
in_params = ('raw T isobel, raw T jsobel, raw T magnitude, '
- 'raw uint8 eroded_mask, T low_threshold')
+ 'raw uint8 eroded_mask, float64 low_threshold')
out_params = 'T out'
name = 'cupyx_skimage_canny_nonmaximum_suppression'
if large_int:
diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py
index 65b90315f..f52f98ad7 100644
--- a/python/cucim/src/cucim/skimage/feature/corner.py
+++ b/python/cucim/src/cucim/skimage/feature/corner.py
@@ -3,12 +3,13 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from scipy import spatial # TODO: use RAPIDS cuSpatial?
from cucim.skimage.util import img_as_float
# from ..transform import integral_image
+from .._shared._gradient import gradient
from .._shared.utils import _supported_float_type
from .peak import peak_local_max
from .util import _prepare_grayscale_input_nD
@@ -217,14 +218,14 @@ def hessian_matrix(image, sigma=1, mode='constant', cval=0, order='rc'):
gaussian_filtered = gaussian(image, sigma=sigma, mode=mode, cval=cval,
channel_axis=channel_axis)
- gradients = cp.gradient(gaussian_filtered)
+ gradients = gradient(gaussian_filtered)
axes = range(image.ndim)
if order == "rc":
axes = reversed(axes)
H_elems = [
- cp.gradient(gradients[ax0], axis=ax1)
+ gradient(gradients[ax0], axis=ax1)
for ax0, ax1 in combinations_with_replacement(axes, 2)
]
diff --git a/python/cucim/src/cucim/skimage/feature/peak.py b/python/cucim/src/cucim/skimage/feature/peak.py
index eff5f6735..fbb2570f7 100644
--- a/python/cucim/src/cucim/skimage/feature/peak.py
+++ b/python/cucim/src/cucim/skimage/feature/peak.py
@@ -1,7 +1,7 @@
from warnings import warn
import cupy as cp
-import cupyx.scipy.ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
import numpy as np
from scipy.ndimage import find_objects as cpu_find_objects
diff --git a/python/cucim/src/cucim/skimage/feature/tests/test_canny.py b/python/cucim/src/cucim/skimage/feature/tests/test_canny.py
index a10006b95..6e06114fc 100644
--- a/python/cucim/src/cucim/skimage/feature/tests/test_canny.py
+++ b/python/cucim/src/cucim/skimage/feature/tests/test_canny.py
@@ -1,6 +1,5 @@
-import unittest
-
import cupy as cp
+import pytest
from cupy.testing import assert_array_equal
from cupyx.scipy.ndimage import binary_dilation, binary_erosion
from skimage import data
@@ -9,18 +8,18 @@
from cucim.skimage.util import img_as_float
-class TestCanny(unittest.TestCase):
+class TestCanny():
def test_00_00_zeros(self):
"""Test that the Canny filter finds no points for a blank field"""
result = feature.canny(cp.zeros((20, 20)), 4, 0, 0, cp.ones((20, 20),
bool))
- self.assertFalse(cp.any(result))
+ assert not cp.any(result)
def test_00_01_zeros_mask(self):
"""Test that the Canny filter finds no points in a masked image"""
result = (feature.canny(cp.random.uniform(size=(20, 20)), 4, 0, 0,
cp.zeros((20, 20), bool)))
- self.assertFalse(cp.any(result))
+ assert not cp.any(result)
def test_01_01_circle(self):
"""Test that the Canny filter finds the outlines of a circle"""
@@ -36,7 +35,7 @@ def test_01_01_circle(self):
cd = binary_dilation(c, iterations=3, brute_force=True)
ce = binary_erosion(c, iterations=3, brute_force=True)
cde = cp.logical_and(cd, cp.logical_not(ce))
- self.assertTrue(cp.all(cde[result]))
+ assert cp.all(cde[result])
#
# The circle has a radius of 100. There are two rings here, one
# for the inside edge and one for the outside. So that's
@@ -44,8 +43,8 @@ def test_01_01_circle(self):
# The edge contains both pixels if there's a tie, so we
# bump the count a little.
point_count = cp.sum(result)
- self.assertTrue(point_count > 1200)
- self.assertTrue(point_count < 1600)
+ assert point_count > 1200
+ assert point_count < 1600
def test_01_02_circle_with_noise(self):
"""Test that the Canny filter finds the circle outlines
@@ -62,24 +61,30 @@ def test_01_02_circle_with_noise(self):
cd = binary_dilation(c, iterations=4, brute_force=True)
ce = binary_erosion(c, iterations=4, brute_force=True)
cde = cp.logical_and(cd, cp.logical_not(ce))
- self.assertTrue(cp.all(cde[result]))
+ assert cp.all(cde[result])
point_count = cp.sum(result)
- self.assertTrue(point_count > 1200)
- self.assertTrue(point_count < 1600)
+ assert point_count > 1200
+ assert point_count < 1600
def test_image_shape(self):
- self.assertRaises(ValueError, feature.canny, cp.zeros((20, 20, 20)), 4,
- 0, 0)
+ with pytest.raises(ValueError):
+ feature.canny(cp.zeros((20, 20, 20)), 4, 0, 0)
def test_mask_none(self):
result1 = feature.canny(cp.zeros((20, 20)), 4, 0, 0, cp.ones((20, 20),
bool))
result2 = feature.canny(cp.zeros((20, 20)), 4, 0, 0)
- self.assertTrue(cp.all(result1 == result2))
+ assert cp.all(result1 == result2)
@cp.testing.with_requires("scikit-image>=0.18")
- def test_use_quantiles(self):
- image = img_as_float(cp.asarray(data.camera()[::100, ::100]))
+ @pytest.mark.parametrize('image_dtype', [cp.uint8, cp.int64, cp.float32,
+ cp.float64])
+ def test_use_quantiles(self, image_dtype):
+ dtype = cp.dtype(image_dtype)
+ image = cp.asarray(data.camera()[::100, ::100])
+ if dtype.kind == 'f':
+ image = img_as_float(image)
+ image = image.astype(dtype)
# Correct output produced manually with quantiles
# of 0.8 and 0.6 for high and low respectively
@@ -96,24 +101,33 @@ def test_use_quantiles(self):
assert_array_equal(result, correct_output)
+ def test_img_all_ones(self):
+ image = cp.ones((10, 10))
+ assert cp.all(feature.canny(image) == 0)
+
def test_invalid_use_quantiles(self):
image = img_as_float(cp.array(data.camera()[::50, ::50]))
- self.assertRaises(ValueError, feature.canny, image, use_quantiles=True,
+ with pytest.raises(ValueError):
+ feature.canny(image, use_quantiles=True,
low_threshold=0.5, high_threshold=3.6)
- self.assertRaises(ValueError, feature.canny, image, use_quantiles=True,
+ with pytest.raises(ValueError):
+ feature.canny(image, use_quantiles=True,
low_threshold=-5, high_threshold=0.5)
- self.assertRaises(ValueError, feature.canny, image, use_quantiles=True,
+ with pytest.raises(ValueError):
+ feature.canny(image, use_quantiles=True,
low_threshold=99, high_threshold=0.9)
- self.assertRaises(ValueError, feature.canny, image, use_quantiles=True,
+ with pytest.raises(ValueError):
+ feature.canny(image, use_quantiles=True,
low_threshold=0.5, high_threshold=-100)
# Example from issue #4282
image = data.camera()
- self.assertRaises(ValueError, feature.canny, image, use_quantiles=True,
+ with pytest.raises(ValueError):
+ feature.canny(image, use_quantiles=True,
low_threshold=50, high_threshold=150)
def test_dtype(self):
diff --git a/python/cucim/src/cucim/skimage/filters/_gabor.py b/python/cucim/src/cucim/skimage/filters/_gabor.py
index e55720357..df4c041c4 100644
--- a/python/cucim/src/cucim/skimage/filters/_gabor.py
+++ b/python/cucim/src/cucim/skimage/filters/_gabor.py
@@ -1,7 +1,8 @@
import math
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
+
from .._shared.utils import _supported_float_type, check_nD, warn
diff --git a/python/cucim/src/cucim/skimage/filters/_median.py b/python/cucim/src/cucim/skimage/filters/_median.py
index d0da048c9..2041f4161 100644
--- a/python/cucim/src/cucim/skimage/filters/_median.py
+++ b/python/cucim/src/cucim/skimage/filters/_median.py
@@ -1,15 +1,27 @@
from warnings import warn
+import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import deprecate_kwarg
+from ._median_hist import _can_use_histogram, _median_hist, KernelResourceError
+try:
+ from math import prod
+except ImportError:
+ from functools import reduce
+ from operator import mul
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+ def prod(x):
+ return reduce(mul, x)
+
+
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def median(image, footprint=None, out=None, mode='nearest', cval=0.0,
- behavior='ndimage'):
+ behavior='ndimage', *, algorithm='auto', algorithm_kwargs={}):
"""Return local median of an image.
Parameters
@@ -47,6 +59,20 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0,
.. versionchanged:: 0.16
Default ``behavior`` has been changed from 'rank' to 'ndimage'
+ Other Parameters
+ ----------------
+ algorithm : {'auto', 'histogram', 'sorting'}
+ Determines which algorithm is used to compute the median. The default
+ of 'auto' will attempt to use a histogram-based algorithm for 2D
+ images with 8 or 16-bit integer data types. Otherwise a sorting-based
+ algorithm will be used. Note: this paramter is cuCIM-specific and does
+ not exist in upstream scikit-image.
+ algorithm_kwargs : dict
+ Any additional algorithm-specific keywords. Currently can only be used
+ to set the number of parallel partitions for the 'histogram' algorithm.
+ (e.g. ``algorithm_kwargs={'partitions': 256}``). Note: this paramter is
+ cuCIM-specific and does not exist in upstream scikit-image.
+
Returns
-------
out : 2-D array (same dtype as input image)
@@ -58,6 +84,22 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0,
filtering offering more flexibility with additional parameters but
dedicated for unsigned integer images.
+ Notes
+ -----
+ An efficient, histogram-based median filter as described in [1]_ is faster
+ than the sorting based approach for larger kernel sizes (e.g. greater than
+ 13x13 or so in 2D). It has near-constant run time regardless of the kernel
+ size. The algorithm presented in [1]_ has been adapted to additional bit
+ depths here. When algorithm='auto', the histogram-based algorithm will be
+ chosen for integer-valued images with sufficiently large footprint size.
+ Otherwise, the sorting-based approach is used.
+
+ References
+ ----------
+ .. [1] O. Green, "Efficient Scalable Median Filtering Using Histogram-Based
+ Operations," in IEEE Transactions on Image Processing, vol. 27, no. 5,
+ pp. 2217-2228, May 2018, https://doi.org/10.1109/TIP.2017.2781375.
+
Examples
--------
>>> import cupy as cp
@@ -79,5 +121,65 @@ def median(image, footprint=None, out=None, mode='nearest', cval=0.0,
if footprint is None:
footprint = ndi.generate_binary_structure(image.ndim, image.ndim)
+
+ if algorithm == 'sorting':
+ can_use_histogram = False
+ elif algorithm in ['auto', 'histogram']:
+ can_use_histogram, reason = _can_use_histogram(image, footprint)
+ else:
+ raise ValueError(f"unknown algorithm: {algorithm}")
+
+ if algorithm == 'histogram' and not can_use_histogram:
+ raise ValueError(
+ "The histogram-based algorithm was requested, but it cannot "
+ f"be used for this image and footprint (reason: {reason})."
+ )
+
+ # The sorting-based implementation in CuPy is faster for small footprints.
+ # Empirically, shapes above (13, 13) and above on RTX A6000 have faster
+ # execution for the histogram-based approach.
+ use_histogram = can_use_histogram
+ if algorithm == 'auto':
+ # prefer sorting-based algorithm if footprint shape is small
+ use_histogram = use_histogram and prod(footprint.shape) > 150
+
+ if use_histogram:
+ try:
+ # as in SciPy, a user-provided `out` can be an array or a dtype
+ output_array_provided = False
+ out_dtype = None
+ if out is not None:
+ output_array_provided = isinstance(out, cp.ndarray)
+ if not output_array_provided:
+ try:
+ out_dtype = cp.dtype(out)
+ except TypeError:
+ raise TypeError(
+ "out must be either a cupy.array or a valid input "
+ "to cupy.dtype"
+ )
+
+ # TODO: Can't currently pass an output array into _median_hist as a
+ # new array currently needs to be created during padding.
+ temp = _median_hist(image, footprint, mode=mode, cval=cval,
+ **algorithm_kwargs)
+ if output_array_provided:
+ out[:] = temp
+ else:
+ if out_dtype is not None:
+ temp = temp.astype(out_dtype, copy=False)
+ out = temp
+ return out
+ except KernelResourceError as e:
+ # Fall back to sorting-based implementation if we encounter a
+ # resource limit (e.g. insufficient shared memory per block).
+ warn("Kernel resource error encountered in histogram-based "
+ f"median kerne: {e}\n"
+ "Falling back to sorting-based median instead.")
+
+ if algorithm_kwargs:
+ warn(f"algorithm_kwargs={algorithm_kwargs} ignored for sorting-based "
+ f"algorithm")
+
return ndi.median_filter(image, footprint=footprint, output=out, mode=mode,
cval=cval)
diff --git a/python/cucim/src/cucim/skimage/filters/_median_hist.py b/python/cucim/src/cucim/skimage/filters/_median_hist.py
new file mode 100644
index 000000000..0700238c0
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/filters/_median_hist.py
@@ -0,0 +1,515 @@
+import math
+import os
+from collections import namedtuple
+from textwrap import dedent
+
+import cupy as cp
+import numpy as np
+
+from .._shared.utils import _to_np_mode
+
+if hasattr(math, 'prod'):
+ prod = math.prod
+else:
+ prod = np.prod
+
+
+def _dtype_to_CUDA_int_type(dtype):
+ cpp_int_types = {
+ cp.uint8: 'unsigned char',
+ cp.uint16: 'unsigned short',
+ cp.uint32: 'unsigned int',
+ cp.uint64: 'unsigned long long',
+ cp.int8: 'signed char',
+ cp.int16: 'short',
+ cp.int32: 'int',
+ cp.int64: 'long long',
+ }
+ dtype = cp.dtype(dtype)
+ if dtype.type not in cpp_int_types:
+ raise ValueError(f"unrecognized dtype: {dtype.type}")
+ return cpp_int_types[dtype.type]
+
+
+def _get_hist_dtype(footprint_shape):
+ """Determine C++ type and cupy.dtype to use for the histogram."""
+ max_possible_count = prod(footprint_shape)
+
+ if max_possible_count < 128:
+ dtype = cp.int8
+ elif max_possible_count < 32768:
+ dtype = cp.int16
+ else:
+ dtype = cp.int32
+ return _dtype_to_CUDA_int_type(dtype), dtype
+
+
+def _gen_global_definitions(
+ image_t='unsigned char',
+ hist_offset=0,
+ hist_int_t='int',
+ hist_size=256,
+ hist_size_coarse=8
+):
+ """Generate C++ #define statements needed for the CUDA kernels.
+
+ The definitions used depend on the number of histogram bins and the
+ histogram data type.
+ """
+
+ if hist_size % hist_size_coarse != 0:
+ raise ValueError(
+ "`hist_size` must be a multiple of `hist_size_coarse`"
+ )
+ hist_size_fine = hist_size // hist_size_coarse
+ log2_coarse = math.log2(hist_size_coarse)
+ log2_fine = math.log2(hist_size_fine)
+ if abs(math.remainder(log2_coarse, 1)) > 1e-7:
+ raise ValueError("log2_coarse must be a power of two")
+ elif abs(math.remainder(log2_fine, 1)) > 1e-7:
+ raise ValueError("log2_fine must be a power of two")
+ else:
+ log2_coarse = round(log2_coarse)
+ log2_fine = round(log2_fine)
+
+ global_defs = f"""
+#define HIST_SIZE {hist_size}
+#define HIST_SIZE_COARSE {hist_size_coarse}
+#define HIST_SIZE_FINE {hist_size_fine}
+#define HIST_INT_T {hist_int_t}
+#define HIST_OFFSET {hist_offset}
+#define IMAGE_T {image_t}
+#define LOG2_COARSE {log2_coarse}
+#define LOG2_FINE {log2_fine}
+ """
+ return global_defs
+
+
+# TODO: look into using CUB's Block-wise collectives (e.g. BlockScan)
+def _gen_preamble_median(hist_size_coarse, hist_size_fine):
+ """Generate bin size-dependent reduction functions.
+
+ This helper generates the C++ code for the following two functions.
+
+ histogramMedianParCoarseLookupOnly
+ histogramMedianParFineLookupOnly
+
+ For each of these the number of calls to scanNeighbor is equal to
+ log2 of the number of bins.
+ """
+ n_log2_coarse = math.log2(hist_size_coarse)
+ if hist_size_coarse < 2 or n_log2_coarse % 1.0 != 0:
+ raise ValueError("hist_size_coarse must be a positive power of 2")
+
+ n_log2_fine = math.log2(hist_size_fine)
+ if hist_size_fine < 2 or n_log2_fine % 1.0 != 0:
+ raise ValueError("hist_size_fine must be a positive power of 2")
+
+ ops = """
+
+ #define scanNeighbor(array, range, index, threadIndex) \\
+ { \\
+ HIST_INT_T v = 0; \\
+ if (index <= threadIndex && threadIndex < range) \\
+ v = array[threadIndex] + array[threadIndex-index]; \\
+ __syncthreads(); \\
+ if (index <= threadIndex && threadIndex < range) \\
+ array[threadIndex] = v; \\
+ }
+
+ #define findMedian(array, range, threadIndex, result, count, position) \\
+ if (threadIndex < range) \\
+ { \\
+ if (array[threadIndex+1] > position && array[threadIndex] <= position) \\
+ { \\
+ *result = threadIndex+1; \\
+ *count = array[threadIndex]; \\
+ } \\
+ }
+
+ __device__ void histogramMedianParCoarseLookupOnly(HIST_INT_T* H, HIST_INT_T* Hscan, const int medPos, int* retval, int* countAtMed){
+ int tx=threadIdx.x;
+ *retval=*countAtMed=0;
+ if(tx= 3"
+
+ # footprint radius can't be larger than the image
+ # TODO: need to check if we need this exact restriction
+ # (may be specific to OpenCV's boundary handling)
+ radii = tuple(s // 2 for s in footprint.shape)
+ if any(r > s for r, s in zip(radii, image.shape)):
+ return False, "footprint half-width cannot exceed the image extent"
+
+ # only fully populated footprint is supported
+ if not np.all(footprint): # synchronizes!
+ return False, "footprint must be 1 everywhere"
+
+ return True, None
+
+
+class KernelResourceError(RuntimeError):
+ pass
+
+
+def _get_kernel_params(image, footprint_shape, value_range='auto',
+ partitions=None, hist_size_coarse=None):
+ """Determine kernel launch parameters and #define values for its code.
+
+ Parameters
+ ----------
+ image : cupy.ndarray
+ The histogram bin range will depend on the image dtype unless specified
+ explicitly via `value_range`
+ footprint_shape : tuple of int
+ The shape of the footprint. The dtype used for storing the histogram
+ will depend on the footprint size. For small footprints, histograms
+ will be stored using int8, otherwise int16 will be used.
+ value_range : {'auto', 'dtype', 'image'}, optional
+ When value_range='dtype', the range will be determined based on the
+ maximal range of the data type. When ``value_range='image'``, the
+ minimum and maximum intensities present in the image will be used. When
+ set to auto 'auto', `dtype` is used for 8-bit images and otherwise
+ 'image' is used.
+ partitions : positive int, optional
+ The grid size used during kernel launch will be (partitions, 1, 1).
+ Increasing this will increase parallelism (and thus performance), but
+ at cost of additional GPU memory usage. Will be automatically truncated
+ to a value no larger than image.shape[0] // 2.
+ hist_size_coarse : int or None, optional
+ Can be used to override the default choice of the number of coarse
+ histogram bins. It is not generally recommended to set this as
+ infeasible values can easily be chosen. Using None, will give
+ automatically selected values that have been validated in previous
+ testing.
+
+ Returns
+ -------
+ CUDAParams : namedtuple
+ Various parameters used in kernel code generation and at launch time.
+ See comments next to the KernelParams declaration below for details.
+ """
+
+ if value_range == 'auto':
+ if image.dtype.itemsize < 2:
+ value_range = 'dtype'
+ else:
+ # to save memory, try using actual value range for >8-bit images
+ # (e.g. DICOM images often have 12-bit range)
+ value_range = 'image'
+
+ if value_range == 'dtype':
+ if image.dtype.itemsize > 2:
+ raise ValueError(
+ "dtype range only supported for 8 and 16-bit integer dtypes."
+ )
+ iinfo = cp.iinfo(image.dtype)
+ minv, maxv = iinfo.min, iinfo.max
+ elif value_range == 'image':
+ minv = int(image.min())
+ maxv = int(image.max())
+ else:
+ if len(value_range) != 2:
+ raise ValueError(
+ "value_range must be either 'dtype', 'image' or a "
+ "(min, max) sequence."
+ )
+ minv, maxv = value_range
+
+ if image.dtype.kind == 'u':
+ # cannot subtract a positive offset in the unsigned case
+ minv = min(minv, 0)
+ hist_offset = 0 if minv == 0 else -minv
+ hist_size = maxv - minv + 1
+ hist_size = max(hist_size, 256) # use at least 256 bins
+ # round hist_size up to the nearest power of 2
+ hist_size = round(2**math.ceil(math.log2(hist_size)))
+ hist_size = max(hist_size, 32)
+
+ if hist_size_coarse is None:
+ # Empirically, robust to choose 32-fold less bins for hist_size coarse
+ hist_size_coarse = hist_size // 32
+
+ # have to set block[0] large enough that histogramMedianParFineLookupOnly
+ # and histogramMedianParCoarseLookupOnly search sizes fit within the number
+ # of threads in the block.
+ # Use the maximum of the coarse and fine sizes, rounded up to the nearest
+ # multiple of 32.
+ hist_size_fine = hist_size // hist_size_coarse
+ hist_size_max = max(hist_size_fine, hist_size_coarse)
+ # block0 must be at least the warp size
+ block0 = 32 * math.ceil(hist_size_max / 32)
+ if block0 > 256:
+ d = cp.cuda.Device()
+ max_block_x = d.attributes["MaxBlockDimX"]
+ if block0 > max_block_x:
+ raise KernelResourceError(
+ f"The requested block size of {block0} for the first dimension"
+ f", exceeds MaxBlockDimX={max_block_x} for this device."
+ )
+
+ if partitions is None:
+ # Substantial overhead in computing the first line, so need at least
+ # two lines per partition for best performance. Limit to bins_max
+ # partitions by default to avoid overly excessive memory overhead.
+ bins_max = max(16, 512 // (hist_size // 256))
+ partitions = min(image.shape[0] // 2, bins_max)
+ else:
+ # cannot exceed the number of image rows
+ partitions = min(partitions, image.shape[0])
+
+ grid = (partitions, 1, 1)
+ block = (block0, 1, 1)
+
+ hist_int_t, hist_dtype = _get_hist_dtype(footprint_shape)
+
+ # All recent GPUs (CC>=3.5) allow at least 48k of shared memory per block,
+ # so don't bother checking the requirements unless thousands of histogram
+ # bins are requested.
+ # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications # noqa
+ if hist_size >= 8192:
+ smem_size = _check_shared_memory_requirement_bytes(
+ hist_dtype, hist_size_coarse, hist_size_fine
+ )
+ d = cp.cuda.Device()
+ smem_available = d.attributes['MaxSharedMemoryPerBlock']
+ if smem_size > smem_available:
+ raise KernelResourceError(
+ f"Shared memory requirement of {smem_size} bytes per block"
+ f"exceeds the device limit of {smem_available}."
+ )
+ CUDAParams = namedtuple(
+ 'HistogramMedianKernelParams',
+ [
+ 'grid',
+ 'block',
+ 'hist_size', # total number of histogram bins
+ 'hist_size_coarse', # number of coarse-level histogram bins
+ 'hist_dtype', # cupy.dtype of the histogram
+ 'hist_int_t', # C++ type of the histogram
+ 'hist_offset', # offset from 0 for the first bin
+ 'partitions' # number of parallel bands to use
+ ]
+ )
+ return CUDAParams(
+ grid,
+ block,
+ hist_size,
+ hist_size_coarse,
+ hist_dtype,
+ hist_int_t,
+ hist_offset,
+ partitions,
+ )
+
+
+def _median_hist(image, footprint, output=None, mode='mirror', cval=0,
+ value_range='auto', partitions=None):
+
+ if output is not None:
+ raise NotImplementedError(
+ "Use of a user-defined output array has not been implemented"
+ )
+
+ compatible_image, reason = _can_use_histogram(image, footprint)
+ if not compatible_image:
+ raise ValueError(reason)
+
+ # kernel pointer offset calculations assume C-contiguous image data
+ image = cp.ascontiguousarray(image)
+ n_rows, n_cols = image.shape[:2]
+ if image.dtype.kind == 'b':
+ image = image.view(cp.uint8)
+ if image.dtype.kind not in 'iu':
+ raise ValueError("only integer-type images are accepted")
+
+ radii = tuple(s // 2 for s in footprint.shape)
+ # med_pos is the index corresponding to the median
+ # (calculation here assumes all elements of the footprint are True)
+ med_pos = footprint.size // 2
+
+ params = _get_kernel_params(
+ image, footprint.shape, value_range, partitions
+ )
+
+ # pad as necessary to avoid boundary artifacts
+ # Don't have to pad along axis 0 if mode is already 'nearest' because the
+ # kernel already assumes 'nearest' mode internally.
+ autopad = True
+ pad_both_axes = mode != 'nearest'
+ if autopad:
+ if pad_both_axes:
+ npad = tuple((r, r) for r in radii)
+ else:
+ npad = ((0, 0),) * (image.ndim - 1) + ((radii[-1], radii[-1]),)
+ mode = _to_np_mode(mode)
+ if mode == 'constant':
+ pad_kwargs = dict(mode=mode, constant_values=cval)
+ else:
+ pad_kwargs = dict(mode=mode)
+ image = cp.pad(image, npad, **pad_kwargs)
+ # must update n_rows, n_cols after padding!
+ n_rows, n_cols = image.shape[:2]
+
+ # generate the kernel
+ kern = _get_median_rawkernel(
+ image_t=_dtype_to_CUDA_int_type(image.dtype),
+ hist_offset=params.hist_offset,
+ hist_int_t=params.hist_int_t,
+ hist_size=params.hist_size,
+ hist_size_coarse=params.hist_size_coarse,
+ )
+
+ # allocate output and scratch space, `hist` and `coarse_hist`.
+ out = cp.empty_like(image)
+ hist = cp.zeros(
+ (n_cols * params.hist_size * params.partitions,),
+ params.hist_dtype,
+ )
+ coarse_hist = cp.zeros(
+ (n_cols * params.hist_size_coarse * params.partitions,),
+ params.hist_dtype,
+ )
+
+ # call the kernel
+ r0, r1 = radii[:2]
+ s0, s1 = image.shape[:2]
+ kernel_args = (image, out, hist, coarse_hist, r0, r1, med_pos, s0, s1)
+ kern(params.grid, params.block, kernel_args)
+
+ # remove any padding that was added
+ if autopad:
+ if pad_both_axes:
+ out_sl = tuple(slice(r, -r) for r in radii)
+ out = out[out_sl]
+ else:
+ out = out[..., radii[-1]:-radii[-1]]
+ return out
diff --git a/python/cucim/src/cucim/skimage/filters/_separable_filtering.py b/python/cucim/src/cucim/skimage/filters/_separable_filtering.py
new file mode 100644
index 000000000..eb8dad28a
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/filters/_separable_filtering.py
@@ -0,0 +1,990 @@
+import math
+
+import cupy as cp
+
+from cucim.skimage._vendored import _ndimage_util as util
+from cucim.skimage._vendored._internal import _normalize_axis_index, prod
+from cucim.skimage._vendored._ndimage_filters_core import (
+ _ndimage_CAST_FUNCTION, _ndimage_includes)
+
+
+def _get_constants(ndim, axis, kernel_size, anchor, patch_per_block=None):
+ if anchor is None:
+ anchor = kernel_size // 2
+ halo_pixels_needed = max(kernel_size - anchor, anchor)
+ if patch_per_block is None:
+ patch_per_block = 4
+
+ if ndim == 2:
+ # note, in 2d axis 0 = "y"
+ # axis 1 = "x"
+ # for simplicity, keeping same halo size at both start and end
+ if axis == 1:
+ # as in OpenCV's column_filter.hpp
+ block_x = 16
+ block_y = 16
+ halo_size = math.ceil(halo_pixels_needed / block_x)
+ elif axis == 0:
+ # as in OpenCV's row_filter.hpp
+ block_x = 32 # 16 in CUDA example
+ block_y = 8 # 4 in CUDA example
+ halo_size = math.ceil(halo_pixels_needed / block_y)
+ # can have out of bounds access unless patch_per_block >= halo_size
+ patch_per_block = max(patch_per_block, halo_size)
+ block_z = 1
+ elif ndim == 3:
+ # note, in 3d axis 0 = "z"
+ # axis 1 = "y"
+ # axis 2 = "x"
+ # for simplicity, keeping same halo size at both start and end
+ if axis == 2:
+ # as in OpenCV's column_filter.hpp
+ block_x = 16
+ block_y = 4
+ block_z = 4
+ halo_size = math.ceil(halo_pixels_needed / block_x)
+ elif axis == 1:
+ # as in OpenCV's column_filter.hpp
+ block_x = 32
+ block_y = 4
+ block_z = 4
+ halo_size = math.ceil(halo_pixels_needed / block_y)
+ elif axis == 0:
+ # as in OpenCV's row_filter.hpp
+ block_x = 32
+ block_y = 4
+ block_z = 4
+ halo_size = math.ceil(halo_pixels_needed / block_z)
+ # can have out of bounds access unless patch_per_block >= halo_size
+ patch_per_block = max(patch_per_block, halo_size)
+ else:
+ raise NotImplementedError("Only 2D and 3D are currently supported")
+ block = (block_x, block_y, block_z)
+ return block, patch_per_block, halo_size
+
+
+def _get_smem_shape(ndim, axis, block, patch_per_block, halo_size, anchor=None,
+ image_dtype=cp.float32):
+ bx, by, bz = block
+ if ndim == 2:
+ if axis == 0:
+ shape = ((patch_per_block + 2 * halo_size) * by, bx)
+ elif axis == 1:
+ shape = (by, (patch_per_block + 2 * halo_size) * bx)
+ elif ndim == 3:
+ if axis == 0:
+ shape = ((patch_per_block + 2 * halo_size) * bz, by, bx)
+ elif axis == 1:
+ shape = (bz, (patch_per_block + 2 * halo_size) * by, bx)
+ elif axis == 2:
+ shape = (bz, by, (patch_per_block + 2 * halo_size) * bx)
+ else:
+ raise NotImplementedError("TODO")
+ nbytes = cp.dtype(image_dtype).itemsize * prod(shape)
+ return shape, nbytes
+
+
+def _get_warp_size(device_id=None):
+ if device_id is None:
+ device_id = cp.cuda.runtime.getDevice()
+ device_props = cp.cuda.runtime.getDeviceProperties(device_id)
+ return device_props['warpSize']
+
+
+def _get_shmem_limits(device_id=None):
+ if device_id is None:
+ device_id = cp.cuda.runtime.getDevice()
+ device_props = cp.cuda.runtime.getDeviceProperties(device_id)
+ shared_mp = device_props.get('sharedMemPerMultiprocessor', None)
+ shared_block = device_props.get('sharedMemPerBlock', None)
+ shared_block_optin = device_props.get('sharedMemPerBlockOptin', None)
+ global_l1_cache_supported = device_props.get('globalL1CacheSupported', None)
+ local_l1_cache_supported = device_props.get('localL1CacheSupported', None)
+ l2_size = device_props.get('l2CacheSize', None)
+ warp_size = device_props.get('warpSize', None)
+ regs_per_block = device_props.get('regsPerBlock', None)
+ return {
+ 'device_id': device_id,
+ 'shared_mp': shared_mp,
+ 'shared_block': shared_block,
+ 'shared_block_optin': shared_block_optin,
+ 'global_l1_cache_supported': global_l1_cache_supported,
+ 'local_l1_cache_supported': local_l1_cache_supported,
+ 'l2_size': l2_size,
+ 'warp_size': warp_size,
+ 'regs_per_block': regs_per_block,
+ }
+
+
+class ResourceLimitError(RuntimeError):
+ pass
+
+
+@cp.memoize(for_each_device=True)
+def _check_smem_availability(ndim, axis, kernel_size, anchor=None,
+ patch_per_block=None, image_dtype=cp.float32,
+ device_id=None):
+ block, patch_per_block, halo_size = _get_constants(
+ ndim, axis, kernel_size, anchor=anchor, patch_per_block=patch_per_block
+ )
+ shape, nbytes = _get_smem_shape(
+ ndim, axis, block, patch_per_block, halo_size, image_dtype
+ )
+ props = _get_shmem_limits(device_id=device_id)
+ if nbytes > props['shared_block']:
+ raise ResourceLimitError("inadequate shared memory available")
+
+
+_dtype_char_to_c_types = {
+ 'e': 'float16',
+ 'f': 'float',
+ 'd': 'double',
+ 'F': 'complex',
+ 'D': 'complex',
+ '?': 'char',
+ 'b': 'char',
+ 'h': 'short',
+ 'i': 'int',
+ 'l': 'long long',
+ 'B': 'unsigned char',
+ 'H': 'unsigned short',
+ 'I': 'unsigned int',
+ 'L': 'unsigned long long',
+}
+
+
+def _get_code_stage1_shared_memory_load_2d(ndim, axis, mode, cval):
+ """Generates the first stage of the function body.
+
+ This involves just copying from the `src` array into the `smem` shared
+ memory array followed by a call to __syncthreads(). All boundary
+ handling also occurs within this function.
+ """
+
+ if ndim == 2 and axis == 0:
+ if mode not in ['constant', 'grid-constant']:
+ boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 'n_rows', separate=True) # noqa
+
+ # as in OpenCV's column_filter.hpp
+ code = """
+ __shared__ T smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
+ const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
+ if (x >= n_cols){
+ return;
+ }
+ const T* src_col = &src[x];
+ const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
+
+ // memory is contiguous along last (columns) axis
+ const int row_stride = n_cols; // stride (in elements) along axis 0
+ int row;
+
+ if (blockIdx.y > 0)
+ {
+ //Upper halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y) * row_stride];
+ }
+ else
+ {
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j) {
+ row = yStart - (HALO_SIZE - j) * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row < 0)
+ smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_lower
+ code += """
+ smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride];
+ }
+ }
+
+ if (blockIdx.y + 2 < gridDim.y) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + j * BLOCK_DIM_Y) * row_stride];
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y) * row_stride];
+ }
+ else
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j) {
+ row = yStart + j * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= n_rows)
+ smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride];
+ }
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ {
+ row = yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= n_rows)
+ smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * row_stride];
+ }
+ }
+ """ # noqa
+ elif ndim == 2 and axis == 1:
+ if mode not in ['constant', 'grid-constant']:
+ boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'col', 'n_cols', separate=True) # noqa
+
+ # as in OpenCV's row_filter.hpp
+ code = """
+ __shared__ T smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
+ const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
+ if (y >= n_rows) {
+ return;
+ }
+ const int row_stride = n_cols; // stride (in elements) along axis 0
+ int col;
+ const T* src_row = &src[y * row_stride];
+ const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
+
+ if (blockIdx.x > 0)
+ {
+ //Load left halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X];
+ }
+ else
+ {
+ //Load left halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j){
+ col = xStart - (HALO_SIZE - j) * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col < 0)
+ smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_lower
+ code += """
+ smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[col];
+ }
+ }
+ if (blockIdx.x + 2 < gridDim.x) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere
+ {
+ //Load main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j)* BLOCK_DIM_X] = src_row[xStart + j * BLOCK_DIM_X];
+
+ //Load right halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X];
+ }
+ else
+ {
+ //Load main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j) {
+ col = xStart + j * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col >= n_cols)
+ smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col];
+ }
+
+ //Load right halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j){
+ col = xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col >= n_cols)
+ smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col];
+ }
+ }
+ """ # noqa
+
+ code += """
+ __syncthreads();
+ """
+ return code
+
+
+def _get_code_stage1_shared_memory_load_3d(ndim, axis, mode, cval):
+ """Generates the first stage of the function body.
+
+ This involves just copying from the `src` array into the `smem` shared
+ memory array followed by a call to __syncthreads(). All boundary
+ handling also occurs within this function.
+ """
+
+ if ndim == 3 and axis == 0:
+ if mode not in ['constant', 'grid-constant']:
+ boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 's_0', separate=True) # noqa
+
+ # as in OpenCV's column_filter.hpp
+ code = """
+ __shared__ T smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Z][BLOCK_DIM_Y][BLOCK_DIM_X];
+ const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
+ const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
+ if ((x >= s_2) || (y >= s_1)) {
+ return;
+ }
+ // memory is contiguous along last (columns) axis
+ const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0
+ const int stride_1 = s_2; // stride (in elements) along axis 1
+
+ const T* src_col = &src[x + stride_1 * y];
+ const int zStart = blockIdx.z * (BLOCK_DIM_Z * PATCH_PER_BLOCK) + threadIdx.z;
+
+ int row;
+
+ if (blockIdx.z > 0)
+ {
+ //Upper halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart - (HALO_SIZE - j) * BLOCK_DIM_Z) * stride_0];
+ }
+ else
+ {
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j) {
+ row = zStart - (HALO_SIZE - j) * BLOCK_DIM_Z;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row < 0)
+ smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_lower
+ code += """
+ smem[threadIdx.z + j * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0];
+ }
+ }
+
+ if (blockIdx.z + 2 < gridDim.z) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart + j * BLOCK_DIM_Z) * stride_0];
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[(zStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Z) * stride_0];
+ }
+ else
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j) {
+ row = zStart + j * BLOCK_DIM_Z;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= s_0)
+ smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0];
+ }
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ {
+ row = zStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Z;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= s_0)
+ smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Z][threadIdx.y][threadIdx.x] = src_col[row * stride_0];
+ }
+ }
+ """ # noqa
+ elif ndim == 3 and axis == 1:
+ if mode not in ['constant', 'grid-constant']:
+ boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'row', 's_1', separate=True) # noqa
+
+ # as in OpenCV's column_filter.hpp
+ code = """
+ __shared__ T smem[BLOCK_DIM_Z][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
+ const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
+ const int z = blockIdx.z * BLOCK_DIM_Z + threadIdx.z;
+ if ((x >= s_2) || (z >= s_0)) {
+ return;
+ }
+ // memory is contiguous along last (columns) axis
+ const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0
+ const int stride_1 = s_2; // stride (in elements) along axis 1
+
+ const T* src_col = &src[x + stride_0 * z];
+ const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
+
+ int row;
+
+ if (blockIdx.y > 0)
+ {
+ //Upper halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y) * stride_1];
+ }
+ else
+ {
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j) {
+ row = yStart - (HALO_SIZE - j) * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row < 0)
+ smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_lower
+ code += """
+ smem[threadIdx.z][threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1];
+ }
+ }
+
+ if (blockIdx.y + 2 < gridDim.y) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + j * BLOCK_DIM_Y) * stride_1];
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y) * stride_1];
+ }
+ else
+ {
+ //Main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j) {
+ row = yStart + j * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= s_1)
+ smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1];
+ }
+
+ //Lower halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ {
+ row = yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (row >= s_1)
+ smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z][threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_Y][threadIdx.x] = src_col[row * stride_1];
+ }
+ }
+ """ # noqa
+ elif ndim == 3 and axis == 2:
+ if mode not in ['constant', 'grid-constant']:
+ boundary_code_lower, boundary_code_upper = util._generate_boundary_condition_ops(mode, 'col', 's_2', separate=True) # noqa
+
+ # as in OpenCV's row_filter.hpp
+ code = """
+ __shared__ T smem[BLOCK_DIM_Z][BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
+ const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
+ const int z = blockIdx.z * BLOCK_DIM_Z + threadIdx.z;
+ if ((y >= s_1) || (z >= s_0)) {
+ return;
+ }
+ const int stride_0 = s_1 * s_2; // stride (in elements) along axis 0
+ const int stride_1 = s_2; // stride (in elements) along axis 1
+ int col;
+ const T* src_row = &src[z * stride_0 + y * stride_1];
+ const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
+
+ if (blockIdx.x > 0)
+ {
+ //Load left halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X];
+ }
+ else
+ {
+ //Load left halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j){
+ col = xStart - (HALO_SIZE - j) * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col < 0)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_lower
+ code += """
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = src_row[col];
+ }
+ }
+ if (blockIdx.x + 2 < gridDim.x) // Note: +2 here assumes HALO_SIZE <= PATCH_PER_BLOCK so we ensure that elsewhere
+ {
+ //Load main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j)* BLOCK_DIM_X] = src_row[xStart + j * BLOCK_DIM_X];
+
+ //Load right halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X];
+ }
+ else
+ {
+ //Load main data
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j) {
+ col = xStart + j * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col >= s_2)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col];
+ }
+
+ //Load right halo
+ #pragma unroll
+ for (int j = 0; j < HALO_SIZE; ++j){
+ col = xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X;
+ """ # noqa
+ if mode == 'constant':
+ code += f"""
+ if (col >= s_2)
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = static_cast({cval});
+ else
+ """ # noqa
+ else:
+ code += boundary_code_upper
+ code += """
+ smem[threadIdx.z][threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE + j) * BLOCK_DIM_X] = src_row[col];
+ }
+ }
+ """ # noqa
+
+ code += """
+ __syncthreads();
+ """
+ return code
+
+
+@cp.memoize(for_each_device=False)
+def _get_code_stage1_shared_memory_load(ndim, axis, mode, cval):
+ if ndim == 2:
+ return _get_code_stage1_shared_memory_load_2d(ndim, axis, mode, cval)
+ elif ndim == 3:
+ return _get_code_stage1_shared_memory_load_3d(ndim, axis, mode, cval)
+
+
+def _get_code_stage2_convolve_2d(ndim, axis, flip_kernel):
+ code = """
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ {
+ """
+ if flip_kernel:
+ kernel_idx = "KSIZE - 1 - k"
+ else:
+ kernel_idx = "k"
+
+ if ndim == 2 and axis == 0:
+ code += """
+ const int y = yStart + j * BLOCK_DIM_Y;
+
+ if (y < n_rows)
+ {
+ """
+ inner = f"""
+ sum = sum + static_cast(smem[threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y - anchor + k][threadIdx.x]) * kernel[{kernel_idx}];
+ """ # noqa
+ elif ndim == 2 and axis == 1:
+ code += """
+ const int x = xStart + j * BLOCK_DIM_X;
+
+ if (x < n_cols)
+ {
+ """
+ inner = f"""
+ sum = sum + static_cast(smem[threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X - anchor + k]) * kernel[{kernel_idx}];
+ """ # noqa
+ code += f"""
+ W sum = static_cast(0);
+
+ #pragma unroll
+ for (int k = 0; k < KSIZE; ++k) {{
+ {inner}
+ }}
+ dst[y * row_stride + x] = cast(sum);
+ }}
+ }}
+ """
+ return code
+
+
+def _get_code_stage2_convolve_3d(ndim, axis, flip_kernel):
+ code = """
+ #pragma unroll
+ for (int j = 0; j < PATCH_PER_BLOCK; ++j)
+ {
+ """
+ if flip_kernel:
+ kernel_idx = "KSIZE - 1 - k"
+ else:
+ kernel_idx = "k"
+
+ if ndim == 3 and axis == 0:
+ code += """
+ const int z = zStart + j * BLOCK_DIM_Z;
+
+ if (z < s_0)
+ {
+ """
+ inner = f"""
+ sum = sum + static_cast(smem[threadIdx.z + (HALO_SIZE + j) * BLOCK_DIM_Z - anchor + k][threadIdx.y][threadIdx.x]) * kernel[{kernel_idx}];
+ """ # noqa
+ elif ndim == 3 and axis == 1:
+ code += """
+ const int y = yStart + j * BLOCK_DIM_Y;
+
+ if (y < s_1)
+ {
+ """
+ inner = f"""
+ sum = sum + static_cast(smem[threadIdx.z][threadIdx.y + (HALO_SIZE + j) * BLOCK_DIM_Y - anchor + k][threadIdx.x]) * kernel[{kernel_idx}];
+ """ # noqa
+ elif ndim == 3 and axis == 2:
+ code += """
+ const int x = xStart + j * BLOCK_DIM_X;
+
+ if (x < s_2)
+ {
+ """
+ inner = f"""
+ sum = sum + static_cast(smem[threadIdx.z][threadIdx.y][threadIdx.x + (HALO_SIZE + j) * BLOCK_DIM_X - anchor + k]) * kernel[{kernel_idx}];
+ """ # noqa
+ code += f"""
+ W sum = static_cast(0);
+
+ #pragma unroll
+ for (int k = 0; k < KSIZE; ++k) {{
+ {inner}
+ }}
+ dst[z * stride_0 + y * stride_1 + x] = cast(sum);
+ }}
+ }}
+ """
+ return code
+
+
+@cp.memoize(for_each_device=False)
+def _get_code_stage2_convolve(ndim, axis, flip_kernel):
+ if ndim == 2:
+ return _get_code_stage2_convolve_2d(ndim, axis, flip_kernel)
+ elif ndim == 3:
+ return _get_code_stage2_convolve_3d(ndim, axis, flip_kernel)
+
+
+@cp.memoize(for_each_device=True)
+def _get_separable_conv_kernel_src(
+ kernel_size, axis, ndim, anchor, image_c_type, kernel_c_type,
+ output_c_type, mode, cval, patch_per_block=None, flip_kernel=False
+):
+ blocks, patch_per_block, halo_size = _get_constants(
+ ndim, axis, kernel_size, anchor, patch_per_block
+ )
+ block_x, block_y, block_z = blocks
+
+ mode_str = mode
+ if 'constant' in mode_str:
+ mode_str += f'_{cval:0.2f}'.replace('.', '_')
+ mode_str = mode_str.replace('-', '_')
+ if flip_kernel:
+ func_name = f'convolve_s{kernel_size}_{ndim}d_ax{axis}_{mode_str}'
+ else:
+ func_name = f'correlate_s{kernel_size}_{ndim}d_ax{axis}_{mode_str}'
+ func_name += f"_T{image_c_type}_W{kernel_c_type}_D{output_c_type}".replace('complex<', 'c').replace('>', '').replace('long ', 'l').replace('unsigned ', 'u') # noqa
+ func_name += f"_patch{patch_per_block}_halo{halo_size}"
+ # func_name += f"_bx{block_x}_by{block_y}" // these are fixed per axis
+
+ code = """
+ #include "cupy/carray.cuh" // for float16
+ #include "cupy/complex.cuh" // for complex
+ """
+
+ # SciPy-style float -> unsigned integer casting for the output
+ # (use cast(sum) instead of static_cast(sum) for the output)
+ code += _ndimage_includes + _ndimage_CAST_FUNCTION
+
+ code += f"""
+ const int KSIZE = {kernel_size};
+ const int BLOCK_DIM_X = {block_x};
+ const int BLOCK_DIM_Y = {block_y};
+ const int BLOCK_DIM_Z = {block_z};
+ const int PATCH_PER_BLOCK = {patch_per_block};
+ const int HALO_SIZE = {halo_size};
+ typedef {image_c_type} T;
+ typedef {output_c_type} D;
+ typedef {kernel_c_type} W;
+ """
+
+ if ndim == 2:
+ code += f"""
+ extern "C"{{
+ __global__ void {func_name}(const T *src, D *dst, const W* kernel, const int anchor, int n_rows, int n_cols)
+ {{
+ """ # noqa
+ elif ndim == 3:
+ code += f"""
+ extern "C"{{
+ __global__ void {func_name}(const T *src, D *dst, const W* kernel, const int anchor, int s_0, int s_1, int s_2)
+ {{
+ """ # noqa
+ code += _get_code_stage1_shared_memory_load(ndim, axis, mode, cval)
+ code += _get_code_stage2_convolve(ndim, axis, flip_kernel)
+ code += """
+ } // end of function
+ } // extern "C"
+ """
+ return func_name, blocks, patch_per_block, code
+
+
+@cp.memoize(for_each_device=True)
+def _get_separable_conv_kernel(kernel_size, axis, ndim, image_c_type,
+ kernel_c_type, output_c_type, anchor=None,
+ mode='nearest', cval=0,
+ patch_per_block=None, flip_kernel=False):
+ func_name, block, patch_per_block, code = _get_separable_conv_kernel_src(
+ kernel_size=kernel_size,
+ axis=axis,
+ ndim=ndim,
+ image_c_type=image_c_type,
+ kernel_c_type=kernel_c_type,
+ output_c_type=output_c_type,
+ anchor=anchor,
+ mode=mode,
+ cval=cval,
+ patch_per_block=patch_per_block,
+ flip_kernel=flip_kernel,
+ )
+ options = ('--std=c++11', '-DCUPY_USE_JITIFY')
+ m = cp.RawModule(code=code, options=options)
+ return m.get_function(func_name), block, patch_per_block
+
+
+def _get_grid(shape, block, axis, patch_per_block):
+ """Determine grid size from image shape and block parameters"""
+ ndim = len(shape)
+ if ndim == 2:
+ if axis == 0:
+ # column filter
+ grid = (
+ math.ceil(shape[1] / block[0]),
+ math.ceil(shape[0] / (block[1] * patch_per_block)),
+ 1,
+ )
+ elif axis == 1:
+ # row filter
+ grid = (
+ math.ceil(shape[1] / (block[0] * patch_per_block)),
+ math.ceil(shape[0] / block[1]),
+ 1,
+ )
+ else:
+ raise ValueError(f"invalid axis: {axis}")
+ elif ndim == 3:
+ if axis == 0:
+ # column filter
+ grid = (
+ math.ceil(shape[2] / block[0]),
+ math.ceil(shape[1] / block[1]),
+ math.ceil(shape[0] / (block[2] * patch_per_block)),
+ )
+ elif axis == 1:
+ # row filter
+ grid = (
+ math.ceil(shape[2] / block[0]),
+ math.ceil(shape[1] / (block[1] * patch_per_block)),
+ math.ceil(shape[0] / block[2]),
+ )
+ elif axis == 2:
+ # row filter
+ grid = (
+ math.ceil(shape[2] / (block[0] * patch_per_block)),
+ math.ceil(shape[1] / block[1]),
+ math.ceil(shape[0] / block[2]),
+ )
+ else:
+ raise ValueError(f"invalid axis: {axis}")
+ else:
+ raise NotImplementedError(f"unsupported ndim: {ndim}")
+ return grid
+
+
+def _shmem_convolve1d(image, weights, axis=-1, output=None, mode="reflect",
+ cval=0.0, origin=0, convolution=False):
+
+ ndim = image.ndim
+ if weights.ndim != 1:
+ raise ValueError("expected 1d weight array")
+ axis = _normalize_axis_index(axis, ndim)
+ origin = util._check_origin(origin, weights.size)
+ if weights.size == 0:
+ return cp.zeros_like(input)
+ util._check_mode(mode)
+
+ if convolution:
+ # use flip_kernel to avoid cp.ascontiguousarray(weights[::-1]))
+ origin = -origin
+ if weights.size % 2 == 0:
+ origin -= 1
+ elif weights.dtype.kind == "c":
+ # numpy.correlate conjugates weights rather than input.
+ weights = weights.conj()
+
+ anchor = weights.size // 2 + origin
+
+ if weights.size > 32:
+ # For large kernels, make sure we have adequate shared memory
+ _check_smem_availability(ndim, axis, weights.size, anchor=anchor,
+ patch_per_block=None, image_dtype=image.dtype,
+ device_id=None)
+
+ # CUDA kernels assume C-contiguous memory layout
+ if not image.flags.c_contiguous:
+ image = cp.ascontiguousarray(image)
+
+ complex_output = image.dtype.kind == 'c'
+ # Note: important to set use_cucim_casting=True for performance with
+ # 8 and 16-bit integer types. This causes the weights to get cast to
+ # float32 rather than float64.
+ weights_dtype = util._get_weights_dtype(
+ image, weights, use_cucim_casting=True
+ )
+ if not weights.flags.c_contiguous or weights.dtype != weights_dtype:
+ weights = cp.ascontiguousarray(weights, weights_dtype)
+
+ # promote output to nearest complex dtype if necessary
+ complex_output = complex_output or weights.dtype.kind == 'c'
+ output = util._get_output(output, image, None, complex_output)
+
+ # handle potential overlap between input and output arrays
+ needs_temp = cp.shares_memory(output, image, 'MAY_SHARE_BOUNDS')
+ if needs_temp:
+ output, temp = util._get_output(output.dtype, input), output
+
+ # index_c_type = util._get_inttype(image)
+ image_c_type = _dtype_char_to_c_types[image.dtype.char]
+ weights_c_type = _dtype_char_to_c_types[weights.dtype.char]
+ output_c_type = _dtype_char_to_c_types[output.dtype.char]
+
+ conv_axis_kernel, block, patch_per_block = _get_separable_conv_kernel(
+ weights.size,
+ axis=axis,
+ ndim=ndim,
+ anchor=anchor,
+ image_c_type=image_c_type,
+ kernel_c_type=weights_c_type,
+ output_c_type=output_c_type,
+ mode=mode,
+ cval=cval,
+ patch_per_block=None,
+ flip_kernel=convolution,
+ )
+ grid = _get_grid(image.shape, block, axis, patch_per_block)
+ args = (image, output, weights, anchor) + image.shape[:ndim]
+ conv_axis_kernel(
+ grid,
+ block,
+ args,
+ )
+ if needs_temp:
+ output[:] = temp
+ output = temp
+ return output
diff --git a/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu b/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu
new file mode 100644
index 000000000..b084383b4
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/filters/cuda/histogram_median.cu
@@ -0,0 +1,251 @@
+/* Several functions and the primary kernel used for the histogram-based
+ * median are in this file.
+ *
+ * Note that this file cannot be compiled standalone as various definitions
+ * and a couple of the supporting functions get dynamically generated based
+ * on the actual histogram sizes. See the Python function
+ * `_get_median_rawkernel` defined in `_median_hist.py`. This function will
+ * generate the full kernel code given a set of parameters.
+ */
+
+__device__ void histogramAddAndSubCoarse(HIST_INT_T* H,
+ const HIST_INT_T* hist_colAdd,
+ const HIST_INT_T* hist_colSub) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) {
+ H[tx] += hist_colAdd[tx] - hist_colSub[tx];
+ }
+}
+
+__device__ void histogramMultipleAddCoarse(HIST_INT_T* H,
+ const HIST_INT_T* hist_col,
+ int histCount) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) {
+ HIST_INT_T temp = H[tx];
+ for (int i = 0; i < histCount; i++)
+ temp += hist_col[(i << LOG2_COARSE) + tx];
+ H[tx] = temp;
+ }
+}
+
+__device__ void histogramClearCoarse(HIST_INT_T* H) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) {
+ H[tx] = 0;
+ }
+}
+
+__device__ void histogramAddCoarse(HIST_INT_T* H, const HIST_INT_T* hist_col) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) {
+ H[tx] += hist_col[tx];
+ }
+}
+
+__device__ void histogramSubCoarse(HIST_INT_T* H, const HIST_INT_T* hist_col) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) {
+ H[tx] -= hist_col[tx];
+ }
+}
+
+__device__ void histogramAddFine(HIST_INT_T* H, const HIST_INT_T* hist_col) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_FINE) {
+ H[tx] += hist_col[tx];
+ }
+}
+
+__device__ void histogramAddAndSubFine(HIST_INT_T* H,
+ const HIST_INT_T* hist_colAdd,
+ const HIST_INT_T* hist_colSub) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_FINE) {
+ H[tx] += hist_colAdd[tx] - hist_colSub[tx];
+ }
+}
+
+__device__ void histogramClearFine(HIST_INT_T* H) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_FINE) {
+ H[tx] = 0;
+ }
+}
+
+__device__ void lucClearCoarse(int* luc) {
+ int tx = threadIdx.x;
+ if (tx < HIST_SIZE_COARSE) luc[tx] = 0;
+}
+
+extern "C" __global__ void cuRankFilterMultiBlock(IMAGE_T* src, IMAGE_T* dest,
+ HIST_INT_T* histPar,
+ HIST_INT_T* coarseHistGrid,
+ int r0, int r1, int medPos_,
+ int rows, int cols) {
+ __shared__ HIST_INT_T HCoarse[HIST_SIZE_COARSE];
+ __shared__ HIST_INT_T HCoarseScan[HIST_SIZE_FINE];
+ __shared__ HIST_INT_T HFine[HIST_SIZE_COARSE][HIST_SIZE_FINE];
+
+ __shared__ int luc[HIST_SIZE_COARSE];
+
+ __shared__ int firstBin, countAtMed, retval;
+
+ // extract values from params array
+ const int row_stride = cols; // stride (in elements) along axis 0
+
+ int extraRowThread = rows % gridDim.x;
+ int doExtraRow = blockIdx.x < extraRowThread;
+ int startRow = 0, stopRow = 0;
+ int rowsPerBlock = rows / gridDim.x + doExtraRow;
+
+ // The following code partitions the work to the blocks. Some blocks will do
+ // one row more than other blocks. This code is responsible for doing that
+ // balancing
+ if (doExtraRow) {
+ startRow = rowsPerBlock * blockIdx.x;
+ stopRow = min(rows, startRow + rowsPerBlock);
+ } else {
+ startRow = (rowsPerBlock + 1) * extraRowThread +
+ (rowsPerBlock) * (blockIdx.x - extraRowThread);
+ stopRow = min(rows, startRow + rowsPerBlock);
+ }
+
+ HIST_INT_T* hist = histPar + cols * HIST_SIZE * blockIdx.x;
+ HIST_INT_T* histCoarse =
+ coarseHistGrid + cols * HIST_SIZE_COARSE * blockIdx.x;
+
+ if (blockIdx.x == (gridDim.x - 1)) stopRow = rows;
+ __syncthreads();
+ int initNeeded = 0, initStartRow, initStopRow;
+ HIST_INT_T initVal;
+
+ if (blockIdx.x == 0) {
+ // Note: skips one iteration in the initialization loop by starting at
+ // row 1 instead of 0 and using initVal r0+2 instead of r0+1.
+ initNeeded = 1;
+ initVal = r0 + 2;
+ initStartRow = 1;
+ initStopRow = r0;
+ } else if (startRow < (r0 + 2)) {
+ initNeeded = 1;
+ initVal = r0 + 2 - startRow;
+ initStartRow = 1;
+ initStopRow = r0 + startRow;
+ } else {
+ initNeeded = 0;
+ initVal = 0;
+ initStartRow = startRow - (r0 + 1);
+ initStopRow = r0 + startRow;
+ }
+ __syncthreads();
+
+ // In the original algorithm an initialization phase was required as part of
+ // the window was outside the image. In this parallel version, the
+ // initializtion is required for all thread blocks that part of the median
+ // filter is outside the window. For all threads in the block the same code
+ // will be executed.
+ if (initNeeded) {
+ for (int j = threadIdx.x; j < (cols); j += blockDim.x) {
+ hist[j * HIST_SIZE + src[j] + HIST_OFFSET] = initVal;
+ histCoarse[j * HIST_SIZE_COARSE + ((src[j] + HIST_OFFSET) >> LOG2_FINE)] =
+ initVal;
+ }
+ }
+ __syncthreads();
+
+ // For all remaining rows in the median filter, add the values to the the
+ // histogram
+ for (int j = threadIdx.x; j < cols; j += blockDim.x) {
+ for (int i = initStartRow; i < initStopRow; i++) {
+ int pos = min(i, rows - 1);
+ hist[j * HIST_SIZE + src[pos * row_stride + j] + HIST_OFFSET]++;
+ histCoarse[j * HIST_SIZE_COARSE +
+ ((src[pos * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]++;
+ }
+ }
+ __syncthreads();
+ // Going through all the rows that the block is responsible for.
+ int inc = blockDim.x * HIST_SIZE;
+ int incCoarse = blockDim.x * HIST_SIZE_COARSE;
+ for (int i = startRow; i < stopRow; i++) {
+ // For every new row that is started the global histogram for the entire
+ // window is restarted.
+
+ histogramClearCoarse(HCoarse);
+ lucClearCoarse(luc);
+ // Computing some necessary indices
+ int possub = max(0, i - r0 - 1), posadd = min(rows - 1, i + r0);
+ int histPos = threadIdx.x * HIST_SIZE;
+ int histCoarsePos = threadIdx.x * HIST_SIZE_COARSE;
+ // Going through all the elements of a specific row. For each histogram, a
+ // value is taken out and one value is added.
+ for (int j = threadIdx.x; j < cols; j += blockDim.x) {
+ hist[histPos + src[possub * row_stride + j] + HIST_OFFSET]--;
+ hist[histPos + src[posadd * row_stride + j] + HIST_OFFSET]++;
+ histCoarse[histCoarsePos +
+ ((src[possub * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]--;
+ histCoarse[histCoarsePos +
+ ((src[posadd * row_stride + j] + HIST_OFFSET) >> LOG2_FINE)]++;
+
+ histPos += inc;
+ histCoarsePos += incCoarse;
+ }
+ __syncthreads();
+
+ histogramMultipleAddCoarse(HCoarse, histCoarse, 2 * r1 + 1);
+ int cols_m_1 = cols - 1;
+
+ for (int j = r1; j < cols - r1; j++) {
+ int possub = max(j - r1, 0);
+ int posadd = min(j + 1 + r1, cols_m_1);
+ int medPos = medPos_;
+ __syncthreads();
+
+ histogramMedianParCoarseLookupOnly(HCoarse, HCoarseScan, medPos,
+ &firstBin, &countAtMed);
+ __syncthreads();
+
+ int loopIndex = luc[firstBin];
+ if (loopIndex <= (j - r1)) {
+ histogramClearFine(HFine[firstBin]);
+ for (loopIndex = j - r1; loopIndex < min(j + r1 + 1, cols);
+ loopIndex++) {
+ histogramAddFine(HFine[firstBin], hist + (loopIndex * HIST_SIZE +
+ (firstBin << LOG2_FINE)));
+ }
+ } else {
+ for (; loopIndex < (j + r1 + 1); loopIndex++) {
+ histogramAddAndSubFine(
+ HFine[firstBin],
+ hist + (min(loopIndex, cols_m_1) * HIST_SIZE +
+ (firstBin << LOG2_FINE)),
+ hist + (max(loopIndex - 2 * r1 - 1, 0) * HIST_SIZE +
+ (firstBin << LOG2_FINE)));
+ __syncthreads();
+ }
+ }
+ __syncthreads();
+ luc[firstBin] = loopIndex;
+
+ int leftOver = medPos - countAtMed;
+ if (leftOver >= 0) {
+ histogramMedianParFineLookupOnly(HFine[firstBin], HCoarseScan, leftOver,
+ &retval, &countAtMed);
+ } else
+ retval = 0;
+ __syncthreads();
+
+ if (threadIdx.x == 0) {
+ dest[i * row_stride + j] =
+ (firstBin << LOG2_FINE) + retval - HIST_OFFSET;
+ }
+ histogramAddAndSubCoarse(HCoarse,
+ histCoarse + (int)(posadd << LOG2_COARSE),
+ histCoarse + (int)(possub << LOG2_COARSE));
+
+ __syncthreads();
+ }
+ __syncthreads();
+ }
+}
diff --git a/python/cucim/src/cucim/skimage/filters/edges.py b/python/cucim/src/cucim/skimage/filters/edges.py
index 6b4dcff37..193da4f0d 100644
--- a/python/cucim/src/cucim/skimage/filters/edges.py
+++ b/python/cucim/src/cucim/skimage/filters/edges.py
@@ -13,7 +13,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import _supported_float_type, check_nD
from ..restoration.uft import laplacian
diff --git a/python/cucim/src/cucim/skimage/filters/tests/test_median.py b/python/cucim/src/cucim/skimage/filters/tests/test_median.py
index 3507032bf..6b0d8e002 100644
--- a/python/cucim/src/cucim/skimage/filters/tests/test_median.py
+++ b/python/cucim/src/cucim/skimage/filters/tests/test_median.py
@@ -2,10 +2,20 @@
import pytest
from cupy.testing import assert_allclose
from cupyx.scipy import ndimage
+from skimage import data
from cucim.skimage._shared.testing import expected_warnings
from cucim.skimage.filters import median
+try:
+ from math import prod
+except ImportError:
+ from functools import reduce
+ from operator import mul
+
+ def prod(x):
+ return reduce(mul, x)
+
@pytest.fixture
def image():
@@ -17,6 +27,11 @@ def image():
dtype=cp.uint8)
+@pytest.fixture
+def camera():
+ return cp.array(data.camera())
+
+
# TODO: mode='rank' disabled until it has been implmented
@pytest.mark.parametrize(
"mode, cval, behavior, warning_type",
@@ -41,12 +56,167 @@ def test_selem_kwarg_deprecation(image):
# TODO: update if rank.median implemented
@pytest.mark.parametrize(
- "behavior, func, params",
- [('ndimage', ndimage.median_filter, {'size': (3, 3)})]
+ 'behavior, func', [('ndimage', ndimage.median_filter)],
# ('rank', rank.median, {'footprint': cp.ones((3, 3), dtype=cp.uint8)})]
)
-def test_median_behavior(image, behavior, func, params):
- assert_allclose(median(image, behavior=behavior), func(image, **params))
+@pytest.mark.parametrize(
+ 'mode', ['reflect', 'mirror', 'nearest', 'constant', 'wrap']
+)
+# include even shapes and singleton shape that force non-histogram code path.
+# include some large shapes that always take the histogram-based code path.
+@pytest.mark.parametrize(
+ 'footprint_shape', [
+ (3, 3), (5, 5), (9, 15), (2, 2), (1, 1), (2, 7), (23, 23), (15, 35),
+ ]
+)
+@pytest.mark.parametrize('out', [None, cp.uint8, cp.float32, 'array'])
+def test_median_behavior(camera, behavior, func, mode, footprint_shape, out):
+ footprint = cp.ones(footprint_shape, dtype=bool)
+ cam2 = camera[:, :177] # use anisotropic size
+ assert cam2.dtype == cp.uint8
+ if out == 'array':
+ out = cp.zeros_like(cam2)
+ assert_allclose(
+ median(cam2, footprint, mode=mode, behavior=behavior, out=out),
+ func(cam2, size=footprint.shape, mode=mode, output=out),
+ )
+
+
+@pytest.mark.parametrize(
+ 'mode', ['reflect', 'mirror', 'nearest', 'constant', 'wrap']
+)
+# use an anisotropic footprint large enough to trigger the histogram-based path
+@pytest.mark.parametrize('footprint_shape', [(3, 3), (3, 5), (15, 23)])
+@pytest.mark.parametrize(
+ 'int_dtype', [cp.uint8, cp.int8, cp.uint16, cp.int16]
+)
+@pytest.mark.parametrize(
+ 'algorithm', ['auto', 'histogram', 'sorting']
+)
+@pytest.mark.parametrize(
+ 'algorithm_kwargs', [{}, {'partitions': 32}]
+)
+def test_median_hist_dtypes(
+ mode, footprint_shape, int_dtype, algorithm, algorithm_kwargs,
+):
+ footprint = cp.ones(footprint_shape, dtype=bool)
+ rng = cp.random.default_rng(123)
+ shape = (350, 407)
+ if int_dtype == cp.uint8:
+ img = rng.integers(0, 256, shape, dtype=cp.uint8)
+ elif int_dtype == cp.int8:
+ img = rng.integers(-128, 128, shape, dtype=int).astype(cp.int8)
+ elif int_dtype == cp.uint16:
+ if False:
+ # test with 12-bit range stored in 16-bit integers (e.g. DICOM)
+ img = rng.integers(0, 4096, shape, dtype=cp.uint16)
+ else:
+ # smaller dynamic range
+ # (range 4096 fails only on CI, but couldn't reproduce locally)
+ img = rng.integers(0, 1024, shape, dtype=cp.uint16)
+ elif int_dtype == cp.int16:
+ # chose a limited range of values to test 512 hist_size case
+ img = rng.integers(-128, 384, shape, dtype=int).astype(cp.int16)
+
+ # 150 is the value used to auto-select between sorting vs. histogram
+ small_kernel = prod(footprint_shape) < 150
+ if algorithm_kwargs and (
+ algorithm == 'sorting'
+ or (algorithm == 'auto' and small_kernel)
+ ):
+ msg = ["algorithm_kwargs={'partitions': 32} ignored"]
+ else:
+ msg = []
+ with expected_warnings(msg):
+ out = median(img, footprint, mode=mode, behavior='ndimage',
+ algorithm=algorithm, algorithm_kwargs=algorithm_kwargs)
+ expected = ndimage.median_filter(img, size=footprint.shape, mode=mode)
+ assert_allclose(expected, out)
+
+
+# TODO: Determine source of isolated remote test failures when 16-bit range
+# is > 1024. Could not reproduce locally.
+@pytest.mark.parametrize('mode', ['reflect', ])
+# use an anisotropic footprint large enough to trigger the histogram-based path
+@pytest.mark.parametrize('footprint_shape', [(7, 11)])
+@pytest.mark.parametrize(
+ 'int_dtype, irange',
+ [
+ (cp.uint16, (0, 256)),
+ (cp.uint16, (0, 15)),
+ (cp.uint16, (128, 384)),
+ (cp.uint16, (0, 200)),
+ (cp.uint16, (0, 510)),
+ (cp.uint16, (500, 550)),
+ (cp.uint16, (0, 1024)),
+ pytest.param(cp.uint16, (0, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa
+ pytest.param(cp.uint16, (1024, 3185), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa
+ (cp.int16, (0, 256)),
+ (cp.int16, (-15, 15)),
+ (cp.int16, (128, 384)),
+ (cp.int16, (-128, 384)),
+ (cp.int16, (-400, 400)),
+ pytest.param(cp.int16, (-1024, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa
+ pytest.param(cp.int16, (150, 2048), marks=pytest.mark.skip(reason="isolated failure on CI only")), # noqa
+ ]
+)
+def test_median_hist_16bit_offsets(mode, footprint_shape, int_dtype, irange):
+ """Make sure 16-bit cases are robust to various value ranges"""
+ footprint = cp.ones(footprint_shape, dtype=bool)
+ rng = cp.random.default_rng(123)
+ shape = (350, 407)
+ if int_dtype == cp.uint16:
+ # test with 12-bit range stored in 16-bit integers (e.g. DICOM)
+ img = rng.integers(irange[0], irange[1], shape, dtype=cp.uint16)
+ elif int_dtype == cp.int16:
+ # chose a limited range of values to test 512 hist_size case
+ img = rng.integers(irange[0], irange[1], shape, dtype=int)
+ img = img.astype(cp.int16)
+ out = median(img, footprint, mode=mode, behavior='ndimage',
+ algorithm='histogram')
+ expected = ndimage.median_filter(img, size=footprint.shape, mode=mode)
+ assert_allclose(expected, out)
+
+
+@pytest.mark.parametrize('int_dtype', [cp.uint16, cp.int16])
+def test_median_hist_kernel_resource_limit_try_except(int_dtype):
+ # use an anisotropic footprint large enough to trigger
+ # the histogram-based path
+ footprint = cp.ones((15, 23), dtype=bool)
+ mode = 'nearest'
+ rng = cp.random.default_rng(123)
+ shape = (350, 407) # use anisotropic size
+ if int_dtype == cp.uint16:
+ # test with range likely to exceed the shared memory limit
+ img = rng.integers(0, 65536, shape, dtype=cp.uint16)
+ elif int_dtype == cp.int16:
+ # test with range likely to exceed the shared memory limit
+ img = rng.integers(-32768, 32767, shape, dtype=int).astype(cp.int16)
+ out = median(img, footprint, mode=mode)
+ expected = ndimage.median_filter(img, size=footprint.shape, mode=mode)
+ assert_allclose(expected, out)
+
+
+@pytest.mark.parametrize(
+ 'algorithm', ['auto', 'histogram', 'sorting', 'invalid']
+)
+def test_median_algorithm_parameter(algorithm):
+ """Call all algorithms for float32 input.
+ """
+ footprint = cp.ones((15, 23), dtype=bool)
+ mode = 'nearest'
+ rng = cp.random.default_rng(123)
+ shape = (350, 407) # use anisotropic size
+ img = rng.standard_normal(shape, dtype=cp.float32)
+ if algorithm in ['invalid', 'histogram']:
+ # histogram supports only integer-valued dtypes
+ # 'invalid' is an uncrecognized algorithm
+ with pytest.raises(ValueError):
+ median(img, footprint, mode=mode, algorithm=algorithm)
+ else:
+ out = median(img, footprint, mode=mode, algorithm=algorithm)
+ expected = ndimage.median_filter(img, size=footprint.shape, mode=mode)
+ assert_allclose(expected, out)
@pytest.mark.parametrize(
diff --git a/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py b/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py
new file mode 100644
index 000000000..0f2c5897f
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/filters/tests/test_separable_filtering.py
@@ -0,0 +1,333 @@
+import cupy as cp
+import pytest
+
+from cucim.skimage._vendored.ndimage import (convolve1d, correlate1d,
+ gaussian_filter, gaussian_filter1d,
+ gaussian_gradient_magnitude,
+ gaussian_laplace, laplace, prewitt,
+ sobel, uniform_filter,
+ uniform_filter1d)
+
+
+def _get_image(shape, dtype, seed=123):
+ rng = cp.random.default_rng(seed)
+ dtype = cp.dtype(dtype)
+ if dtype.kind == 'b':
+ image = rng.integers(0, 1, shape, dtype=cp.uint8).astype(bool)
+ elif dtype.kind in 'iu':
+ image = rng.integers(0, 128, shape, dtype=dtype)
+ elif dtype.kind in 'c':
+ real_dtype = cp.asarray([], dtype=dtype).real.dtype
+ image = rng.standard_normal(shape, dtype=real_dtype)
+ image = image + 1j * rng.standard_normal(shape, dtype=real_dtype)
+ else:
+ if dtype == cp.float16:
+ image = rng.standard_normal(shape).astype(dtype)
+ else:
+ image = rng.standard_normal(shape, dtype=dtype)
+ return image
+
+
+def _get_rtol_atol(dtype):
+ real_dtype = cp.array([], dtype=dtype).real.dtype
+ rtol = atol = 1e-5
+ if real_dtype == cp.float64:
+ rtol = atol = 1e-12
+ elif real_dtype == cp.float16:
+ rtol = atol = 1e-3
+ return rtol, atol
+
+
+def _compare_implementations(
+ shape, kernel_size, axis, dtype, mode, cval=0.0, origin=0,
+ output_dtype=None, kernel_dtype=None, output_preallocated=False,
+ function=convolve1d,
+):
+ dtype = cp.dtype(dtype)
+ if kernel_dtype is None:
+ kernel_dtype = dtype
+ image = _get_image(shape, dtype)
+ kernel = _get_image((kernel_size,), kernel_dtype)
+ rtol, atol = _get_rtol_atol(kernel.dtype)
+ kwargs = dict(axis=axis, mode=mode, cval=cval, origin=origin)
+ if output_dtype is not None:
+ output_dtype = cp.dtype(output_dtype)
+ if output_preallocated:
+ if output_dtype is None:
+ output_dtype = image.dtype
+ output1 = cp.empty(image.shape, dtype=output_dtype)
+ output2 = cp.empty(image.shape, dtype=output_dtype)
+ function(
+ image, kernel, output=output1, algorithm='elementwise', **kwargs
+ )
+ function(
+ image, kernel, output=output2, algorithm='shared_memory', **kwargs
+ )
+ cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol)
+ return
+ output1 = function(
+ image, kernel, output=output_dtype, algorithm='elementwise', **kwargs
+ )
+ output2 = function(
+ image, kernel, output=output_dtype, algorithm='shared_memory', **kwargs
+ )
+ cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol)
+ return
+
+
+def _compare_implementations_other(
+ shape, dtype, mode, cval=0.0,
+ output_dtype=None, kernel_dtype=None, output_preallocated=False,
+ function=convolve1d, func_kwargs={},
+):
+ dtype = cp.dtype(dtype)
+ image = _get_image(shape, dtype)
+ rtol, atol = _get_rtol_atol(image.dtype)
+ kwargs = dict(mode=mode, cval=cval)
+ if func_kwargs:
+ kwargs.update(func_kwargs)
+ if output_dtype is not None:
+ output_dtype = cp.dtype(output_dtype)
+ if output_preallocated:
+ if output_dtype is None:
+ output_dtype = image.dtype
+ output1 = cp.empty(image.shape, dtype=output_dtype)
+ output2 = cp.empty(image.shape, dtype=output_dtype)
+ function(image, output=output1, algorithm='elementwise', **kwargs)
+ function(image, output=output2, algorithm='shared_memory', **kwargs)
+ cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol)
+ return
+ output1 = function(
+ image, output=output_dtype, algorithm='elementwise', **kwargs
+ )
+ output2 = function(
+ image, output=output_dtype, algorithm='shared_memory', **kwargs
+ )
+ cp.testing.assert_allclose(output1, output2, rtol=rtol, atol=atol)
+ return
+
+
+@pytest.mark.parametrize('shape', ((64, 57), (1000, 500)))
+@pytest.mark.parametrize('axis', (0, 1))
+@pytest.mark.parametrize('origin', ('min', 0, 'max'))
+@pytest.mark.parametrize('kernel_size', tuple(range(1, 17)))
+@pytest.mark.parametrize('function', [convolve1d, correlate1d])
+def test_separable_kernel_sizes_and_origins(
+ shape, axis, origin, kernel_size, function
+):
+ if kernel_size == 1:
+ origin = 0
+ elif origin == 'min':
+ origin = -(kernel_size // 2)
+ elif origin == 'max':
+ origin = kernel_size // 2
+ if kernel_size % 2 == 0:
+ origin -= 1
+ _compare_implementations(
+ shape,
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode='nearest',
+ origin=origin,
+ function=function,
+ )
+
+
+@pytest.mark.parametrize('shape', ((64, 57), (1000, 500)))
+@pytest.mark.parametrize('axis', (0, 1))
+@pytest.mark.parametrize(
+ 'kernel_size',
+ tuple(range(17, 129, 11)) + tuple(range(145, 275, 41))
+)
+def test_separable_kernel_larger_sizes(shape, axis, kernel_size):
+ _compare_implementations(
+ shape,
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode='reflect',
+ origin=0,
+ )
+
+
+@pytest.mark.parametrize('shape', ((1000, 500),))
+@pytest.mark.parametrize('axis', (0, 1))
+def test_separable_elementwise_very_large_size_fallback(shape, axis):
+ """Very large kernel to make it likely shared memory will be exceeded."""
+ _compare_implementations(
+ shape,
+ kernel_size=901,
+ axis=axis,
+ dtype=cp.float64,
+ mode='nearest',
+ origin=0,
+ )
+
+
+@pytest.mark.parametrize('shape', ((4000, 2000), (1, 1), (5, 500), (1500, 5)))
+@pytest.mark.parametrize('axis', (-1, -2))
+@pytest.mark.parametrize('kernel_size', (1, 38, 129))
+@pytest.mark.parametrize(
+ 'mode',
+ ('nearest', 'reflect', 'wrap', 'mirror', 'constant', ('constant', 1)),
+)
+def test_separable_image_shapes_and_modes(shape, axis, kernel_size, mode):
+
+ if isinstance(mode, tuple):
+ mode, cval = mode
+ else:
+ cval = 0
+
+ _compare_implementations(
+ shape,
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode=mode,
+ cval=cval,
+ origin=0,
+ )
+
+
+image_dtypes_tested = (
+ cp.float16, cp.float32, cp.float64, cp.complex64, cp.complex128, bool,
+ cp.int8, cp.uint8, cp.int16, cp.uint16, cp.int32, cp.uint32, cp.int64,
+ cp.uint64,
+)
+
+
+@pytest.mark.parametrize('axis', (0, 1))
+@pytest.mark.parametrize('image_dtype', image_dtypes_tested)
+@pytest.mark.parametrize(
+ 'kernel_dtype', (None, cp.float32, cp.uint8, cp.complex64)
+)
+def test_separable_image_and_kernel_dtypes(axis, image_dtype, kernel_dtype):
+ """Test many kernel and image dtype combinations"""
+
+ _compare_implementations(
+ (64, 32),
+ kernel_size=3,
+ axis=axis,
+ dtype=image_dtype,
+ mode='nearest',
+ origin=0,
+ kernel_dtype=kernel_dtype,
+ )
+
+
+@pytest.mark.parametrize('axis', (0, 1))
+@pytest.mark.parametrize('image_dtype', image_dtypes_tested)
+@pytest.mark.parametrize(
+ 'output_dtype', (None, cp.float32, cp.int32, cp.complex64)
+)
+@pytest.mark.parametrize('output_preallocated', (False, True))
+def test_separable_input_and_output_dtypes(
+ axis, image_dtype, output_dtype, output_preallocated
+):
+ """Test many kernel and image dtype combinations"""
+ if cp.dtype(image_dtype).kind == 'c' and output_dtype is not None:
+ if not cp.dtype(output_dtype).kind == 'c':
+ pytest.skip('cannot cast complex values to real')
+ _compare_implementations(
+ (64, 32),
+ kernel_size=3,
+ axis=axis,
+ dtype=image_dtype,
+ mode='nearest',
+ origin=0,
+ kernel_dtype=None,
+ output_dtype=output_dtype,
+ output_preallocated=output_preallocated,
+ )
+
+
+@pytest.mark.parametrize('shape', ((64, 57),))
+@pytest.mark.parametrize('axis', (0, 1))
+@pytest.mark.parametrize('origin', ('min', 0, 'max'))
+@pytest.mark.parametrize(
+ 'function, func_kwargs',
+ [
+ (gaussian_filter, dict(sigma=1.5)),
+ (gaussian_filter1d, dict(sigma=1.5, axis=0)),
+ (gaussian_filter1d, dict(sigma=1.5, axis=1)),
+ (gaussian_gradient_magnitude, dict(sigma=3.5)),
+ (gaussian_laplace, dict(sigma=2.5)),
+ (laplace, {}),
+ (prewitt, {}),
+ (sobel, {}),
+ (uniform_filter, dict(size=7)),
+ (uniform_filter1d, dict(size=7, axis=0)),
+ (uniform_filter1d, dict(size=7, axis=1)),
+ ]
+)
+def test_separable_internal_kernel(
+ shape, axis, origin, function, func_kwargs
+):
+ """
+ Test case to make sure the 'algorithm' kwarg works for all other separable
+ ndimage filters as well.
+ """
+ _compare_implementations_other(
+ shape,
+ dtype=cp.float32,
+ mode='nearest',
+ function=function,
+ func_kwargs=func_kwargs,
+ )
+
+
+@pytest.mark.parametrize('shape', ((16, 24, 32), (192, 128, 160)))
+@pytest.mark.parametrize('axis', (0, 1, 2))
+@pytest.mark.parametrize('kernel_size', tuple(range(1, 17, 3)))
+@pytest.mark.parametrize('function', [convolve1d, correlate1d])
+def test_separable_kernel_sizes_3d(
+ shape, axis, kernel_size, function
+):
+ _compare_implementations(
+ shape,
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode='nearest',
+ origin=0,
+ function=function,
+ )
+
+
+@pytest.mark.parametrize('axis', (0, 1, 2))
+@pytest.mark.parametrize('kernel_size', (65, 129, 198))
+def test_separable_large_kernel_3d(axis, kernel_size):
+ _compare_implementations(
+ shape=(256, 128, 96),
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode='reflect',
+ origin=0,
+ )
+
+
+@pytest.mark.parametrize(
+ 'shape', ((64, 5, 64), (5, 64, 64), (64, 64, 5), (32, 32, 32))
+)
+@pytest.mark.parametrize('axis', (-1, -2, -3))
+@pytest.mark.parametrize('kernel_size', (9,))
+@pytest.mark.parametrize(
+ 'mode',
+ ('nearest', 'reflect', 'wrap', 'mirror', 'constant', ('constant', 1)),
+)
+def test_separable_image_shapes_and_modes_3d(shape, axis, kernel_size, mode):
+ if isinstance(mode, tuple):
+ mode, cval = mode
+ else:
+ cval = 0
+ _compare_implementations(
+ shape,
+ kernel_size=kernel_size,
+ axis=axis,
+ dtype=cp.float32,
+ mode=mode,
+ cval=cval,
+ origin=0,
+ )
diff --git a/python/cucim/src/cucim/skimage/filters/thresholding.py b/python/cucim/src/cucim/skimage/filters/thresholding.py
index 8ca274209..14acc342e 100644
--- a/python/cucim/src/cucim/skimage/filters/thresholding.py
+++ b/python/cucim/src/cucim/skimage/filters/thresholding.py
@@ -6,7 +6,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from skimage.filters import threshold_isodata as _threshold_isodata_cpu
from skimage.filters import threshold_minimum as _threshold_minimum_cpu
from skimage.filters import threshold_multiotsu as _threshold_multiotsu_cpu
@@ -735,8 +735,8 @@ def threshold_li(image, *, tolerance=None, initial_guess=None,
return threshold
-@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="23.02.00",
+ deprecated_version="22.02.00")
def threshold_minimum(image=None, nbins=256, max_num_iter=10000, *, hist=None):
"""Return threshold value based on minimum method.
diff --git a/python/cucim/src/cucim/skimage/measure/_blur_effect.py b/python/cucim/src/cucim/skimage/measure/_blur_effect.py
index 4e0fe6fe9..69421512e 100644
--- a/python/cucim/src/cucim/skimage/measure/_blur_effect.py
+++ b/python/cucim/src/cucim/skimage/measure/_blur_effect.py
@@ -1,5 +1,5 @@
import cupy as cp
-import cupyx.scipy.ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from ..color import rgb2gray
from ..util import img_as_float
diff --git a/python/cucim/src/cucim/skimage/measure/_label.py b/python/cucim/src/cucim/skimage/measure/_label.py
index 9dc2fc2bd..c9969cdff 100644
--- a/python/cucim/src/cucim/skimage/measure/_label.py
+++ b/python/cucim/src/cucim/skimage/measure/_label.py
@@ -16,8 +16,8 @@ def _get_structure(ndim, connectivity):
# TODO: currently uses int32 for the labels. should add int64 option as well
@deprecate_kwarg({'input': 'label_image'},
- deprecated_version='0.19',
- removed_version='1.0')
+ deprecated_version='22.02.00',
+ removed_version='23.02.00')
def label(label_image, background=None, return_num=False, connectivity=None):
r"""Label connected regions of an integer array.
diff --git a/python/cucim/src/cucim/skimage/measure/profile.py b/python/cucim/src/cucim/skimage/measure/profile.py
index 6643dc034..fc0b0cf1f 100644
--- a/python/cucim/src/cucim/skimage/measure/profile.py
+++ b/python/cucim/src/cucim/skimage/measure/profile.py
@@ -2,7 +2,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import _fix_ndimage_mode, _validate_interpolation_order
diff --git a/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py b/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py
index ff5b2ac5e..3b0013005 100644
--- a/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py
+++ b/python/cucim/src/cucim/skimage/measure/tests/test_blur_effect.py
@@ -1,6 +1,7 @@
import cupy as cp
+import pytest
from cupy.testing import assert_array_equal
-from skimage.data import astronaut, cells3d
+from skimage.data import astronaut
from cucim.skimage.color import rgb2gray
from cucim.skimage.filters import gaussian
@@ -47,6 +48,7 @@ def test_blur_effect_channel_axis():
def test_blur_effect_3d():
"""Test that the blur metric works on a 3D image."""
+ cells3d = pytest.importorskip('skimage.data.cells3d')
image_3d = cp.array(cells3d()[:, 1, :, :]) # grab just the nuclei
B0 = blur_effect(image_3d)
B1 = blur_effect(gaussian(image_3d, sigma=1))
diff --git a/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py b/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py
index 6bfa07d31..44d0e28e5 100644
--- a/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py
+++ b/python/cucim/src/cucim/skimage/metrics/_structural_similarity.py
@@ -1,7 +1,7 @@
import functools
import cupy as cp
-from cupyx.scipy.ndimage import uniform_filter
+import cucim.skimage._vendored.ndimage as ndi
from .._shared import utils
from .._shared.filters import gaussian
@@ -187,7 +187,7 @@ def structural_similarity(im1, im2,
filter_func = gaussian
filter_args = {'sigma': sigma, 'truncate': truncate, 'mode': 'reflect'}
else:
- filter_func = uniform_filter
+ filter_func = ndi.uniform_filter
filter_args = {'size': win_size}
# ndimage filters need floating point data
diff --git a/python/cucim/src/cucim/skimage/morphology/__init__.py b/python/cucim/src/cucim/skimage/morphology/__init__.py
index e3a5423b4..cf1fbb774 100644
--- a/python/cucim/src/cucim/skimage/morphology/__init__.py
+++ b/python/cucim/src/cucim/skimage/morphology/__init__.py
@@ -1,4 +1,4 @@
-from ._skeletonize import thin
+from ._skeletonize import medial_axis, thin
from .binary import (binary_closing, binary_dilation, binary_erosion,
binary_opening)
from .footprints import (ball, cube, diamond, disk, octagon, octahedron,
@@ -32,4 +32,5 @@
"remove_small_objects",
"remove_small_holes",
"thin",
+ "medial_axis",
]
diff --git a/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py b/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py
new file mode 100644
index 000000000..37f40b13e
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/morphology/_medial_axis_lookup.py
@@ -0,0 +1,67 @@
+import numpy as np
+
+# medial axis lookup tables (independent of image content)
+#
+# Note: lookup table generated using scikit-image code from
+# https://github.com/scikit-image/scikit-image/blob/38b595d60befe3a0b4c0742995b9737200a079c6/skimage/morphology/_skeletonize.py#L449-L458 # noqa
+
+lookup_table = np.array(
+ [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,
+ 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0
+ ],
+ dtype=bool,
+)
+
+
+cornerness_table = np.array(
+ [
+ 9, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6,
+ 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5,
+ 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 7,
+ 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4,
+ 6, 5, 5, 4, 5, 4, 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4,
+ 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 6,
+ 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5,
+ 5, 4, 5, 4, 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
+ 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5,
+ 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3,
+ 4, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4,
+ 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 8, 7, 7, 6, 7, 6, 6, 5,
+ 7, 6, 6, 5, 6, 5, 5, 4, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4,
+ 4, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 6, 5, 5, 4,
+ 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5,
+ 5, 4, 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2,
+ 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4, 4, 3, 4, 3,
+ 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4,
+ 5, 4, 4, 3, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 6, 5,
+ 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, 5, 4, 4, 3, 4, 3, 3, 2,
+ 4, 3, 3, 2, 3, 2, 2, 1, 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3,
+ 3, 2, 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 5, 4, 4, 3,
+ 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, 4, 3, 3, 2, 3, 2, 2, 1, 3, 2,
+ 2, 1, 2, 1, 1, 0
+ ],
+ dtype=np.uint8,
+)
diff --git a/python/cucim/src/cucim/skimage/morphology/_skeletonize.py b/python/cucim/src/cucim/skimage/morphology/_skeletonize.py
index 4a93622de..2bc06dec1 100644
--- a/python/cucim/src/cucim/skimage/morphology/_skeletonize.py
+++ b/python/cucim/src/cucim/skimage/morphology/_skeletonize.py
@@ -1,8 +1,15 @@
+import warnings
+
import cupy as cp
-import cupyx.scipy.ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
import numpy as np
+from cucim.core.operations.morphology import distance_transform_edt
+
from .._shared.utils import check_nD, deprecate_kwarg
+from ._medial_axis_lookup import \
+ cornerness_table as _medial_axis_cornerness_table
+from ._medial_axis_lookup import lookup_table as _medial_axis_lookup_table
# --------- Skeletonization and thinning based on Guo and Hall 1989 ---------
@@ -39,8 +46,8 @@
0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=bool)
-@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg({"max_iter": "max_num_iter"}, removed_version="23.02.00",
+ deprecated_version="22.02.00")
def thin(image, max_num_iter=None):
"""
Perform morphological thinning of a binary image.
@@ -62,7 +69,7 @@ def thin(image, max_num_iter=None):
See Also
--------
- skeletonize, medial_axis
+ medial_axis
Notes
-----
@@ -131,7 +138,7 @@ def thin(image, max_num_iter=None):
# perform the two "subiterations" described in the paper
for lut in [G123_LUT, G123P_LUT]:
# correlate image with neighborhood mask
- N = ndi.correlate(skel, mask, mode='constant')
+ N = ndi.correlate(skel, mask, mode="constant")
# take deletion decision from this subiteration's LUT
D = cp.take(lut, N)
# perform deletion
@@ -141,3 +148,218 @@ def thin(image, max_num_iter=None):
num_iter += 1
return skel.astype(bool)
+
+
+# --------- Skeletonization by medial axis transform --------
+
+
+def _get_tiebreaker(n, random_seed):
+ # CuPy generator doesn't currently have the permutation method, so
+ # fall back to cp.random.permutation instead.
+ cp.random.seed(random_seed)
+ if n < 2 << 31:
+ dtype = np.int32
+ else:
+ dtype = np.intp
+ tiebreaker = cp.random.permutation(cp.arange(n, dtype=dtype))
+ return tiebreaker
+
+
+def medial_axis(image, mask=None, return_distance=False, *, random_state=None):
+ """Compute the medial axis transform of a binary image.
+
+ Parameters
+ ----------
+ image : binary ndarray, shape (M, N)
+ The image of the shape to be skeletonized.
+ mask : binary ndarray, shape (M, N), optional
+ If a mask is given, only those elements in `image` with a true
+ value in `mask` are used for computing the medial axis.
+ return_distance : bool, optional
+ If true, the distance transform is returned as well as the skeleton.
+ random_state : {None, int, `numpy.random.Generator`}, optional
+ If `random_state` is None the `numpy.random.Generator` singleton is
+ used.
+ If `random_state` is an int, a new ``Generator`` instance is used,
+ seeded with `random_state`.
+ If `random_state` is already a ``Generator`` instance then that
+ instance is used.
+
+ .. versionadded:: 0.19
+
+ Returns
+ -------
+ out : ndarray of bools
+ Medial axis transform of the image
+ dist : ndarray of ints, optional
+ Distance transform of the image (only returned if `return_distance`
+ is True)
+
+ See Also
+ --------
+ skeletonize
+
+ Notes
+ -----
+ This algorithm computes the medial axis transform of an image
+ as the ridges of its distance transform.
+
+ The different steps of the algorithm are as follows
+ * A lookup table is used, that assigns 0 or 1 to each configuration of
+ the 3x3 binary square, whether the central pixel should be removed
+ or kept. We want a point to be removed if it has more than one neighbor
+ and if removing it does not change the number of connected components.
+
+ * The distance transform to the background is computed, as well as
+ the cornerness of the pixel.
+
+ * The foreground (value of 1) points are ordered by
+ the distance transform, then the cornerness.
+
+ * A cython function is called to reduce the image to its skeleton. It
+ processes pixels in the order determined at the previous step, and
+ removes or maintains a pixel according to the lookup table. Because
+ of the ordering, it is possible to process all pixels in only one
+ pass.
+
+ Examples
+ --------
+ >>> square = np.zeros((7, 7), dtype=np.uint8)
+ >>> square[1:-1, 2:-2] = 1
+ >>> square
+ array([[0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
+ >>> medial_axis(square).astype(np.uint8)
+ array([[0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 1, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 1, 0, 1, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
+
+ """
+ try:
+ from skimage.morphology._skeletonize_cy import _skeletonize_loop
+ except ImportError as e:
+ warnings.warn(
+ "Could not find required private skimage Cython function:\n"
+ "\tskimage.morphology._skeletonize_cy._skeletonize_loop\n"
+ )
+ raise e
+
+ if mask is None:
+ # masked_image is modified in-place later so make a copy of the input
+ masked_image = image.astype(bool, copy=True)
+ else:
+ masked_image = image.astype(bool, copy=True)
+ masked_image[~mask] = False
+
+ # Load precomputed lookup table based on three conditions:
+ # 1. Keep only positive pixels
+ # AND
+ # 2. Keep if removing the pixel results in a different connectivity
+ # (if the number of connected components is different with and
+ # without the central pixel)
+ # OR
+ # 3. Keep if # pixels in neighborhood is 2 or less
+ # Note that this table is independent of the image
+ table = _medial_axis_lookup_table
+
+ # Build distance transform
+ distance = distance_transform_edt(masked_image)
+ if return_distance:
+ store_distance = distance.copy()
+
+ # Corners
+ # The processing order along the edge is critical to the shape of the
+ # resulting skeleton: if you process a corner first, that corner will
+ # be eroded and the skeleton will miss the arm from that corner. Pixels
+ # with fewer neighbors are more "cornery" and should be processed last.
+ # We use a cornerness_table lookup table where the score of a
+ # configuration is the number of background (0-value) pixels in the
+ # 3x3 neighborhood
+ cornerness_table = cp.asarray(_medial_axis_cornerness_table)
+ corner_score = _table_lookup(masked_image, cornerness_table)
+
+ # Define arrays for inner loop
+ distance = distance[masked_image]
+ i, j = cp.where(masked_image)
+
+ # Determine the order in which pixels are processed.
+ # We use a random # for tiebreaking. Assign each pixel in the image a
+ # predictable, random # so that masking doesn't affect arbitrary choices
+ # of skeletons
+ tiebreaker = _get_tiebreaker(n=distance.size, random_seed=random_state)
+ order = cp.lexsort(
+ cp.stack(
+ (tiebreaker, corner_score[masked_image], distance),
+ axis=0
+ )
+ )
+
+ # Call _skeletonize_loop on the CPU. It requies a single pass over the
+ # full array using a specific pixel order, so cannot be run multithreaded!
+ order = cp.asnumpy(order.astype(cp.int32, copy=False))
+ table = cp.asnumpy(table.astype(cp.uint8, copy=False))
+ i = cp.asnumpy(i).astype(dtype=np.intp, copy=False)
+ j = cp.asnumpy(j).astype(dtype=np.intp, copy=False)
+ result = cp.asnumpy(masked_image)
+ # Remove pixels not belonging to the medial axis
+ _skeletonize_loop(result.view(np.uint8), i, j, order, table)
+ result = cp.asarray(result.view(bool), dtype=bool)
+
+ if mask is not None:
+ result[~mask] = image[~mask]
+ if return_distance:
+ return result, store_distance
+ else:
+ return result
+
+
+def _table_lookup(image, table):
+ """
+ Perform a morphological transform on an image, directed by its
+ neighbors
+
+ Parameters
+ ----------
+ image : ndarray
+ A binary image
+ table : ndarray
+ A 512-element table giving the transform of each pixel given
+ the values of that pixel and its 8-connected neighbors.
+
+ Returns
+ -------
+ result : ndarray of same shape as `image`
+ Transformed image
+
+ Notes
+ -----
+ The pixels are numbered like this::
+
+ 0 1 2
+ 3 4 5
+ 6 7 8
+
+ The index at a pixel is the sum of 2** for pixels
+ that evaluate to true.
+ """
+ #
+ # We accumulate into the indexer to get the index into the table
+ # at each point in the image
+ #
+ # max possible value of indexer is 512, so just use int16 dtype
+ kernel = cp.array(
+ [[256, 128, 64], [32, 16, 8], [4, 2, 1]],
+ dtype=cp.int16
+ )
+ indexer = ndi.convolve(image, kernel, output=np.int16, mode="constant")
+ image = table[indexer]
+ return image
diff --git a/python/cucim/src/cucim/skimage/morphology/binary.py b/python/cucim/src/cucim/skimage/morphology/binary.py
index 56c5b6654..1df98fe00 100644
--- a/python/cucim/src/cucim/skimage/morphology/binary.py
+++ b/python/cucim/src/cucim/skimage/morphology/binary.py
@@ -4,7 +4,7 @@
import functools
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import deprecate_kwarg
from .footprints import _footprint_is_sequence
@@ -37,8 +37,9 @@ def _iterate_binary_func(binary_func, image, footprint, out):
# default with the same dimension as the input image and size 3 along each
# axis.
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def binary_erosion(image, footprint=None, out=None):
"""Return fast binary morphological erosion of an image.
@@ -94,8 +95,9 @@ def binary_erosion(image, footprint=None, out=None):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def binary_dilation(image, footprint=None, out=None):
"""Return fast binary morphological dilation of an image.
@@ -149,8 +151,9 @@ def binary_dilation(image, footprint=None, out=None):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def binary_opening(image, footprint=None, out=None):
"""Return fast binary morphological opening of an image.
@@ -199,8 +202,9 @@ def binary_opening(image, footprint=None, out=None):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def binary_closing(image, footprint=None, out=None):
"""Return fast binary morphological closing of an image.
diff --git a/python/cucim/src/cucim/skimage/morphology/footprints.py b/python/cucim/src/cucim/skimage/morphology/footprints.py
index 744f11535..70e8fda7c 100644
--- a/python/cucim/src/cucim/skimage/morphology/footprints.py
+++ b/python/cucim/src/cucim/skimage/morphology/footprints.py
@@ -175,8 +175,8 @@ def _decompose_size(size, kernel_size=3):
@deprecate_kwarg({'height': 'ncols', 'width': 'nrows'},
- deprecated_version='0.18.0',
- removed_version='0.20.0')
+ deprecated_version='21.06.00',
+ removed_version='22.02.00')
def rectangle(nrows, ncols, dtype=cp.uint8, *, decomposition=None):
"""Generates a flat, rectangular-shaped footprint.
diff --git a/python/cucim/src/cucim/skimage/morphology/gray.py b/python/cucim/src/cucim/skimage/morphology/gray.py
index ca6fa2845..ce5daf759 100644
--- a/python/cucim/src/cucim/skimage/morphology/gray.py
+++ b/python/cucim/src/cucim/skimage/morphology/gray.py
@@ -4,7 +4,7 @@
import functools
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import deprecate_kwarg
from ..util import crop
@@ -157,8 +157,8 @@ def func_out(image, footprint, out=None, *args, **kwargs):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
def erosion(image, footprint=None, out=None, shift_x=False, shift_y=False):
"""Return grayscale morphological erosion of an image.
@@ -236,8 +236,8 @@ def erosion(image, footprint=None, out=None, shift_x=False, shift_y=False):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
def dilation(image, footprint=None, out=None, shift_x=False, shift_y=False):
"""Return grayscale morphological dilation of an image.
@@ -324,8 +324,8 @@ def dilation(image, footprint=None, out=None, shift_x=False, shift_y=False):
return out
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
@default_footprint
@pad_for_eccentric_footprints
def opening(image, footprint=None, out=None):
@@ -390,8 +390,8 @@ def opening(image, footprint=None, out=None):
return out
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
@default_footprint
@pad_for_eccentric_footprints
def closing(image, footprint=None, out=None):
@@ -474,8 +474,8 @@ def _white_tophat_seqence(image, footprints, out):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
def white_tophat(image, footprint=None, out=None):
"""Return white top hat of an image.
@@ -565,8 +565,8 @@ def white_tophat(image, footprint=None, out=None):
@default_footprint
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
def black_tophat(image, footprint=None, out=None):
"""Return black top hat of an image.
diff --git a/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py b/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py
index 8847aaaf0..646503f13 100644
--- a/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py
+++ b/python/cucim/src/cucim/skimage/morphology/grayreconstruct.py
@@ -15,8 +15,8 @@
from .._shared.utils import deprecate_kwarg
-@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg(kwarg_mapping={'selem': 'footprint'},
+ removed_version="23.02.00", deprecated_version="22.02.00")
def reconstruction(seed, mask, method='dilation', footprint=None, offset=None):
"""Perform a morphological reconstruction of an image.
diff --git a/python/cucim/src/cucim/skimage/morphology/misc.py b/python/cucim/src/cucim/skimage/morphology/misc.py
index 62ff9bfd0..bf3215a3f 100644
--- a/python/cucim/src/cucim/skimage/morphology/misc.py
+++ b/python/cucim/src/cucim/skimage/morphology/misc.py
@@ -2,7 +2,7 @@
import functools
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import remove_arg, warn
@@ -73,7 +73,7 @@ def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False,
labelling if `ar` is bool.
in_place : bool, optional (default: False)
If ``True``, remove the objects in the input array itself.
- Otherwise, make a copy. Deprecated since version 0.19. Please
+ Otherwise, make a copy. Deprecated since version 22.02.00. Please
use `out` instead.
out : ndarray
Array of the same shape as `ar`, into which the output is
@@ -172,7 +172,7 @@ def remove_small_holes(ar, area_threshold=64, connectivity=1, in_place=False,
The connectivity defining the neighborhood of a pixel.
in_place : bool, optional (default: False)
If `True`, remove the connected components in the input array
- itself. Otherwise, make a copy. Deprecated since version 0.19.
+ itself. Otherwise, make a copy. Deprecated since version 22.02.00.
Please use `out` instead.
out : ndarray
Array of the same shape as `ar` and bool dtype, into which the
diff --git a/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py b/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py
index 532271c84..2fbafafc1 100644
--- a/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py
+++ b/python/cucim/src/cucim/skimage/morphology/tests/test_skeletonize.py
@@ -5,20 +5,25 @@
from skimage.morphology import thin as thin_cpu
from cucim.skimage._shared._warnings import expected_warnings
-from cucim.skimage.morphology import thin
+from cucim.skimage.morphology import medial_axis, thin
-class TestThin():
+class TestThin:
@property
def input_image(self):
"""image to test thinning with"""
- ii = cp.array([[0, 0, 0, 0, 0, 0, 0],
- [0, 1, 1, 1, 1, 1, 0],
- [0, 1, 0, 1, 1, 1, 0],
- [0, 1, 1, 1, 1, 1, 0],
- [0, 1, 1, 1, 1, 1, 0],
- [0, 1, 1, 1, 1, 1, 0],
- [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8)
+ ii = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 1, 1, 1, 1, 0],
+ [0, 1, 0, 1, 1, 1, 0],
+ [0, 1, 1, 1, 1, 1, 0],
+ [0, 1, 1, 1, 1, 1, 0],
+ [0, 1, 1, 1, 1, 1, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ ],
+ dtype=cp.uint8,
+ )
return ii
def test_zeros(self):
@@ -26,13 +31,18 @@ def test_zeros(self):
def test_iter_1(self):
result = thin(self.input_image, 1).astype(cp.uint8)
- expected = cp.array([[0, 0, 0, 0, 0, 0, 0],
- [0, 0, 1, 0, 0, 0, 0],
- [0, 1, 0, 1, 1, 0, 0],
- [0, 0, 1, 1, 1, 0, 0],
- [0, 0, 1, 1, 1, 0, 0],
- [0, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8)
+ expected = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0],
+ [0, 1, 0, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 1, 1, 1, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ ],
+ dtype=cp.uint8,
+ )
assert_array_equal(result, expected)
def test_max_iter_kwarg_deprecation(self):
@@ -43,13 +53,18 @@ def test_max_iter_kwarg_deprecation(self):
def test_noiter(self):
result = thin(self.input_image).astype(cp.uint8)
- expected = cp.array([[0, 0, 0, 0, 0, 0, 0],
- [0, 0, 1, 0, 0, 0, 0],
- [0, 1, 0, 1, 0, 0, 0],
- [0, 0, 1, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0]], dtype=cp.uint8)
+ expected = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0],
+ [0, 1, 0, 1, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0],
+ ],
+ dtype=cp.uint8,
+ )
assert_array_equal(result, expected)
def test_baddim(self):
@@ -57,7 +72,7 @@ def test_baddim(self):
with pytest.raises(ValueError):
thin(ii)
- @pytest.mark.parametrize('invert', [False, True])
+ @pytest.mark.parametrize("invert", [False, True])
def test_compare_skimage(self, invert):
h = data.horse()
if invert:
@@ -65,3 +80,84 @@ def test_compare_skimage(self, invert):
result = thin(cp.asarray(h))
expected = thin_cpu(h)
assert_array_equal(result, expected)
+
+
+class TestMedialAxis:
+ def test_00_00_zeros(self):
+ """Test skeletonize on an array of all zeros"""
+ result = medial_axis(cp.zeros((10, 10), bool))
+ assert not cp.any(result)
+
+ def test_00_01_zeros_masked(self):
+ """Test skeletonize on an array that is completely masked"""
+ result = medial_axis(cp.zeros((10, 10), bool), cp.zeros((10, 10), bool))
+ assert not cp.any(result)
+
+ def test_vertical_line(self):
+ """Test a thick vertical line, issue #3861"""
+ img = cp.zeros((9, 9))
+ img[:, 2] = 1
+ img[:, 3] = 1
+ img[:, 4] = 1
+
+ expected = cp.full(img.shape, False)
+ expected[:, 3] = True
+
+ result = medial_axis(img)
+ assert_array_equal(result, expected)
+
+ def test_01_01_rectangle(self):
+ """Test skeletonize on a rectangle"""
+ image = cp.zeros((9, 15), bool)
+ image[1:-1, 1:-1] = True
+ #
+ # The result should be four diagonals from the
+ # corners, meeting in a horizontal line
+ #
+ expected = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+ result = medial_axis(image)
+ assert cp.all(result == expected)
+ result, distance = medial_axis(image, return_distance=True)
+ assert distance.max() == 4
+
+ def test_01_02_hole(self):
+ """Test skeletonize on a rectangle with a hole in the middle"""
+ image = cp.zeros((9, 15), bool)
+ image[1:-1, 1:-1] = True
+ image[4, 4:-4] = False
+ expected = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ ],
+ dtype=bool,
+ )
+ result = medial_axis(image)
+ assert cp.all(result == expected)
+
+ def test_narrow_image(self):
+ """Test skeletonize on a 1-pixel thin strip"""
+ image = cp.zeros((1, 5), bool)
+ image[:, 1:-1] = True
+ result = medial_axis(image)
+ assert cp.all(result == image)
diff --git a/python/cucim/src/cucim/skimage/registration/_optical_flow.py b/python/cucim/src/cucim/skimage/registration/_optical_flow.py
index 93ce39a5e..efbf3d7c3 100644
--- a/python/cucim/src/cucim/skimage/registration/_optical_flow.py
+++ b/python/cucim/src/cucim/skimage/registration/_optical_flow.py
@@ -9,6 +9,7 @@
import cupy as cp
from cupyx.scipy import ndimage as ndi
+from .._shared._gradient import gradient
from .._shared.utils import _supported_float_type
from ..transform import warp
from ._optical_flow_utils import coarse_to_fine, get_warp_points
@@ -79,7 +80,8 @@ def _tvl1(reference_image, moving_image, flow0, attachment, tightness,
image1_warp = warp(moving_image, get_warp_points(grid, flow_current),
mode='edge')
- grad = cp.stack(cp.gradient(image1_warp))
+ # output_as_array=True stacks the gradients along the first axis
+ grad = gradient(image1_warp, output_as_array=True)
NI = (grad * grad).sum(0)
NI[NI == 0] = 1
@@ -288,7 +290,8 @@ def _ilk(reference_image, moving_image, flow0, radius, num_warp, gaussian,
moving_image_warp = warp(moving_image, get_warp_points(grid, flow),
mode='edge')
- grad = cp.stack(cp.gradient(moving_image_warp), axis=0)
+ # output_as_array=True stacks the gradients along the first axis
+ grad = gradient(moving_image_warp, output_as_array=True)
error_image = ((grad * flow).sum(axis=0)
+ reference_image - moving_image_warp)
diff --git a/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py b/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py
index 0bafa2c2f..404f64132 100644
--- a/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py
+++ b/python/cucim/src/cucim/skimage/registration/tests/test_masked_phase_cross_correlation.py
@@ -4,7 +4,7 @@
from cupyx.scipy.ndimage import fourier_shift
from cupyx.scipy.ndimage import shift as real_shift
from numpy.testing import assert_almost_equal
-from skimage.data import brain, camera
+from skimage.data import camera
from skimage.io import imread
from cucim.skimage._shared.fft import fftmodule as fft
@@ -67,6 +67,7 @@ def test_masked_registration_random_masks():
def test_masked_registration_3d_contiguous_mask():
"""masked_register_translation should be able to register translations
between volumes with contiguous masks."""
+ brain = pytest.importorskip('skimage.data.brain')
ref_vol = cp.array(brain()[:, ::2, ::2])
offset = (1, -5, 10)
diff --git a/python/cucim/src/cucim/skimage/restoration/_denoise.py b/python/cucim/src/cucim/skimage/restoration/_denoise.py
index 614c5e7b3..392fc4ad2 100644
--- a/python/cucim/src/cucim/skimage/restoration/_denoise.py
+++ b/python/cucim/src/cucim/skimage/restoration/_denoise.py
@@ -91,8 +91,9 @@ def _denoise_tv_chambolle_nd(image, weight=0.1, eps=2.0e-4, max_num_iter=200):
return out
-@utils.deprecate_kwarg({'n_iter_max': 'max_num_iter'}, removed_version="1.0",
- deprecated_version="0.19.2")
+@utils.deprecate_kwarg({'n_iter_max': 'max_num_iter'},
+ removed_version="23.02.00",
+ deprecated_version="22.06.00")
@utils.deprecate_multichannel_kwarg(multichannel_position=4)
def denoise_tv_chambolle(image, weight=0.1, eps=2.0e-4, max_num_iter=200,
multichannel=False, *, channel_axis=None):
diff --git a/python/cucim/src/cucim/skimage/restoration/deconvolution.py b/python/cucim/src/cucim/skimage/restoration/deconvolution.py
index 9809637eb..2a1cfa67c 100644
--- a/python/cucim/src/cucim/skimage/restoration/deconvolution.py
+++ b/python/cucim/src/cucim/skimage/restoration/deconvolution.py
@@ -383,8 +383,8 @@ def unsupervised_wiener(image, psf, reg=None, user_params=None, is_real=True,
return (x_postmean, {'noise': gn_chain, 'prior': gx_chain})
-@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="23.02.00",
+ deprecated_version="22.02.00")
def richardson_lucy(image, psf, num_iter=50, clip=True, filter_epsilon=None):
"""Richardson-Lucy deconvolution.
diff --git a/python/cucim/src/cucim/skimage/restoration/j_invariant.py b/python/cucim/src/cucim/skimage/restoration/j_invariant.py
index 19f86a444..2add11e72 100644
--- a/python/cucim/src/cucim/skimage/restoration/j_invariant.py
+++ b/python/cucim/src/cucim/skimage/restoration/j_invariant.py
@@ -3,7 +3,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import _supported_float_type
from ..metrics import mean_squared_error
diff --git a/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py b/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py
index b6a697789..58f3bc0a1 100644
--- a/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py
+++ b/python/cucim/src/cucim/skimage/restoration/tests/test_j_invariant.py
@@ -25,7 +25,7 @@
# TODO: replace with CuPy version once completed
def _denoise_wavelet(image, rescale_sigma=True, **kwargs):
- if 'channel_axis' in kwargs and skimage.__version__ < '0.19':
+ if 'channel_axis' in kwargs and skimage.__version__ < '22.02.00':
# convert channel_axis to older multichannel kwarg for skimage 0.18.x
channel_axis = kwargs.pop('channel_axis')
kwargs['multichannel'] = False if channel_axis is None else True
@@ -62,7 +62,7 @@ def test_invariant_denoise_color(dtype):
def test_invariant_denoise_color_deprecated():
- # can remove \A\Z here if only testing with scikit-image 0.19
+ # can remove \A\Z here if only testing with cuCIM 22.02.00
with expected_warnings([r"`multichannel` is a deprecated argument|\A\Z"]):
denoised_img_color = _invariant_denoise(
noisy_img_color, _denoise_wavelet,
diff --git a/python/cucim/src/cucim/skimage/segmentation/__init__.py b/python/cucim/src/cucim/skimage/segmentation/__init__.py
index b5527b8a1..55b65058f 100644
--- a/python/cucim/src/cucim/skimage/segmentation/__init__.py
+++ b/python/cucim/src/cucim/skimage/segmentation/__init__.py
@@ -1,4 +1,6 @@
+from ._chan_vese import chan_vese
from ._clear_border import clear_border
+from ._expand_labels import expand_labels
from ._join import join_segmentations, relabel_sequential
from .boundaries import find_boundaries, mark_boundaries
from .morphsnakes import (checkerboard_level_set, disk_level_set,
@@ -7,12 +9,14 @@
from .random_walker_segmentation import random_walker
__all__ = [
+ "expand_labels",
"random_walker",
"find_boundaries",
"mark_boundaries",
"clear_border",
"join_segmentations",
"relabel_sequential",
+ "chan_vese",
"morphological_geodesic_active_contour",
"morphological_chan_vese",
"inverse_gaussian_gradient",
diff --git a/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py b/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py
new file mode 100644
index 000000000..0401c1f07
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/segmentation/_chan_vese.py
@@ -0,0 +1,442 @@
+import cupy as cp
+import numpy as np
+from cupyx import rsqrt # reciprocal sqrt
+
+from cucim.core.operations.morphology import distance_transform_edt
+
+from .._shared.utils import _supported_float_type, deprecate_kwarg
+
+
+@cp.fuse()
+def _fused_curvature(phi, x_start, x_end, y_start, y_end, ul, ur, ll, lr):
+ fy = (y_end - y_start) / 2.0
+ fx = (x_end - x_start) / 2.0
+ fyy = y_end + y_start - 2 * phi
+ fxx = x_end + x_start - 2 * phi
+ fxy = .25 * (lr + ul - ur - ll)
+ grad2 = fx**2 + fy**2
+ K = (fxx * fy**2 - 2 * fxy * fx * fy + fyy * fx**2)
+ K /= (grad2 * cp.sqrt(grad2) + 1e-8)
+ return K
+
+
+def _cv_curvature(phi):
+ """Returns the 'curvature' of a level set 'phi'.
+ """
+ P = cp.pad(phi, 1, mode='edge')
+ y_start = P[:-2, 1:-1]
+ y_end = P[2:, 1:-1]
+ x_start = P[1:-1, :-2]
+ x_end = P[1:-1, 2:]
+
+ lower_right = P[2:, 2:]
+ lower_left = P[2:, :-2]
+ upper_right = P[:-2, 2:]
+ upper_left = P[:-2, :-2]
+ K = _fused_curvature(phi, x_start, x_end, y_start, y_end, upper_left,
+ upper_right, lower_left, lower_right)
+ return K
+
+
+@cp.fuse()
+def _fused_variance_kernel1(eta, x_start, x_mid, x_end, y_start, y_mid, y_end):
+ phixp = x_end - x_mid
+ phixn = x_mid - x_start
+ phix0 = x_end - x_start
+ phix0 /= 2.0
+ phixp *= phixp
+ phixn *= phixn
+ phix0 *= phix0
+
+ phiyp = y_end - y_mid
+ phiyn = y_mid - y_start
+ phiy0 = y_end - y_start
+ phiy0 /= 2.0
+ phiyp *= phiyp
+ phiyn *= phiyn
+ phiy0 *= phiy0
+
+ C1 = rsqrt(eta + phixp + phiy0)
+ C2 = rsqrt(eta + phixn + phiy0)
+ C3 = rsqrt(eta + phix0 + phiyp)
+ C4 = rsqrt(eta + phix0 + phiyn)
+
+ K = x_end * C1
+ K += x_start * C2
+ K += y_end * C3
+ K += y_start * C4
+
+ Csum = C1
+ Csum += C2
+ Csum += C3
+ Csum += C4
+
+ return K, Csum
+
+
+@cp.fuse()
+def _fused_hphi_hinv(phi):
+ Hphi = (phi > 0).astype(phi.dtype)
+ Hinv = 1.0 - Hphi
+ return Hphi, Hinv
+
+
+@cp.fuse()
+def _fused_variance_kernel2(
+ image, c1, c2, lam1, lam2, phi, K, dt, mu, delta_phi, Csum
+):
+ difference_term = image - c1
+ difference_term *= difference_term
+ difference_term *= -lam1
+
+ term2 = image - c2
+ term2 *= term2
+ term2 *= lam2
+ difference_term += term2
+
+ new_phi = phi + (dt * delta_phi) * (mu * K + difference_term)
+ out = new_phi / (1 + mu * dt * delta_phi * Csum)
+ return out
+
+
+def _cv_calculate_variation(image, phi, mu, lambda1, lambda2, dt):
+ """Returns the variation of level set 'phi' based on algorithm parameters.
+ """
+ eta = 1e-16
+ P = cp.pad(phi, 1, mode='edge')
+
+ x_end = P[1:-1, 2:]
+ x_mid = P[1:-1, 1:-1]
+ x_start = P[1:-1, :-2]
+
+ y_end = P[2:, 1:-1]
+ y_mid = P[1:-1, 1:-1]
+ y_start = P[:-2, 1:-1]
+
+ K, Csum = _fused_variance_kernel1(
+ eta, x_start, x_mid, x_end, y_start, y_mid, y_end
+ )
+ Hphi, Hinv = _fused_hphi_hinv(phi)
+ c1, c2 = _cv_calculate_averages(image, Hphi, Hinv)
+ delta_phi = _cv_delta(phi)
+ out = _fused_variance_kernel2(
+ image, c1, c2, lambda1, lambda2, phi, K, dt, mu, delta_phi, Csum
+ )
+ return out
+
+
+@cp.fuse()
+def _cv_heavyside(x, eps=1.):
+ """Returns the result of a regularised heavyside function of the
+ input value(s).
+ """
+ return 0.5 * (1. + (2. / cp.pi) * cp.arctan(x / eps))
+
+
+@cp.fuse()
+def _cv_delta(x, eps=1.):
+ """Returns the result of a regularised dirac function of the
+ input value(s).
+ """
+ return eps / (eps * eps + x * x)
+
+
+@cp.fuse()
+def _fused_inplace_eps_div(num, denom, eps):
+ denom += eps
+ num /= denom
+ return
+
+
+def _cv_calculate_averages(image, H, Hinv):
+ """Returns the average values 'inside' and 'outside'.
+ """
+ Hsum = cp.sum(H)
+ Hinvsum = cp.sum(Hinv)
+ avg_inside = cp.sum(image * H)
+ avg_oustide = cp.sum(image * Hinv)
+
+ eps = 10 * cp.finfo(image.dtype).eps
+ _fused_inplace_eps_div(avg_inside, Hsum, eps)
+ _fused_inplace_eps_div(avg_oustide, Hinvsum, eps)
+ return (avg_inside, avg_oustide)
+
+
+@cp.fuse()
+def _fused_difference_op1(image, c, h, lam):
+ out = image - c
+ out *= out
+ out *= h
+ out *= lam
+ return out
+
+
+def _cv_difference_from_average_term(image, Hphi, lambda_pos, lambda_neg):
+ """Returns the 'energy' contribution due to the difference from
+ the average value within a region at each point.
+ """
+ Hinv = 1. - Hphi
+ (c1, c2) = _cv_calculate_averages(image, Hphi, Hinv)
+ out = _fused_difference_op1(image, c1, Hphi, lambda_pos)
+ out += _fused_difference_op1(image, c2, Hinv, lambda_neg)
+ return out
+
+
+def _cv_edge_length_term(phi, mu):
+ """Returns the 'energy' contribution due to the length of the
+ edge between regions at each point, multiplied by a factor 'mu'.
+ """
+ e = _cv_curvature(phi)
+ e *= mu
+ return e
+
+
+def _cv_energy(image, phi, mu, lambda1, lambda2):
+ """Returns the total 'energy' of the current level set function.
+ """
+ H = _cv_heavyside(phi)
+ avgenergy = _cv_difference_from_average_term(image, H, lambda1, lambda2)
+ lenenergy = _cv_edge_length_term(phi, mu)
+ return cp.sum(avgenergy) + cp.sum(lenenergy)
+
+
+def _cv_checkerboard(image_size, square_size, dtype=cp.float64):
+ """Generates a checkerboard level set function.
+
+ According to Pascal Getreuer, such a level set function has fast
+ convergence.
+ """
+ yv = cp.arange(image_size[0], dtype=dtype)[:, np.newaxis]
+ xv = cp.arange(image_size[1], dtype=dtype)[np.newaxis, :]
+ sf = cp.pi / square_size
+ xv *= sf
+ yv *= sf
+ cp.sin(xv, out=xv)
+ cp.sin(yv, out=yv)
+ return xv * yv
+
+
+def _cv_large_disk(image_size):
+ """Generates a disk level set function.
+
+ The disk covers the whole image along its smallest dimension.
+ """
+ res = cp.ones(image_size, dtype=bool)
+ centerY = int((image_size[0] - 1) / 2)
+ centerX = int((image_size[1] - 1) / 2)
+ res[centerY, centerX] = 0.
+ radius = float(min(centerX, centerY))
+ out = radius - distance_transform_edt(res)
+ out /= radius
+ return out
+
+
+def _cv_small_disk(image_size):
+ """Generates a disk level set function.
+
+ The disk covers half of the image along its smallest dimension.
+ """
+ res = cp.ones(image_size, dtype=bool)
+ centerY = int((image_size[0] - 1) / 2)
+ centerX = int((image_size[1] - 1) / 2)
+ res[centerY, centerX] = 0.
+ radius = float(min(centerX, centerY)) / 2.0
+ out = radius - distance_transform_edt(res)
+ out /= radius * 3
+ return out
+
+
+def _cv_init_level_set(init_level_set, image_shape, dtype=cp.float64):
+ """Generates an initial level set function conditional on input arguments.
+ """
+ if type(init_level_set) == str:
+ if init_level_set == 'checkerboard':
+ res = _cv_checkerboard(image_shape, 5, dtype)
+ elif init_level_set == 'disk':
+ res = _cv_large_disk(image_shape)
+ elif init_level_set == 'small disk':
+ res = _cv_small_disk(image_shape)
+ else:
+ raise ValueError("Incorrect name for starting level set preset.")
+ else:
+ res = init_level_set
+ return res.astype(dtype, copy=False)
+
+
+@deprecate_kwarg({'max_iter': 'max_num_iter'}, removed_version="1.0",
+ deprecated_version="0.19")
+def chan_vese(image, mu=0.25, lambda1=1.0, lambda2=1.0, tol=1e-3,
+ max_num_iter=500, dt=0.5, init_level_set='checkerboard',
+ extended_output=False):
+ """Chan-Vese segmentation algorithm.
+
+ Active contour model by evolving a level set. Can be used to
+ segment objects without clearly defined boundaries.
+
+ Parameters
+ ----------
+ image : (M, N) ndarray
+ Grayscale image to be segmented.
+ mu : float, optional
+ 'edge length' weight parameter. Higher `mu` values will
+ produce a 'round' edge, while values closer to zero will
+ detect smaller objects.
+ lambda1 : float, optional
+ 'difference from average' weight parameter for the output
+ region with value 'True'. If it is lower than `lambda2`, this
+ region will have a larger range of values than the other.
+ lambda2 : float, optional
+ 'difference from average' weight parameter for the output
+ region with value 'False'. If it is lower than `lambda1`, this
+ region will have a larger range of values than the other.
+ tol : float, positive, optional
+ Level set variation tolerance between iterations. If the
+ L2 norm difference between the level sets of successive
+ iterations normalized by the area of the image is below this
+ value, the algorithm will assume that the solution was
+ reached.
+ max_num_iter : uint, optional
+ Maximum number of iterations allowed before the algorithm
+ interrupts itself.
+ dt : float, optional
+ A multiplication factor applied at calculations for each step,
+ serves to accelerate the algorithm. While higher values may
+ speed up the algorithm, they may also lead to convergence
+ problems.
+ init_level_set : str or (M, N) ndarray, optional
+ Defines the starting level set used by the algorithm.
+ If a string is inputted, a level set that matches the image
+ size will automatically be generated. Alternatively, it is
+ possible to define a custom level set, which should be an
+ array of float values, with the same shape as 'image'.
+ Accepted string values are as follows.
+
+ 'checkerboard'
+ the starting level set is defined as
+ sin(x/5*pi)*sin(y/5*pi), where x and y are pixel
+ coordinates. This level set has fast convergence, but may
+ fail to detect implicit edges.
+ 'disk'
+ the starting level set is defined as the opposite
+ of the distance from the center of the image minus half of
+ the minimum value between image width and image height.
+ This is somewhat slower, but is more likely to properly
+ detect implicit edges.
+ 'small disk'
+ the starting level set is defined as the
+ opposite of the distance from the center of the image
+ minus a quarter of the minimum value between image width
+ and image height.
+ extended_output : bool, optional
+ If set to True, the return value will be a tuple containing
+ the three return values (see below). If set to False which
+ is the default value, only the 'segmentation' array will be
+ returned.
+
+ Returns
+ -------
+ segmentation : (M, N) ndarray, bool
+ Segmentation produced by the algorithm.
+ phi : (M, N) ndarray of floats
+ Final level set computed by the algorithm.
+ energies : list of floats
+ Shows the evolution of the 'energy' for each step of the
+ algorithm. This should allow to check whether the algorithm
+ converged.
+
+ Notes
+ -----
+ The Chan-Vese Algorithm is designed to segment objects without
+ clearly defined boundaries. This algorithm is based on level sets
+ that are evolved iteratively to minimize an energy, which is
+ defined by weighted values corresponding to the sum of differences
+ intensity from the average value outside the segmented region, the
+ sum of differences from the average value inside the segmented
+ region, and a term which is dependent on the length of the
+ boundary of the segmented region.
+
+ This algorithm was first proposed by Tony Chan and Luminita Vese,
+ in a publication entitled "An Active Contour Model Without Edges"
+ [1]_.
+
+ This implementation of the algorithm is somewhat simplified in the
+ sense that the area factor 'nu' described in the original paper is
+ not implemented, and is only suitable for grayscale images.
+
+ Typical values for `lambda1` and `lambda2` are 1. If the
+ 'background' is very different from the segmented object in terms
+ of distribution (for example, a uniform black image with figures
+ of varying intensity), then these values should be different from
+ each other.
+
+ Typical values for mu are between 0 and 1, though higher values
+ can be used when dealing with shapes with very ill-defined
+ contours.
+
+ The 'energy' which this algorithm tries to minimize is defined
+ as the sum of the differences from the average within the region
+ squared and weighed by the 'lambda' factors to which is added the
+ length of the contour multiplied by the 'mu' factor.
+
+ Supports 2D grayscale images only, and does not implement the area
+ term described in the original article.
+
+ References
+ ----------
+ .. [1] An Active Contour Model without Edges, Tony Chan and
+ Luminita Vese, Scale-Space Theories in Computer Vision,
+ 1999, :DOI:`10.1007/3-540-48236-9_13`
+ .. [2] Chan-Vese Segmentation, Pascal Getreuer Image Processing On
+ Line, 2 (2012), pp. 214-224,
+ :DOI:`10.5201/ipol.2012.g-cv`
+ .. [3] The Chan-Vese Algorithm - Project Report, Rami Cohen, 2011
+ :arXiv:`1107.2782`
+ """
+ if len(image.shape) != 2:
+ raise ValueError("Input image should be a 2D array.")
+
+ float_dtype = _supported_float_type(image.dtype)
+ phi = _cv_init_level_set(init_level_set, image.shape, dtype=float_dtype)
+ if type(phi) != cp.ndarray or phi.shape != image.shape:
+ raise ValueError("The dimensions of initial level set do not "
+ "match the dimensions of image.")
+
+ image = image.astype(float_dtype, copy=False)
+ image = image - cp.min(image)
+ if cp.max(image) != 0:
+ image = image / cp.max(image)
+
+ i = 0
+ if extended_output:
+ old_energy = _cv_energy(image, phi, mu, lambda1, lambda2)
+ energies = []
+ phivar = tol + 1
+
+ while phivar > tol and i < max_num_iter:
+ # Save old level set values
+ oldphi = phi
+
+ # Calculate new level set
+ phi = _cv_calculate_variation(image, phi, mu, lambda1, lambda2, dt)
+ phivar = phi - oldphi
+ phivar *= phivar
+ phivar = cp.sqrt(phivar.mean())
+
+ if extended_output:
+ # Extract energy
+ new_energy = _cv_energy(image, phi, mu, lambda1, lambda2)
+
+ # Could compare energy to the previous level set to see if
+ # continuing is necessary
+
+ # Save old energy values
+ energies.append(old_energy)
+ old_energy = new_energy
+ i += 1
+
+ segmentation = phi > 0
+
+ if extended_output:
+ return (segmentation, phi, energies)
+ else:
+ return segmentation
diff --git a/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py b/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py
new file mode 100644
index 000000000..8ec47a7be
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/segmentation/_expand_labels.py
@@ -0,0 +1,96 @@
+import cupy as cp
+
+from cucim.core.operations.morphology import distance_transform_edt
+
+
+def expand_labels(label_image, distance=1):
+ """Expand labels in label image by ``distance`` pixels without overlapping.
+
+ Given a label image, ``expand_labels`` grows label regions (connected components)
+ outwards by up to ``distance`` pixels without overflowing into neighboring regions.
+ More specifically, each background pixel that is within Euclidean distance
+ of <= ``distance`` pixels of a connected component is assigned the label of that
+ connected component.
+ Where multiple connected components are within ``distance`` pixels of a background
+ pixel, the label value of the closest connected component will be assigned (see
+ Notes for the case of multiple labels at equal distance).
+
+ Parameters
+ ----------
+ label_image : ndarray of dtype int
+ label image
+ distance : float
+ Euclidean distance in pixels by which to grow the labels. Default is one.
+
+ Returns
+ -------
+ enlarged_labels : ndarray of dtype int
+ Labeled array, where all connected regions have been enlarged
+
+ Notes
+ -----
+ Where labels are spaced more than ``distance`` pixels are apart, this is
+ equivalent to a morphological dilation with a disc or hyperball of radius ``distance``.
+ However, in contrast to a morphological dilation, ``expand_labels`` will
+ not expand a label region into a neighboring region.
+
+ This implementation of ``expand_labels`` is derived from CellProfiler [1]_, where
+ it is known as module "IdentifySecondaryObjects (Distance-N)" [2]_.
+
+ There is an important edge case when a pixel has the same distance to
+ multiple regions, as it is not defined which region expands into that
+ space. Here, the exact behavior depends on the upstream implementation
+ of ``scipy.ndimage.distance_transform_edt``.
+
+ See Also
+ --------
+ :func:`cucim.skimage.measure.label`, :func:`cucim.skimage.morphology.dilation` # noqa
+
+ References
+ ----------
+ .. [1] https://cellprofiler.org
+ .. [2] https://github.com/CellProfiler/CellProfiler/blob/082930ea95add7b72243a4fa3d39ae5145995e9c/cellprofiler/modules/identifysecondaryobjects.py#L559 # noqa
+
+ Examples
+ --------
+ >>> labels = np.array([0, 1, 0, 0, 0, 0, 2])
+ >>> expand_labels(labels, distance=1)
+ array([1, 1, 1, 0, 0, 2, 2])
+
+ Labels will not overwrite each other:
+
+ >>> expand_labels(labels, distance=3)
+ array([1, 1, 1, 1, 2, 2, 2])
+
+ In case of ties, behavior is undefined, but currently resolves to the
+ label closest to ``(0,) * ndim`` in lexicographical order.
+
+ >>> labels_tied = np.array([0, 1, 0, 2, 0])
+ >>> expand_labels(labels_tied, 1)
+ array([1, 1, 1, 2, 2])
+ >>> labels2d = np.array(
+ ... [[0, 1, 0, 0],
+ ... [2, 0, 0, 0],
+ ... [0, 3, 0, 0]]
+ ... )
+ >>> expand_labels(labels2d, 1)
+ array([[2, 1, 1, 0],
+ [2, 2, 0, 0],
+ [2, 3, 3, 0]])
+ """
+
+ distances, nearest_label_coords = distance_transform_edt(
+ label_image == 0, return_indices=True
+ )
+ labels_out = cp.zeros_like(label_image)
+ dilate_mask = distances <= distance
+ # build the coordinates to find nearest labels,
+ # in contrast to [1] this implementation supports label arrays
+ # of any dimension
+ masked_nearest_label_coords = [
+ dimension_indices[dilate_mask]
+ for dimension_indices in nearest_label_coords
+ ]
+ nearest_labels = label_image[tuple(masked_nearest_label_coords)]
+ labels_out[dilate_mask] = nearest_labels
+ return labels_out
diff --git a/python/cucim/src/cucim/skimage/segmentation/boundaries.py b/python/cucim/src/cucim/skimage/segmentation/boundaries.py
index 62724d9fc..d631c28e9 100644
--- a/python/cucim/src/cucim/skimage/segmentation/boundaries.py
+++ b/python/cucim/src/cucim/skimage/segmentation/boundaries.py
@@ -1,5 +1,5 @@
import cupy as cp
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import _supported_float_type
from ..color import gray2rgb
diff --git a/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py b/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py
index 9938cb9d1..dd3f60a82 100644
--- a/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py
+++ b/python/cucim/src/cucim/skimage/segmentation/morphsnakes.py
@@ -3,10 +3,12 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+from cupyx import rsqrt
+import cucim.skimage._vendored.ndimage as ndi
from cucim import _misc
+from .._shared._gradient import gradient
from .._shared.utils import check_nD, deprecate_kwarg
__all__ = ['morphological_chan_vese',
@@ -29,64 +31,53 @@ def __call__(self, *args, **kwargs):
# SI and IS operators for 2D and 3D.
-_P2 = [np.eye(3),
- np.array([[0, 1, 0]] * 3),
- np.flipud(np.eye(3)),
- np.rot90([[0, 1, 0]] * 3)]
-_P3 = [np.zeros((3, 3, 3)) for i in range(9)]
-
-_P3[0][:, :, 1] = 1
-_P3[1][:, 1, :] = 1
-_P3[2][1, :, :] = 1
-_P3[3][:, [0, 1, 2], [0, 1, 2]] = 1
-_P3[4][:, [0, 1, 2], [2, 1, 0]] = 1
-_P3[5][[0, 1, 2], :, [0, 1, 2]] = 1
-_P3[6][[0, 1, 2], :, [2, 1, 0]] = 1
-_P3[7][[0, 1, 2], [0, 1, 2], :] = 1
-_P3[8][[0, 1, 2], [2, 1, 0], :] = 1
-
-
-def sup_inf(u):
+def _get_P2():
+ _P2 = [cp.eye(3),
+ cp.array([[0, 1, 0]] * 3),
+ cp.array(np.flipud(np.eye(3))),
+ cp.array(np.rot90([[0, 1, 0]] * 3))]
+ return _P2
+
+
+def _get_P3():
+ _P3 = [np.zeros((3, 3, 3)) for i in range(9)]
+
+ _P3[0][:, :, 1] = 1
+ _P3[1][:, 1, :] = 1
+ _P3[2][1, :, :] = 1
+ _P3[3][:, [0, 1, 2], [0, 1, 2]] = 1
+ _P3[4][:, [0, 1, 2], [2, 1, 0]] = 1
+ _P3[5][[0, 1, 2], :, [0, 1, 2]] = 1
+ _P3[6][[0, 1, 2], :, [2, 1, 0]] = 1
+ _P3[7][[0, 1, 2], [0, 1, 2], :] = 1
+ _P3[8][[0, 1, 2], [2, 1, 0], :] = 1
+ return [cp.array(p) for p in _P3]
+
+
+def sup_inf(u, footprints, workspace=None):
"""SI operator."""
-
- if _misc.ndim(u) == 2:
- P = _P2
- elif _misc.ndim(u) == 3:
- P = _P3
+ if workspace is None:
+ erosions = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype)
else:
- raise ValueError("u has an invalid number of dimensions "
- "(should be 2 or 3)")
+ erosions = workspace
+ for i, footprint in enumerate(footprints):
+ erosions[i, ...] = ndi.binary_erosion(u, footprint)
+ return erosions.max(0)
- erosions = []
- for P_i in P:
- e = ndi.binary_erosion(u, cp.asarray(P_i)).astype(np.int8, copy=False)
- erosions.append(e)
- return cp.stack(erosions, axis=0).max(0)
-
-
-def inf_sup(u):
+def inf_sup(u, footprints, workspace=None):
"""IS operator."""
-
- if _misc.ndim(u) == 2:
- P = _P2
- elif _misc.ndim(u) == 3:
- P = _P3
+ if workspace is None:
+ dilations = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype)
else:
- raise ValueError("u has an invalid number of dimensions "
- "(should be 2 or 3)")
-
- dilations = []
- for P_i in P:
- d = ndi.binary_dilation(u, cp.asarray(P_i)).astype(np.int8,
- copy=False)
- dilations.append(d)
-
- return cp.stack(dilations, axis=0).min(0)
+ dilations = workspace
+ for i, footprint in enumerate(footprints):
+ dilations[i, ...] = ndi.binary_dilation(u, footprint)
+ return dilations.min(0)
-_curvop = _fcycle([lambda u: sup_inf(inf_sup(u)), # SIoIS
- lambda u: inf_sup(sup_inf(u))]) # ISoSI
+_curvop = _fcycle([lambda u, f, w: sup_inf(inf_sup(u, f, w), f, w), # SIoIS
+ lambda u, f, w: inf_sup(sup_inf(u, f, w), f, w)]) # ISoSI
def _check_input(image, init_level_set):
@@ -187,6 +178,11 @@ def checkerboard_level_set(image_shape, square_size=5):
return res
+@cp.fuse()
+def _fused_inverse_kernel(gradnorm, alpha):
+ return rsqrt(1.0 + alpha * gradnorm)
+
+
def inverse_gaussian_gradient(image, alpha=100.0, sigma=5.0):
"""Inverse of gradient magnitude.
@@ -216,11 +212,35 @@ def inverse_gaussian_gradient(image, alpha=100.0, sigma=5.0):
`morphological_geodesic_active_contour`.
"""
gradnorm = ndi.gaussian_gradient_magnitude(image, sigma, mode='nearest')
- return 1.0 / cp.sqrt(1.0 + alpha * gradnorm)
+ return _fused_inverse_kernel(gradnorm, alpha)
+
+
+@cp.fuse()
+def _abs_grad_kernel(gx, gy):
+ return cp.abs(gx) + cp.abs(gy)
-@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0",
- deprecated_version="0.19")
+@cp.fuse()
+def _fused_variance_kernel(
+ image, c1, c2, lam1, lam2, abs_du,
+):
+ difference_term = image - c1
+ difference_term *= difference_term
+ difference_term *= lam1
+ term2 = image - c2
+ term2 *= term2
+ term2 *= lam2
+ difference_term -= term2
+
+ aux = abs_du * difference_term
+ aux_lt0 = aux < 0
+ aux_gt0 = aux > 0
+ return aux_lt0, aux_gt0
+
+
+@deprecate_kwarg({'iterations': 'num_iter'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def morphological_chan_vese(image, num_iter, init_level_set='checkerboard',
smoothing=1, lambda1=1, lambda2=1,
iter_callback=lambda x: None):
@@ -298,36 +318,46 @@ def morphological_chan_vese(image, num_iter, init_level_set='checkerboard',
u = (init_level_set > 0).astype(cp.int8)
- iter_callback(u)
+ if _misc.ndim(u) == 2:
+ footprints = _get_P2()
+ elif _misc.ndim(u) == 3:
+ footprints = _get_P3()
+ else:
+ raise ValueError("u has an invalid number of dimensions "
+ "(should be 2 or 3)")
+ workspace = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype)
- for _ in range(num_iter):
+ iter_callback(u)
+ for i in range(num_iter):
# inside = u > 0
# outside = u <= 0
- c0 = (image * (1 - u)).sum() / float((1 - u).sum() + 1e-8)
- c1 = (image * u).sum() / float(u.sum() + 1e-8)
+ c0 = (image * (1 - u)).sum()
+ c0 /= float((1 - u).sum() + 1e-8)
+ c1 = (image * u).sum()
+ c1 /= float(u.sum() + 1e-8)
# Image attachment
- du = cp.gradient(u)
- abs_du = cp.abs(cp.stack(du, axis=0)).sum(0)
- aux = abs_du * (
- lambda1 * (image - c1) ** 2 - lambda2 * (image - c0) ** 2
+ du = gradient(u)
+ abs_du = _abs_grad_kernel(du[0], du[1])
+ aux_lt0, aux_gt0 = _fused_variance_kernel(
+ image, c1, c0, lambda1, lambda2, abs_du
)
-
- u[aux < 0] = 1
- u[aux > 0] = 0
+ u[aux_lt0] = 1
+ u[aux_gt0] = 0
# Smoothing
for _ in range(smoothing):
- u = _curvop(u)
+ u = _curvop(u, footprints, workspace)
iter_callback(u)
return u
-@deprecate_kwarg({'iterations': 'num_iter'}, removed_version="1.0",
- deprecated_version="0.19")
+@deprecate_kwarg({'iterations': 'num_iter'},
+ removed_version="23.02.00",
+ deprecated_version="22.02.00")
def morphological_geodesic_active_contour(gimage, num_iter,
init_level_set='disk', smoothing=1,
threshold='auto', balloon=0,
@@ -418,13 +448,22 @@ def morphological_geodesic_active_contour(gimage, num_iter,
threshold = cp.percentile(image, 40)
structure = cp.ones((3,) * len(image.shape), dtype=cp.int8)
- dimage = cp.gradient(image)
+ dimage = gradient(image)
# threshold_mask = image > threshold
if balloon != 0:
threshold_mask_balloon = image > threshold / cp.abs(balloon)
u = (init_level_set > 0).astype(cp.int8)
+ if _misc.ndim(u) == 2:
+ footprints = _get_P2()
+ elif _misc.ndim(u) == 3:
+ footprints = _get_P3()
+ else:
+ raise ValueError("u has an invalid number of dimensions "
+ "(should be 2 or 3)")
+ workspace = cp.empty(((len(footprints),) + u.shape), dtype=u.dtype)
+
iter_callback(u)
for _ in range(num_iter):
@@ -439,7 +478,7 @@ def morphological_geodesic_active_contour(gimage, num_iter,
# Image attachment
aux = cp.zeros_like(image)
- du = cp.gradient(u)
+ du = gradient(u)
for el1, el2 in zip(dimage, du):
aux += el1 * el2
u[aux > 0] = 1
@@ -447,7 +486,7 @@ def morphological_geodesic_active_contour(gimage, num_iter,
# Smoothing
for _ in range(smoothing):
- u = _curvop(u)
+ u = _curvop(u, footprints, workspace)
iter_callback(u)
diff --git a/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py b/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py
index 2acaaf8ba..0ec9ed408 100644
--- a/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py
+++ b/python/cucim/src/cucim/skimage/segmentation/random_walker_segmentation.py
@@ -9,7 +9,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from cupyx.scipy import sparse
from cupyx.scipy.sparse.linalg import cg, spsolve
@@ -343,12 +343,6 @@ def random_walker(data, labels, beta=130, mode='cg_j', tol=1.e-3, copy=True,
probability that label `label_nb` reaches the pixel `(i, j)`
first.
- See Also
- --------
- skimage.morphology.watershed : watershed segmentation
- A segmentation algorithm based on mathematical morphology
- and "flooding" of regions from markers.
-
Notes
-----
Multichannel inputs are scaled with all channel data combined. Ensure all
diff --git a/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py b/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py
new file mode 100644
index 000000000..15b991883
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/segmentation/tests/test_chan_vese.py
@@ -0,0 +1,105 @@
+import cupy as cp
+import pytest
+from cupy.testing import assert_array_equal
+
+from cucim.skimage._shared._warnings import expected_warnings
+from cucim.skimage._shared.utils import _supported_float_type
+from cucim.skimage.segmentation import chan_vese
+
+
+@pytest.mark.parametrize('dtype', [cp.float32, cp.float64])
+def test_chan_vese_flat_level_set(dtype):
+ # because the algorithm evolves the level set around the
+ # zero-level, it the level-set has no zero level, the algorithm
+ # will not produce results in theory. However, since a continuous
+ # approximation of the delta function is used, the algorithm
+ # still affects the entirety of the level-set. Therefore with
+ # infinite time, the segmentation will still converge.
+ img = cp.zeros((10, 10), dtype=dtype)
+ img[3:6, 3:6] = 1
+ ls = cp.full((10, 10), 1000, dtype=dtype)
+ result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set=ls)
+ assert_array_equal(result.astype(float), cp.ones((10, 10)))
+ result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set=-ls)
+ assert_array_equal(result.astype(float), cp.zeros((10, 10)))
+
+
+def test_chan_vese_small_disk_level_set():
+ img = cp.zeros((10, 10))
+ img[3:6, 3:6] = 1
+ result = chan_vese(img, mu=0.0, tol=1e-3, init_level_set="small disk")
+ assert_array_equal(result.astype(float), img)
+
+
+def test_chan_vese_simple_shape():
+ img = cp.zeros((10, 10))
+ img[3:6, 3:6] = 1
+ result = chan_vese(img, mu=0.0, tol=1e-8).astype(float)
+ assert_array_equal(result, img)
+
+
+@pytest.mark.parametrize(
+ 'dtype', [cp.uint8, cp.float16, cp.float32, cp.float64]
+)
+def test_chan_vese_extended_output(dtype):
+ img = cp.zeros((10, 10), dtype=dtype)
+ img[3:6, 3:6] = 1
+ result = chan_vese(img, mu=0.0, tol=1e-8, extended_output=True)
+ float_dtype = _supported_float_type(dtype)
+ assert result[1].dtype == float_dtype
+ assert all(arr.dtype == float_dtype for arr in result[2])
+ assert_array_equal(len(result), 3)
+
+
+def test_chan_vese_remove_noise():
+ ref = cp.zeros((10, 10))
+ ref[1:6, 1:6] = cp.array([[0, 1, 1, 1, 0],
+ [1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 1],
+ [0, 1, 1, 1, 0]])
+ img = ref.copy()
+ img[8, 3] = 1
+ result = chan_vese(img, mu=0.3, tol=1e-3, max_num_iter=100, dt=10,
+ init_level_set="disk").astype(float)
+ assert_array_equal(result, ref)
+
+
+def test_chan_vese_incorrect_image_type():
+ img = cp.zeros((10, 10, 3))
+ ls = cp.zeros((10, 9))
+ with pytest.raises(ValueError):
+ chan_vese(img, mu=0.0, init_level_set=ls)
+
+
+def test_chan_vese_gap_closing():
+ ref = cp.zeros((20, 20))
+ ref[8:15, :] = cp.ones((7, 20))
+ img = ref.copy()
+ img[:, 6] = cp.zeros((20))
+ result = chan_vese(img, mu=0.7, tol=1e-3, max_num_iter=1000, dt=1000,
+ init_level_set="disk").astype(float)
+ assert_array_equal(result, ref)
+
+
+def test_chan_vese_max_iter_deprecation():
+ img = cp.zeros((20, 20))
+ with expected_warnings(["`max_iter` is a deprecated argument"]):
+ chan_vese(img, max_iter=10)
+
+
+def test_chan_vese_incorrect_level_set():
+ img = cp.zeros((10, 10))
+ ls = cp.zeros((10, 9))
+ with pytest.raises(ValueError):
+ chan_vese(img, mu=0.0, init_level_set=ls)
+ with pytest.raises(ValueError):
+ chan_vese(img, mu=0.0, init_level_set="a")
+
+
+def test_chan_vese_blank_image():
+ img = cp.zeros((10, 10))
+ level_set = cp.random.rand(10, 10)
+ ref = level_set > 0
+ result = chan_vese(img, mu=0.0, tol=0.0, init_level_set=level_set)
+ assert_array_equal(result, ref)
diff --git a/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py b/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py
new file mode 100644
index 000000000..2fe1df30c
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/segmentation/tests/test_expand_labels.py
@@ -0,0 +1,226 @@
+import cupy as cp
+import pytest
+from cupy.testing import assert_array_equal
+
+from cucim.core.operations.morphology import distance_transform_edt
+from cucim.skimage import data, measure
+from cucim.skimage.segmentation import expand_labels
+
+SAMPLE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
+SAMPLE1D_EXPANDED_3 = cp.array(
+ [4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]
+)
+
+# Some pixels are important edge cases with undefined behaviour:
+# these are the pixels that are at the same distance from
+# multiple labels. Ideally the label would be chosen at random
+# to avoid bias, but as we are relying on the index map returned
+# by the scipy.ndimage distance transform, what actually happens
+# is determined by the upstream implementation of the distance
+# tansform, thus we don't give any guarantees for the edge case pixels.
+#
+# Regardless, it seems prudent to have a test including an edge case
+# so we can detect whether future upstream changes in scipy.ndimage
+# modify the behaviour.
+
+EDGECASE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
+EDGECASE1D_EXPANDED_3 = cp.array(
+ [4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]
+)
+
+SAMPLE2D = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ ]
+)
+
+SAMPLE2D_EXPANDED_3 = cp.array(
+ [
+ [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 0],
+ [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2],
+ [0, 0, 1, 0, 0, 0, 0, 2, 2, 2, 2],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
+ ]
+)
+
+# non-integer expansion
+SAMPLE2D_EXPANDED_1_5 = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 2],
+ [1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2],
+ [0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2],
+ [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ ]
+)
+
+
+EDGECASE2D = cp.array(
+ [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0],
+ [0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0],
+ [0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+ ]
+)
+
+EDGECASE2D_EXPANDED_4 = cp.array(
+ [
+ [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
+ [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
+ [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0],
+ [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0],
+ ]
+)
+
+SAMPLE3D = cp.array(
+ [
+ [
+ [0, 0, 0, 0],
+ [0, 3, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+
+ [
+ [0, 0, 0, 0],
+ [0, 3, 3, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ],
+
+ [
+ [0, 0, 0, 0],
+ [0, 3, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 5, 0],
+ ],
+
+ [
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 5, 0],
+ ]
+ ]
+)
+
+SAMPLE3D_EXPANDED_2 = cp.array(
+ [
+ [
+ [3, 3, 3, 3],
+ [3, 3, 3, 3],
+ [3, 3, 3, 3],
+ [0, 3, 5, 0],
+ ],
+
+ [
+ [3, 3, 3, 3],
+ [3, 3, 3, 3],
+ [3, 3, 3, 3],
+ [0, 5, 5, 5],
+ ],
+
+ [
+ [3, 3, 3, 3],
+ [3, 3, 3, 3],
+ [3, 3, 5, 5],
+ [5, 5, 5, 5],
+ ],
+
+ [
+ [3, 3, 3, 0],
+ [3, 3, 3, 0],
+ [3, 3, 5, 5],
+ [5, 5, 5, 5],
+ ]
+ ]
+)
+
+SAMPLE_EDGECASE_BEHAVIOUR = cp.array(
+ [[0, 1, 0, 0], [2, 0, 0, 0], [0, 3, 0, 0]]
+)
+
+
+@pytest.mark.parametrize(
+ "input_array, expected_output, expand_distance",
+ [
+ (SAMPLE1D, SAMPLE1D_EXPANDED_3, 3),
+ (SAMPLE2D, SAMPLE2D_EXPANDED_3, 3),
+ (SAMPLE2D, SAMPLE2D_EXPANDED_1_5, 1.5),
+ (EDGECASE1D, EDGECASE1D_EXPANDED_3, 3),
+ (EDGECASE2D, EDGECASE2D_EXPANDED_4, 4),
+ (SAMPLE3D, SAMPLE3D_EXPANDED_2, 2)
+ ]
+)
+def test_expand_labels(input_array, expected_output, expand_distance):
+ if input_array.ndim == 1:
+ with pytest.raises(NotImplementedError):
+ expand_labels(input_array, expand_distance)
+ else:
+ expanded = expand_labels(input_array, expand_distance)
+ assert_array_equal(expanded, expected_output)
+
+
+@pytest.mark.parametrize('ndim', [2, 3])
+@pytest.mark.parametrize('distance', range(6))
+def test_binary_blobs(ndim, distance):
+ """Check some invariants with label expansion.
+
+ - New labels array should exactly contain the original labels array.
+ - Distance to old labels array within new labels should never exceed input
+ distance.
+ - Distance beyond the expanded labels should always exceed the input
+ distance.
+ """
+ img = data.binary_blobs(length=64, blob_size_fraction=0.05, n_dim=ndim)
+ labels = measure.label(img)
+ expanded = expand_labels(labels, distance=distance)
+ original_mask = labels != 0
+ assert_array_equal(labels[original_mask], expanded[original_mask])
+ expanded_only_mask = (expanded - labels).astype(bool)
+ distance_map = distance_transform_edt(~original_mask)
+ expanded_distances = distance_map[expanded_only_mask]
+ if expanded_distances.size > 0:
+ assert cp.all(expanded_distances <= distance)
+ beyond_expanded_distances = distance_map[~expanded.astype(bool)]
+ if beyond_expanded_distances.size > 0:
+ assert cp.all(beyond_expanded_distances > distance)
+
+
+def test_edge_case_behaviour():
+ """ Check edge case behavior to detect upstream changes
+
+ For edge cases where a pixel has the same distance to several regions,
+ lexicographical order seems to determine which region gets to expand
+ into this pixel given the current upstream behaviour in
+ scipy.ndimage.distance_map_edt.
+
+ As a result, we expect different results when transposing the array.
+ If this test fails, something has changed upstream.
+ """
+ expanded = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR, 1)
+ expanded_transpose = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR.T, 1)
+ assert not cp.all(expanded == expanded_transpose.T)
diff --git a/python/cucim/src/cucim/skimage/transform/_geometric.py b/python/cucim/src/cucim/skimage/transform/_geometric.py
index 372cf3b84..91211834b 100644
--- a/python/cucim/src/cucim/skimage/transform/_geometric.py
+++ b/python/cucim/src/cucim/skimage/transform/_geometric.py
@@ -1421,10 +1421,8 @@ def __init__(self, matrix=None, scale=None, rotation=None,
if translation is None:
translation = (0,) * dimensionality
if dimensionality == 2:
- ax = (0, 1)
c, s = _cos(rotation), _sin(rotation)
- matrix[ax, ax] = c
- matrix[ax, ax[::-1]] = -s, s
+ matrix[:2, :2] = xp.array([[c, -s], [s, c]], dtype=float)
else: # 3D rotation
matrix[:3, :3] = xp.asarray(_euler_rotation_matrix(rotation))
diff --git a/python/cucim/src/cucim/skimage/transform/_warps.py b/python/cucim/src/cucim/skimage/transform/_warps.py
index 05aea7f24..7f2f69b2b 100644
--- a/python/cucim/src/cucim/skimage/transform/_warps.py
+++ b/python/cucim/src/cucim/skimage/transform/_warps.py
@@ -2,7 +2,7 @@
import cupy as cp
import numpy as np
-from cupyx.scipy import ndimage as ndi
+import cucim.skimage._vendored.ndimage as ndi
from .._shared.utils import (_to_ndimage_mode, _validate_interpolation_order,
channel_as_last_axis, convert_to_float,
@@ -260,8 +260,8 @@ def rescale(image, scale, order=None, mode='reflect', cval=0, clip=True,
Otherwise, this parameter indicates which axis of the array corresponds
to channels.
- .. versionadded:: 0.19
- ``channel_axis`` was added in 0.19.
+ .. versionadded:: 22.02.00
+ ``channel_axis`` was added in 22.02.00.
Notes
-----
@@ -766,19 +766,33 @@ def _clip_warp_output(input_image, output_image, mode, cval, clip):
"""
if clip:
min_val = input_image.min().item()
- max_val = input_image.max().item()
-
- preserve_cval = (mode == 'constant' and not
- (min_val <= cval <= max_val))
+ if np.isnan(min_val):
+ # NaNs detected, use NaN-safe min/max
+ min_func = cp.nanmin
+ max_func = cp.nanmax
+ min_val = min_func(input_image).item()
+ else:
+ min_func = cp.min
+ max_func = cp.max
+ max_val = max_func(input_image).item()
+
+ # Check if cval has been used such that it expands the effective input
+ # range
+ preserve_cval = (
+ mode == 'constant'
+ and not min_val <= cval <= max_val
+ and min_func(output_image) <= cval <= max_func(output_image)
+ )
+ # expand min/max range to account for cval
if preserve_cval:
- cval_mask = output_image == cval
+ # cast cval to the same dtype as the input image
+ cval = input_image.dtype.type(cval)
+ min_val = min(min_val, cval)
+ max_val = max(max_val, cval)
cp.clip(output_image, min_val, max_val, out=output_image)
- if preserve_cval:
- output_image[cval_mask] = cval
-
def warp(image, inverse_map, map_args={}, output_shape=None, order=None,
mode='constant', cval=0., clip=True, preserve_range=False):
@@ -1080,8 +1094,8 @@ def warp_polar(image, center=None, *, radius=None, output_shape=None,
Otherwise, this parameter indicates which axis of the array corresponds
to channels.
- .. versionadded:: 0.19
- ``channel_axis`` was added in 0.19.
+ .. versionadded:: 22.02.00
+ ``channel_axis`` was added in 22.02.00.
**kwargs : keyword arguments
Passed to `transform.warp`.
diff --git a/python/cucim/src/cucim/skimage/transform/tests/test_warps.py b/python/cucim/src/cucim/skimage/transform/tests/test_warps.py
index 6504c082d..8c8717f51 100644
--- a/python/cucim/src/cucim/skimage/transform/tests/test_warps.py
+++ b/python/cucim/src/cucim/skimage/transform/tests/test_warps.py
@@ -118,6 +118,78 @@ def test_warp_clip():
assert_almost_equal(float(outx.max()), 1)
+@pytest.mark.parametrize('order', [0, 1])
+def test_warp_clip_image_containing_nans(order):
+ # Test that clipping works as intended on an image with NaNs
+ # Orders >1 do not produce good output when the input image has
+ # NaNs, so those orders are not tested.
+
+ x = cp.ones((15, 15), dtype=cp.float64)
+ x[7, 7] = cp.nan
+
+ outx = rotate(x, 45, order=order, cval=2, resize=True, clip=True)
+
+ assert_almost_equal(cp.nanmin(outx).item(), 1)
+ assert_almost_equal(cp.nanmax(outx).item(), 2)
+
+
+@pytest.mark.parametrize('order', [0, 1])
+def test_warp_clip_cval_is_nan(order):
+ # Test that clipping works as intended when cval is NaN
+ # Orders > 1 do not produce good output when cval is NaN, so those
+ # orders are not tested.
+
+ x = cp.ones((15, 15), dtype=cp.float64)
+ x[5:-5, 5:-5] = 2
+
+ outx = rotate(x, 45, order=order, cval=cp.nan, resize=True, clip=True)
+
+ assert_almost_equal(cp.nanmin(outx).item(), 1)
+ assert_almost_equal(cp.nanmax(outx).item(), 2)
+
+
+@pytest.mark.parametrize('order', range(6))
+def test_warp_clip_cval_outside_input_range(order):
+ # Test that clipping behavior considers cval part of the input range
+
+ x = cp.ones((15, 15), dtype=cp.float64)
+
+ # Specify a cval that is outside the input range to check clipping
+ outx = rotate(x, 45, order=order, cval=2, resize=True, clip=True)
+
+ # The corners should be cval for all interpolation orders
+ outx = cp.asnumpy(outx)
+ assert_array_almost_equal([outx[0, 0], outx[0, -1],
+ outx[-1, 0], outx[-1, -1]], 2)
+
+ # For all interpolation orders other than nearest-neighbor, the clipped
+ # output should have some pixels with values between the input (1) and
+ # cval (2) (i.e., clipping should not set them to 1)
+ if order > 0:
+ assert np.sum(np.less(1, outx) * np.less(outx, 2)) > 0
+
+
+@pytest.mark.parametrize('order', range(6))
+def test_warp_clip_cval_not_used(order):
+ # Test that clipping does not consider cval part of the input range if it
+ # is not used in the output image
+
+ x = cp.ones((15, 15), dtype=cp.float64)
+ x[5:-5, 5:-5] = 2
+
+ # Transform the image by stretching it out by one pixel on each side so
+ # that cval will not actually be used
+ scale = 15 / (15 + 2)
+ transform = AffineTransform(scale=scale, translation=(1, 1))
+ outx = warp(x, transform, mode='constant', order=order, cval=0, clip=True)
+
+ # At higher orders of interpolation, the transformed image has overshoots
+ # beyond the input range that should be clipped to the range 1 to 2. Even
+ # though cval=0, the minimum value of the clipped output image should be
+ # 1 and not affected by the unused cval.
+ assert_array_almost_equal(outx.min(), 1)
+
+
def test_homography():
x = cp.zeros((5, 5), dtype=cp.double)
x[1, 1] = 1
diff --git a/python/cucim/tests/fixtures/testimage.py b/python/cucim/tests/fixtures/testimage.py
index c50b8eb6f..343698676 100644
--- a/python/cucim/tests/fixtures/testimage.py
+++ b/python/cucim/tests/fixtures/testimage.py
@@ -21,9 +21,9 @@
from ..util.gen_image import ImageGenerator
-def gen_image(tmpdir_factory, recipe):
+def gen_image(tmpdir_factory, recipe, resolution=None):
dataset_path = tmpdir_factory.mktemp('datasets').strpath
- dataset_gen = ImageGenerator(dataset_path, [recipe])
+ dataset_gen = ImageGenerator(dataset_path, [recipe], [resolution])
image_path = dataset_gen.gen()
return (dataset_path, image_path[0])
@@ -63,9 +63,8 @@ def testimg_tiff_stripe_32x24_16_raw(tmpdir_factory):
def testimg_tiff_stripe_32x24_16(request):
return request.param
-# tiff_stripe_4096x4096_256
-
+# tiff_stripe_4096x4096_256
@pytest.fixture(scope='session')
def testimg_tiff_stripe_4096x4096_256_jpeg(tmpdir_factory):
dataset_path, image_path = gen_image(
@@ -137,3 +136,45 @@ def testimg_tiff_stripe_100000x100000_256_raw(tmpdir_factory):
])
def testimg_tiff_stripe_100000x100000_256(request):
return request.param
+
+
+# testimg_tiff_stripe_4096_4096_256_jpeg_resolution
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_3_5_centimeter(
+ tmpdir_factory):
+ resolution = (0.3, 0.5, "CENTIMETER")
+ dataset_path, image_path = gen_image(
+ tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution)
+ yield image_path, resolution
+ # Clean up fake dataset folder
+ shutil.rmtree(dataset_path)
+
+
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_4_7_inch(tmpdir_factory):
+ resolution = (0.4, 0.7, "INCH")
+ dataset_path, image_path = gen_image(
+ tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution)
+ yield image_path, resolution
+ # Clean up fake dataset folder
+ shutil.rmtree(dataset_path)
+
+
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_4096_4096_256_jpeg_resolution_9_1_none(tmpdir_factory):
+ resolution = (9, 1, "NONE")
+ dataset_path, image_path = gen_image(
+ tmpdir_factory, 'tiff::stripe:4096x4096:256:jpeg', resolution)
+ yield image_path, resolution
+ # Clean up fake dataset folder
+ shutil.rmtree(dataset_path)
+
+
+@pytest.fixture(scope='session', params=[
+ lazy_fixture(
+ 'testimg_tiff_stripe_4096_4096_256_jpeg_resolution_3_5_centimeter'),
+ lazy_fixture('testimg_tiff_stripe_4096_4096_256_jpeg_resolution_4_7_inch'),
+ lazy_fixture('testimg_tiff_stripe_4096_4096_256_jpeg_resolution_9_1_none'),
+])
+def testimg_tiff_stripe_4096_4096_256_jpeg_resolution(request):
+ return request.param
diff --git a/python/cucim/tests/unit/clara/test_load_image_metadata.py b/python/cucim/tests/unit/clara/test_load_image_metadata.py
index 65e7d926b..2452d4324 100644
--- a/python/cucim/tests/unit/clara/test_load_image_metadata.py
+++ b/python/cucim/tests/unit/clara/test_load_image_metadata.py
@@ -14,6 +14,7 @@
#
from ...util.io import open_image_cucim
+import math
def test_load_image_metadata(testimg_tiff_stripe_32x24_16):
@@ -45,7 +46,7 @@ def test_load_image_metadata(testimg_tiff_stripe_32x24_16):
# Returns physical size in tuple.
assert img.spacing() == [1.0, 1.0, 1.0]
# Units for each spacing element (size is same with `ndim`).
- assert img.spacing_units() == ['micrometer', 'micrometer', 'color']
+ assert img.spacing_units() == ['', '', 'color']
# Physical location of (0, 0, 0) (size is always 3).
assert img.origin == [0.0, 0.0, 0.0]
# Direction cosines (size is always 3x3).
@@ -71,6 +72,54 @@ def test_load_image_metadata(testimg_tiff_stripe_32x24_16):
assert img.raw_metadata == '{"axes": "YXC", "shape": [24, 32, 3]}'
+def test_load_image_resolution_metadata(testimg_tiff_stripe_4096_4096_256_jpeg_resolution): # noqa: E501
+ image, resolution = testimg_tiff_stripe_4096_4096_256_jpeg_resolution
+ img = open_image_cucim(image)
+
+ x_resolution, y_resolution, resolution_unit = resolution
+
+ if resolution_unit == "CENTIMETER":
+ x_spacing = 10000.0 / x_resolution
+ y_spacing = 10000.0 / y_resolution
+ spacing_unit = "micrometer"
+ elif resolution_unit == "INCH":
+ x_spacing = 25400.0 / x_resolution
+ y_spacing = 25400.0 / y_resolution
+ spacing_unit = "micrometer"
+ else:
+ x_spacing = x_resolution
+ y_spacing = y_resolution
+ spacing_unit = ""
+
+ # Returns physical size in tuple.
+ assert all(map(lambda a, b: math.isclose(a, b, rel_tol=0.1),
+ img.spacing(), (y_spacing, x_spacing, 1.0)))
+ # Units for each spacing element (size is same with `ndim`).
+ assert img.spacing_units() == [spacing_unit, spacing_unit, 'color']
+
+ # A metadata object as `dict`
+ metadata = img.metadata
+ print(metadata)
+ assert isinstance(metadata, dict)
+ assert len(metadata) == 2 # 'cucim' and 'tiff'
+ assert math.isclose(metadata['tiff']['x_resolution'],
+ x_resolution, rel_tol=0.00001)
+ assert math.isclose(metadata['tiff']['y_resolution'],
+ y_resolution, rel_tol=0.00001)
+ unit_value = resolution_unit.lower() if resolution_unit != "NONE" else ""
+ assert metadata['tiff']['resolution_unit'] == unit_value
+
+ # Check if lower resolution image's metadata has lower physical spacing.
+ num_levels = img.resolutions['level_count']
+ for level in range(num_levels):
+ lowres_img = img.read_region((0, 0), (100, 100), level=level)
+ lowres_downsample = img.resolutions["level_downsamples"][level]
+ assert all(map(lambda a, b: math.isclose(a, b, rel_tol=0.1),
+ lowres_img.spacing(),
+ (y_spacing / lowres_downsample,
+ x_spacing / lowres_downsample, 1.0)))
+
+
def test_load_rgba_image_metadata(tmpdir):
"""Test accessing RGBA image's metadata.
diff --git a/python/cucim/tests/unit/core/test_stain_normalizer.py b/python/cucim/tests/unit/core/test_stain_normalizer.py
index 4b50a4121..ee66ecf67 100644
--- a/python/cucim/tests/unit/core/test_stain_normalizer.py
+++ b/python/cucim/tests/unit/core/test_stain_normalizer.py
@@ -22,15 +22,15 @@
class TestStainExtractorMacenko():
@pytest.mark.parametrize(
- 'image',
+ 'image, ErrorClass',
[
- cp.full((3, 2, 4), -1), # negative value
- cp.full((3, 2, 4), 256), # out of range value
- None,
- cp.full((3, 2, 4), 240), # uniformly below the beta threshold
- ]
+ (cp.full((3, 2, 4), -1), ValueError), # negative value
+ (cp.full((3, 2, 4), 256), ValueError), # out of range value
+ (None, TypeError),
+ (cp.full((3, 2, 4), 240), ValueError), # uniformly below the beta threshold # noqa
+ ],
)
- def test_transparent_image(self, image):
+ def test_transparent_image(self, image, ErrorClass):
"""
Test HE stain extraction on an image that comprises
only transparent pixels - pixels with absorbance below the
@@ -38,12 +38,8 @@ def test_transparent_image(self, image):
since once the transparent pixels are removed, there are no
remaining pixels to compute eigenvectors.
"""
- if image is None:
- with pytest.raises(TypeError):
- stain_extraction_pca(image)
- else:
- with pytest.raises(ValueError):
- stain_extraction_pca(image)
+ with pytest.raises(ErrorClass):
+ stain_extraction_pca(image)
@pytest.mark.parametrize(
'image',
diff --git a/python/cucim/tests/util/gen_image.py b/python/cucim/tests/util/gen_image.py
index 8eea5dc81..645db1f79 100644
--- a/python/cucim/tests/util/gen_image.py
+++ b/python/cucim/tests/util/gen_image.py
@@ -16,6 +16,7 @@
import argparse
import logging
import os
+import tifffile
try:
from .gen_tiff import TiffGenerator
@@ -28,16 +29,23 @@
class ImageGenerator:
- def __init__(self, dest, recipes, logger=None):
+ def __init__(self, dest, recipes, resolutions=None, logger=None):
self.logger = logger or logging.getLogger(__name__)
self.dest = dest
self.recipes = recipes
+ if resolutions is None:
+ resolutions = [(1, 1, "CENTIMETER")] * len(recipes)
+ if len(resolutions) != len(recipes):
+ raise RuntimeError(
+ 'Number of resolutions must be equal to number of recipes')
+ self.resolutions = resolutions
+
def gen(self):
results = []
- for recipe in self.recipes:
+ for recipe, resolution in zip(self.recipes, self.resolutions):
items = recipe.split(':')
item_len = len(items)
if not (1 <= item_len <= 6):
@@ -69,10 +77,16 @@ def gen(self):
raise RuntimeError(
f'No data generated from [pattern={pattern},'
+ f' image_size={image_size}, tile_size={tile_size},'
- + f' compression={compression}].')
+ + f' compression={compression}, resolution={resolution}].')
file_name = f'{kind}_{pattern}_{image_size_str}_{tile_size}'
-
+ if resolution is None or len(resolution) == 2:
+ unit = None
+ elif len(resolution) == 3:
+ unit = resolution[2]
+ resolution = resolution[:2]
+ if unit is None:
+ unit = tifffile.RESUNIT.NONE
image_path = generator_obj.save_image(image_data,
dest_folder,
file_name=file_name,
@@ -81,7 +95,9 @@ def gen(self):
pattern=pattern,
image_size=image_size,
tile_size=tile_size,
- compression=compression)
+ compression=compression,
+ resolution=resolution,
+ resolutionunit=unit)
self.logger.info(' Generated %s...', image_path)
results.append(image_path)
diff --git a/python/cucim/tests/util/gen_tiff.py b/python/cucim/tests/util/gen_tiff.py
index 713679657..324f8db14 100644
--- a/python/cucim/tests/util/gen_tiff.py
+++ b/python/cucim/tests/util/gen_tiff.py
@@ -38,7 +38,8 @@ def get_image(self, pattern, image_size):
return None
def save_image(self, image_data, dest_folder, file_name, kind, subpath,
- pattern, image_size, tile_size, compression):
+ pattern, image_size, tile_size, compression, resolution,
+ resolutionunit):
# You can add pyramid images (0: largest resolution)
if isinstance(image_data, list):
arr_stack = image_data
@@ -55,10 +56,15 @@ def save_image(self, image_data, dest_folder, file_name, kind, subpath,
tiff_file_name = str(
(Path(dest_folder) / f'{file_name}.tif').absolute())
+ level_resolution = None
with TiffWriter(tiff_file_name, bigtiff=True) as tif:
for level in range(len(arr_stack)): # save from the largest image
src_arr = arr_stack[level]
+ if resolution:
+ level_resolution = (resolution[0] / (level + 1),
+ resolution[1] / (level + 1))
+
tif.write(
src_arr,
software="tifffile",
@@ -68,6 +74,8 @@ def save_image(self, image_data, dest_folder, file_name, kind, subpath,
planarconfig="CONTIG",
compression=compression, # requires imagecodecs
subfiletype=1 if level else 0,
+ resolution=level_resolution,
+ resolutionunit=resolutionunit,
)
return tiff_file_name
diff --git a/run b/run
index 8033eaf18..6fe8ce919 100755
--- a/run
+++ b/run
@@ -806,7 +806,9 @@ test() {
install_python_test_deps_() {
if [ -n "${CONDA_PREFIX}" ]; then
- run_command conda install -c conda-forge -y \
+ # https://github.com/rapidsai/cucim/pull/349#issuecomment-1203335731
+ # Do not update or change already-installed dependencies.
+ run_command conda install -c conda-forge -y --freeze-installed \
--file ${TOP}/python/cucim/requirements-test.txt
else
if [ -n "${VIRTUAL_ENV}" ]; then