Skip to content

Commit

Permalink
ref: unite initialization code of background comptuers in base class
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 13, 2023
1 parent feb4fee commit 478b2c2
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 57 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- fix: BackgroundSparseMed did not work for datasets of length < 100
- fix: bad f-string in BackgroundSparseMed
- enh: create a default basin-based output file for background computation
- ref: unite initialization code of background comptuers in base class
- ref: remove functools.cache decorator from HDF5Data
- tests: add tests for BackgroundSparseMed
0.12.3
Expand Down
38 changes: 35 additions & 3 deletions dcnum/feat/feat_background/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
import inspect
import multiprocessing as mp
import pathlib
import uuid

import h5py
import hdf5plugin
import numpy as np

from ...meta import ppid
from ...write import create_with_basins


class Background(abc.ABC):
def __init__(self, input_data, output_path, num_cpus=None, **kwargs):
def __init__(self, input_data, output_path, compress=True, num_cpus=None,
**kwargs):
"""
Parameters
Expand All @@ -24,6 +28,9 @@ def __init__(self, input_data, output_path, num_cpus=None, **kwargs):
set `output_path` to the same path to write directly to the
input file. The data are written in the "events/image_bg"
dataset in the output file.
compress: bool
Whether to compress background data. Set this to False
for faster processing.
num_cpus: int
Number of CPUs to use for median computation. Defaults to
`multiprocessing.cpu_count()`.
Expand Down Expand Up @@ -70,6 +77,13 @@ def __init__(self, input_data, output_path, num_cpus=None, **kwargs):
else:
self.input_data = input_data

#: unique identifier
self.name = str(uuid.uuid4())
#: shape of event images
self.image_shape = self.input_data[0].shape
#: total number of events
self.event_count = len(self.input_data)

if self.h5out is None:
if not output_path.exists():
# If the output path does not exist, then we create
Expand All @@ -78,10 +92,28 @@ def __init__(self, input_data, output_path, num_cpus=None, **kwargs):
basin_paths=self.paths_ref)
# TODO:
# - properly setup HDF5 caching
# - create image_bg here instead of in subclasses
# "a", because output file is already an .rtdc file
# "a", because output file already exists
self.h5out = h5py.File(output_path, "a", libver="latest")

# Initialize background data
if compress:
compression_kwargs = hdf5plugin.Zstd(clevel=5)
else:
compression_kwargs = {}
h5bg = self.h5out.require_dataset(
"events/image_bg",
shape=self.input_data.shape,
dtype=np.uint8,
chunks=(min(100, self.event_count),
self.image_shape[0],
self.image_shape[1]),
fletcher32=True,
**compression_kwargs,
)
h5bg.attrs.create('CLASS', np.string_('IMAGE'))
h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
h5bg.attrs.create('IMAGE_SUBCLASS', np.string_('IMAGE_GRAYSCALE'))

@staticmethod
def get_kwargs_from_ppid(bg_ppid):
"""Return keyword arguments for any subclass from a PPID string"""
Expand Down
26 changes: 1 addition & 25 deletions dcnum/feat/feat_background/bg_roll_median.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import multiprocessing as mp
import queue
import time
import uuid

import hdf5plugin
import numpy as np
from scipy import ndimage

Expand Down Expand Up @@ -60,6 +58,7 @@ def __init__(self, input_data, output_path, kernel_size=100,
super(BackgroundRollMed, self).__init__(
input_data=input_data,
output_path=output_path,
compress=compress,
num_cpus=num_cpus,
kernel_size=kernel_size,
batch_size=batch_size)
Expand All @@ -69,12 +68,6 @@ def __init__(self, input_data, output_path, kernel_size=100,
#: number of events processed at once
self.batch_size = batch_size

#: unique identifier
self.name = str(uuid.uuid4())
#: shape of event images
self.image_shape = self.input_data[0].shape
#: total number of events
self.event_count = len(self.input_data)
#: mp.RawArray for temporary batch input data
self.shared_input_raw = mp_spawn.RawArray(
np.ctypeslib.ctypes.c_uint8,
Expand Down Expand Up @@ -112,23 +105,6 @@ def __init__(self, input_data, output_path, kernel_size=100,
for _ in range(self.num_cpus)]
[w.start() for w in self.workers]

# Initialize background data
if compress:
compression_kwargs = hdf5plugin.Zstd(clevel=5)
else:
compression_kwargs = {}
h5bg = self.h5out.require_dataset(
"events/image_bg",
shape=self.input_data.shape,
dtype=np.uint8,
chunks=(100, self.image_shape[0], self.image_shape[1]),
fletcher32=True,
**compression_kwargs,
)
h5bg.attrs.create('CLASS', np.string_('IMAGE'))
h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
h5bg.attrs.create('IMAGE_SUBCLASS', np.string_('IMAGE_GRAYSCALE'))

def __enter__(self):
return self

Expand Down
30 changes: 1 addition & 29 deletions dcnum/feat/feat_background/bg_sparse_median.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import multiprocessing as mp
import queue
import time
import uuid

import hdf5plugin
import numpy as np
from scipy import ndimage

Expand Down Expand Up @@ -73,6 +71,7 @@ def __init__(self, input_data, output_path, kernel_size=200,
super(BackgroundSparseMed, self).__init__(
input_data=input_data,
output_path=output_path,
compress=compress,
num_cpus=num_cpus,
kernel_size=kernel_size,
split_time=split_time,
Expand All @@ -88,14 +87,6 @@ def __init__(self, input_data, output_path, kernel_size=200,
#: keep at least this many background images from the series
self.frac_cleansing = frac_cleansing

#: unique identifier
self.name = str(uuid.uuid4())
#: shape of event images
self.image_shape = self.input_data[0].shape

#: total number of events
self.event_count = len(self.input_data)

# time axis
self.time = None
if self.h5in is not None:
Expand Down Expand Up @@ -166,25 +157,6 @@ def __init__(self, input_data, output_path, kernel_size=200,
for _ in range(self.num_cpus)]
[w.start() for w in self.workers]

# Initialize background data
if compress:
compression_kwargs = hdf5plugin.Zstd(clevel=5)
else:
compression_kwargs = {}
h5bg = self.h5out.require_dataset(
"events/image_bg",
shape=self.input_data.shape,
dtype=np.uint8,
chunks=(min(100, self.event_count),
self.image_shape[0],
self.image_shape[1]),
fletcher32=True,
**compression_kwargs,
)
h5bg.attrs.create('CLASS', np.string_('IMAGE'))
h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
h5bg.attrs.create('IMAGE_SUBCLASS', np.string_('IMAGE_GRAYSCALE'))

def __enter__(self):
return self

Expand Down

0 comments on commit 478b2c2

Please sign in to comment.