Skip to content

Commit

Permalink
tests: add tests for #11
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 10, 2023
1 parent 8a0fd65 commit b0070cb
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 21 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
0.13.0
- feat: support writing file-based HDF5 basins
- feat: support reading file-based HDF5 basins
- feat: support writing file-based HDF5 basins (#11)
- feat: support reading file-based HDF5 basins (#11)
- fix: correctly support passing an ndarray to BackgroundSparseMed
instead of an input file path
- fix: BackgroundSparseMed did not work for datasets of length < 100
Expand Down
51 changes: 32 additions & 19 deletions dcnum/read/hdf5_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
def __getitem__(self, feat):
if feat in ["image", "image_bg", "mask"]:
return self.get_image_cache(feat)
elif feat in self._cache_scalar:
elif feat in self._cache_scalar: # check for scalar cached
return self._cache_scalar[feat]
elif len(self.h5["events"][feat].shape) == 1:
elif (feat in self.h5["events"]
and len(self.h5["events"][feat].shape) == 1): # cache scalar
self._cache_scalar[feat] = self.h5["events"][feat][:]
return self._cache_scalar[feat]
else:
Expand Down Expand Up @@ -183,8 +184,11 @@ def image_bg(self):
@property
def image_corr(self):
if "image_corr" not in self._image_cache:
self._image_cache["image_corr"] = ImageCorrCache(self.image,
self.image_bg)
if self.image is not None and self.image_bg is not None:
image_corr = ImageCorrCache(self.image, self.image_bg)
else:
image_corr = None
self._image_cache["image_corr"] = image_corr
return self._image_cache["image_corr"]

@property
Expand Down Expand Up @@ -229,8 +233,9 @@ def features_scalar_frame(self):

def close(self):
"""Close the underlying HDF5 file"""
for bn, _ in self._basin_data:
bn.close()
for bn, _ in self._basin_data.values():
if bn is not None:
bn.close()
self._image_cache.clear()
self._basin_data.clear()
self.h5.close()
Expand All @@ -248,22 +253,23 @@ def get_basin_data(self, index):
if index not in self._basin_data:
bn_dict = self.basins[index]
pdir = pathlib.Path(self.path).parent
for pp in bn_dict["paths"]:
# first, try relative path (to avoid getting path from WDIR)
prel = pdir / pp
if prel.exists():
path = prel
break
# second, try absolute path
if pathlib.Path(pp).exists():
for ff in bn_dict["paths"]:
pp = pathlib.Path(ff)
if pp.is_absolute() and pp.exists():
path = pp
break
else:
# try relative path (to avoid getting path from WDIR)
prel = pdir / pp
if prel.exists():
path = prel
break
else:
path = None
if path is None:
self._basin_data[index] = (None, None)
else:
h5dat = HDF5Data(self.basins)
h5dat = HDF5Data(path)
features = bn_dict.get("features")
if features is None:
features = sorted(h5dat.h5["events"].keys())
Expand All @@ -282,9 +288,10 @@ def get_image_cache(self, feat):
# search all basins
for idx in range(len(self.basins)):
bndat, features = self.get_basin_data(idx)
if feat in features:
ds = bndat.h5[f"events/{feat}"]
break
if features is not None:
if feat in features:
ds = bndat.h5[f"events/{feat}"]
break
else:
ds = None

Expand All @@ -301,7 +308,13 @@ def get_image_cache(self, feat):

def keys(self):
if self._keys is None:
self._keys = sorted(self.h5["/events"].keys())
features = sorted(self.h5["/events"].keys())
# add basin features
for ii in range(len(self.basins)):
_, bfeats = self.get_basin_data(ii)
if bfeats:
features += bfeats
self._keys = sorted(set(features))
return self._keys


Expand Down
152 changes: 152 additions & 0 deletions tests/test_read_basin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import h5py
import numpy as np

from dcnum.write import HDF5Writer
from dcnum.read import HDF5Data


from helper_methods import retrieve_data


def test_basin_not_available():
h5path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, HDF5Writer(h5path_small, "w") as hw:
dst = hw.h5
dst.require_group("events")
# first, copy all the scalar features to the new file
for feat in src["events"]:
if feat not in ["image", "image_bg", "mask"]:
dst["events"][feat] = src["events"][feat][:]
# Next, store the basin information in the new dataset
hw.store_basin(name="test",
paths=["fake.rtdc", # fake path
str(h5path), # absolute path name
])
# sanity checks
assert "deform" in dst["events"]
assert "image" not in dst["events"]

h5path.unlink()

# Now open the scalar dataset and check whether basins missing
with HDF5Data(h5path_small) as hd:
assert "image" not in hd
assert hd.image is None
assert hd.image_bg is None
assert hd.image_corr is None
assert hd.mask is None
_, features = hd.get_basin_data(0)
assert not features


def test_basin_nothing_available():
h5path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, HDF5Writer(h5path_small, "w") as hw:
dst = hw.h5
# first, copy all the scalar features to the new file
for feat in src["events"]:
if feat not in ["image", "image_bg", "mask"]:
dst["events"][feat] = src["events"][feat][:]
# Next, store the basin information in the new dataset
hw.store_basin(name="test",
paths=["fake.rtdc", # fake path
])

# sanity checks
assert "deform" in dst["events"]
assert "image" not in dst["events"]

h5path.unlink()

# Now open the scalar dataset and check whether basins missing
with HDF5Data(h5path_small) as hd:
assert "image" not in hd
_, features = hd.get_basin_data(0)


def test_basin_path_absolute():
"""Create a dataset that refers to a basin in an absolute path"""
h5path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, HDF5Writer(h5path_small, "w") as hw:
dst = hw.h5
# first, copy all the scalar features to the new file
for feat in src["events"]:
if feat not in ["image", "image_bg", "mask"]:
dst["events"][feat] = src["events"][feat][:]
# Next, store the basin information in the new dataset
hw.store_basin(name="test",
paths=["fake.rtdc", # fake path
str(h5path.resolve())
])

# Now open the scalar dataset and check whether basins are defined
with HDF5Data(h5path_small) as hd:
assert "image" in hd.get_basin_data(0)[1]
assert "image" in hd.keys()
assert np.median(hd["image"][0]) == 187


def test_basin_relative():
"""Create a dataset that refers to a basin in a relative path"""
h5path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, HDF5Writer(h5path_small, "w") as hw:
dst = hw.h5
# first, copy all the scalar features to the new file
for feat in src["events"]:
if feat not in ["image", "image_bg", "mask"]:
dst["events"][feat] = src["events"][feat][:]
# Next, store the basin information in the new dataset
hw.store_basin(name="test",
paths=["fake.rtdc", # fake path
h5path.name
])

# Now open the scalar dataset and check whether basins are defined
with HDF5Data(h5path_small) as hd:
assert "image" in hd.get_basin_data(0)[1]
assert "image" in hd.keys()
assert np.median(hd["image"][0]) == 187
assert np.median(hd.image[0]) == 187
assert np.median(hd.image_corr[0]) == 1


def test_basin_scalar_features():
"""Create a dataset that refers to a basin in a relative path"""
h5path = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
h5path_small = h5path.with_name("smaller.rtdc")

# Dataset creation
with h5py.File(h5path) as src, HDF5Writer(h5path_small, "w") as hw:
dst = hw.h5
# only copy one feature
dst["events"]["deform"] = src["events"]["deform"][:]
# Next, store the basin information in the new dataset
hw.store_basin(name="test",
paths=["fake.rtdc", # fake path
h5path.name
])

# Now open the scalar dataset and check whether basins are defined
with HDF5Data(h5path_small) as hd:
assert "image" in hd.get_basin_data(0)[1]
assert "image" in hd.keys()
assert "area_um" in hd.keys()
assert "deform" in hd.keys()
assert np.median(hd["image"][0]) == 187
assert np.median(hd.image[0]) == 187
assert np.median(hd.image_corr[0]) == 1
assert np.allclose(hd["deform"][0], 0.0740563677588885)
assert np.allclose(hd["area_um"][0], 0.559682)
assert np.allclose(hd["area_um"][1], 91.193185875)

0 comments on commit b0070cb

Please sign in to comment.