From a76eb938ff19da01652605d59dceadbaaff2dcde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Fri, 24 Nov 2023 23:28:12 +0100 Subject: [PATCH] feat: implement HDF5Writer.store_log --- CHANGELOG | 1 + src/dcnum/write/writer.py | 27 +++++++++++++++++++++++++++ tests/test_write_writer.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 8ac4c5b..56c24ba 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ - BREAKING CHANGE: Remove preselection capabilities, because it is not well integrated into the pipeline. For more information, please see issue #15. + - feat: implement HDF5Writer.store_log - enh: add Segmenter.hardware_processor property - enh: introduce pipeline identifier for data pixel size - enh: reduce pixel_size accuracy to 8 digits after the decimal point diff --git a/src/dcnum/write/writer.py b/src/dcnum/write/writer.py index fc25d23..86b8b5e 100644 --- a/src/dcnum/write/writer.py +++ b/src/dcnum/write/writer.py @@ -136,6 +136,33 @@ def store_feature_chunk(self, feat, data): ds.resize(offset + dsize, axis=0) ds[offset:offset + dsize] = data + def store_log(self, + log: str, + data: List[str], + override: bool = False): + """Store log data + + Store the log data under the key `log`. The `data` + kwarg must be a list of strings. If the log entry + already exists, `ValueError` is raised unless + `override` is set to True. + """ + logs = self.h5.require_group("logs") + if log in logs: + if override: + del logs[log] + else: + raise ValueError( + f"Log '{log}' already exists in {self.h5.filename}!") + logs.create_dataset( + name=log, + data=data, + shape=(len(data),), + # maximum line length + dtype=f"S{max([len(ll) for ll in data])}", + chunks=True, + **self.ds_kwds) + def create_with_basins( path_out: str | pathlib.Path, diff --git a/tests/test_write_writer.py b/tests/test_write_writer.py index 764675c..aca579d 100644 --- a/tests/test_write_writer.py +++ b/tests/test_write_writer.py @@ -216,3 +216,32 @@ def test_writer_basin_file_relative(): assert data_dict["type"] == "file" assert data_dict["format"] == "hdf5" assert data_dict["features"] == ["deform", "area_um"] + + +def test_writer_logs(tmp_path): + path_test = tmp_path / "test.h5" + # We basically create a file that consists only of the metadata. + with write.HDF5Writer(path_test) as hw: + hw.store_log("peter", ["McNulty", "Freamon", "Omar"]) + + with read.HDF5Data(path_test) as hd: + assert hd.logs["peter"] == ["McNulty", "Freamon", "Omar"] + + +def test_writer_logs_override(tmp_path): + path_test = tmp_path / "test.h5" + # We basically create a file that consists only of the metadata. + with write.HDF5Writer(path_test) as hw: + hw.store_log("peter", ["McNulty", "Freamon", "Omar"]) + + with read.HDF5Data(path_test) as hd: + assert hd.logs["peter"] == ["McNulty", "Freamon", "Omar"] + + with write.HDF5Writer(path_test) as hw: + with pytest.raises(ValueError, match="peter"): + hw.store_log("peter", ["Omar", "McNulty", "Freamon"]) + hw.store_log("peter", ["Omar", "McNulty", "Freamon"], + override=True) + + with read.HDF5Data(path_test) as hd: + assert hd.logs["peter"] == ["Omar", "McNulty", "Freamon"]