From aa318285c54db94bb6786dcade546824aeee9610 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 21:41:55 -0400 Subject: [PATCH 1/9] [pre-commit.ci] pre-commit autoupdate (#807) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4148102c2..04ae1161a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer From def8e572c15409a95b592dd7073f929f00e7f31f Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Tue, 9 Apr 2024 12:09:24 -0400 Subject: [PATCH 2/9] fix daily test readme badge (#808) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1480a074a..60e68dc1c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![PyPI version](https://badge.fury.io/py/neuroconv.svg)](https://badge.fury.io/py/neuroconv.svg) -![Full Tests](https://github.com/catalystneuro/neuroconv/actions/workflows/testing.yml/badge.svg) +![Daily Tests](https://github.com/catalystneuro/neuroconv/actions/workflows/dailies.yml/badge.svg) ![Auto-release](https://github.com/catalystneuro/neuroconv/actions/workflows/auto-publish.yml/badge.svg) [![codecov](https://codecov.io/github/catalystneuro/neuroconv/coverage.svg?branch=main)](https://codecov.io/github/catalystneuro/neuroconv?branch=main) [![documentation](https://readthedocs.org/projects/neuroconv/badge/?version=main)](https://neuroconv.readthedocs.io/en/main/) From 0d03d891e68c80c91b0f9bd3509e581e1e898412 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:20:44 -0400 Subject: [PATCH 3/9] Extend dev testing workflow to include probeinterface (#810) Co-authored-by: Heberto Mayorquin --- .github/workflows/dev-testing.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dev-testing.yml b/.github/workflows/dev-testing.yml index a56a8033c..7bcbbc510 100644 --- a/.github/workflows/dev-testing.yml +++ b/.github/workflows/dev-testing.yml @@ -51,8 +51,10 @@ jobs: run: pip install --no-cache-dir git+https://github.com/dandi/dandi-cli@master - name: Dev gallery - PyNWB run: pip install --no-cache-dir git+https://github.com/NeurodataWithoutBorders/pynwb@dev + - name: Dev gallery - ProbeInterface + run: pip install --no-cache-dir git+https://github.com/spikeinterface/probeinterface@main - name: Dev gallery - SpikeInterface - run: pip install --no-cache-dir git+https://github.com/spikeinterface/spikeinterface@main + run: pip install --no-cache-dir "spikeinterface[test_core] @ git+https://github.com/spikeinterface/spikeinterface@main" - name: Dev gallery - NEO run: pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo@master - name: Dev gallery - HDMF From 866bfeadb5e592911d2e02183768dc4e257ba3b2 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:00:06 -0400 Subject: [PATCH 4/9] [Backend Configuration IVa] Add backend control to context tool (#800) Co-authored-by: Heberto Mayorquin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 3 +- .../nwb_helpers/_metadata_and_file_helpers.py | 38 ++++++-- .../test_tools/test_context_tools.py | 90 +++++++++++++++++-- 3 files changed, 116 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 919bb46a2..8fbe63f6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,14 @@ ### Deprecations * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794) -* Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) ### Features * Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383) +* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800) ### Bug fixes * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799) +* Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py index 85598c6bf..75fcfc9ed 100644 --- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py +++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py @@ -5,11 +5,11 @@ from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Literal, Optional from warnings import warn from pydantic import FilePath -from pynwb import NWBHDF5IO, NWBFile +from pynwb import NWBFile from pynwb.file import Subject from ...utils.dict import DeepDict, load_dict_from_file @@ -133,6 +133,7 @@ def make_or_load_nwbfile( nwbfile: Optional[NWBFile] = None, metadata: Optional[dict] = None, overwrite: bool = False, + backend: Literal["hdf5", "zarr"] = "hdf5", verbose: bool = True, ): """ @@ -150,11 +151,17 @@ def make_or_load_nwbfile( overwrite: bool, optional Whether to overwrite the NWBFile if one exists at the nwbfile_path. The default is False (append mode). + backend : "hdf5" or "zarr", default: "hdf5" + The type of backend used to create the file. verbose: bool, optional If 'nwbfile_path' is specified, informs user after a successful write operation. The default is True. """ + from . import BACKEND_NWB_IO + nwbfile_path_in = Path(nwbfile_path) if nwbfile_path else None + backend_io_class = BACKEND_NWB_IO[backend] + assert not (nwbfile_path is None and nwbfile is None and metadata is None), ( "You must specify either an 'nwbfile_path', or an in-memory 'nwbfile' object, " "or provide the metadata for creating one." @@ -163,17 +170,36 @@ def make_or_load_nwbfile( "'nwbfile_path' exists at location, 'overwrite' is False (append mode), but an in-memory 'nwbfile' object was " "passed! Cannot reconcile which nwbfile object to write." ) - + if overwrite is False and backend == "zarr": + # TODO: remove when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved + raise NotImplementedError("Appending a Zarr file is not yet supported!") load_kwargs = dict() success = True - file_initially_exists = nwbfile_path_in.is_file() if nwbfile_path_in is not None else None + file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_in is not None else None if nwbfile_path_in: - load_kwargs.update(path=nwbfile_path_in) + load_kwargs.update(path=str(nwbfile_path_in)) if file_initially_exists and not overwrite: load_kwargs.update(mode="r+", load_namespaces=True) else: load_kwargs.update(mode="w") - io = NWBHDF5IO(**load_kwargs) + + backends_that_can_read = [ + backend_name + for backend_name, backend_io_class in BACKEND_NWB_IO.items() + if backend_io_class.can_read(path=str(nwbfile_path_in)) + ] + # Future-proofing: raise an error if more than one backend can read the file + assert ( + len(backends_that_can_read) <= 1 + ), "More than one backend is capable of reading the file! Please raise an issue describing your file." + if load_kwargs["mode"] == "r+" and backend not in backends_that_can_read: + raise IOError( + f"The chosen backend ('{backend}') is unable to read the file! " + f"Please select '{backends_that_can_read[0]}' instead." + ) + + io = backend_io_class(**load_kwargs) + try: if load_kwargs.get("mode", "") == "r+": nwbfile = io.read() diff --git a/tests/test_minimal/test_tools/test_context_tools.py b/tests/test_minimal/test_tools/test_context_tools.py index fd72a79f0..fc2ce7d18 100644 --- a/tests/test_minimal/test_tools/test_context_tools.py +++ b/tests/test_minimal/test_tools/test_context_tools.py @@ -6,6 +6,7 @@ from unittest.mock import patch from hdmf.testing import TestCase +from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, TimeSeries from neuroconv.tools.nwb_helpers import make_nwbfile_from_metadata, make_or_load_nwbfile @@ -74,14 +75,26 @@ def test_make_or_load_nwbfile_no_file_save_on_error_in_context(self): pass assert not nwbfile_path.exists() - def test_make_or_load_nwbfile_write(self): + def test_make_or_load_nwbfile_write_hdf5(self): nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_write.nwb" - with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile: + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5" + ) as nwbfile: nwbfile.add_acquisition(self.time_series_1) with NWBHDF5IO(path=nwbfile_path, mode="r") as io: nwbfile_out = io.read() assert "test1" in nwbfile_out.acquisition + def test_make_or_load_nwbfile_write_zarr(self): + nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_write.nwb.zarr" + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr" + ) as nwbfile: + nwbfile.add_acquisition(self.time_series_1) + with NWBZarrIO(path=str(nwbfile_path), mode="r") as io: + nwbfile_out = io.read() + assert "test1" in nwbfile_out.acquisition + def test_make_or_load_nwbfile_closure(self): nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_closure.nwb" with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile: @@ -91,28 +104,89 @@ def test_make_or_load_nwbfile_closure(self): self.assertCountEqual(nwbfile_out.acquisition["test1"].data, self.time_series_1.data) assert not nwbfile_out.acquisition["test1"].data # A closed h5py.Dataset returns false - def test_make_or_load_nwbfile_overwrite(self): + def test_make_or_load_nwbfile_overwrite_hdf5(self): nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_overwrite.nwb" - with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile: + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5" + ) as nwbfile: nwbfile.add_acquisition(self.time_series_1) - with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile: + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5" + ) as nwbfile: nwbfile.add_acquisition(self.time_series_2) with NWBHDF5IO(path=nwbfile_path, mode="r") as io: nwbfile_out = io.read() assert "test1" not in nwbfile_out.acquisition assert "test2" in nwbfile_out.acquisition - def test_make_or_load_nwbfile_append(self): + def test_make_or_load_nwbfile_overwrite_zarr(self): + nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_overwrite.nwb.zarr" + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr" + ) as nwbfile: + nwbfile.add_acquisition(self.time_series_1) + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr" + ) as nwbfile: + nwbfile.add_acquisition(self.time_series_2) + with NWBZarrIO(path=str(nwbfile_path), mode="r") as io: + nwbfile_out = io.read() + assert "test1" not in nwbfile_out.acquisition + assert "test2" in nwbfile_out.acquisition + + def test_make_or_load_nwbfile_append_hdf5(self): nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb" - with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile: + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5" + ) as nwbfile: nwbfile.add_acquisition(self.time_series_1) - with make_or_load_nwbfile(nwbfile_path=nwbfile_path) as nwbfile: + with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="hdf5") as nwbfile: nwbfile.add_acquisition(self.time_series_2) with NWBHDF5IO(path=nwbfile_path, mode="r") as io: nwbfile_out = io.read() assert "test1" in nwbfile_out.acquisition assert "test2" in nwbfile_out.acquisition + # TODO: re-include when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved + # def test_make_or_load_nwbfile_append_zarr(self): + # nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb.zarr" + # with make_or_load_nwbfile( + # nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr" + # ) as nwbfile: + # nwbfile.add_acquisition(self.time_series_1) + # with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="zarr") as nwbfile: + # nwbfile.add_acquisition(self.time_series_2) + # with NWBZarrIO(path=str(nwbfile_path), mode="r") as io: + # nwbfile_out = io.read() + # assert "test1" in nwbfile_out.acquisition + # assert "test2" in nwbfile_out.acquisition + # + # def test_make_or_load_nwbfile_append_hdf5_using_zarr_error(self): + # nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb" + # with make_or_load_nwbfile( + # nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5" + # ) as nwbfile: + # nwbfile.add_acquisition(self.time_series_1) + # with self.assertRaisesWith( + # exc_type=IOError, + # exc_msg="The chosen backend (zarr) is unable to read the file! Please select a different backend.", + # ): + # with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="zarr") as nwbfile: + # nwbfile.add_acquisition(self.time_series_2) + + def test_make_or_load_nwbfile_append_zarr_using_hdf5_error(self): + nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb.zarr" + with make_or_load_nwbfile( + nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr" + ) as nwbfile: + nwbfile.add_acquisition(self.time_series_1) + with self.assertRaisesWith( + exc_type=IOError, + exc_msg="The chosen backend ('hdf5') is unable to read the file! Please select 'zarr' instead.", + ): + with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="hdf5") as nwbfile: + nwbfile.add_acquisition(self.time_series_2) + def test_make_or_load_nwbfile_pass_nwbfile(self): nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_pass_nwbfile.nwb" nwbfile_in = make_nwbfile_from_metadata(metadata=self.metadata) From 6ec538bbc056330f48829730900564543f8a36b0 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 11 Apr 2024 12:52:58 -0600 Subject: [PATCH 5/9] Avoid modifying metadata in-place in the `VideoInterface` (#814) --- CHANGELOG.md | 3 +-- .../behavior/video/videodatainterface.py | 6 +++-- src/neuroconv/tools/neo/neo.py | 25 ++++++++++--------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fbe63f6b..81f479245 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,9 @@ ### Bug fixes * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799) +* Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814) * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) - - # v0.4.8 (March 20, 2024) ### Bug fixes diff --git a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py index 26a6f9070..341de83e8 100644 --- a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py @@ -1,3 +1,4 @@ +from copy import deepcopy from pathlib import Path from typing import List, Literal, Optional from warnings import warn @@ -303,9 +304,10 @@ def add_to_nwbfile( file_paths = self.source_data["file_paths"] - videos_metadata = metadata.get("Behavior", dict()).get("Videos", None) + # Be sure to copy metadata at this step to avoid mutating in-place + videos_metadata = deepcopy(metadata).get("Behavior", dict()).get("Videos", None) if videos_metadata is None: - videos_metadata = self.get_metadata()["Behavior"]["Videos"] + videos_metadata = deepcopy(self.get_metadata()["Behavior"]["Videos"]) assert len(videos_metadata) == self._number_of_files, ( "Incomplete metadata " diff --git a/src/neuroconv/tools/neo/neo.py b/src/neuroconv/tools/neo/neo.py index 477836991..a56236365 100644 --- a/src/neuroconv/tools/neo/neo.py +++ b/src/neuroconv/tools/neo/neo.py @@ -159,17 +159,16 @@ def add_icephys_electrode(neo_reader, nwbfile, metadata: dict = None): ... ] """ + metadata_copy = deepcopy(metadata) if metadata is not None else dict() + assert isinstance(nwbfile, pynwb.NWBFile), "'nwbfile' should be of type pynwb.NWBFile" if len(nwbfile.devices) == 0: warnings.warn("When adding Icephys Electrode, no Devices were found on nwbfile. Creating a Device now...") - add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata) - - if metadata is None: - metadata = dict() + add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata_copy) - if "Icephys" not in metadata: - metadata["Icephys"] = dict() + if "Icephys" not in metadata_copy: + metadata_copy["Icephys"] = dict() defaults = [ dict( @@ -180,15 +179,15 @@ def add_icephys_electrode(neo_reader, nwbfile, metadata: dict = None): for elec_id in range(get_number_of_electrodes(neo_reader)) ] - if "Electrodes" not in metadata["Icephys"] or len(metadata["Icephys"]["Electrodes"]) == 0: - metadata["Icephys"]["Electrodes"] = deepcopy(defaults) + if "Electrodes" not in metadata_copy["Icephys"] or len(metadata_copy["Icephys"]["Electrodes"]) == 0: + metadata_copy["Icephys"]["Electrodes"] = defaults assert all( - [isinstance(x, dict) for x in metadata["Icephys"]["Electrodes"]] + [isinstance(x, dict) for x in metadata_copy["Icephys"]["Electrodes"]] ), "Expected metadata['Icephys']['Electrodes'] to be a list of dictionaries!" # Create Icephys electrode from metadata - for elec in metadata["Icephys"]["Electrodes"]: + for elec in metadata_copy["Icephys"]["Electrodes"]: if elec.get("name", defaults[0]["name"]) not in nwbfile.icephys_electrodes: device_name = elec.pop("device_name", None) or elec.pop("device", defaults[0]["device_name"]) # elec.pop("device_name", 0) @@ -286,8 +285,10 @@ def add_icephys_recordings( else: sessions_offset = 0 - relative_session_start_time = metadata["Icephys"]["Sessions"][sessions_offset]["relative_session_start_time"] - session_stimulus_type = metadata["Icephys"]["Sessions"][sessions_offset]["stimulus_type"] + relative_session_start_time = deepcopy( + metadata["Icephys"]["Sessions"][sessions_offset]["relative_session_start_time"] + ) + session_stimulus_type = deepcopy(metadata["Icephys"]["Sessions"][sessions_offset]["stimulus_type"]) # Sequential icephys recordings simultaneous_recordings = list() From 88576d652a9e428a87c9a10605245b88e35fde8b Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 11 Apr 2024 17:19:40 -0600 Subject: [PATCH 6/9] Add general test to avoid in-place modification of metadata by interfaces (#815) Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- CHANGELOG.md | 5 +++++ .../ecephys/basesortingextractorinterface.py | 6 ++++-- .../tools/testing/data_interface_mixins.py | 13 ++++++++++--- tests/test_on_data/test_behavior_interfaces.py | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81f479245..fc6eb273b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ * Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814) * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) +### Testing +* Add general test for metadata in-place modification by interfaces. [PR #815](https://github.com/catalystneuro/neuroconv/pull/815) + + + # v0.4.8 (March 20, 2024) ### Bug fixes diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py index ed333b48c..fbc672b67 100644 --- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py @@ -1,3 +1,4 @@ +from copy import deepcopy from typing import List, Literal, Optional, Union import numpy as np @@ -307,8 +308,9 @@ def add_to_nwbfile( """ from ...tools.spikeinterface import add_sorting + metadata_copy = deepcopy(metadata) if write_ecephys_metadata: - self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata) + self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata_copy) if stub_test: sorting_extractor = self.subset_sorting() @@ -316,7 +318,7 @@ def add_to_nwbfile( sorting_extractor = self.sorting_extractor property_descriptions = dict() - for metadata_column in metadata["Ecephys"].get("UnitProperties", []): + for metadata_column in metadata_copy["Ecephys"].get("UnitProperties", []): property_descriptions.update({metadata_column["name"]: metadata_column["description"]}) for unit_id in sorting_extractor.get_unit_ids(): # Special condition for wrapping electrode group pointers to actual object ids rather than string names diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index bde8898cf..a9840617f 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -2,6 +2,7 @@ import json import tempfile from abc import abstractmethod +from copy import deepcopy from datetime import datetime from pathlib import Path from typing import List, Optional, Type, Union @@ -83,13 +84,19 @@ def check_metadata(self): validate(metadata_for_validation, schema) self.check_extracted_metadata(metadata) - def run_conversion(self, nwbfile_path: str): + def check_run_conversion(self, nwbfile_path: str): metadata = self.interface.get_metadata() metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) + + metadata_in = deepcopy(metadata) + self.interface.run_conversion( nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, **self.conversion_options ) + # Test the the metadata was not altered by run conversin which calls add_to_nwbfile + assert metadata == metadata_in + @abstractmethod def check_read_nwb(self, nwbfile_path: str): """Read the produced NWB file and compare it to the interface.""" @@ -117,7 +124,7 @@ def test_conversion_as_lone_interface(self): self.check_conversion_options_schema_valid() self.check_metadata() self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb") - self.run_conversion(nwbfile_path=self.nwbfile_path) + self.check_run_conversion(nwbfile_path=self.nwbfile_path) self.check_read_nwb(nwbfile_path=self.nwbfile_path) # Any extra custom checks to run @@ -516,7 +523,7 @@ def test_conversion_as_lone_interface(self): self.check_conversion_options_schema_valid() self.check_metadata() self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb") - self.run_conversion(nwbfile_path=self.nwbfile_path) + self.check_run_conversion(nwbfile_path=self.nwbfile_path) self.check_read_nwb(nwbfile_path=self.nwbfile_path) # Any extra custom checks to run diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 17b956b6c..8877f1bf1 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -333,7 +333,7 @@ class TestDeepLabCutInterface(DeepLabCutInterfaceMixin, unittest.TestCase): save_directory = OUTPUT_PATH - def run_conversion(self, nwbfile_path: str): + def check_run_conversion(self, nwbfile_path: str): metadata = self.interface.get_metadata() metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) From dd9af163069fb30470a9eae363bfd9b82f6bd8b7 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sun, 14 Apr 2024 13:04:07 -0400 Subject: [PATCH 7/9] [Backend Configuration IVb] Swap internal variable name for consistency (#817) --- .../tools/nwb_helpers/_configure_backend.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py index 6bc055a04..fd97ea529 100644 --- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py +++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py @@ -23,21 +23,22 @@ def configure_backend( # TODO: update buffer shape in iterator, if present - nwbfile_object = nwbfile_objects[object_id] - is_dataset_linked = isinstance(nwbfile_object.fields.get(dataset_name), TimeSeries) + neurodata_object = nwbfile_objects[object_id] + is_dataset_linked = isinstance(neurodata_object.fields.get(dataset_name), TimeSeries) # Table columns - if isinstance(nwbfile_object, Data): - nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs) + if isinstance(neurodata_object, Data): + neurodata_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs) # TimeSeries data or timestamps - elif isinstance(nwbfile_object, TimeSeries) and not is_dataset_linked: - nwbfile_object.set_data_io( + elif isinstance(neurodata_object, TimeSeries) and not is_dataset_linked: + neurodata_object.set_data_io( dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs ) # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent) - elif isinstance(nwbfile_object, TimeSeries) and is_dataset_linked: + elif isinstance(neurodata_object, TimeSeries) and is_dataset_linked: continue # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe else: raise NotImplementedError( - f"Unsupported object type {type(nwbfile_object)} for backend configuration of {nwbfile_object.name}!" + f"Unsupported object type {type(neurodata_object)} for backend configuration " + f"of {neurodata_object.name}!" ) From 3711c63e0061d072e1cab0d14d2c4734182c9675 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 15 Apr 2024 02:06:47 -0400 Subject: [PATCH 8/9] [Backend Configuration IVc] Integrate backend control as interface method (#801) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 9 +- src/neuroconv/basedatainterface.py | 90 ++++++++++++++++++- .../nwb_helpers/_backend_configuration.py | 4 +- .../_configuration_models/_base_dataset_io.py | 7 +- .../tools/nwb_helpers/_configure_backend.py | 13 ++- .../nwb_helpers/_dataset_configuration.py | 35 +++++--- .../nwb_helpers/_metadata_and_file_helpers.py | 21 ++--- .../tools/testing/data_interface_mixins.py | 62 +++++++++++-- .../test_on_data/test_behavior_interfaces.py | 36 +++++--- 9 files changed, 225 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc6eb273b..ea0978ed2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,15 +3,16 @@ ### Deprecations * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794) -### Features -* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383) -* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800) - ### Bug fixes * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799) * Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814) * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) +### Features +* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800) +* Added `get_default_backend_configuration` method to all `DataInterface` classes. Also added HDF5 `backend` control to all standalone `.run_conversion(...)` methods for those interfaces. [PR #801](https://github.com/catalystneuro/neuroconv/pull/801) +* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383) + ### Testing * Add general test for metadata in-place modification by interfaces. [PR #815](https://github.com/catalystneuro/neuroconv/pull/815) diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py index b46875f9e..aafe7a63a 100644 --- a/src/neuroconv/basedatainterface.py +++ b/src/neuroconv/basedatainterface.py @@ -3,12 +3,19 @@ import warnings from abc import ABC, abstractmethod from pathlib import Path -from typing import Optional, Tuple, Union +from typing import Literal, Optional, Tuple, Union from jsonschema.validators import validate from pynwb import NWBFile -from .tools.nwb_helpers import make_nwbfile_from_metadata, make_or_load_nwbfile +from .tools.nwb_helpers import ( + HDF5BackendConfiguration, + ZarrBackendConfiguration, + configure_backend, + get_default_backend_configuration, + make_nwbfile_from_metadata, + make_or_load_nwbfile, +) from .utils import ( NWBMetaDataEncoder, get_schema_from_method_signature, @@ -60,13 +67,43 @@ def validate_metadata(self, metadata: dict) -> None: decoded_metadata = json.loads(serialized_metadata) validate(instance=decoded_metadata, schema=self.get_metadata_schema()) - def create_nwbfile(self, metadata=None, **conversion_options) -> NWBFile: + def create_nwbfile(self, metadata: Optional[dict] = None, **conversion_options) -> NWBFile: + """ + Create and return an in-memory pynwb.NWBFile object with this interface's data added to it. + + Parameters + ---------- + metadata : dict, optional + Metadata dictionary with information used to create the NWBFile. + **conversion_options + Additional keyword arguments to pass to the `.add_to_nwbfile` method. + + Returns + ------- + nwbfile : pynwb.NWBFile + The in-memory object with this interface's data added to it. + """ + if metadata is None: + metadata = self.get_metadata() + nwbfile = make_nwbfile_from_metadata(metadata) self.add_to_nwbfile(nwbfile, metadata=metadata, **conversion_options) return nwbfile @abstractmethod def add_to_nwbfile(self, nwbfile: NWBFile, **conversion_options) -> None: + """ + Define a protocol for mapping the data from this interface to NWB neurodata objects. + + These neurodata objects should also be added to the in-memory pynwb.NWBFile object in this step. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + The in-memory object to add the data to. + **conversion_options + Additional keyword arguments to pass to the `.add_to_nwbfile` method. + """ raise NotImplementedError def run_conversion( @@ -75,6 +112,11 @@ def run_conversion( nwbfile: Optional[NWBFile] = None, metadata: Optional[dict] = None, overwrite: bool = False, + # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely + # backend: Union[Literal["hdf5", "zarr"]], + # backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None, + backend: Literal["hdf5"] = "hdf5", + backend_configuration: Optional[HDF5BackendConfiguration] = None, **conversion_options, ): """ @@ -92,6 +134,13 @@ def run_conversion( overwrite : bool, default: False Whether to overwrite the NWBFile if one exists at the nwbfile_path. The default is False (append mode). + backend : "hdf5" or a HDF5BackendConfiguration, default: "hdf5" + The type of backend to use when writing the file. + backend_configuration : HDF5BackendConfiguration, optional + The configuration model to use when configuring the datasets for this backend. + To customize, call the `.get_default_backend_configuration(...)` method, modify the returned + BackendConfiguration object, and pass that instead. + Otherwise, all datasets will use default configuration settings. """ if nwbfile_path is None: warnings.warn( # TODO: remove on or after 6/21/2024 @@ -99,6 +148,8 @@ def run_conversion( "NWBFile object in memory, use DataInterface.create_nwbfile. To append to an existing NWBFile object," " use DataInterface.add_to_nwbfile." ) + if backend_configuration is not None and nwbfile is None: + raise ValueError("When specifying a custom `backend_configuration`, you must also provide an `nwbfile`.") if metadata is None: metadata = self.get_metadata() @@ -108,6 +159,37 @@ def run_conversion( nwbfile=nwbfile, metadata=metadata, overwrite=overwrite, + backend=backend, verbose=getattr(self, "verbose", False), ) as nwbfile_out: - self.add_to_nwbfile(nwbfile_out, metadata=metadata, **conversion_options) + if backend_configuration is None: + # In this case, assume the relevant data has already been added to the NWBFile + self.add_to_nwbfile(nwbfile_out, metadata=metadata, **conversion_options) + + backend_configuration = self.get_default_backend_configuration(nwbfile=nwbfile_out, backend=backend) + + configure_backend(nwbfile=nwbfile_out, backend_configuration=backend_configuration) + + @staticmethod + def get_default_backend_configuration( + nwbfile: NWBFile, + # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely + # backend: Union[Literal["hdf5", "zarr"]], + backend: Literal["hdf5"] = "hdf5", + ) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]: + """ + Fill and return a default backend configuration to serve as a starting point for further customization. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + The in-memory object with this interface's data already added to it. + backend : "hdf5" or "zarr", default: "hdf5" + The type of backend to use when creating the file. + + Returns + ------- + backend_configuration : HDF5BackendConfiguration or ZarrBackendConfiguration + The default configuration for the specified backend type. + """ + return get_default_backend_configuration(nwbfile=nwbfile, backend=backend) diff --git a/src/neuroconv/tools/nwb_helpers/_backend_configuration.py b/src/neuroconv/tools/nwb_helpers/_backend_configuration.py index 0b35fdb87..d9c701c3b 100644 --- a/src/neuroconv/tools/nwb_helpers/_backend_configuration.py +++ b/src/neuroconv/tools/nwb_helpers/_backend_configuration.py @@ -12,7 +12,9 @@ def get_default_backend_configuration( nwbfile: NWBFile, backend: Literal["hdf5", "zarr"] ) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]: """Fill a default backend configuration to serve as a starting point for further customization.""" - from ..nwb_helpers import BACKEND_CONFIGURATIONS + from ..nwb_helpers import ( + BACKEND_CONFIGURATIONS, # Locally scoped to avoid circular import + ) BackendConfigurationClass = BACKEND_CONFIGURATIONS[backend] diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 9562fa83e..fbb85a858 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -260,9 +260,10 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera ) compression_method = "gzip" elif dtype == np.dtype("object"): # Unclear what default chunking/compression should be for compound objects - chunk_shape = None - buffer_shape = None - compression_method = None + raise NotImplementedError( + f"Unable to create a `DatasetIOConfiguration` for the dataset at '{location_in_file}'" + f"for neurodata object '{neurodata_object}' of type '{type(neurodata_object)}'!" + ) return cls( object_id=neurodata_object.object_id, diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py index fd97ea529..8e7a2f17b 100644 --- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py +++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py @@ -12,9 +12,19 @@ def configure_backend( nwbfile: NWBFile, backend_configuration: Union[HDF5BackendConfiguration, ZarrBackendConfiguration] ) -> None: - """Configure all datasets specified in the `backend_configuration` with their appropriate DataIO and options.""" + """ + Configure all datasets specified in the `backend_configuration` with their appropriate DataIO and options. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + The in-memory pynwb.NWBFile object to configure. + backend_configuration : "hdf5" or "zarr", default: "hdf5" + The configuration model to use when configuring the datasets for this backend. + """ nwbfile_objects = nwbfile.objects + # Set all DataIO based on the configuration data_io_class = backend_configuration.data_io_class for dataset_configuration in backend_configuration.dataset_configurations.values(): object_id = dataset_configuration.object_id @@ -25,6 +35,7 @@ def configure_backend( neurodata_object = nwbfile_objects[object_id] is_dataset_linked = isinstance(neurodata_object.fields.get(dataset_name), TimeSeries) + # Table columns if isinstance(neurodata_object, Data): neurodata_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs) diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py index 51f414244..35eb44f23 100644 --- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py +++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py @@ -5,10 +5,11 @@ import h5py import numpy as np import zarr +from hdmf import Container from hdmf.data_utils import DataIO from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, NWBFile -from pynwb.base import DynamicTable +from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData from ._configuration_models._base_dataset_io import DatasetIOConfiguration @@ -52,7 +53,7 @@ def get_default_dataset_io_configurations( """ Generate DatasetIOConfiguration objects for wrapping NWB file objects with a specific backend. - This method automatically detects all objects in an NWB file that can be wrapped in a DataIO. + This method automatically detects all objects in an NWB file that can be wrapped in a hdmf.DataIO. If the NWB file is in append mode, it supports auto-detection of the backend. Otherwise, it requires a backend specification. @@ -66,9 +67,11 @@ def get_default_dataset_io_configurations( Yields ------ DatasetIOConfiguration - A summary of each detected object that can be wrapped in a DataIO. + A summary of each detected object that can be wrapped in a hdmf.DataIO. """ - from ..nwb_helpers import DATASET_IO_CONFIGURATIONS + from ..nwb_helpers import ( + DATASET_IO_CONFIGURATIONS, # Locally scoped to avoid circular import + ) DatasetIOConfigurationClass = DATASET_IO_CONFIGURATIONS[backend] @@ -100,17 +103,25 @@ def get_default_dataset_io_configurations( for neurodata_object in nwbfile.objects.values(): if isinstance(neurodata_object, DynamicTable): - dynamic_table = neurodata_object # for readability + dynamic_table = neurodata_object # For readability for column in dynamic_table.columns: candidate_dataset = column.data # VectorData object if _is_dataset_written_to_file( candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file ): - continue # skip + continue # Skip # Skip over columns that are already wrapped in DataIO if isinstance(candidate_dataset, DataIO): + continue # Skip + + # Skip over columns whose values are links, such as the 'group' of an ElectrodesTable + if any(isinstance(value, Container) for value in candidate_dataset): + continue # Skip + + # Skip when columns whose values are a reference type + if isinstance(column, TimeSeriesReferenceVectorData): continue dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object( @@ -121,25 +132,27 @@ def get_default_dataset_io_configurations( else: # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps' # The most common example of this is ndx-events Events/LabeledEvents types - time_series = neurodata_object # for readability + time_series = neurodata_object # For readability for dataset_name in ("data", "timestamps"): - if dataset_name not in time_series.fields: # timestamps is optional + if dataset_name not in time_series.fields: # The 'timestamps' field is optional continue candidate_dataset = getattr(time_series, dataset_name) + + # Skip if already written to file if _is_dataset_written_to_file( candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file ): - continue # skip + continue # Skip over datasets that are already wrapped in DataIO if isinstance(candidate_dataset, DataIO): continue - # Edge case of in-memory ImageSeries with external mode; data is in fields and is empty array + # Skip edge case of in-memory ImageSeries with external mode; data is in fields and is empty array if isinstance(candidate_dataset, np.ndarray) and candidate_dataset.size == 0: - continue # skip + continue dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object( neurodata_object=time_series, dataset_name=dataset_name diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py index 75fcfc9ed..1e243489e 100644 --- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py +++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py @@ -76,7 +76,7 @@ def make_nwbfile_from_metadata(metadata: dict) -> NWBFile: return NWBFile(**nwbfile_kwargs) -def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metadata: dict = None): +def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metadata: Optional[dict] = None): """ Add device information from metadata to NWBFile object. @@ -105,6 +105,8 @@ def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metada ] Missing keys in an element of metadata['Ecephys']['Device'] will be auto-populated with defaults. """ + metadata_copy = deepcopy(metadata) if metadata is not None else dict() + assert isinstance(nwbfile, NWBFile), "'nwbfile' should be of type pynwb.NWBFile" assert modality in [ "Ecephys", @@ -115,16 +117,14 @@ def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metada defaults = dict(name=f"Device{modality}", description=f"{modality} device. Automatically generated.") - if metadata is None: - metadata = dict() - if modality not in metadata: - metadata[modality] = dict() - if "Device" not in metadata[modality]: - metadata[modality]["Device"] = [defaults] + if modality not in metadata_copy: + metadata_copy[modality] = dict() + if "Device" not in metadata_copy[modality]: + metadata_copy[modality]["Device"] = [defaults] - for dev in metadata[modality]["Device"]: - if dev.get("name", defaults["name"]) not in nwbfile.devices: - nwbfile.create_device(**dict(defaults, **dev)) + for device_metadata in metadata_copy[modality]["Device"]: + if device_metadata.get("name", defaults["name"]) not in nwbfile.devices: + nwbfile.create_device(**dict(defaults, **device_metadata)) @contextmanager @@ -173,6 +173,7 @@ def make_or_load_nwbfile( if overwrite is False and backend == "zarr": # TODO: remove when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved raise NotImplementedError("Appending a Zarr file is not yet supported!") + load_kwargs = dict() success = True file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_in is not None else None diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index a9840617f..01168da13 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -5,13 +5,15 @@ from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import List, Optional, Type, Union +from typing import List, Literal, Optional, Type, Union import numpy as np from hdmf.testing import TestCase as HDMFTestCase +from hdmf_zarr import NWBZarrIO from jsonschema.validators import Draft7Validator, validate from numpy.testing import assert_array_equal from pynwb import NWBHDF5IO +from pynwb.testing.mock.file import mock_NWBFile from spikeinterface.core.testing import check_recordings_equal, check_sortings_equal from neuroconv.basedatainterface import BaseDataInterface @@ -84,24 +86,54 @@ def check_metadata(self): validate(metadata_for_validation, schema) self.check_extracted_metadata(metadata) - def check_run_conversion(self, nwbfile_path: str): + def check_no_metadata_mutation(self): + """Ensure the metadata object was not altered by `add_to_nwbfile` method.""" metadata = self.interface.get_metadata() metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) metadata_in = deepcopy(metadata) + nwbfile = mock_NWBFile() + self.interface.add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, **self.conversion_options) + + assert metadata == metadata_in + + def check_run_conversion_default_backend(self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"): + metadata = self.interface.get_metadata() + if "session_start_time" not in metadata["NWBFile"]: + metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) + self.interface.run_conversion( - nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, **self.conversion_options + nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, backend=backend, **self.conversion_options ) - # Test the the metadata was not altered by run conversin which calls add_to_nwbfile - assert metadata == metadata_in + def check_run_conversion_custom_backend(self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"): + metadata = self.interface.get_metadata() + if "session_start_time" not in metadata["NWBFile"]: + metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) + + nwbfile = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options) + backend_configuration = self.interface.get_default_backend_configuration(nwbfile=nwbfile, backend=backend) + self.interface.run_conversion( + nwbfile_path=nwbfile_path, + overwrite=True, + nwbfile=nwbfile, + metadata=metadata, + backend=backend, + backend_configuration=backend_configuration, + **self.conversion_options, + ) @abstractmethod def check_read_nwb(self, nwbfile_path: str): """Read the produced NWB file and compare it to the interface.""" pass + def check_basic_zarr_read(self, nwbfile_path: str): + """Ensure NWBZarrIO can read the file.""" + with NWBZarrIO(path=nwbfile_path, mode="r") as io: + io.read() + def check_extracted_metadata(self, metadata: dict): """Override this method to make assertions about specific extracted metadata values.""" pass @@ -124,9 +156,20 @@ def test_conversion_as_lone_interface(self): self.check_conversion_options_schema_valid() self.check_metadata() self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb") - self.check_run_conversion(nwbfile_path=self.nwbfile_path) + + self.check_no_metadata_mutation() + + self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") + self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") + self.check_read_nwb(nwbfile_path=self.nwbfile_path) + # TODO: enable when all H5DataIO prewraps are gone + # self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb.zarr") + # self.check_run_conversion(nwbfile_path=self.nwbfile_path, backend="zarr") + # self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="zarr") + # self.check_basic_zarr_read(nwbfile_path=self.nwbfile_path) + # Any extra custom checks to run self.run_custom_checks() @@ -523,7 +566,12 @@ def test_conversion_as_lone_interface(self): self.check_conversion_options_schema_valid() self.check_metadata() self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb") - self.check_run_conversion(nwbfile_path=self.nwbfile_path) + + self.check_no_metadata_mutation() + + self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") + self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5") + self.check_read_nwb(nwbfile_path=self.nwbfile_path) # Any extra custom checks to run diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 8877f1bf1..3986233db 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -333,17 +333,36 @@ class TestDeepLabCutInterface(DeepLabCutInterfaceMixin, unittest.TestCase): save_directory = OUTPUT_PATH - def check_run_conversion(self, nwbfile_path: str): + def run_custom_checks(self): + self.check_custom_timestamps(nwbfile_path=self.nwbfile_path) + + def check_custom_timestamps(self, nwbfile_path: str): + # TODO: Peel out into separate test class and replace this part with check_read_nwb + if self.case != 1: # set custom timestamps + return + metadata = self.interface.get_metadata() metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) - if self.case == 1: # set custom timestamps - self.interface.set_aligned_timestamps(self._custom_timestamps_case_1) - assert len(self.interface._timestamps) == 2330 + self.interface.set_aligned_timestamps(self._custom_timestamps_case_1) + assert len(self.interface._timestamps) == 2330 - self.interface.run_conversion(nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata) + self.interface.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True) - def check_read_nwb(self, nwbfile_path: str): # This is currently structured to be file-specific + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: + nwbfile = io.read() + assert "behavior" in nwbfile.processing + processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces + assert "PoseEstimation" in processing_module_interfaces + + pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series + + for pose_estimation in pose_estimation_series_in_nwb.values(): + pose_timestamps = pose_estimation.timestamps + np.testing.assert_array_equal(pose_timestamps, self._custom_timestamps_case_1) + + def check_read_nwb(self, nwbfile_path: str): + # TODO: move this to the upstream mixin with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: nwbfile = io.read() assert "behavior" in nwbfile.processing @@ -359,11 +378,6 @@ def check_read_nwb(self, nwbfile_path: str): # This is currently structured to assert all(expected_pose_estimation_series_are_in_nwb_file) - if self.case == 1: # custom timestamps - for pose_estimation in pose_estimation_series_in_nwb.values(): - pose_timestamps = pose_estimation.timestamps - np.testing.assert_array_equal(pose_timestamps, self._custom_timestamps_case_1) - class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin, unittest.TestCase): data_interface_cls = SLEAPInterface From 3273e518f12a8f2639f9c126b0ed84af01ebfa74 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 18:15:16 -0400 Subject: [PATCH 9/9] [pre-commit.ci] pre-commit autoupdate (#819) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04ae1161a..cadf89204 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 24.4.0 hooks: - id: black exclude: ^docs/