From aa318285c54db94bb6786dcade546824aeee9610 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 8 Apr 2024 21:41:55 -0400
Subject: [PATCH 1/9] [pre-commit.ci] pre-commit autoupdate (#807)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4148102c2..04ae1161a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
     -   id: check-yaml
     -   id: end-of-file-fixer

From def8e572c15409a95b592dd7073f929f00e7f31f Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 9 Apr 2024 12:09:24 -0400
Subject: [PATCH 2/9] fix daily test readme badge (#808)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1480a074a..60e68dc1c 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![PyPI version](https://badge.fury.io/py/neuroconv.svg)](https://badge.fury.io/py/neuroconv.svg)
-![Full Tests](https://github.com/catalystneuro/neuroconv/actions/workflows/testing.yml/badge.svg)
+![Daily Tests](https://github.com/catalystneuro/neuroconv/actions/workflows/dailies.yml/badge.svg)
 ![Auto-release](https://github.com/catalystneuro/neuroconv/actions/workflows/auto-publish.yml/badge.svg)
 [![codecov](https://codecov.io/github/catalystneuro/neuroconv/coverage.svg?branch=main)](https://codecov.io/github/catalystneuro/neuroconv?branch=main)
 [![documentation](https://readthedocs.org/projects/neuroconv/badge/?version=main)](https://neuroconv.readthedocs.io/en/main/)

From 0d03d891e68c80c91b0f9bd3509e581e1e898412 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Apr 2024 14:20:44 -0400
Subject: [PATCH 3/9] Extend dev testing workflow to include probeinterface
 (#810)

Co-authored-by: Heberto Mayorquin <h.mayorquin@gmail.com>
---
 .github/workflows/dev-testing.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/dev-testing.yml b/.github/workflows/dev-testing.yml
index a56a8033c..7bcbbc510 100644
--- a/.github/workflows/dev-testing.yml
+++ b/.github/workflows/dev-testing.yml
@@ -51,8 +51,10 @@ jobs:
         run: pip install --no-cache-dir git+https://github.com/dandi/dandi-cli@master
       - name: Dev gallery - PyNWB
         run: pip install --no-cache-dir git+https://github.com/NeurodataWithoutBorders/pynwb@dev
+      - name: Dev gallery - ProbeInterface
+        run: pip install --no-cache-dir git+https://github.com/spikeinterface/probeinterface@main
       - name: Dev gallery - SpikeInterface
-        run: pip install --no-cache-dir git+https://github.com/spikeinterface/spikeinterface@main
+        run: pip install --no-cache-dir "spikeinterface[test_core] @ git+https://github.com/spikeinterface/spikeinterface@main"
       - name: Dev gallery - NEO
         run: pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo@master
       - name: Dev gallery - HDMF

From 866bfeadb5e592911d2e02183768dc4e257ba3b2 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Thu, 11 Apr 2024 14:00:06 -0400
Subject: [PATCH 4/9] [Backend Configuration IVa] Add backend control to
 context tool (#800)

Co-authored-by: Heberto Mayorquin <h.mayorquin@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                  |  3 +-
 .../nwb_helpers/_metadata_and_file_helpers.py | 38 ++++++--
 .../test_tools/test_context_tools.py          | 90 +++++++++++++++++--
 3 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 919bb46a2..8fbe63f6b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,13 +2,14 @@
 
 ### Deprecations
 * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794)
-* Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
 ### Features
 * Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383)
+* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800)
 
 ### Bug fixes
 * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
+* Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
 
 
diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
index 85598c6bf..75fcfc9ed 100644
--- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
+++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
@@ -5,11 +5,11 @@
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Optional
+from typing import Literal, Optional
 from warnings import warn
 
 from pydantic import FilePath
-from pynwb import NWBHDF5IO, NWBFile
+from pynwb import NWBFile
 from pynwb.file import Subject
 
 from ...utils.dict import DeepDict, load_dict_from_file
@@ -133,6 +133,7 @@ def make_or_load_nwbfile(
     nwbfile: Optional[NWBFile] = None,
     metadata: Optional[dict] = None,
     overwrite: bool = False,
+    backend: Literal["hdf5", "zarr"] = "hdf5",
     verbose: bool = True,
 ):
     """
@@ -150,11 +151,17 @@ def make_or_load_nwbfile(
     overwrite: bool, optional
         Whether to overwrite the NWBFile if one exists at the nwbfile_path.
         The default is False (append mode).
+    backend : "hdf5" or "zarr", default: "hdf5"
+        The type of backend used to create the file.
     verbose: bool, optional
         If 'nwbfile_path' is specified, informs user after a successful write operation.
         The default is True.
     """
+    from . import BACKEND_NWB_IO
+
     nwbfile_path_in = Path(nwbfile_path) if nwbfile_path else None
+    backend_io_class = BACKEND_NWB_IO[backend]
+
     assert not (nwbfile_path is None and nwbfile is None and metadata is None), (
         "You must specify either an 'nwbfile_path', or an in-memory 'nwbfile' object, "
         "or provide the metadata for creating one."
@@ -163,17 +170,36 @@ def make_or_load_nwbfile(
         "'nwbfile_path' exists at location, 'overwrite' is False (append mode), but an in-memory 'nwbfile' object was "
         "passed! Cannot reconcile which nwbfile object to write."
     )
-
+    if overwrite is False and backend == "zarr":
+        # TODO: remove when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved
+        raise NotImplementedError("Appending a Zarr file is not yet supported!")
     load_kwargs = dict()
     success = True
-    file_initially_exists = nwbfile_path_in.is_file() if nwbfile_path_in is not None else None
+    file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_in is not None else None
     if nwbfile_path_in:
-        load_kwargs.update(path=nwbfile_path_in)
+        load_kwargs.update(path=str(nwbfile_path_in))
         if file_initially_exists and not overwrite:
             load_kwargs.update(mode="r+", load_namespaces=True)
         else:
             load_kwargs.update(mode="w")
-        io = NWBHDF5IO(**load_kwargs)
+
+        backends_that_can_read = [
+            backend_name
+            for backend_name, backend_io_class in BACKEND_NWB_IO.items()
+            if backend_io_class.can_read(path=str(nwbfile_path_in))
+        ]
+        # Future-proofing: raise an error if more than one backend can read the file
+        assert (
+            len(backends_that_can_read) <= 1
+        ), "More than one backend is capable of reading the file! Please raise an issue describing your file."
+        if load_kwargs["mode"] == "r+" and backend not in backends_that_can_read:
+            raise IOError(
+                f"The chosen backend ('{backend}') is unable to read the file! "
+                f"Please select '{backends_that_can_read[0]}' instead."
+            )
+
+        io = backend_io_class(**load_kwargs)
+
     try:
         if load_kwargs.get("mode", "") == "r+":
             nwbfile = io.read()
diff --git a/tests/test_minimal/test_tools/test_context_tools.py b/tests/test_minimal/test_tools/test_context_tools.py
index fd72a79f0..fc2ce7d18 100644
--- a/tests/test_minimal/test_tools/test_context_tools.py
+++ b/tests/test_minimal/test_tools/test_context_tools.py
@@ -6,6 +6,7 @@
 from unittest.mock import patch
 
 from hdmf.testing import TestCase
+from hdmf_zarr import NWBZarrIO
 from pynwb import NWBHDF5IO, TimeSeries
 
 from neuroconv.tools.nwb_helpers import make_nwbfile_from_metadata, make_or_load_nwbfile
@@ -74,14 +75,26 @@ def test_make_or_load_nwbfile_no_file_save_on_error_in_context(self):
             pass
         assert not nwbfile_path.exists()
 
-    def test_make_or_load_nwbfile_write(self):
+    def test_make_or_load_nwbfile_write_hdf5(self):
         nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_write.nwb"
-        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile:
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5"
+        ) as nwbfile:
             nwbfile.add_acquisition(self.time_series_1)
         with NWBHDF5IO(path=nwbfile_path, mode="r") as io:
             nwbfile_out = io.read()
             assert "test1" in nwbfile_out.acquisition
 
+    def test_make_or_load_nwbfile_write_zarr(self):
+        nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_write.nwb.zarr"
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr"
+        ) as nwbfile:
+            nwbfile.add_acquisition(self.time_series_1)
+        with NWBZarrIO(path=str(nwbfile_path), mode="r") as io:
+            nwbfile_out = io.read()
+            assert "test1" in nwbfile_out.acquisition
+
     def test_make_or_load_nwbfile_closure(self):
         nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_closure.nwb"
         with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile:
@@ -91,28 +104,89 @@ def test_make_or_load_nwbfile_closure(self):
             self.assertCountEqual(nwbfile_out.acquisition["test1"].data, self.time_series_1.data)
         assert not nwbfile_out.acquisition["test1"].data  # A closed h5py.Dataset returns false
 
-    def test_make_or_load_nwbfile_overwrite(self):
+    def test_make_or_load_nwbfile_overwrite_hdf5(self):
         nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_overwrite.nwb"
-        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile:
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5"
+        ) as nwbfile:
             nwbfile.add_acquisition(self.time_series_1)
-        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile:
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5"
+        ) as nwbfile:
             nwbfile.add_acquisition(self.time_series_2)
         with NWBHDF5IO(path=nwbfile_path, mode="r") as io:
             nwbfile_out = io.read()
             assert "test1" not in nwbfile_out.acquisition
             assert "test2" in nwbfile_out.acquisition
 
-    def test_make_or_load_nwbfile_append(self):
+    def test_make_or_load_nwbfile_overwrite_zarr(self):
+        nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_overwrite.nwb.zarr"
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr"
+        ) as nwbfile:
+            nwbfile.add_acquisition(self.time_series_1)
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr"
+        ) as nwbfile:
+            nwbfile.add_acquisition(self.time_series_2)
+        with NWBZarrIO(path=str(nwbfile_path), mode="r") as io:
+            nwbfile_out = io.read()
+            assert "test1" not in nwbfile_out.acquisition
+            assert "test2" in nwbfile_out.acquisition
+
+    def test_make_or_load_nwbfile_append_hdf5(self):
         nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb"
-        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True) as nwbfile:
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5"
+        ) as nwbfile:
             nwbfile.add_acquisition(self.time_series_1)
-        with make_or_load_nwbfile(nwbfile_path=nwbfile_path) as nwbfile:
+        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="hdf5") as nwbfile:
             nwbfile.add_acquisition(self.time_series_2)
         with NWBHDF5IO(path=nwbfile_path, mode="r") as io:
             nwbfile_out = io.read()
             assert "test1" in nwbfile_out.acquisition
             assert "test2" in nwbfile_out.acquisition
 
+    # TODO: re-include when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved
+    # def test_make_or_load_nwbfile_append_zarr(self):
+    #     nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb.zarr"
+    #     with make_or_load_nwbfile(
+    #         nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr"
+    #     ) as nwbfile:
+    #         nwbfile.add_acquisition(self.time_series_1)
+    #     with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="zarr") as nwbfile:
+    #         nwbfile.add_acquisition(self.time_series_2)
+    #     with NWBZarrIO(path=str(nwbfile_path), mode="r") as io:
+    #         nwbfile_out = io.read()
+    #         assert "test1" in nwbfile_out.acquisition
+    #         assert "test2" in nwbfile_out.acquisition
+    #
+    # def test_make_or_load_nwbfile_append_hdf5_using_zarr_error(self):
+    #     nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb"
+    #     with make_or_load_nwbfile(
+    #         nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="hdf5"
+    #     ) as nwbfile:
+    #         nwbfile.add_acquisition(self.time_series_1)
+    #     with self.assertRaisesWith(
+    #         exc_type=IOError,
+    #         exc_msg="The chosen backend (zarr) is unable to read the file! Please select a different backend.",
+    #     ):
+    #         with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="zarr") as nwbfile:
+    #             nwbfile.add_acquisition(self.time_series_2)
+
+    def test_make_or_load_nwbfile_append_zarr_using_hdf5_error(self):
+        nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_append.nwb.zarr"
+        with make_or_load_nwbfile(
+            nwbfile_path=nwbfile_path, metadata=self.metadata, overwrite=True, backend="zarr"
+        ) as nwbfile:
+            nwbfile.add_acquisition(self.time_series_1)
+        with self.assertRaisesWith(
+            exc_type=IOError,
+            exc_msg="The chosen backend ('hdf5') is unable to read the file! Please select 'zarr' instead.",
+        ):
+            with make_or_load_nwbfile(nwbfile_path=nwbfile_path, overwrite=False, backend="hdf5") as nwbfile:
+                nwbfile.add_acquisition(self.time_series_2)
+
     def test_make_or_load_nwbfile_pass_nwbfile(self):
         nwbfile_path = self.tmpdir / "test_make_or_load_nwbfile_pass_nwbfile.nwb"
         nwbfile_in = make_nwbfile_from_metadata(metadata=self.metadata)

From 6ec538bbc056330f48829730900564543f8a36b0 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 11 Apr 2024 12:52:58 -0600
Subject: [PATCH 5/9] Avoid modifying metadata in-place in the `VideoInterface`
 (#814)

---
 CHANGELOG.md                                  |  3 +--
 .../behavior/video/videodatainterface.py      |  6 +++--
 src/neuroconv/tools/neo/neo.py                | 25 ++++++++++---------
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fbe63f6b..81f479245 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,10 +9,9 @@
 
 ### Bug fixes
 * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
+* Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814)
 * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
-
-
 # v0.4.8 (March 20, 2024)
 
 ### Bug fixes
diff --git a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py
index 26a6f9070..341de83e8 100644
--- a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from pathlib import Path
 from typing import List, Literal, Optional
 from warnings import warn
@@ -303,9 +304,10 @@ def add_to_nwbfile(
 
         file_paths = self.source_data["file_paths"]
 
-        videos_metadata = metadata.get("Behavior", dict()).get("Videos", None)
+        # Be sure to copy metadata at this step to avoid mutating in-place
+        videos_metadata = deepcopy(metadata).get("Behavior", dict()).get("Videos", None)
         if videos_metadata is None:
-            videos_metadata = self.get_metadata()["Behavior"]["Videos"]
+            videos_metadata = deepcopy(self.get_metadata()["Behavior"]["Videos"])
 
         assert len(videos_metadata) == self._number_of_files, (
             "Incomplete metadata "
diff --git a/src/neuroconv/tools/neo/neo.py b/src/neuroconv/tools/neo/neo.py
index 477836991..a56236365 100644
--- a/src/neuroconv/tools/neo/neo.py
+++ b/src/neuroconv/tools/neo/neo.py
@@ -159,17 +159,16 @@ def add_icephys_electrode(neo_reader, nwbfile, metadata: dict = None):
                 ...
             ]
     """
+    metadata_copy = deepcopy(metadata) if metadata is not None else dict()
+
     assert isinstance(nwbfile, pynwb.NWBFile), "'nwbfile' should be of type pynwb.NWBFile"
 
     if len(nwbfile.devices) == 0:
         warnings.warn("When adding Icephys Electrode, no Devices were found on nwbfile. Creating a Device now...")
-        add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata)
-
-    if metadata is None:
-        metadata = dict()
+        add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata_copy)
 
-    if "Icephys" not in metadata:
-        metadata["Icephys"] = dict()
+    if "Icephys" not in metadata_copy:
+        metadata_copy["Icephys"] = dict()
 
     defaults = [
         dict(
@@ -180,15 +179,15 @@ def add_icephys_electrode(neo_reader, nwbfile, metadata: dict = None):
         for elec_id in range(get_number_of_electrodes(neo_reader))
     ]
 
-    if "Electrodes" not in metadata["Icephys"] or len(metadata["Icephys"]["Electrodes"]) == 0:
-        metadata["Icephys"]["Electrodes"] = deepcopy(defaults)
+    if "Electrodes" not in metadata_copy["Icephys"] or len(metadata_copy["Icephys"]["Electrodes"]) == 0:
+        metadata_copy["Icephys"]["Electrodes"] = defaults
 
     assert all(
-        [isinstance(x, dict) for x in metadata["Icephys"]["Electrodes"]]
+        [isinstance(x, dict) for x in metadata_copy["Icephys"]["Electrodes"]]
     ), "Expected metadata['Icephys']['Electrodes'] to be a list of dictionaries!"
 
     # Create Icephys electrode from metadata
-    for elec in metadata["Icephys"]["Electrodes"]:
+    for elec in metadata_copy["Icephys"]["Electrodes"]:
         if elec.get("name", defaults[0]["name"]) not in nwbfile.icephys_electrodes:
             device_name = elec.pop("device_name", None) or elec.pop("device", defaults[0]["device_name"])
             # elec.pop("device_name", 0)
@@ -286,8 +285,10 @@ def add_icephys_recordings(
     else:
         sessions_offset = 0
 
-    relative_session_start_time = metadata["Icephys"]["Sessions"][sessions_offset]["relative_session_start_time"]
-    session_stimulus_type = metadata["Icephys"]["Sessions"][sessions_offset]["stimulus_type"]
+    relative_session_start_time = deepcopy(
+        metadata["Icephys"]["Sessions"][sessions_offset]["relative_session_start_time"]
+    )
+    session_stimulus_type = deepcopy(metadata["Icephys"]["Sessions"][sessions_offset]["stimulus_type"])
 
     # Sequential icephys recordings
     simultaneous_recordings = list()

From 88576d652a9e428a87c9a10605245b88e35fde8b Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 11 Apr 2024 17:19:40 -0600
Subject: [PATCH 6/9] Add general test to avoid in-place modification of
 metadata by interfaces  (#815)

Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
---
 CHANGELOG.md                                        |  5 +++++
 .../ecephys/basesortingextractorinterface.py        |  6 ++++--
 .../tools/testing/data_interface_mixins.py          | 13 ++++++++++---
 tests/test_on_data/test_behavior_interfaces.py      |  2 +-
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81f479245..fc6eb273b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,11 @@
 * Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814)
 * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
+### Testing
+* Add general test for metadata in-place modification by interfaces. [PR #815](https://github.com/catalystneuro/neuroconv/pull/815)
+
+
+
 # v0.4.8 (March 20, 2024)
 
 ### Bug fixes
diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
index ed333b48c..fbc672b67 100644
--- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from typing import List, Literal, Optional, Union
 
 import numpy as np
@@ -307,8 +308,9 @@ def add_to_nwbfile(
         """
         from ...tools.spikeinterface import add_sorting
 
+        metadata_copy = deepcopy(metadata)
         if write_ecephys_metadata:
-            self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata)
+            self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata_copy)
 
         if stub_test:
             sorting_extractor = self.subset_sorting()
@@ -316,7 +318,7 @@ def add_to_nwbfile(
             sorting_extractor = self.sorting_extractor
 
         property_descriptions = dict()
-        for metadata_column in metadata["Ecephys"].get("UnitProperties", []):
+        for metadata_column in metadata_copy["Ecephys"].get("UnitProperties", []):
             property_descriptions.update({metadata_column["name"]: metadata_column["description"]})
             for unit_id in sorting_extractor.get_unit_ids():
                 # Special condition for wrapping electrode group pointers to actual object ids rather than string names
diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py
index bde8898cf..a9840617f 100644
--- a/src/neuroconv/tools/testing/data_interface_mixins.py
+++ b/src/neuroconv/tools/testing/data_interface_mixins.py
@@ -2,6 +2,7 @@
 import json
 import tempfile
 from abc import abstractmethod
+from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Type, Union
@@ -83,13 +84,19 @@ def check_metadata(self):
         validate(metadata_for_validation, schema)
         self.check_extracted_metadata(metadata)
 
-    def run_conversion(self, nwbfile_path: str):
+    def check_run_conversion(self, nwbfile_path: str):
         metadata = self.interface.get_metadata()
         metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
+
+        metadata_in = deepcopy(metadata)
+
         self.interface.run_conversion(
             nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, **self.conversion_options
         )
 
+        # Test the the metadata was not altered by run conversin which calls add_to_nwbfile
+        assert metadata == metadata_in
+
     @abstractmethod
     def check_read_nwb(self, nwbfile_path: str):
         """Read the produced NWB file and compare it to the interface."""
@@ -117,7 +124,7 @@ def test_conversion_as_lone_interface(self):
                 self.check_conversion_options_schema_valid()
                 self.check_metadata()
                 self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb")
-                self.run_conversion(nwbfile_path=self.nwbfile_path)
+                self.check_run_conversion(nwbfile_path=self.nwbfile_path)
                 self.check_read_nwb(nwbfile_path=self.nwbfile_path)
 
                 # Any extra custom checks to run
@@ -516,7 +523,7 @@ def test_conversion_as_lone_interface(self):
                 self.check_conversion_options_schema_valid()
                 self.check_metadata()
                 self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb")
-                self.run_conversion(nwbfile_path=self.nwbfile_path)
+                self.check_run_conversion(nwbfile_path=self.nwbfile_path)
                 self.check_read_nwb(nwbfile_path=self.nwbfile_path)
 
                 # Any extra custom checks to run
diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py
index 17b956b6c..8877f1bf1 100644
--- a/tests/test_on_data/test_behavior_interfaces.py
+++ b/tests/test_on_data/test_behavior_interfaces.py
@@ -333,7 +333,7 @@ class TestDeepLabCutInterface(DeepLabCutInterfaceMixin, unittest.TestCase):
 
     save_directory = OUTPUT_PATH
 
-    def run_conversion(self, nwbfile_path: str):
+    def check_run_conversion(self, nwbfile_path: str):
         metadata = self.interface.get_metadata()
         metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
 

From dd9af163069fb30470a9eae363bfd9b82f6bd8b7 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Sun, 14 Apr 2024 13:04:07 -0400
Subject: [PATCH 7/9] [Backend Configuration IVb] Swap internal variable name
 for consistency (#817)

---
 .../tools/nwb_helpers/_configure_backend.py     | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
index 6bc055a04..fd97ea529 100644
--- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
@@ -23,21 +23,22 @@ def configure_backend(
 
         # TODO: update buffer shape in iterator, if present
 
-        nwbfile_object = nwbfile_objects[object_id]
-        is_dataset_linked = isinstance(nwbfile_object.fields.get(dataset_name), TimeSeries)
+        neurodata_object = nwbfile_objects[object_id]
+        is_dataset_linked = isinstance(neurodata_object.fields.get(dataset_name), TimeSeries)
         # Table columns
-        if isinstance(nwbfile_object, Data):
-            nwbfile_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
+        if isinstance(neurodata_object, Data):
+            neurodata_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
         # TimeSeries data or timestamps
-        elif isinstance(nwbfile_object, TimeSeries) and not is_dataset_linked:
-            nwbfile_object.set_data_io(
+        elif isinstance(neurodata_object, TimeSeries) and not is_dataset_linked:
+            neurodata_object.set_data_io(
                 dataset_name=dataset_name, data_io_class=data_io_class, data_io_kwargs=data_io_kwargs
             )
         # Skip the setting of a DataIO when target dataset is a link (assume it will be found in parent)
-        elif isinstance(nwbfile_object, TimeSeries) and is_dataset_linked:
+        elif isinstance(neurodata_object, TimeSeries) and is_dataset_linked:
             continue
         # Strictly speaking, it would be odd if a backend_configuration led to this, but might as well be safe
         else:
             raise NotImplementedError(
-                f"Unsupported object type {type(nwbfile_object)} for backend configuration of {nwbfile_object.name}!"
+                f"Unsupported object type {type(neurodata_object)} for backend configuration "
+                f"of {neurodata_object.name}!"
             )

From 3711c63e0061d072e1cab0d14d2c4734182c9675 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Mon, 15 Apr 2024 02:06:47 -0400
Subject: [PATCH 8/9] [Backend Configuration IVc] Integrate backend control as
 interface method (#801)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                  |  9 +-
 src/neuroconv/basedatainterface.py            | 90 ++++++++++++++++++-
 .../nwb_helpers/_backend_configuration.py     |  4 +-
 .../_configuration_models/_base_dataset_io.py |  7 +-
 .../tools/nwb_helpers/_configure_backend.py   | 13 ++-
 .../nwb_helpers/_dataset_configuration.py     | 35 +++++---
 .../nwb_helpers/_metadata_and_file_helpers.py | 21 ++---
 .../tools/testing/data_interface_mixins.py    | 62 +++++++++++--
 .../test_on_data/test_behavior_interfaces.py  | 36 +++++---
 9 files changed, 225 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc6eb273b..ea0978ed2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,15 +3,16 @@
 ### Deprecations
 * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794)
 
-### Features
-* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383)
-* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800)
-
 ### Bug fixes
 * Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 * Avoid in-place modification of the metadata in the `VideoInterface` and on neo tools. [PR #814](https://github.com/catalystneuro/neuroconv/pull/814)
 * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
+### Features
+* Added `backend` control to the `make_or_load_nwbfile` helper method in `neuroconv.tools.nwb_helpers`. [PR #800](https://github.com/catalystneuro/neuroconv/pull/800)
+* Added `get_default_backend_configuration` method to all `DataInterface` classes. Also added HDF5 `backend` control to all standalone `.run_conversion(...)` methods for those interfaces. [PR #801](https://github.com/catalystneuro/neuroconv/pull/801)
+* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383)
+
 ### Testing
 * Add general test for metadata in-place modification by interfaces. [PR #815](https://github.com/catalystneuro/neuroconv/pull/815)
 
diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py
index b46875f9e..aafe7a63a 100644
--- a/src/neuroconv/basedatainterface.py
+++ b/src/neuroconv/basedatainterface.py
@@ -3,12 +3,19 @@
 import warnings
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Optional, Tuple, Union
+from typing import Literal, Optional, Tuple, Union
 
 from jsonschema.validators import validate
 from pynwb import NWBFile
 
-from .tools.nwb_helpers import make_nwbfile_from_metadata, make_or_load_nwbfile
+from .tools.nwb_helpers import (
+    HDF5BackendConfiguration,
+    ZarrBackendConfiguration,
+    configure_backend,
+    get_default_backend_configuration,
+    make_nwbfile_from_metadata,
+    make_or_load_nwbfile,
+)
 from .utils import (
     NWBMetaDataEncoder,
     get_schema_from_method_signature,
@@ -60,13 +67,43 @@ def validate_metadata(self, metadata: dict) -> None:
         decoded_metadata = json.loads(serialized_metadata)
         validate(instance=decoded_metadata, schema=self.get_metadata_schema())
 
-    def create_nwbfile(self, metadata=None, **conversion_options) -> NWBFile:
+    def create_nwbfile(self, metadata: Optional[dict] = None, **conversion_options) -> NWBFile:
+        """
+        Create and return an in-memory pynwb.NWBFile object with this interface's data added to it.
+
+        Parameters
+        ----------
+        metadata : dict, optional
+            Metadata dictionary with information used to create the NWBFile.
+        **conversion_options
+            Additional keyword arguments to pass to the `.add_to_nwbfile` method.
+
+        Returns
+        -------
+        nwbfile : pynwb.NWBFile
+            The in-memory object with this interface's data added to it.
+        """
+        if metadata is None:
+            metadata = self.get_metadata()
+
         nwbfile = make_nwbfile_from_metadata(metadata)
         self.add_to_nwbfile(nwbfile, metadata=metadata, **conversion_options)
         return nwbfile
 
     @abstractmethod
     def add_to_nwbfile(self, nwbfile: NWBFile, **conversion_options) -> None:
+        """
+        Define a protocol for mapping the data from this interface to NWB neurodata objects.
+
+        These neurodata objects should also be added to the in-memory pynwb.NWBFile object in this step.
+
+        Parameters
+        ----------
+        nwbfile : pynwb.NWBFile
+            The in-memory object to add the data to.
+        **conversion_options
+            Additional keyword arguments to pass to the `.add_to_nwbfile` method.
+        """
         raise NotImplementedError
 
     def run_conversion(
@@ -75,6 +112,11 @@ def run_conversion(
         nwbfile: Optional[NWBFile] = None,
         metadata: Optional[dict] = None,
         overwrite: bool = False,
+        # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely
+        # backend: Union[Literal["hdf5", "zarr"]],
+        # backend_configuration: Optional[Union[HDF5BackendConfiguration, ZarrBackendConfiguration]] = None,
+        backend: Literal["hdf5"] = "hdf5",
+        backend_configuration: Optional[HDF5BackendConfiguration] = None,
         **conversion_options,
     ):
         """
@@ -92,6 +134,13 @@ def run_conversion(
         overwrite : bool, default: False
             Whether to overwrite the NWBFile if one exists at the nwbfile_path.
             The default is False (append mode).
+        backend : "hdf5" or a HDF5BackendConfiguration, default: "hdf5"
+            The type of backend to use when writing the file.
+        backend_configuration : HDF5BackendConfiguration, optional
+            The configuration model to use when configuring the datasets for this backend.
+            To customize, call the `.get_default_backend_configuration(...)` method, modify the returned
+            BackendConfiguration object, and pass that instead.
+            Otherwise, all datasets will use default configuration settings.
         """
         if nwbfile_path is None:
             warnings.warn(  # TODO: remove on or after 6/21/2024
@@ -99,6 +148,8 @@ def run_conversion(
                 "NWBFile object in memory, use DataInterface.create_nwbfile. To append to an existing NWBFile object,"
                 " use DataInterface.add_to_nwbfile."
             )
+        if backend_configuration is not None and nwbfile is None:
+            raise ValueError("When specifying a custom `backend_configuration`, you must also provide an `nwbfile`.")
 
         if metadata is None:
             metadata = self.get_metadata()
@@ -108,6 +159,37 @@ def run_conversion(
             nwbfile=nwbfile,
             metadata=metadata,
             overwrite=overwrite,
+            backend=backend,
             verbose=getattr(self, "verbose", False),
         ) as nwbfile_out:
-            self.add_to_nwbfile(nwbfile_out, metadata=metadata, **conversion_options)
+            if backend_configuration is None:
+                # In this case, assume the relevant data has already been added to the NWBFile
+                self.add_to_nwbfile(nwbfile_out, metadata=metadata, **conversion_options)
+
+                backend_configuration = self.get_default_backend_configuration(nwbfile=nwbfile_out, backend=backend)
+
+            configure_backend(nwbfile=nwbfile_out, backend_configuration=backend_configuration)
+
+    @staticmethod
+    def get_default_backend_configuration(
+        nwbfile: NWBFile,
+        # TODO: when all H5DataIO prewraps are gone, introduce Zarr safely
+        # backend: Union[Literal["hdf5", "zarr"]],
+        backend: Literal["hdf5"] = "hdf5",
+    ) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]:
+        """
+        Fill and return a default backend configuration to serve as a starting point for further customization.
+
+        Parameters
+        ----------
+        nwbfile : pynwb.NWBFile
+            The in-memory object with this interface's data already added to it.
+        backend : "hdf5" or "zarr", default: "hdf5"
+            The type of backend to use when creating the file.
+
+        Returns
+        -------
+        backend_configuration : HDF5BackendConfiguration or ZarrBackendConfiguration
+            The default configuration for the specified backend type.
+        """
+        return get_default_backend_configuration(nwbfile=nwbfile, backend=backend)
diff --git a/src/neuroconv/tools/nwb_helpers/_backend_configuration.py b/src/neuroconv/tools/nwb_helpers/_backend_configuration.py
index 0b35fdb87..d9c701c3b 100644
--- a/src/neuroconv/tools/nwb_helpers/_backend_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_backend_configuration.py
@@ -12,7 +12,9 @@ def get_default_backend_configuration(
     nwbfile: NWBFile, backend: Literal["hdf5", "zarr"]
 ) -> Union[HDF5BackendConfiguration, ZarrBackendConfiguration]:
     """Fill a default backend configuration to serve as a starting point for further customization."""
-    from ..nwb_helpers import BACKEND_CONFIGURATIONS
+    from ..nwb_helpers import (
+        BACKEND_CONFIGURATIONS,  # Locally scoped to avoid circular import
+    )
 
     BackendConfigurationClass = BACKEND_CONFIGURATIONS[backend]
 
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
index 9562fa83e..fbb85a858 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
@@ -260,9 +260,10 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
             )
             compression_method = "gzip"
         elif dtype == np.dtype("object"):  # Unclear what default chunking/compression should be for compound objects
-            chunk_shape = None
-            buffer_shape = None
-            compression_method = None
+            raise NotImplementedError(
+                f"Unable to create a `DatasetIOConfiguration` for the dataset at '{location_in_file}'"
+                f"for neurodata object '{neurodata_object}' of type '{type(neurodata_object)}'!"
+            )
 
         return cls(
             object_id=neurodata_object.object_id,
diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
index fd97ea529..8e7a2f17b 100644
--- a/src/neuroconv/tools/nwb_helpers/_configure_backend.py
+++ b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
@@ -12,9 +12,19 @@
 def configure_backend(
     nwbfile: NWBFile, backend_configuration: Union[HDF5BackendConfiguration, ZarrBackendConfiguration]
 ) -> None:
-    """Configure all datasets specified in the `backend_configuration` with their appropriate DataIO and options."""
+    """
+    Configure all datasets specified in the `backend_configuration` with their appropriate DataIO and options.
+
+    Parameters
+    ----------
+    nwbfile : pynwb.NWBFile
+        The in-memory pynwb.NWBFile object to configure.
+    backend_configuration : "hdf5" or "zarr", default: "hdf5"
+        The configuration model to use when configuring the datasets for this backend.
+    """
     nwbfile_objects = nwbfile.objects
 
+    # Set all DataIO based on the configuration
     data_io_class = backend_configuration.data_io_class
     for dataset_configuration in backend_configuration.dataset_configurations.values():
         object_id = dataset_configuration.object_id
@@ -25,6 +35,7 @@ def configure_backend(
 
         neurodata_object = nwbfile_objects[object_id]
         is_dataset_linked = isinstance(neurodata_object.fields.get(dataset_name), TimeSeries)
+
         # Table columns
         if isinstance(neurodata_object, Data):
             neurodata_object.set_data_io(data_io_class=data_io_class, data_io_kwargs=data_io_kwargs)
diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
index 51f414244..35eb44f23 100644
--- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
+++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -5,10 +5,11 @@
 import h5py
 import numpy as np
 import zarr
+from hdmf import Container
 from hdmf.data_utils import DataIO
 from hdmf_zarr import NWBZarrIO
 from pynwb import NWBHDF5IO, NWBFile
-from pynwb.base import DynamicTable
+from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData
 
 from ._configuration_models._base_dataset_io import DatasetIOConfiguration
 
@@ -52,7 +53,7 @@ def get_default_dataset_io_configurations(
     """
     Generate DatasetIOConfiguration objects for wrapping NWB file objects with a specific backend.
 
-    This method automatically detects all objects in an NWB file that can be wrapped in a DataIO.
+    This method automatically detects all objects in an NWB file that can be wrapped in a hdmf.DataIO.
     If the NWB file is in append mode, it supports auto-detection of the backend.
     Otherwise, it requires a backend specification.
 
@@ -66,9 +67,11 @@ def get_default_dataset_io_configurations(
     Yields
     ------
     DatasetIOConfiguration
-        A summary of each detected object that can be wrapped in a DataIO.
+        A summary of each detected object that can be wrapped in a hdmf.DataIO.
     """
-    from ..nwb_helpers import DATASET_IO_CONFIGURATIONS
+    from ..nwb_helpers import (
+        DATASET_IO_CONFIGURATIONS,  # Locally scoped to avoid circular import
+    )
 
     DatasetIOConfigurationClass = DATASET_IO_CONFIGURATIONS[backend]
 
@@ -100,17 +103,25 @@ def get_default_dataset_io_configurations(
 
     for neurodata_object in nwbfile.objects.values():
         if isinstance(neurodata_object, DynamicTable):
-            dynamic_table = neurodata_object  # for readability
+            dynamic_table = neurodata_object  # For readability
 
             for column in dynamic_table.columns:
                 candidate_dataset = column.data  # VectorData object
                 if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
-                    continue  # skip
+                    continue  # Skip
 
                 # Skip over columns that are already wrapped in DataIO
                 if isinstance(candidate_dataset, DataIO):
+                    continue  # Skip
+
+                # Skip over columns whose values are links, such as the 'group' of an ElectrodesTable
+                if any(isinstance(value, Container) for value in candidate_dataset):
+                    continue  # Skip
+
+                # Skip when columns whose values are a reference type
+                if isinstance(column, TimeSeriesReferenceVectorData):
                     continue
 
                 dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
@@ -121,25 +132,27 @@ def get_default_dataset_io_configurations(
         else:
             # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps'
             # The most common example of this is ndx-events Events/LabeledEvents types
-            time_series = neurodata_object  # for readability
+            time_series = neurodata_object  # For readability
 
             for dataset_name in ("data", "timestamps"):
-                if dataset_name not in time_series.fields:  # timestamps is optional
+                if dataset_name not in time_series.fields:  # The 'timestamps' field is optional
                     continue
 
                 candidate_dataset = getattr(time_series, dataset_name)
+
+                # Skip if already written to file
                 if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
                 ):
-                    continue  # skip
+                    continue
 
                 # Skip over datasets that are already wrapped in DataIO
                 if isinstance(candidate_dataset, DataIO):
                     continue
 
-                # Edge case of in-memory ImageSeries with external mode; data is in fields and is empty array
+                # Skip edge case of in-memory ImageSeries with external mode; data is in fields and is empty array
                 if isinstance(candidate_dataset, np.ndarray) and candidate_dataset.size == 0:
-                    continue  # skip
+                    continue
 
                 dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object(
                     neurodata_object=time_series, dataset_name=dataset_name
diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
index 75fcfc9ed..1e243489e 100644
--- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
+++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
@@ -76,7 +76,7 @@ def make_nwbfile_from_metadata(metadata: dict) -> NWBFile:
     return NWBFile(**nwbfile_kwargs)
 
 
-def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metadata: dict = None):
+def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metadata: Optional[dict] = None):
     """
     Add device information from metadata to NWBFile object.
 
@@ -105,6 +105,8 @@ def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metada
             ]
         Missing keys in an element of metadata['Ecephys']['Device'] will be auto-populated with defaults.
     """
+    metadata_copy = deepcopy(metadata) if metadata is not None else dict()
+
     assert isinstance(nwbfile, NWBFile), "'nwbfile' should be of type pynwb.NWBFile"
     assert modality in [
         "Ecephys",
@@ -115,16 +117,14 @@ def add_device_from_metadata(nwbfile: NWBFile, modality: str = "Ecephys", metada
 
     defaults = dict(name=f"Device{modality}", description=f"{modality} device. Automatically generated.")
 
-    if metadata is None:
-        metadata = dict()
-    if modality not in metadata:
-        metadata[modality] = dict()
-    if "Device" not in metadata[modality]:
-        metadata[modality]["Device"] = [defaults]
+    if modality not in metadata_copy:
+        metadata_copy[modality] = dict()
+    if "Device" not in metadata_copy[modality]:
+        metadata_copy[modality]["Device"] = [defaults]
 
-    for dev in metadata[modality]["Device"]:
-        if dev.get("name", defaults["name"]) not in nwbfile.devices:
-            nwbfile.create_device(**dict(defaults, **dev))
+    for device_metadata in metadata_copy[modality]["Device"]:
+        if device_metadata.get("name", defaults["name"]) not in nwbfile.devices:
+            nwbfile.create_device(**dict(defaults, **device_metadata))
 
 
 @contextmanager
@@ -173,6 +173,7 @@ def make_or_load_nwbfile(
     if overwrite is False and backend == "zarr":
         # TODO: remove when https://github.com/hdmf-dev/hdmf-zarr/issues/182 is resolved
         raise NotImplementedError("Appending a Zarr file is not yet supported!")
+
     load_kwargs = dict()
     success = True
     file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_in is not None else None
diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py
index a9840617f..01168da13 100644
--- a/src/neuroconv/tools/testing/data_interface_mixins.py
+++ b/src/neuroconv/tools/testing/data_interface_mixins.py
@@ -5,13 +5,15 @@
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import List, Optional, Type, Union
+from typing import List, Literal, Optional, Type, Union
 
 import numpy as np
 from hdmf.testing import TestCase as HDMFTestCase
+from hdmf_zarr import NWBZarrIO
 from jsonschema.validators import Draft7Validator, validate
 from numpy.testing import assert_array_equal
 from pynwb import NWBHDF5IO
+from pynwb.testing.mock.file import mock_NWBFile
 from spikeinterface.core.testing import check_recordings_equal, check_sortings_equal
 
 from neuroconv.basedatainterface import BaseDataInterface
@@ -84,24 +86,54 @@ def check_metadata(self):
         validate(metadata_for_validation, schema)
         self.check_extracted_metadata(metadata)
 
-    def check_run_conversion(self, nwbfile_path: str):
+    def check_no_metadata_mutation(self):
+        """Ensure the metadata object was not altered by `add_to_nwbfile` method."""
         metadata = self.interface.get_metadata()
         metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
 
         metadata_in = deepcopy(metadata)
 
+        nwbfile = mock_NWBFile()
+        self.interface.add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, **self.conversion_options)
+
+        assert metadata == metadata_in
+
+    def check_run_conversion_default_backend(self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"):
+        metadata = self.interface.get_metadata()
+        if "session_start_time" not in metadata["NWBFile"]:
+            metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
+
         self.interface.run_conversion(
-            nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, **self.conversion_options
+            nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, backend=backend, **self.conversion_options
         )
 
-        # Test the the metadata was not altered by run conversin which calls add_to_nwbfile
-        assert metadata == metadata_in
+    def check_run_conversion_custom_backend(self, nwbfile_path: str, backend: Literal["hdf5", "zarr"] = "hdf5"):
+        metadata = self.interface.get_metadata()
+        if "session_start_time" not in metadata["NWBFile"]:
+            metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
+
+        nwbfile = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options)
+        backend_configuration = self.interface.get_default_backend_configuration(nwbfile=nwbfile, backend=backend)
+        self.interface.run_conversion(
+            nwbfile_path=nwbfile_path,
+            overwrite=True,
+            nwbfile=nwbfile,
+            metadata=metadata,
+            backend=backend,
+            backend_configuration=backend_configuration,
+            **self.conversion_options,
+        )
 
     @abstractmethod
     def check_read_nwb(self, nwbfile_path: str):
         """Read the produced NWB file and compare it to the interface."""
         pass
 
+    def check_basic_zarr_read(self, nwbfile_path: str):
+        """Ensure NWBZarrIO can read the file."""
+        with NWBZarrIO(path=nwbfile_path, mode="r") as io:
+            io.read()
+
     def check_extracted_metadata(self, metadata: dict):
         """Override this method to make assertions about specific extracted metadata values."""
         pass
@@ -124,9 +156,20 @@ def test_conversion_as_lone_interface(self):
                 self.check_conversion_options_schema_valid()
                 self.check_metadata()
                 self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb")
-                self.check_run_conversion(nwbfile_path=self.nwbfile_path)
+
+                self.check_no_metadata_mutation()
+
+                self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
+                self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
+
                 self.check_read_nwb(nwbfile_path=self.nwbfile_path)
 
+                # TODO: enable when all H5DataIO prewraps are gone
+                # self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb.zarr")
+                # self.check_run_conversion(nwbfile_path=self.nwbfile_path, backend="zarr")
+                # self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="zarr")
+                # self.check_basic_zarr_read(nwbfile_path=self.nwbfile_path)
+
                 # Any extra custom checks to run
                 self.run_custom_checks()
 
@@ -523,7 +566,12 @@ def test_conversion_as_lone_interface(self):
                 self.check_conversion_options_schema_valid()
                 self.check_metadata()
                 self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb")
-                self.check_run_conversion(nwbfile_path=self.nwbfile_path)
+
+                self.check_no_metadata_mutation()
+
+                self.check_run_conversion_default_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
+                self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="hdf5")
+
                 self.check_read_nwb(nwbfile_path=self.nwbfile_path)
 
                 # Any extra custom checks to run
diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py
index 8877f1bf1..3986233db 100644
--- a/tests/test_on_data/test_behavior_interfaces.py
+++ b/tests/test_on_data/test_behavior_interfaces.py
@@ -333,17 +333,36 @@ class TestDeepLabCutInterface(DeepLabCutInterfaceMixin, unittest.TestCase):
 
     save_directory = OUTPUT_PATH
 
-    def check_run_conversion(self, nwbfile_path: str):
+    def run_custom_checks(self):
+        self.check_custom_timestamps(nwbfile_path=self.nwbfile_path)
+
+    def check_custom_timestamps(self, nwbfile_path: str):
+        # TODO: Peel out into separate test class and replace this part with check_read_nwb
+        if self.case != 1:  # set custom timestamps
+            return
+
         metadata = self.interface.get_metadata()
         metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
 
-        if self.case == 1:  # set custom timestamps
-            self.interface.set_aligned_timestamps(self._custom_timestamps_case_1)
-            assert len(self.interface._timestamps) == 2330
+        self.interface.set_aligned_timestamps(self._custom_timestamps_case_1)
+        assert len(self.interface._timestamps) == 2330
 
-        self.interface.run_conversion(nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata)
+        self.interface.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True)
 
-    def check_read_nwb(self, nwbfile_path: str):  # This is currently structured to be file-specific
+        with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
+            nwbfile = io.read()
+            assert "behavior" in nwbfile.processing
+            processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces
+            assert "PoseEstimation" in processing_module_interfaces
+
+            pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series
+
+            for pose_estimation in pose_estimation_series_in_nwb.values():
+                pose_timestamps = pose_estimation.timestamps
+                np.testing.assert_array_equal(pose_timestamps, self._custom_timestamps_case_1)
+
+    def check_read_nwb(self, nwbfile_path: str):
+        # TODO: move this to the upstream mixin
         with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
             nwbfile = io.read()
             assert "behavior" in nwbfile.processing
@@ -359,11 +378,6 @@ def check_read_nwb(self, nwbfile_path: str):  # This is currently structured to
 
             assert all(expected_pose_estimation_series_are_in_nwb_file)
 
-            if self.case == 1:  # custom timestamps
-                for pose_estimation in pose_estimation_series_in_nwb.values():
-                    pose_timestamps = pose_estimation.timestamps
-                    np.testing.assert_array_equal(pose_timestamps, self._custom_timestamps_case_1)
-
 
 class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin, unittest.TestCase):
     data_interface_cls = SLEAPInterface

From 3273e518f12a8f2639f9c126b0ed84af01ebfa74 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 18:15:16 -0400
Subject: [PATCH 9/9] [pre-commit.ci] pre-commit autoupdate (#819)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 04ae1161a..cadf89204 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/psf/black
-    rev: 24.3.0
+    rev: 24.4.0
     hooks:
     -   id: black
         exclude: ^docs/