From cca7b209b7a42929819869130dc277b80485c238 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 17:49:46 -0800 Subject: [PATCH 1/9] Fix #225 Add NWBZarrIO.read_nwb convenience function --- src/hdmf_zarr/nwb.py | 28 +++++++++++++- tests/unit/test_fsspec_streaming.py | 32 ++++++++++++---- tests/unit/test_nwbzarrio.py | 57 +++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_nwbzarrio.py diff --git a/src/hdmf_zarr/nwb.py b/src/hdmf_zarr/nwb.py index 582f1a63..762b6b3c 100644 --- a/src/hdmf_zarr/nwb.py +++ b/src/hdmf_zarr/nwb.py @@ -1,6 +1,7 @@ """Module with Zarr backend for NWB for integration with PyNWB""" from warnings import warn -from .backend import ZarrIO +from pathlib import Path +from .backend import ZarrIO, SUPPORTED_ZARR_STORES from hdmf.utils import (docval, popargs, @@ -63,5 +64,30 @@ def export(self, **kwargs): kwargs['container'] = nwbfile super().export(**kwargs) + @staticmethod + @docval({'name': 'path', + 'type': (str, Path, *SUPPORTED_ZARR_STORES), + 'doc': 'the path to the Zarr file or a supported Zarr store'}, + is_method=False) + def read_nwb(**kwargs): + """ + Helper factory method for reading an NWB file and return the NWBFile object + """ + # Retrieve the filepath + path = popargs('path', kwargs) + if isinstance(path, Path): + path = str(path) + # determine default storage options to use when opening a file from S3 + storage_options = {} + if isinstance(path, str) and path.startswith(("s3://")): + storage_options = dict(anon=True) + + # open the file with NWBZarrIO and rad the file + io = NWBZarrIO(path=path, mode="r", load_namespaces=True, storage_options=storage_options) + nwbfile = io.read() + + # return the NWBFile object + return nwbfile + except ImportError: warn("PyNWB is not installed. Support for NWBZarrIO is disabled.") diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py index cbb0bf06..1864f4a3 100644 --- a/tests/unit/test_fsspec_streaming.py +++ b/tests/unit/test_fsspec_streaming.py @@ -8,17 +8,20 @@ class TestFSSpecStreaming(unittest.TestCase): - @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") - def test_fsspec_streaming(self): + + def setUp(self): # PLACEHOLDER test file from Allen Institute for Neural Dynamics # TODO: store a small test file and use it to speed up testing - remote_path = ( + self.s3_aind_path = ( "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/" "ecephys_625749_2022-08-03_15-15-06_nwb/" "ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/" ) + self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" - with NWBZarrIO(remote_path, mode="r", storage_options=dict(anon=True)) as io: + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming(self): + with NWBZarrIO(self.s3_aind_path, mode="r", storage_options=dict(anon=True)) as io: nwbfile = io.read() self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") @@ -32,10 +35,25 @@ def test_s3_open_with_consolidated_(self): """ The file is a Zarr file with consolidated metadata. """ - s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" - with NWBZarrIO(s3_path, mode='r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore) - with NWBZarrIO(s3_path, mode='-r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='-r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.FSStore) + + + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming_via_read_nwb(self): + """ + Test reading from s3 using the convenience function NWBZarrIO.read_nwb + """ + # Test with a s3:// URL + nwbfile = NWBZarrIO.read_nwb(self.s3_aind_path) + self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") + self.assertEqual(nwbfile.institution, "AIND") + + # Try a https:// URL as well + nwbfile = NWBZarrIO.read_nwb(self.https_s3_path) + self.assertIsNotNone(nwbfile) + diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py new file mode 100644 index 00000000..10a98802 --- /dev/null +++ b/tests/unit/test_nwbzarrio.py @@ -0,0 +1,57 @@ +import unittest +from hdmf_zarr import NWBZarrIO +import os +import shutil +import datetime +from dateutil.tz import tzlocal + +try: + from pynwb import NWBFile + PYNWB_AVAILABLE = True +except ImportError: + PYNWB_AVAILABLE = False + + +@unittest.skipIf(not PYNWB_AVAILABLE, "PyNWB not installed") +class TestNWBZarrIO(unittest.TestCase): + + def setUp(self): + self.filepath = "test_io.zarr" + + def tearDown(self): + if os.path.exists(self.filepath): + shutil.rmtree(self.filepath) + + def write_test_file(self): + # Create the NWBFile + nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + experimenter="Dr. Bilbo Baggins", + lab="Bag End Laboratory", + institution="University of Middle Earth at the Shire", + experiment_description="I went on an adventure with thirteen dwarves " + "to reclaim vast treasures.", + session_id="LONELYMTN", + ) + + # Create a device + device = nwbfile.create_device( + name="array", description="the best array", manufacturer="Probe Company 9000" + ) + with NWBZarrIO(path=self.filepath, mode="w") as io: + io.write(nwbfile) + + @unittest.skip + def test_read_nwb(self): + """ + Test reading a local file with NWBZarrIO.read_nwb. + + NOTE: See TestFSSpecStreaming.test_fsspec_streaming_via_read_nwb for corresponding tests + for reading a remote file with NWBZarrIO.read_nwb + """ + self.write_test_file() + nwbfile = NWBZarrIO.read_nwb(path=self.filepath, mode="r") + self.assertEqual(len(nwbfile.devices), 1) + self.assertEqual(nwbfile.experimenter, "Dr. Bilbo Baggins") From b153da0e12134cd4a4ac8b10bd275a3ed1580dce Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 17:52:19 -0800 Subject: [PATCH 2/9] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ed84f2f..537387b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) * Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) * Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215) +* Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) ## 0.8.0 (June 4, 2024) ### Bug Fixes From 8062b054910e2a234cd6cea19594583be37c63c5 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 17:53:48 -0800 Subject: [PATCH 3/9] Fix ruff --- tests/unit/test_nwbzarrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py index 10a98802..237df618 100644 --- a/tests/unit/test_nwbzarrio.py +++ b/tests/unit/test_nwbzarrio.py @@ -37,7 +37,7 @@ def write_test_file(self): ) # Create a device - device = nwbfile.create_device( + _ = nwbfile.create_device( name="array", description="the best array", manufacturer="Probe Company 9000" ) with NWBZarrIO(path=self.filepath, mode="w") as io: From 82b74c2730a84bc0671647d2ba843b4f0e999558 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 18:01:21 -0800 Subject: [PATCH 4/9] Remove broken test --- tests/unit/test_fsspec_streaming.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py index 1864f4a3..ca80bb72 100644 --- a/tests/unit/test_fsspec_streaming.py +++ b/tests/unit/test_fsspec_streaming.py @@ -52,8 +52,3 @@ def test_fsspec_streaming_via_read_nwb(self): nwbfile = NWBZarrIO.read_nwb(self.s3_aind_path) self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") self.assertEqual(nwbfile.institution, "AIND") - - # Try a https:// URL as well - nwbfile = NWBZarrIO.read_nwb(self.https_s3_path) - self.assertIsNotNone(nwbfile) - From 44b036dcb03d204c0339d935b48140faa07c7fb2 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 18:03:20 -0800 Subject: [PATCH 5/9] Remove extra skip on unittest --- tests/unit/test_nwbzarrio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py index 237df618..12e4f5f0 100644 --- a/tests/unit/test_nwbzarrio.py +++ b/tests/unit/test_nwbzarrio.py @@ -43,7 +43,6 @@ def write_test_file(self): with NWBZarrIO(path=self.filepath, mode="w") as io: io.write(nwbfile) - @unittest.skip def test_read_nwb(self): """ Test reading a local file with NWBZarrIO.read_nwb. From f76889dbcc11e427aa9bf54b783bc63484cfe0e0 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 18:06:06 -0800 Subject: [PATCH 6/9] Fix import in unit test --- tests/unit/test_nwbzarrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py index 12e4f5f0..ef283d85 100644 --- a/tests/unit/test_nwbzarrio.py +++ b/tests/unit/test_nwbzarrio.py @@ -2,7 +2,7 @@ from hdmf_zarr import NWBZarrIO import os import shutil -import datetime +from datetime import datetime from dateutil.tz import tzlocal try: From 4c6f9dc64c09f5b679b94fd0a2580fe98d6f9eda Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 18:08:46 -0800 Subject: [PATCH 7/9] Fix extra arg in unittest --- tests/unit/test_nwbzarrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py index ef283d85..270f04e2 100644 --- a/tests/unit/test_nwbzarrio.py +++ b/tests/unit/test_nwbzarrio.py @@ -51,6 +51,6 @@ def test_read_nwb(self): for reading a remote file with NWBZarrIO.read_nwb """ self.write_test_file() - nwbfile = NWBZarrIO.read_nwb(path=self.filepath, mode="r") + nwbfile = NWBZarrIO.read_nwb(path=self.filepath) self.assertEqual(len(nwbfile.devices), 1) self.assertEqual(nwbfile.experimenter, "Dr. Bilbo Baggins") From 2d7b77fe6330fe0dc3a2f2f2c3d5e60331eddf4c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 18:11:20 -0800 Subject: [PATCH 8/9] Fix asset in unit test --- tests/unit/test_nwbzarrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py index 270f04e2..ff408594 100644 --- a/tests/unit/test_nwbzarrio.py +++ b/tests/unit/test_nwbzarrio.py @@ -53,4 +53,4 @@ def test_read_nwb(self): self.write_test_file() nwbfile = NWBZarrIO.read_nwb(path=self.filepath) self.assertEqual(len(nwbfile.devices), 1) - self.assertEqual(nwbfile.experimenter, "Dr. Bilbo Baggins") + self.assertTupleEqual(nwbfile.experimenter, ('Dr. Bilbo Baggins',)) From 79f90e1c564dd34b1941f26fbf5f048d6d1dda19 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 7 Nov 2024 15:58:42 -0800 Subject: [PATCH 9/9] replace file --- tests/unit/test_fsspec_streaming.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py index ca80bb72..05dc249c 100644 --- a/tests/unit/test_fsspec_streaming.py +++ b/tests/unit/test_fsspec_streaming.py @@ -17,7 +17,8 @@ def setUp(self): "ecephys_625749_2022-08-03_15-15-06_nwb/" "ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/" ) - self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" + # DANDISET: 000719/icephys_9_27_2024 + self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/7515c603-9940-4598-aa1b-8bf32dc9b10c/" @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") def test_fsspec_streaming(self):