From 8d469e7798e57b7c20bf23c5ff6b3e538aa4c70f Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Thu, 27 Jun 2024 08:06:01 -0700 Subject: [PATCH 01/15] Zarr append --- src/hdmf/data_utils.py | 5 ++++- tests/unit/utils_test/test_data_utils.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tests/unit/utils_test/test_data_utils.py diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index f4ac6541f..71f5bdf6d 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -5,17 +5,20 @@ from warnings import warn from typing import Tuple from itertools import product, chain +from zarr import Array as ZarrArray import h5py import numpy as np from .utils import docval, getargs, popargs, docval_macro, get_data_shape - def append_data(data, arg): if isinstance(data, (list, DataIO)): data.append(arg) return data + elif isinstance(data, ZarrArray): + data.append([arg], axis=0) + return data elif type(data).__name__ == 'TermSetWrapper': # circular import data.append(arg) return data diff --git a/tests/unit/utils_test/test_data_utils.py b/tests/unit/utils_test/test_data_utils.py new file mode 100644 index 000000000..2e0df7ba8 --- /dev/null +++ b/tests/unit/utils_test/test_data_utils.py @@ -0,0 +1,14 @@ +from hdmf.data_utils import append_data +from hdmf.testing import TestCase + +import numpy as np +from numpy.testing import assert_array_equal +import zarr + +class TestAppendData(TestCase): + + def test_append_data_zarr(self): + zarr_array = zarr.array([1,2,3]) + new = append_data(zarr_array, 4) + + assert_array_equal(new[:], np.array([1,2,3,4])) From 2f37abf3fdc4b3051978e93fd5c3fdf2185c3014 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 08:32:27 -0700 Subject: [PATCH 02/15] Update CHANGELOG.md --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae753b98b..5f5db4918 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,12 @@ # HDMF Changelog -## HDMF 3.14.2 (???) +## HDMF 3.14.2 (Upcoming) ### Bug fixes - Fix iterator increment causing an extra +1 added after the end of completion. @CodyCBakerPhD [#1128](https://github.com/hdmf-dev/hdmf/pull/1128) - +### Enhancements +- Support appending to zarr arrays. @mavaylon1 [#1136](https://github.com/hdmf-dev/hdmf/pull/1136) ## HDMF 3.14.1 (June 6, 2024) From 755d097744cd9940b99985984ed42c2bbd2f35dd Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 11:07:48 -0700 Subject: [PATCH 03/15] Update pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 3a0034087..3f00ab243 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "pandas>=1.0.5", "ruamel.yaml>=0.16", "scipy>=1.4", + "zarr >= 2.12.0", "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 ] dynamic = ["version"] From ce58fe79508b64a4deab3074c5ab9c3a5e42ecbc Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 11:19:32 -0700 Subject: [PATCH 04/15] Update pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3f00ab243..9138a8e24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,6 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -zarr = ["zarr>=2.12.0"] tqdm = ["tqdm>=4.41.0"] termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'", "schemasheets>=0.1.23; python_version >= '3.9'", From b1ee26f52742cffc40f75cee787dcfe6adea75e1 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 11:20:38 -0700 Subject: [PATCH 05/15] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9138a8e24..817b3c438 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "pandas>=1.0.5", "ruamel.yaml>=0.16", "scipy>=1.4", - "zarr >= 2.12.0", + # "zarr >= 2.12.0", "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 ] dynamic = ["version"] From 058296fbf8a747004010f7d2d6c9e934686e579f Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 11:24:14 -0700 Subject: [PATCH 06/15] Update pyproject.toml --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 817b3c438..a089113c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "pandas>=1.0.5", "ruamel.yaml>=0.16", "scipy>=1.4", - # "zarr >= 2.12.0", + "zarr >= 2.12.0", "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 ] dynamic = ["version"] @@ -117,7 +117,7 @@ omit = [ # force-exclude = "src/hdmf/common/hdmf-common-schema|docs/gallery" [tool.ruff] -select = ["E", "F", "T100", "T201", "T203"] +lint.select = ["E", "F", "T100", "T201", "T203"] exclude = [ ".git", ".tox", @@ -132,11 +132,11 @@ exclude = [ ] line-length = 120 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "docs/gallery/*" = ["E402", "T201"] "src/*/__init__.py" = ["F401"] "setup.py" = ["T201"] "test_gallery.py" = ["T201"] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 17 From 8851b3465807790277c9357c3c21a9c12f2f99b5 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 27 Jun 2024 11:26:22 -0700 Subject: [PATCH 07/15] Update pyproject.toml --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a089113c0..4b3c29e5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,9 @@ omit = [ [tool.ruff] lint.select = ["E", "F", "T100", "T201", "T203"] +ignore = [ + "E721" # We use it for good reasons +] exclude = [ ".git", ".tox", From 1410481d73ba6342a3968cd6c07a1bde3b98a321 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:38:01 -0700 Subject: [PATCH 08/15] Update pyproject.toml --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4b3c29e5f..a089113c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,9 +118,6 @@ omit = [ [tool.ruff] lint.select = ["E", "F", "T100", "T201", "T203"] -ignore = [ - "E721" # We use it for good reasons -] exclude = [ ".git", ".tox", From 5673c01e06ef1b13b1e560394e9b9350cdab9d4b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:39:19 -0700 Subject: [PATCH 09/15] Update validator.py --- src/hdmf/validate/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index bdfc15f8f..294a237f4 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -164,7 +164,7 @@ def get_type(data, builder_dtype=None): # Empty array else: # Empty string array - if data.dtype.metadata["vlen"] == str: + if isinstance(data.dtype.metadata["vlen"], str): return "utf", None # Undetermined variable length data type. else: # pragma: no cover From c4d1be4ae3865553d7249c3cf9e511bdfa19d764 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:40:32 -0700 Subject: [PATCH 10/15] Update testcase.py --- src/hdmf/testing/testcase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 798df6fe4..1be4bcecd 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -174,7 +174,7 @@ def _assert_array_equal(self, :param message: custom additional message to show when assertions as part of this assert are failing """ array_data_types = tuple([i for i in get_docval_macro('array_data') - if (i != list and i != tuple and i != AbstractDataChunkIterator)]) + if (i is not list and i is not tuple and i is not AbstractDataChunkIterator)]) # We construct array_data_types this way to avoid explicit dependency on h5py, Zarr and other # I/O backends. Only list and tuple do not support [()] slicing, and AbstractDataChunkIterator # should never occur here. The effective value of array_data_types is then: From a778369120d2e4c585bd1b15b51c624dc74bf8ed Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:41:17 -0700 Subject: [PATCH 11/15] Update objectmapper.py --- src/hdmf/build/objectmapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index fed678d41..52fbfec49 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -1164,7 +1164,7 @@ def __get_subspec_values(self, builder, spec, manager): if not isinstance(builder, DatasetBuilder): # pragma: no cover raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec") if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and - type(builder.data[0]) != np.void): + type(builder.data[0]) is not np.void): # if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error ret[spec] = self.__check_ref_resolver(builder.data) From 135a15079dcce716f9a2535a05a95d606b885044 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:43:41 -0700 Subject: [PATCH 12/15] Update h5tools.py --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 0604881bb..d2cc1b94a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -728,7 +728,7 @@ def __read_dataset(self, h5obj, name=None): def _check_str_dtype(self, h5obj): dtype = h5obj.dtype if dtype.kind == 'O': - if dtype.metadata.get('vlen') == str and H5PY_3: + if isinstance(dtype.metadata.get('vlen'), str) and H5PY_3: return StrDataset(h5obj, None) return h5obj From 91570a914902073973f2778260dda8958539858b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:47:52 -0700 Subject: [PATCH 13/15] Update h5tools.py --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index d2cc1b94a..0604881bb 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -728,7 +728,7 @@ def __read_dataset(self, h5obj, name=None): def _check_str_dtype(self, h5obj): dtype = h5obj.dtype if dtype.kind == 'O': - if isinstance(dtype.metadata.get('vlen'), str) and H5PY_3: + if dtype.metadata.get('vlen') == str and H5PY_3: return StrDataset(h5obj, None) return h5obj From 7efedf7c27537f726f517c4b8a223391b2e7e88d Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 07:50:02 -0700 Subject: [PATCH 14/15] Update h5tools.py --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 0604881bb..8135d75e7 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -728,7 +728,7 @@ def __read_dataset(self, h5obj, name=None): def _check_str_dtype(self, h5obj): dtype = h5obj.dtype if dtype.kind == 'O': - if dtype.metadata.get('vlen') == str and H5PY_3: + if dtype.metadata.get('vlen') is str and H5PY_3: return StrDataset(h5obj, None) return h5obj From 57a0981c38f3f9eafcae94a77dc1a6b4925331f1 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 28 Jun 2024 16:19:48 -0700 Subject: [PATCH 15/15] Update src/hdmf/validate/validator.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- src/hdmf/validate/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 294a237f4..e39011d9f 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -164,7 +164,7 @@ def get_type(data, builder_dtype=None): # Empty array else: # Empty string array - if isinstance(data.dtype.metadata["vlen"], str): + if data.dtype.metadata["vlen"] is str: return "utf", None # Undetermined variable length data type. else: # pragma: no cover