Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zarr append for datasets (non-reference) #1136

Merged
merged 15 commits into from
Jun 28, 2024
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# HDMF Changelog

## HDMF 3.14.2 (???)
## HDMF 3.14.2 (Upcoming)

### Bug fixes
- Fix iterator increment causing an extra +1 added after the end of completion. @CodyCBakerPhD [#1128](https://github.com/hdmf-dev/hdmf/pull/1128)


### Enhancements
- Support appending to zarr arrays. @mavaylon1 [#1136](https://github.com/hdmf-dev/hdmf/pull/1136)

## HDMF 3.14.1 (June 6, 2024)

Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ dependencies = [
"pandas>=1.0.5",
"ruamel.yaml>=0.16",
"scipy>=1.4",
"zarr >= 2.12.0",
"importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9
]
dynamic = ["version"]

[project.optional-dependencies]
zarr = ["zarr>=2.12.0"]
tqdm = ["tqdm>=4.41.0"]
termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'",
"schemasheets>=0.1.23; python_version >= '3.9'",
Expand Down Expand Up @@ -117,7 +117,7 @@ omit = [
# force-exclude = "src/hdmf/common/hdmf-common-schema|docs/gallery"

[tool.ruff]
select = ["E", "F", "T100", "T201", "T203"]
lint.select = ["E", "F", "T100", "T201", "T203"]
exclude = [
".git",
".tox",
Expand All @@ -132,11 +132,11 @@ exclude = [
]
line-length = 120

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"docs/gallery/*" = ["E402", "T201"]
"src/*/__init__.py" = ["F401"]
"setup.py" = ["T201"]
"test_gallery.py" = ["T201"]

[tool.ruff.mccabe]
[tool.ruff.lint.mccabe]
max-complexity = 17
2 changes: 1 addition & 1 deletion src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ def __read_dataset(self, h5obj, name=None):
def _check_str_dtype(self, h5obj):
dtype = h5obj.dtype
if dtype.kind == 'O':
if dtype.metadata.get('vlen') == str and H5PY_3:
if dtype.metadata.get('vlen') is str and H5PY_3:
return StrDataset(h5obj, None)
return h5obj

Expand Down
2 changes: 1 addition & 1 deletion src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ def __get_subspec_values(self, builder, spec, manager):
if not isinstance(builder, DatasetBuilder): # pragma: no cover
raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec")
if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and
type(builder.data[0]) != np.void):
type(builder.data[0]) is not np.void):
# if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset
builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error
ret[spec] = self.__check_ref_resolver(builder.data)
Expand Down
5 changes: 4 additions & 1 deletion src/hdmf/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@
from warnings import warn
from typing import Tuple
from itertools import product, chain
from zarr import Array as ZarrArray

import h5py
import numpy as np

from .utils import docval, getargs, popargs, docval_macro, get_data_shape


def append_data(data, arg):
if isinstance(data, (list, DataIO)):
data.append(arg)
return data
elif isinstance(data, ZarrArray):
data.append([arg], axis=0)
return data
elif type(data).__name__ == 'TermSetWrapper': # circular import
data.append(arg)
return data
Expand Down
2 changes: 1 addition & 1 deletion src/hdmf/testing/testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _assert_array_equal(self,
:param message: custom additional message to show when assertions as part of this assert are failing
"""
array_data_types = tuple([i for i in get_docval_macro('array_data')
if (i != list and i != tuple and i != AbstractDataChunkIterator)])
if (i is not list and i is not tuple and i is not AbstractDataChunkIterator)])
# We construct array_data_types this way to avoid explicit dependency on h5py, Zarr and other
# I/O backends. Only list and tuple do not support [()] slicing, and AbstractDataChunkIterator
# should never occur here. The effective value of array_data_types is then:
Expand Down
2 changes: 1 addition & 1 deletion src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def get_type(data, builder_dtype=None):
# Empty array
else:
# Empty string array
if data.dtype.metadata["vlen"] == str:
if data.dtype.metadata["vlen"] is str:
return "utf", None
# Undetermined variable length data type.
else: # pragma: no cover
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/utils_test/test_data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from hdmf.data_utils import append_data
from hdmf.testing import TestCase

import numpy as np
from numpy.testing import assert_array_equal
import zarr

class TestAppendData(TestCase):

def test_append_data_zarr(self):
zarr_array = zarr.array([1,2,3])
new = append_data(zarr_array, 4)

assert_array_equal(new[:], np.array([1,2,3,4]))
Loading