From 9e53812db9d05056e6abf7214464b604f603e31d Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 22 Nov 2024 10:27:19 -0800 Subject: [PATCH 01/13] initial fix --- .../_configuration_models/_base_dataset_io.py | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 8b4e98436..7a70d7fbb 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -7,6 +7,7 @@ import h5py import numcodecs import numpy as np +import pynwb import zarr from hdmf import Container from hdmf.utils import get_data_shape @@ -258,9 +259,24 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera and `timestamps`, each of which can be configured separately. """ location_in_file = _find_location_in_memory_nwbfile(neurodata_object=neurodata_object, field_name=dataset_name) - candidate_dataset = getattr(neurodata_object, dataset_name) - full_shape = get_data_shape(data=candidate_dataset) + + manager = pynwb.get_manager() + namespace_catalog = manager.type_map.namespace_catalog + for namespace in namespace_catalog.namespaces: + try: + spec = namespace_catalog.get_spec(namespace, neurodata_object.parent.neurodata_type) + break + except ValueError: + continue + spec = spec.get_dataset(neurodata_object.name) + spec = spec if spec is not None else {} + dtype = spec.get("dtype", None) + if isinstance(dtype, list): # compound dtype + full_shape = (len(candidate_dataset),) + else: + full_shape = get_data_shape(data=candidate_dataset) + dtype = _infer_dtype(dataset=candidate_dataset) if isinstance(candidate_dataset, GenericDataChunkIterator): @@ -312,3 +328,10 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera buffer_shape=buffer_shape, compression_method=compression_method, ) + + +def get_spec(namespace_catalog, namespace, neurodata_type, default=None): + try: + return namespace_catalog.get_spec(namespace, neurodata_type) + except ValueError: + return default From ff479cb3c5e29998b5ccd0d82d1ceda5703d760d Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Sat, 23 Nov 2024 05:34:52 +1100 Subject: [PATCH 02/13] add inputs to dev tests workflow dispatch --- .github/workflows/dev-testing.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/dev-testing.yml b/.github/workflows/dev-testing.yml index 65c011653..4173d3b63 100644 --- a/.github/workflows/dev-testing.yml +++ b/.github/workflows/dev-testing.yml @@ -1,6 +1,11 @@ name: Dev Branch Testing on: workflow_dispatch: + inputs: + python-versions: + description: 'List of Python versions to use in matrix, as JSON string' + required: true + type: string workflow_call: inputs: python-versions: From 9fb42ee146a812352ad9f6225677c49de30530be Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 22 Nov 2024 10:53:00 -0800 Subject: [PATCH 03/13] removed unused get_spec fn --- .../nwb_helpers/_configuration_models/_base_dataset_io.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 7a70d7fbb..865dd4f10 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -328,10 +328,3 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera buffer_shape=buffer_shape, compression_method=compression_method, ) - - -def get_spec(namespace_catalog, namespace, neurodata_type, default=None): - try: - return namespace_catalog.get_spec(namespace, neurodata_type) - except ValueError: - return default From 5298401f3dbad04453c09ec67f3bf8e2750fa1d0 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Wed, 4 Dec 2024 14:06:16 -0800 Subject: [PATCH 04/13] implemented builder-based fix --- .../_configuration_models/_base_dataset_io.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 865dd4f10..d254b65aa 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -10,6 +10,10 @@ import pynwb import zarr from hdmf import Container +from hdmf.build.builders import ( + DatasetBuilder, + GroupBuilder, +) from hdmf.utils import get_data_shape from pydantic import ( BaseModel, @@ -262,16 +266,14 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera candidate_dataset = getattr(neurodata_object, dataset_name) manager = pynwb.get_manager() - namespace_catalog = manager.type_map.namespace_catalog - for namespace in namespace_catalog.namespaces: - try: - spec = namespace_catalog.get_spec(namespace, neurodata_object.parent.neurodata_type) - break - except ValueError: - continue - spec = spec.get_dataset(neurodata_object.name) - spec = spec if spec is not None else {} - dtype = spec.get("dtype", None) + builder = manager.build(neurodata_object) + if isinstance(builder, GroupBuilder): + dtype = builder.datasets[dataset_name].dtype + elif isinstance(builder, DatasetBuilder): + dtype = builder.dtype + else: + raise NotImplementedError(f"Builder Type {type(builder)} not supported!") + if isinstance(dtype, list): # compound dtype full_shape = (len(candidate_dataset),) else: From 6e05f7d453d570fc204ae37aa8ccc0218daba045 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Wed, 4 Dec 2024 16:44:46 -0800 Subject: [PATCH 05/13] implemented builder-based compound dtype check --- .../_configuration_models/_base_dataset_io.py | 33 +++++++++++-------- .../nwb_helpers/_dataset_configuration.py | 8 +++-- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index d254b65aa..76d8c926c 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -7,12 +7,10 @@ import h5py import numcodecs import numpy as np -import pynwb import zarr from hdmf import Container from hdmf.build.builders import ( - DatasetBuilder, - GroupBuilder, + BaseBuilder, ) from hdmf.utils import get_data_shape from pydantic import ( @@ -249,7 +247,9 @@ def model_json_schema(cls, **kwargs) -> dict[str, Any]: return super().model_json_schema(mode="validation", schema_generator=PureJSONSchemaGenerator, **kwargs) @classmethod - def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Literal["data", "timestamps"]) -> Self: + def from_neurodata_object( + cls, neurodata_object: Container, dataset_name: Literal["data", "timestamps"], builder: BaseBuilder + ) -> Self: """ Construct an instance of a DatasetIOConfiguration for a dataset in a neurodata object in an NWBFile. @@ -265,16 +265,7 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera location_in_file = _find_location_in_memory_nwbfile(neurodata_object=neurodata_object, field_name=dataset_name) candidate_dataset = getattr(neurodata_object, dataset_name) - manager = pynwb.get_manager() - builder = manager.build(neurodata_object) - if isinstance(builder, GroupBuilder): - dtype = builder.datasets[dataset_name].dtype - elif isinstance(builder, DatasetBuilder): - dtype = builder.dtype - else: - raise NotImplementedError(f"Builder Type {type(builder)} not supported!") - - if isinstance(dtype, list): # compound dtype + if has_compound_dtype(builder, location_in_file): full_shape = (len(candidate_dataset),) else: full_shape = get_data_shape(data=candidate_dataset) @@ -330,3 +321,17 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera buffer_shape=buffer_shape, compression_method=compression_method, ) + + +def has_compound_dtype(builder, location_in_file): + split_location = iter(location_in_file.split("/")) + location = next(split_location) + while location in builder.groups: + builder = builder.groups[location] + location = next(split_location) + + if location in builder.datasets: + builder = builder.datasets[location] + else: + raise ValueError(f"Could not find location '{location}' in builder.") + return isinstance(builder.dtype, list) diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py index f3d8e7560..2827b2e84 100644 --- a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py +++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py @@ -9,7 +9,7 @@ from hdmf.data_utils import DataIO from hdmf.utils import get_data_shape from hdmf_zarr import NWBZarrIO -from pynwb import NWBHDF5IO, NWBFile +from pynwb import NWBHDF5IO, NWBFile, get_manager from pynwb.base import DynamicTable, TimeSeriesReferenceVectorData from pynwb.file import NWBContainer @@ -102,6 +102,8 @@ def get_default_dataset_io_configurations( ) known_dataset_fields = ("data", "timestamps") + manager = get_manager() + builder = manager.build(nwbfile) for neurodata_object in nwbfile.objects.values(): if isinstance(neurodata_object, DynamicTable): dynamic_table = neurodata_object # For readability @@ -134,7 +136,7 @@ def get_default_dataset_io_configurations( continue dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object( - neurodata_object=column, dataset_name=dataset_name + neurodata_object=column, dataset_name=dataset_name, builder=builder ) yield dataset_io_configuration @@ -168,7 +170,7 @@ def get_default_dataset_io_configurations( continue dataset_io_configuration = DatasetIOConfigurationClass.from_neurodata_object( - neurodata_object=neurodata_object, dataset_name=known_dataset_field + neurodata_object=neurodata_object, dataset_name=known_dataset_field, builder=builder ) yield dataset_io_configuration From 9890484310880bb5d2b02ca2b1cf4604bf225268 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Wed, 4 Dec 2024 16:47:23 -0800 Subject: [PATCH 06/13] added docstrings --- .../_configuration_models/_base_dataset_io.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 76d8c926c..e3abcf0fd 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -261,6 +261,8 @@ def from_neurodata_object( The name of the field that will become a dataset when written to disk. Some neurodata objects can have multiple such fields, such as `pynwb.TimeSeries` which can have both `data` and `timestamps`, each of which can be configured separately. + builder : hdmf.build.builders.BaseBuilder + The builder object that would be used to construct the NWBFile object. """ location_in_file = _find_location_in_memory_nwbfile(neurodata_object=neurodata_object, field_name=dataset_name) candidate_dataset = getattr(neurodata_object, dataset_name) @@ -323,7 +325,22 @@ def from_neurodata_object( ) -def has_compound_dtype(builder, location_in_file): +def has_compound_dtype(builder: BaseBuilder, location_in_file: str) -> bool: + """ + Determine if the dataset at the given location in the file has a compound dtype. + + Parameters + ---------- + builder : hdmf.build.builders.BaseBuilder + The builder object that would be used to construct the NWBFile object. + location_in_file : str + The location of the dataset within the NWBFile, e.g. 'acquisition/ElectricalSeries/data'. + + Returns + ------- + bool + Whether the dataset has a compound dtype. + """ split_location = iter(location_in_file.split("/")) location = next(split_location) while location in builder.groups: From 330dd220da5331d2c55b11e7aa55cdbfdac57d1b Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 5 Dec 2024 14:17:00 -0800 Subject: [PATCH 07/13] added fix for top-level datasets like electrodes --- .../_configuration_models/_base_dataset_io.py | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index e3abcf0fd..55e1fb8ce 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -342,13 +342,46 @@ def has_compound_dtype(builder: BaseBuilder, location_in_file: str) -> bool: Whether the dataset has a compound dtype. """ split_location = iter(location_in_file.split("/")) - location = next(split_location) - while location in builder.groups: - builder = builder.groups[location] - location = next(split_location) - - if location in builder.datasets: - builder = builder.datasets[location] + name = next(split_location) + + if name not in builder.groups: + # Items in defined top-level places like electrodes may not be in the groups of the nwbfile-level builder, + # but rather in hidden locations like general/extracellular_ephys/electrodes + builder = _find_sub_builder(builder, name) + name = next(split_location) + + # To find the appropriate builder for the dataset, we traverse the groups in the location_in_file until we reach + # a DatasetBuilder + while name in builder.groups: + builder = builder.groups[name] + name = next(split_location) + if name in builder.datasets: + builder = builder.datasets[name] else: - raise ValueError(f"Could not find location '{location}' in builder.") + raise ValueError(f"Could not find location '{location_in_file}' in builder.") + return isinstance(builder.dtype, list) + + +def _find_sub_builder(builder: BaseBuilder, name: str) -> BaseBuilder: + """Recursively search for a sub-builder by name in a builder object. + + Parameters + ---------- + builder : hdmf.build.builders.BaseBuilder + The builder object to search for the sub-builder in. + name : str + The name of the sub-builder to search for. + + Returns + ------- + hdmf.build.builders.BaseBuilder + The sub-builder with the given name, or None if it could not be found. + """ + for sub_builder in builder.groups.values(): + if sub_builder.name == name: + return sub_builder + output_builder = _find_sub_builder(builder=sub_builder, name=name) + if output_builder is not None: + return output_builder + return None From 541eafb223a2b0e07bca0e1f89dce12cc5146df0 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 5 Dec 2024 16:09:59 -0800 Subject: [PATCH 08/13] added fix for stimulus --- .../_configuration_models/_base_dataset_io.py | 61 ++++++++++++++----- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 55e1fb8ce..896966926 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -341,26 +341,55 @@ def has_compound_dtype(builder: BaseBuilder, location_in_file: str) -> bool: bool Whether the dataset has a compound dtype. """ + dataset_builder = get_dataset_builder(builder, location_in_file) + return isinstance(dataset_builder.dtype, list) + + +def get_dataset_builder(builder, location_in_file): + """Find the appropriate sub-builder for the dataset at the given location in the file. + + This function will traverse the groups in the location_in_file until it reaches a DatasetBuilder, + and then return that builder. + + Parameters + ---------- + builder : hdmf.build.builders.BaseBuilder + The builder object that would be used to construct the NWBFile object. + location_in_file : str + The location of the dataset within the NWBFile, e.g. 'acquisition/ElectricalSeries/data'. + + Returns + ------- + hdmf.build.builders.BaseBuilder + The builder object for the dataset at the given location. + + Raises + ------ + ValueError + If the location_in_file is not found in the builder. + + Notes + ----- + Items in defined top-level places like electrodes may not be in the groups of the nwbfile-level builder, + but rather in hidden locations like general/extracellular_ephys/electrodes. + Also, some items in these top-level locations may interrupt the order of the location_in_file. + For example, when location_in_file is 'stimulus/AcousticWaveformSeries/data', the builder for that dataset is + located at 'stimulus/presentation/AcousticWaveformSeries/data'. + For this reason, we recursively search for the appropriate sub-builder for each name in the location_in_file. + """ split_location = iter(location_in_file.split("/")) name = next(split_location) - if name not in builder.groups: - # Items in defined top-level places like electrodes may not be in the groups of the nwbfile-level builder, - # but rather in hidden locations like general/extracellular_ephys/electrodes + while name not in builder.datasets: builder = _find_sub_builder(builder, name) - name = next(split_location) - - # To find the appropriate builder for the dataset, we traverse the groups in the location_in_file until we reach - # a DatasetBuilder - while name in builder.groups: - builder = builder.groups[name] - name = next(split_location) - if name in builder.datasets: - builder = builder.datasets[name] - else: - raise ValueError(f"Could not find location '{location_in_file}' in builder.") - - return isinstance(builder.dtype, list) + if builder is None: + raise ValueError(f"Could not find location '{location_in_file}' in builder.") + try: + name = next(split_location) + except StopIteration: + raise ValueError(f"Could not find location '{location_in_file}' in builder.") + builder = builder.datasets[name] + return builder def _find_sub_builder(builder: BaseBuilder, name: str) -> BaseBuilder: From cd7670b2f236e291ba23a0ca2bf9ca8c7ac8b008 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 5 Dec 2024 16:48:41 -0800 Subject: [PATCH 09/13] switched to breadth-first search --- .../_configuration_models/_base_dataset_io.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 896966926..e767f96a4 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -393,7 +393,7 @@ def get_dataset_builder(builder, location_in_file): def _find_sub_builder(builder: BaseBuilder, name: str) -> BaseBuilder: - """Recursively search for a sub-builder by name in a builder object. + """Search breadth-first for a sub-builder by name in a builder object. Parameters ---------- @@ -407,10 +407,30 @@ def _find_sub_builder(builder: BaseBuilder, name: str) -> BaseBuilder: hdmf.build.builders.BaseBuilder The sub-builder with the given name, or None if it could not be found. """ - for sub_builder in builder.groups.values(): + sub_builders = list(builder.groups.values()) + return _recursively_search_sub_builders(sub_builders=sub_builders, name=name) + + +def _recursively_search_sub_builders(sub_builders: list[BaseBuilder], name: str) -> BaseBuilder: + """Recursively search for a sub-builder by name in a list of sub-builders. + + Parameters + ---------- + sub_builders : list[hdmf.build.builders.BaseBuilder] + The list of sub-builders to search for the sub-builder in. + name : str + The name of the sub-builder to search for. + + Returns + ------- + hdmf.build.builders.BaseBuilder + The sub-builder with the given name, or None if it could not be found. + """ + sub_sub_builders = [] + for sub_builder in sub_builders: if sub_builder.name == name: return sub_builder - output_builder = _find_sub_builder(builder=sub_builder, name=name) - if output_builder is not None: - return output_builder - return None + sub_sub_builders.extend(list(sub_builder.groups.values())) + if len(sub_sub_builders) == 0: + return None + return _recursively_search_sub_builders(sub_builders=sub_sub_builders, name=name) From d296d7f996f64878cbc0f7044be67cbab4029d81 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 6 Dec 2024 09:58:16 -0800 Subject: [PATCH 10/13] added support for missing top-level categories like lab_meta_data --- .../nwb_helpers/_configuration_models/_base_dataset_io.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index e767f96a4..51e6be871 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -376,10 +376,17 @@ def get_dataset_builder(builder, location_in_file): For example, when location_in_file is 'stimulus/AcousticWaveformSeries/data', the builder for that dataset is located at 'stimulus/presentation/AcousticWaveformSeries/data'. For this reason, we recursively search for the appropriate sub-builder for each name in the location_in_file. + Also, the first name in location_in_file is inherently suspect due to the way that the location is determined + in _find_location_in_memory_nwbfile(), and may not be present in the builder. For example, when location_in_file is + 'lab_meta_data/fiber_photometry/fiber_photometry_table/location/data', the builder for that dataset is located at + 'general/fiber_photometry/fiber_photometry_table/location/data'. """ split_location = iter(location_in_file.split("/")) name = next(split_location) + if _find_sub_builder(builder, name) is None: + name = next(split_location) + while name not in builder.datasets: builder = _find_sub_builder(builder, name) if builder is None: From 8f2d1d24518b0220481edf335ff722a0879831db Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 6 Dec 2024 10:18:21 -0800 Subject: [PATCH 11/13] added support for links --- .../_configuration_models/_base_dataset_io.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py index 51e6be871..5cffcff32 100644 --- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py +++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py @@ -11,6 +11,7 @@ from hdmf import Container from hdmf.build.builders import ( BaseBuilder, + LinkBuilder, ) from hdmf.utils import get_data_shape from pydantic import ( @@ -387,15 +388,17 @@ def get_dataset_builder(builder, location_in_file): if _find_sub_builder(builder, name) is None: name = next(split_location) - while name not in builder.datasets: + while name not in builder.datasets and name not in builder.links: builder = _find_sub_builder(builder, name) if builder is None: - raise ValueError(f"Could not find location '{location_in_file}' in builder.") + raise ValueError(f"Could not find location '{location_in_file}' in builder ({name} is missing).") try: name = next(split_location) except StopIteration: - raise ValueError(f"Could not find location '{location_in_file}' in builder.") - builder = builder.datasets[name] + raise ValueError(f"Could not find location '{location_in_file}' in builder ({name} is not a dataset).") + builder = builder[name] + if isinstance(builder, LinkBuilder): + builder = builder.builder return builder From 64c00bac9d47480d5623ac155aa8ceec6f5316ea Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 13 Dec 2024 10:09:30 -0800 Subject: [PATCH 12/13] added builder to the tests --- .../test_models/test_dataset_io_configuration_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py index 616c6e9d4..9abb6aa25 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from pynwb import get_manager from pynwb.testing.mock.file import mock_NWBFile from neuroconv.tools.nwb_helpers import DatasetIOConfiguration @@ -89,8 +90,12 @@ def get_data_io_kwargs(self): data = np.array(["test", "string", "abc"], dtype=object) nwbfile.add_trial_column(name="test", description="test column with object dtype but all strings", data=data) neurodata_object = nwbfile.trials.columns[2] + manager = get_manager() + builder = manager.build(nwbfile) - dataset_io_configuration = TestDatasetIOConfiguration.from_neurodata_object(neurodata_object, dataset_name="data") + dataset_io_configuration = TestDatasetIOConfiguration.from_neurodata_object( + neurodata_object, dataset_name="data", builder=builder + ) assert dataset_io_configuration.chunk_shape == (3,) assert dataset_io_configuration.buffer_shape == (3,) From 8a772b43c8f3f4fdf8635b07023f865a51cbe243 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Mon, 27 Jan 2025 11:19:15 -0800 Subject: [PATCH 13/13] updated changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8b887fb8..cf295b18c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ## Bug Fixes * `run_conversion` does not longer trigger append mode an index error when `nwbfile_path` points to a faulty file [PR #1180](https://github.com/catalystneuro/neuroconv/pull/1180) +* `DatasetIOConfiguration` now recommends `chunk_shape = (len(candidate_dataset),)` for datasets with compound dtypes, +as used by hdmf >= 3.14.6. ## Features * Use the latest version of ndx-pose for `DeepLabCutInterface` and `LightningPoseDataInterface` [PR #1128](https://github.com/catalystneuro/neuroconv/pull/1128)