From e9c273774b3367c3e6acb7b538e696a60e575d31 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Mar 2024 22:11:58 +0000 Subject: [PATCH 01/13] Add docs on spec language support (#1069) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 5 +++++ docs/source/index.rst | 1 + docs/source/spec_language_support.rst | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 docs/source/spec_language_support.rst diff --git a/CHANGELOG.md b/CHANGELOG.md index 1294aee02..cd7b69fcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.13.0 (Upcoming) + +### Enhancements +- Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069) + ## HDMF 3.12.2 (February 9, 2024) ### Bug fixes diff --git a/docs/source/index.rst b/docs/source/index.rst index e6a53d3ab..2fcd4778a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -45,6 +45,7 @@ If you use HDMF in your research, please use the following citation: building_api export validation + spec_language_support .. toctree:: :hidden: diff --git a/docs/source/spec_language_support.rst b/docs/source/spec_language_support.rst new file mode 100644 index 000000000..43a628093 --- /dev/null +++ b/docs/source/spec_language_support.rst @@ -0,0 +1,21 @@ + +.. _spec_language_support: + +=========================================== +Support for the HDMF Specification Language +=========================================== + +The HDMF API provides nearly full support for all features of the `HDMF Specification Language`_ +version 3.0.0, except for the following: + +1. Attributes containing multiple references (see `#833`_) +2. Certain text and integer values for quantity (see `#423`_, `#531`_) +3. Datasets that do not have a data_type_inc/data_type_def and contain either a reference dtype or a compound dtype (see `#737`_) +4. Passing dataset dtype and shape from parent data type to child data type (see `#320`_) + +.. _HDMF Specification Language: https://hdmf-schema-language.readthedocs.io +.. _#833: https://github.com/hdmf-dev/hdmf/issues/833 +.. _#423: https://github.com/hdmf-dev/hdmf/issues/423 +.. _#531: https://github.com/hdmf-dev/hdmf/issues/531 +.. _#737: https://github.com/hdmf-dev/hdmf/issues/737 +.. _#320: https://github.com/hdmf-dev/hdmf/issues/320 From f092cbbe5d5f110246c8b7518e118add26aa5203 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:16:22 -0700 Subject: [PATCH 02/13] Warn when adding ragged arrays to DynamicTable without index argument (#1066) * add detection of ragged array inputs to table * add tests for ragged array inputs to table * add warnings for ragged inputs to table * update CHANGELOG.md * check only lists and tuples for raggedness * add flag to turn off ragged data checks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 1 + src/hdmf/common/table.py | 26 +++++++++++-- src/hdmf/utils.py | 14 +++++++ tests/unit/common/test_table.py | 68 +++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd7b69fcb..8eddf8270 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Enhancements - Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069) +- Added warning when using `add_row` or `add_column` to add a ragged array to `DynamicTable` without an index parameter. @stephprince [#1066](https://github.com/hdmf-dev/hdmf/pull/1066) ## HDMF 3.12.2 (February 9, 2024) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 5eeedcd86..3b67ff19d 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -15,7 +15,7 @@ from . import register_class, EXP_NAMESPACE from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator -from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional, check_type +from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional, check_type, is_ragged from ..term_set import TermSetWrapper @@ -639,12 +639,16 @@ def __len__(self): {'name': 'id', 'type': int, 'doc': 'the ID for the row', 'default': None}, {'name': 'enforce_unique_id', 'type': bool, 'doc': 'enforce that the id in the table must be unique', 'default': False}, + {'name': 'check_ragged', 'type': bool, 'default': True, + 'doc': ('whether or not to check for ragged arrays when adding data to the table. ' + 'Set to False to avoid checking every element if performance issues occur.')}, allow_extra=True) def add_row(self, **kwargs): """ Add a row to the table. If *id* is not provided, it will auto-increment. """ - data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) + data, row_id, enforce_unique_id, check_ragged = popargs('data', 'id', 'enforce_unique_id', 'check_ragged', + kwargs) data = data if data is not None else kwargs bad_data = [] @@ -709,6 +713,11 @@ def add_row(self, **kwargs): c.add_vector(data[colname]) else: c.add_row(data[colname]) + if check_ragged and is_ragged(c.data): + warn(("Data has elements with different lengths and therefore cannot be coerced into an " + "N-dimensional array. Use the 'index' argument when creating a column to add rows " + "with different lengths."), + stacklevel=2) def __eq__(self, other): """Compare if the two DynamicTables contain the same data. @@ -748,6 +757,9 @@ def __eq__(self, other): 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' 'object is used.')}, + {'name': 'check_ragged', 'type': bool, 'default': True, + 'doc': ('whether or not to check for ragged arrays when adding data to the table. ' + 'Set to False to avoid checking every element if performance issues occur.')}, allow_extra=True) def add_column(self, **kwargs): # noqa: C901 """ @@ -760,7 +772,7 @@ def add_column(self, **kwargs): # noqa: C901 :raises ValueError: if the column has already been added to the table """ name, data = getargs('name', 'data', kwargs) - index, table, enum, col_cls= popargs('index', 'table', 'enum', 'col_cls', kwargs) + index, table, enum, col_cls, check_ragged = popargs('index', 'table', 'enum', 'col_cls', 'check_ragged', kwargs) if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " @@ -823,6 +835,14 @@ def add_column(self, **kwargs): # noqa: C901 # once we have created the column create_vector_index = None if ckwargs.get('data', None) is not None: + + # if no index was provided, check that data is not ragged + if index is False and check_ragged and is_ragged(data): + warn(("Data has elements with different lengths and therefore cannot be coerced into an " + "N-dimensional array. Use the 'index' argument when adding a column of data with " + "different lengths."), + stacklevel=2) + # Check that we are asked to create an index if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0: # Iteratively flatten the data we use for the column based on the depth of the index to generate. diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 57a4bb465..5e0b61539 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -954,6 +954,20 @@ def to_uint_array(arr): raise ValueError('Cannot convert array of dtype %s to uint.' % arr.dtype) +def is_ragged(data): + """ + Test whether a list of lists or array is ragged / jagged + """ + if isinstance(data, (list, tuple)): + lengths = [len(sub_data) if isinstance(sub_data, (list, tuple)) else 1 for sub_data in data] + if len(set(lengths)) > 1: + return True # ragged at this level + + return any(is_ragged(sub_data) for sub_data in data) # check next level + + return False + + class LabelledDict(dict): """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items. diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 7246a8ba8..d98add060 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -354,6 +354,74 @@ def test_add_column_multi_index(self): ] ) + def test_add_column_without_required_index(self): + """ + Add a column with different element lengths without specifying an index parameter + """ + table = self.with_spec() + table.add_row(foo=5, bar=50.0, baz='lizard') + table.add_row(foo=5, bar=50.0, baz='lizard') + + # testing adding column without a necessary index parameter + lol_data = [[1, 2, 3], [1, 2, 3, 4]] + str_data = [['a', 'b'], ['a', 'b', 'c']] + empty_data = [[1, 2], []] + multi_nested_data = [[[1, 2, 3], [1, 2, 3, 4]], [1, 2]] + tuple_data = ((1, 2, 3), (1, 2, 3, 4)) + + msg = ("Data has elements with different lengths and therefore cannot be coerced into an N-dimensional " + "array. Use the 'index' argument when adding a column of data with different lengths.") + with self.assertWarnsWith(UserWarning, msg): + table.add_column(name='col1', description='', data=lol_data,) + with self.assertWarnsWith(UserWarning, msg): + table.add_column(name='col2', description='', data=str_data,) + with self.assertWarnsWith(UserWarning, msg): + table.add_column(name='col3', description='', data=empty_data,) + with self.assertWarnsWith(UserWarning, msg): + table.add_column(name='col4', description='', data=multi_nested_data,) + with self.assertWarnsWith(UserWarning, msg): + table.add_column(name='col5', description='', data=tuple_data,) + + def test_add_column_without_required_index_and_no_ragged_check(self): + """ + Add a column with different element lengths without checking for raggedness + """ + lol_data = [[1, 2, 3], [1, 2, 3, 4]] + table = self.with_spec() + table.add_row(foo=5, bar=50.0, baz='lizard') + table.add_row(foo=5, bar=50.0, baz='lizard') + table.add_column(name='col1', description='', data=lol_data, check_ragged=False) + + def test_add_row_without_required_index(self): + """ + Add rows with different element lengths without specifying an index parameter + """ + + # test adding row of list data with different lengths without index parameter + msg = ("Data has elements with different lengths and therefore cannot be coerced into an N-dimensional " + "array. Use the 'index' argument when creating a column to add rows with different lengths.") + table = self.with_spec() + table.add_column(name='qux', description='qux column') + table.add_row(foo=5, bar=50.0, baz='lizard', qux=[1, 2, 3]) + with self.assertWarnsWith(UserWarning, msg): + table.add_row(foo=5, bar=50.0, baz='lizard', qux=[1, 2, 3 ,4]) + + # test adding row of tuple/str data with different lengths without index parameter + table = self.with_spec() + table.add_column(name='qux', description='qux column') + table.add_row(foo=5, bar=50.0, baz='lizard', qux=('a', 'b')) + with self.assertWarnsWith(UserWarning, msg): + table.add_row(foo=5, bar=50.0, baz='lizard', qux=('a', 'b', 'c')) + + def test_add_row_without_required_index_and_no_ragged_check(self): + """ + Add rows with different element lengths without checking for raggedness + """ + table = self.with_spec() + table.add_column(name='qux', description='qux column') + table.add_row(foo=5, bar=50.0, baz='lizard', qux=[1, 2, 3]) + table.add_row(foo=5, bar=50.0, baz='lizard', qux=[1, 2, 3 ,4], check_ragged=False) + def test_add_column_auto_index_int(self): """ Add a column as a list of lists after we have already added data so that we need to create a single VectorIndex From ab18840f3e7014b19deb896e3e3d96df875eff5a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 22:38:20 +0000 Subject: [PATCH 03/13] [pre-commit.ci] pre-commit autoupdate (#1056) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad798b8f7..d48d8e48d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.3.2 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From 437990d1cedc71c34fa20f142a786643677f5f3e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 22:44:48 +0000 Subject: [PATCH 04/13] Bump actions/add-to-project from 0.5.0 to 0.6.0 (#1063) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- .github/workflows/project_action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml index bfca0b3f5..5f13d9540 100644 --- a/.github/workflows/project_action.yml +++ b/.github/workflows/project_action.yml @@ -20,7 +20,7 @@ jobs: - name: Add to Developer Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.5.0 + uses: actions/add-to-project@v0.6.0 with: project-url: https://github.com/orgs/hdmf-dev/projects/7 github-token: ${{ env.TOKEN }} @@ -28,7 +28,7 @@ jobs: - name: Add to Community Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.5.0 + uses: actions/add-to-project@v0.6.0 with: project-url: https://github.com/orgs/hdmf-dev/projects/8 github-token: ${{ env.TOKEN }} From bd3e150238987468a9c479b9aea31df5837cadc2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 16:00:47 -0700 Subject: [PATCH 05/13] [pre-commit.ci] pre-commit autoupdate (#1071) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d48d8e48d..786a3e4b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.2 + rev: v0.3.3 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From c79d2384196fc2b92d38d44725313c31ffce71a2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Mar 2024 17:34:06 +0000 Subject: [PATCH 06/13] Bump black from 23.10.1 to 24.3.0 (#1075) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index f61962728..1d856e4e7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ # compute coverage, and create test environments. note that depending on the version of python installed, different # versions of requirements may be installed due to package incompatibilities. # -black==23.10.1 +black==24.3.0 codespell==2.2.6 coverage==7.3.2 pre-commit==3.5.0 From a6f51ff6083914579f1e44e9f47a122c3df950ed Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 20 Mar 2024 12:27:48 -0700 Subject: [PATCH 07/13] Release 3.13 (#1074) * Update CHANGELOG.md * Update release.md * updates * Update .github/PULL_REQUEST_TEMPLATE/release.md Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> * Update pyproject.toml --------- Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- .github/PULL_REQUEST_TEMPLATE/release.md | 2 +- CHANGELOG.md | 2 +- pyproject.toml | 1 + requirements-opt.txt | 12 ++++++------ src/hdmf/common/hdmf-common-schema | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 11bd20bfa..7c5ff5ece 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -10,7 +10,7 @@ Prepare for release of HDMF [version] and any other locations as needed - [ ] Update `pyproject.toml` as needed - [ ] Update `README.rst` as needed -- [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually +- [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually. Make sure we are using the latest release and not the latest commit on the `main` branch. - [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed - [ ] Run tests locally including gallery tests, and inspect all warnings and outputs (`pytest && python test_gallery.py`) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eddf8270..fb7a71e00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF Changelog -## HDMF 3.13.0 (Upcoming) +## HDMF 3.13.0 (March 20, 2024) ### Enhancements - Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069) diff --git a/pyproject.toml b/pyproject.toml index ee8037be5..b60ae6943 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: BSD License", "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", diff --git a/requirements-opt.txt b/requirements-opt.txt index 644fc80be..11cd23e17 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,8 +1,8 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.66.1 -zarr==2.16.1 -linkml-runtime==1.6.0; python_version >= "3.9" -schemasheets==0.1.24; python_version >= "3.9" -oaklib==0.5.20; python_version >= "3.9" -pydantic==1.10.13 # linkml-runtime 1.6.0 and related packages require pydantic<2 +tqdm==4.66.2 +zarr==2.17.1 +linkml-runtime==1.7.3; python_version >= "3.9" +schemasheets==0.2.1; python_version >= "3.9" +oaklib==0.5.31; python_version >= "3.9" +pydantic==2.6.4 # linkml-runtime 1.6.0 and related packages require pydantic<2 pyyaml==6.0.1; python_version >= "3.9" diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index 4d2ddd638..5b4cbb31d 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit 4d2ddd6387c4e36f21f41964fe8873c083680b15 +Subproject commit 5b4cbb31dbafcff51ca70bf218f464b186568151 From 67b82621296debf8202a23da5fca0a5bc9d6b9cf Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 20 Mar 2024 13:36:02 -0700 Subject: [PATCH 08/13] Remove outdated comment in requirements-opt.txt (#1076) --- requirements-opt.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-opt.txt b/requirements-opt.txt index 11cd23e17..6b4e102f1 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -4,5 +4,5 @@ zarr==2.17.1 linkml-runtime==1.7.3; python_version >= "3.9" schemasheets==0.2.1; python_version >= "3.9" oaklib==0.5.31; python_version >= "3.9" -pydantic==2.6.4 # linkml-runtime 1.6.0 and related packages require pydantic<2 +pydantic==2.6.4 pyyaml==6.0.1; python_version >= "3.9" From 5c8506216995f995b891da1e6b596ee42b7dd948 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 20 Mar 2024 16:16:43 -0700 Subject: [PATCH 09/13] Unwrap TermSetWrapper in ObjectMapper and not the IO (#1070) * Unwrap TermSetWrapper in ObjectMapper and no the IO * ruff * test * test * test * Update builders.py * Update manager.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update CHANGELOG.md * revert * test * test * test * test * tesT * tesT * tesT * tesT * move --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/hdmf/backends/hdf5/h5tools.py | 5 ---- src/hdmf/build/objectmapper.py | 6 ++++- .../build_tests/mapper_tests/test_build.py | 26 ++++++++++++++++++- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb7a71e00..f35a06cd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## HDMF 3.13.0 (March 20, 2024) ### Enhancements +- Unwrap `TermSetWrapper` within the builder to support different backends more efficiently. @mavaylon1 [#1070](https://github.com/hdmf-dev/hdmf/pull/1070) - Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069) - Added warning when using `add_row` or `add_column` to add a ragged array to `DynamicTable` without an index parameter. @stephprince [#1066](https://github.com/hdmf-dev/hdmf/pull/1066) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 7a644f0b7..05ce36e13 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -17,7 +17,6 @@ from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container -from ...term_set import TermSetWrapper from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset @@ -1103,10 +1102,6 @@ def write_dataset(self, **kwargs): # noqa: C901 data = data.data else: options['io_settings'] = {} - if isinstance(data, TermSetWrapper): - # This is for when the wrapped item is a dataset - # (refer to objectmapper.py for wrapped attributes) - data = data.value attributes = builder.attributes options['dtype'] = builder.dtype dset = None diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index b8e50d104..fed678d41 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -752,7 +752,11 @@ def build(self, **kwargs): % (container.__class__.__name__, container.name, repr(source))) try: # use spec_dtype from self.spec when spec_ext does not specify dtype - bldr_data, dtype = self.convert_dtype(spec, container.data, spec_dtype=spec_dtype) + if isinstance(container.data, TermSetWrapper): + data = container.data.value + else: + data = container.data + bldr_data, dtype = self.convert_dtype(spec, data, spec_dtype=spec_dtype) except Exception as ex: msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) raise Exception(msg) from ex diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index 8590f29f2..b90ad6f1a 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -1,7 +1,8 @@ from abc import ABCMeta, abstractmethod import numpy as np -from hdmf import Container, Data +from hdmf import Container, Data, TermSet, TermSetWrapper +from hdmf.common import VectorData, get_type_map from hdmf.build import ObjectMapper, BuildManager, TypeMap, GroupBuilder, DatasetBuilder from hdmf.build.warnings import DtypeConversionWarning from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, Spec @@ -10,6 +11,29 @@ from tests.unit.helpers.utils import CORE_NAMESPACE +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + + +class TestUnwrapTermSetWrapperBuild(TestCase): + """ + Test the unwrapping of TermSetWrapper on regular datasets within build. + """ + def setUp(self): + if not LINKML_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + def test_unwrap(self): + manager = BuildManager(get_type_map()) + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + build = manager.build(VectorData(name='test_data', + description='description', + data=TermSetWrapper(value=['Homo sapiens'], termset= terms))) + + self.assertEqual(build.data, ['Homo sapiens']) # TODO: test build of extended group/dataset that modifies an attribute dtype (commented out below), shape, value, etc. # by restriction. also check that attributes cannot be deleted or scope expanded. From 2c03baa50308061ae843d403623bfca5b6a6d84f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Mar 2024 09:48:12 -0700 Subject: [PATCH 10/13] Bump actions/add-to-project from 0.6.0 to 0.6.1 (#1078) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/project_action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml index 5f13d9540..5d141d1d1 100644 --- a/.github/workflows/project_action.yml +++ b/.github/workflows/project_action.yml @@ -20,7 +20,7 @@ jobs: - name: Add to Developer Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.6.0 + uses: actions/add-to-project@v0.6.1 with: project-url: https://github.com/orgs/hdmf-dev/projects/7 github-token: ${{ env.TOKEN }} @@ -28,7 +28,7 @@ jobs: - name: Add to Community Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.6.0 + uses: actions/add-to-project@v0.6.1 with: project-url: https://github.com/orgs/hdmf-dev/projects/8 github-token: ${{ env.TOKEN }} From 000020232cbf6dcfb053c4d57d984445c2eaa0e7 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 25 Mar 2024 10:16:36 -0700 Subject: [PATCH 11/13] Update GitHub release checklist (#1080) * Update release.md - Add a step to manually run all tests before merging - Minor updates to other steps * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update release.md --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matthew Avaylon --- .github/PULL_REQUEST_TEMPLATE/release.md | 26 +++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 7c5ff5ece..86a7ad57d 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -1,6 +1,7 @@ Prepare for release of HDMF [version] ### Before merging: +- [ ] Make sure all PRs to be included in this release have been merged to `dev`. - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, `requirements-doc.txt`, `requirements-opt.txt`, and `environment-ros3.yml` to the latest versions, and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed. @@ -10,23 +11,28 @@ Prepare for release of HDMF [version] and any other locations as needed - [ ] Update `pyproject.toml` as needed - [ ] Update `README.rst` as needed -- [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually. Make sure we are using the latest release and not the latest commit on the `main` branch. +- [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA + manually. Make sure we are using the latest release and not the latest commit on the `main` branch. - [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed - [ ] Run tests locally including gallery tests, and inspect all warnings and outputs - (`pytest && python test_gallery.py`) + (`pytest && python test_gallery.py`). Try to remove all warnings. - [ ] Run PyNWB tests locally including gallery and validation tests, and inspect all warnings and outputs - (`cd pynwb; python test.py -v > out.txt 2>&1`) + (`cd pynwb; git checkout dev; git pull; python test.py -v > out.txt 2>&1`) - [ ] Run HDMF-Zarr tests locally including gallery and validation tests, and inspect all warnings and outputs - (`cd hdmf-zarr; pytest && python test_gallery.py`) + (`cd hdmf-zarr; git checkout dev; git pull; pytest && python test_gallery.py`) - [ ] Test docs locally and inspect all warnings and outputs `cd docs; make clean && make html` -- [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged -- [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and - build docs for new branch): https://readthedocs.org/projects/hdmf/builds/ +- [ ] After pushing this branch to GitHub, manually trigger the "Run all tests" GitHub Actions workflow on this + branch by going to https://github.com/hdmf-dev/hdmf/actions/workflows/run_all_tests.yml, selecting + "Run workflow" on the right, selecting this branch, and clicking "Run workflow". Make sure all tests pass. +- [ ] Check that the readthedocs build for this PR succeeds (see the PR check) ### After merging: 1. Create release by following steps in `docs/source/make_a_release.rst` or use alias `git pypi-release [tag]` if set up 2. After the CI bot creates the new release (wait ~10 min), update the release notes on the [GitHub releases page](https://github.com/hdmf-dev/hdmf/releases) with the changelog -3. Check that the readthedocs "latest" and "stable" builds run and succeed -4. Update [conda-forge/hdmf-feedstock](https://github.com/conda-forge/hdmf-feedstock) with the latest version number - and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render as needed +3. Check that the readthedocs "stable" build runs and succeeds +4. Either monitor [conda-forge/hdmf-feedstock](https://github.com/conda-forge/hdmf-feedstock) for the + regro-cf-autotick-bot bot to create a PR updating the version of HDMF to the latest PyPI release, usually within + 24 hours of release, or manually create a PR updating `recipe/meta.yaml` with the latest version number + and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render and update + dependencies as needed. From 244d17a28ed436849b1973a3aaac8522d0ea922b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 28 Mar 2024 10:08:58 -0700 Subject: [PATCH 12/13] Configuration File for TermSet validations (#1016) * config * rough draft * move * testing * check * new way of thinking draft * support multiple config files * testing * placeholder' * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * Delete docs/gallery/example_config.yaml * clean up * clean up * clean up * checkpoint * need to clean * partial clean up * warn * yaml changes * revert * except * clean up * warning tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * tests * tests * ruff * update * update * cov * tests * tests/clean * coverage' git push * coverage' git push * final clean ups * final clean ups * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update src/hdmf/container.py * Update src/hdmf/container.py * Update src/hdmf/term_set.py * Update src/hdmf/term_set.py * in progress * Update src/hdmf/container.py Co-authored-by: Ryan Ly * Update tests/unit/test_term_set.py Co-authored-by: Ryan Ly * Update tests/unit/test_term_set.py Co-authored-by: Ryan Ly * in progress * in progress * in progress * in progress * clean tests * checkpoint of updates * checkpoint of updates * checkpoint of updates * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * copy * clean up * clean * Update CHANGELOG.md * clean up * clean up * test copy * name * Update CHANGELOG.md * Update requirements-opt.txt * Update requirements-opt.txt * Update container.py Co-authored-by: Ryan Ly * Update container.py Co-authored-by: Ryan Ly * Update __init__.py Co-authored-by: Ryan Ly * Update manager.py Co-authored-by: Ryan Ly * clean * namespace * Update src/hdmf/common/__init__.py * Update src/hdmf/common/__init__.py Co-authored-by: Ryan Ly * Update __init__.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly --- CHANGELOG.md | 5 + requirements-opt.txt | 6 +- src/hdmf/__init__.py | 2 +- src/hdmf/build/manager.py | 18 ++- src/hdmf/common/__init__.py | 25 ++++ src/hdmf/container.py | 82 +++++++++++- src/hdmf/term_set.py | 87 ++++++++++-- tests/unit/common/test_common.py | 14 +- tests/unit/common/test_table.py | 17 ++- tests/unit/hdmf_config.yaml | 9 ++ tests/unit/hdmf_config2.yaml | 18 +++ tests/unit/test_container.py | 15 ++- tests/unit/test_term_set.py | 124 +++++++++++++++++- .../schemasheets/nwb_static_enums.yaml | 58 +++----- 14 files changed, 402 insertions(+), 78 deletions(-) create mode 100644 tests/unit/hdmf_config.yaml create mode 100644 tests/unit/hdmf_config2.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index f35a06cd1..22c21b0e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.14.0 (Upcoming) + +### Enhancements +- Added `TermSetConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016) + ## HDMF 3.13.0 (March 20, 2024) ### Enhancements diff --git a/requirements-opt.txt b/requirements-opt.txt index 6b4e102f1..53fd11e3a 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,8 +1,6 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF tqdm==4.66.2 zarr==2.17.1 -linkml-runtime==1.7.3; python_version >= "3.9" +linkml-runtime==1.7.4; python_version >= "3.9" schemasheets==0.2.1; python_version >= "3.9" -oaklib==0.5.31; python_version >= "3.9" -pydantic==2.6.4 -pyyaml==6.0.1; python_version >= "3.9" +oaklib==0.5.32; python_version >= "3.9" diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 2699a28af..6fc72a117 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -3,7 +3,7 @@ from .container import Container, Data, DataRegion, HERDManager from .region import ListSlicer from .utils import docval, getargs -from .term_set import TermSet, TermSetWrapper +from .term_set import TermSet, TermSetWrapper, TypeConfigurator @docval( diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 03f2856b8..a26de3279 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -5,6 +5,7 @@ from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, BaseBuilder from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator from ..container import AbstractContainer, Container, Data +from ..term_set import TypeConfigurator from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog from ..spec.spec import BaseStorageSpec from ..utils import docval, getargs, ExtenderMeta, get_docval @@ -391,18 +392,23 @@ def data_type(self): class TypeMap: - ''' A class to maintain the map between ObjectMappers and AbstractContainer classes - ''' + """ + A class to maintain the map between ObjectMappers and AbstractContainer classes + """ @docval({'name': 'namespaces', 'type': NamespaceCatalog, 'doc': 'the NamespaceCatalog to use', 'default': None}, - {'name': 'mapper_cls', 'type': type, 'doc': 'the ObjectMapper class to use', 'default': None}) + {'name': 'mapper_cls', 'type': type, 'doc': 'the ObjectMapper class to use', 'default': None}, + {'name': 'type_config', 'type': TypeConfigurator, 'doc': 'The TypeConfigurator to use.', + 'default': None}) def __init__(self, **kwargs): - namespaces, mapper_cls = getargs('namespaces', 'mapper_cls', kwargs) + namespaces, mapper_cls, type_config = getargs('namespaces', 'mapper_cls', 'type_config', kwargs) if namespaces is None: namespaces = NamespaceCatalog() if mapper_cls is None: from .objectmapper import ObjectMapper # avoid circular import mapper_cls = ObjectMapper + if type_config is None: + type_config = TypeConfigurator() self.__ns_catalog = namespaces self.__mappers = dict() # already constructed ObjectMapper classes self.__mapper_cls = dict() # the ObjectMapper class to use for each container type @@ -410,6 +416,8 @@ def __init__(self, **kwargs): self.__data_types = dict() self.__default_mapper_cls = mapper_cls self.__class_generator = ClassGenerator() + self.type_config = type_config + self.register_generator(CustomClassGenerator) self.register_generator(MCIClassGenerator) @@ -422,7 +430,7 @@ def container_types(self): return self.__container_types def __copy__(self): - ret = TypeMap(copy(self.__ns_catalog), self.__default_mapper_cls) + ret = TypeMap(copy(self.__ns_catalog), self.__default_mapper_cls, self.type_config) ret.merge(self) return ret diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index e0782effe..248ca1095 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -20,6 +20,31 @@ # a global type map global __TYPE_MAP +@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}, + is_method=False) +def load_type_config(**kwargs): + """ + This method will either load the default config or the config provided by the path. + NOTE: This config is global and shared across all type maps. + """ + config_path = kwargs['config_path'] + __TYPE_MAP.type_config.load_type_config(config_path) + +def get_loaded_type_config(): + """ + This method returns the entire config file. + """ + if __TYPE_MAP.type_config.config is None: + msg = "No configuration is loaded." + raise ValueError(msg) + else: + return __TYPE_MAP.type_config.config + +def unload_type_config(): + """ + Unload the configuration file. + """ + return __TYPE_MAP.type_config.unload_type_config() # a function to register a container classes with the global map @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the spec for'}, diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 521568d95..f93c06199 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -5,6 +5,7 @@ from typing import Type from uuid import uuid4 from warnings import warn +import os import h5py import numpy as np @@ -13,6 +14,7 @@ from .data_utils import DataIO, append_data, extend_data from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict +from .term_set import TermSet, TermSetWrapper def _set_exp(cls): """Set a class as being experimental""" @@ -34,7 +36,7 @@ class HERDManager: This class manages whether to set/attach an instance of HERD to the subclass. """ - @docval({'name': 'herd', 'type': 'hdmf.common.resources.HERD', + @docval({'name': 'herd', 'type': 'HERD', 'doc': 'The external resources to be used for the container.'},) def link_resources(self, **kwargs): """ @@ -75,7 +77,6 @@ def _setter(cls, field): Make a setter function for creating a :py:func:`property` """ name = field['name'] - if not field.get('settable', True): return None @@ -85,10 +86,82 @@ def setter(self, val): if name in self.fields: msg = "can't set attribute '%s' -- already set" % name raise AttributeError(msg) - self.fields[name] = val + self.fields[name] = self._field_config(arg_name=name, val=val) return setter + @property + def data_type(self): + """ + Return the spec data type associated with this container. + """ + return getattr(self, self._data_type_attr) + + + def _field_config(self, arg_name, val): + """ + This method will be called in the setter. The termset configuration will be used (if loaded) + to check for a defined TermSet associated with the field. If found, the value of the field + will be wrapped with a TermSetWrapper. + + Even though the path field in the configurator can be a list of paths, the config + itself is only one file. When a user loads custom configs, the config is appended/modified. + The modifications are not written to file, avoiding permanent modifications. + """ + # load termset configuration file from global Config + from hdmf.common import get_type_map # circular import + type_map = get_type_map() + configurator = type_map.type_config + + if len(configurator.path)>0: + # The type_map has a config always set; however, when toggled off, the config path is empty. + CUR_DIR = os.path.dirname(os.path.realpath(configurator.path[0])) + termset_config = configurator.config + else: + return val + # check to see that the namespace for the container is in the config + if self.namespace not in type_map.container_types: + msg = "%s not found within loaded configuration." % self.namespace + warn(msg) + return val + else: + # check to see that the container type is in the config under the namespace + config_namespace = termset_config['namespaces'][self.namespace] + data_type = self.data_type + + if data_type not in config_namespace['data_types']: + msg = '%s not found within the configuration for %s' % (data_type, self.namespace) + warn(msg) + return val + else: + for attr in config_namespace['data_types'][data_type]: + obj_mapper = type_map.get_map(self) + + # get the spec according to attr name in schema + # Note: this is the name for the field in the config + spec = obj_mapper.get_attr_spec(attr) + + # In the case of dealing with datasets directly or not defined in the spec. + # (Data/VectorData/DynamicTable/etc) + if spec is None: + msg = "Spec not found for %s." % attr + warn(msg) + return val + else: + # If the val has been manually wrapped then skip checking the config for the attr + if isinstance(val, TermSetWrapper): + msg = "Field value already wrapped with TermSetWrapper." + warn(msg) + return val + else: + # From the spec, get the mapped attribute name + mapped_attr_name = obj_mapper.get_attribute(spec) + termset_path = os.path.join(CUR_DIR, + config_namespace['data_types'][data_type][mapped_attr_name]['termset']) + termset = TermSet(term_schema_path=termset_path) + val = TermSetWrapper(value=val, termset=termset) + return val + @classmethod def _getter(cls, field): """ @@ -389,7 +462,7 @@ def set_modified(self, **kwargs): def children(self): return tuple(self.__children) - @docval({'name': 'child', 'type': 'hdmf.container.Container', + @docval({'name': 'child', 'type': 'Container', 'doc': 'the child Container for this Container', 'default': None}) def add_child(self, **kwargs): warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) @@ -787,7 +860,6 @@ class Data(AbstractContainer): """ A class for representing dataset containers """ - @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}, {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'}) def __init__(self, **kwargs): diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index f7169bdfd..1464f505c 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -5,6 +5,7 @@ import warnings import numpy as np from .data_utils import append_data, extend_data +from ruamel.yaml import YAML class TermSet: @@ -162,12 +163,12 @@ def __schemasheets_convert(self): This method returns a path to the new schema to be viewed via SchemaView. """ try: - import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from schemasheets.schemamaker import SchemaMaker except ImportError: # pragma: no cover msg = "Install schemasheets." raise ValueError(msg) + schema_maker = SchemaMaker() tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv") schema = schema_maker.create_schema(tsv_file_paths) @@ -175,6 +176,7 @@ def __schemasheets_convert(self): schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml") with open(schemasheet_schema_path, "w") as f: + yaml=YAML(typ='safe') yaml.dump(schema_dict, f) return schemasheet_schema_path @@ -262,13 +264,6 @@ def __getitem__(self, val): """ return self.__value[val] - # uncomment when DataChunkIterator objects can be wrapped by TermSet - # def __next__(self): - # """ - # Return the next item of a wrapped iterator. - # """ - # return self.__value.__next__() - # def __len__(self): return len(self.__value) @@ -304,3 +299,79 @@ def extend(self, arg): else: msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) raise ValueError(msg) + +class TypeConfigurator: + """ + This class allows users to toggle on/off a global configuration for defined data types. + When toggled on, every instance of a configuration file supported data type will be validated + according to the corresponding TermSet. + """ + @docval({'name': 'path', 'type': str, 'doc': 'Path to the configuration file.', 'default': None}) + def __init__(self, **kwargs): + self.config = None + if kwargs['path'] is None: + self.path = [] + else: + self.path = [kwargs['path']] + self.load_type_config(config_path=self.path[0]) + + @docval({'name': 'data_type', 'type': str, + 'doc': 'The desired data type within the configuration file.'}, + {'name': 'namespace', 'type': str, + 'doc': 'The namespace for the data type.'}) + def get_config(self, data_type, namespace): + """ + Return the config for that data type in the given namespace. + """ + try: + namespace_config = self.config['namespaces'][namespace] + except KeyError: + msg = 'The namespace %s was not found within the configuration.' % namespace + raise ValueError(msg) + + try: + type_config = namespace_config['data_types'][data_type] + return type_config + except KeyError: + msg = '%s was not found within the configuration for that namespace.' % data_type + raise ValueError(msg) + + @docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}) + def load_type_config(self,config_path): + """ + Load the configuration file for validation on the fields defined for the objects within the file. + """ + with open(config_path, 'r') as config: + yaml=YAML(typ='safe') + termset_config = yaml.load(config) + if self.config is None: # set the initial config/load after config has been unloaded + self.config = termset_config + if len(self.path)==0: # for loading after an unloaded config + self.path.append(config_path) + else: # append/replace to the existing config + if config_path in self.path: + msg = 'This configuration file path already exists within the configurator.' + raise ValueError(msg) + else: + for namespace in termset_config['namespaces']: + if namespace not in self.config['namespaces']: # append namespace config if not present + self.config['namespaces'][namespace] = termset_config['namespaces'][namespace] + else: # check for any needed overrides within existing namespace configs + for data_type in termset_config['namespaces'][namespace]['data_types']: + # NOTE: these two branches effectively do the same thing, but are split for clarity. + if data_type in self.config['namespaces'][namespace]['data_types']: + replace_config = termset_config['namespaces'][namespace]['data_types'][data_type] + self.config['namespaces'][namespace]['data_types'][data_type] = replace_config + else: # append to config + new_config = termset_config['namespaces'][namespace]['data_types'][data_type] + self.config['namespaces'][namespace]['data_types'][data_type] = new_config + + # append path to self.path + self.path.append(config_path) + + def unload_type_config(self): + """ + Remove validation according to termset configuration file. + """ + self.path = [] + self.config = None diff --git a/tests/unit/common/test_common.py b/tests/unit/common/test_common.py index 76c99d44a..e20614852 100644 --- a/tests/unit/common/test_common.py +++ b/tests/unit/common/test_common.py @@ -1,5 +1,5 @@ from hdmf import Data, Container -from hdmf.common import get_type_map +from hdmf.common import get_type_map, load_type_config, unload_type_config from hdmf.testing import TestCase @@ -11,3 +11,15 @@ def test_base_types(self): self.assertIs(cls, Container) cls = tm.get_dt_container_cls('Data', 'hdmf-common') self.assertIs(cls, Data) + + def test_copy_ts_config(self): + path = 'tests/unit/hdmf_config.yaml' + load_type_config(config_path=path) + tm = get_type_map() + config = {'namespaces': {'hdmf-common': {'version': '3.12.2', + 'data_types': {'VectorData': {'description': {'termset': 'example_test_term_set.yaml'}}, + 'VectorIndex': {'data': '...'}}}}} + + self.assertEqual(tm.type_config.config, config) + self.assertEqual(tm.type_config.path, [path]) + unload_type_config() diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index d98add060..f2d03332f 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -17,8 +17,7 @@ EnumData, DynamicTableRegion, get_manager, - SimpleMultiContainer, -) + SimpleMultiContainer) from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file from hdmf.utils import StrDataset from hdmf.data_utils import DataChunkIterator @@ -32,9 +31,9 @@ try: import linkml_runtime # noqa: F401 - LINKML_INSTALLED = True + REQUIREMENTS_INSTALLED = True except ImportError: - LINKML_INSTALLED = False + REQUIREMENTS_INSTALLED = False class TestDynamicTable(TestCase): @@ -131,7 +130,7 @@ def test_constructor_all_columns_are_iterators(self): # now test that when we supply id's that the error goes away _ = DynamicTable(name="TestTable", description="", columns=[column], id=list(range(3))) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_add_col_validate(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') col1 = VectorData( @@ -150,7 +149,7 @@ def test_add_col_validate(self): expected_df.index.name = 'id' pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_add_col_validate_bad_data(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') col1 = VectorData( @@ -165,7 +164,7 @@ def test_add_col_validate_bad_data(self): data=TermSetWrapper(value=['bad data'], termset=terms)) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_add_row_validate(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') col1 = VectorData( @@ -187,7 +186,7 @@ def test_add_row_validate(self): expected_df.index.name = 'id' pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_add_row_validate_bad_data_one_col(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') col1 = VectorData( @@ -204,7 +203,7 @@ def test_add_row_validate_bad_data_one_col(self): with self.assertRaises(ValueError): species.add_row(Species_1='bad', Species_2='Ursus arctos horribilis') - @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") def test_add_row_validate_bad_data_all_col(self): terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') col1 = VectorData( diff --git a/tests/unit/hdmf_config.yaml b/tests/unit/hdmf_config.yaml new file mode 100644 index 000000000..92ec2f321 --- /dev/null +++ b/tests/unit/hdmf_config.yaml @@ -0,0 +1,9 @@ +namespaces: + hdmf-common: + version: 3.12.2 + data_types: + VectorData: + description: + termset: example_test_term_set.yaml + VectorIndex: + data: ... diff --git a/tests/unit/hdmf_config2.yaml b/tests/unit/hdmf_config2.yaml new file mode 100644 index 000000000..0aecacf51 --- /dev/null +++ b/tests/unit/hdmf_config2.yaml @@ -0,0 +1,18 @@ +namespaces: + hdmf-common: + version: 3.12.2 + data_types: + Data: + description: + termset: example_test_term_set.yaml + EnumData: + description: + termset: example_test_term_set.yaml + VectorData: + description: ... + namespace2: + version: 0 + data_types: + MythicData: + description: + termset: example_test_term_set.yaml diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index b5a2d87e8..9ac81ba13 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -58,6 +58,11 @@ def test_new(self): self.assertFalse(child_obj._in_construct_mode) self.assertTrue(child_obj.modified) + def test_get_data_type(self): + obj = Container('obj1') + dt = obj.data_type + self.assertEqual(dt, 'Container') + def test_new_object_id_none(self): """Test that passing object_id=None to __new__ is OK and results in a non-None object ID being assigned. """ @@ -519,7 +524,7 @@ class EmptyFields(AbstractContainer): self.assertTupleEqual(EmptyFields.get_fields_conf(), tuple()) props = TestAbstractContainerFieldsConf.find_all_properties(EmptyFields) - expected = ['all_objects', 'children', 'container_source', 'fields', 'modified', + expected = ['all_objects', 'children', 'container_source', 'data_type', 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) @@ -540,8 +545,8 @@ def __init__(self, **kwargs): self.assertTupleEqual(NamedFields.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFields) - expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', - 'fields', 'modified', 'name', 'object_id', + expected = ['all_objects', 'children', 'container_source', 'data_type', + 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) @@ -622,8 +627,8 @@ class NamedFieldsChild(NamedFields): self.assertTupleEqual(NamedFieldsChild.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFieldsChild) - expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', - 'fields', 'modified', 'name', 'object_id', + expected = ['all_objects', 'children', 'container_source', 'data_type', + 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index b4a469438..99bd6bf59 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -1,9 +1,12 @@ import os +import numpy as np -from hdmf.term_set import TermSet, TermSetWrapper +from hdmf import Container +from hdmf.term_set import TermSet, TermSetWrapper, TypeConfigurator from hdmf.testing import TestCase, remove_test_file -from hdmf.common import VectorData -import numpy as np +from hdmf.common import (VectorIndex, VectorData, unload_type_config, + get_loaded_type_config, load_type_config) +from hdmf.utils import popargs CUR_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -215,3 +218,118 @@ def test_wrapper_extend_error(self): data_obj = VectorData(name='species', description='...', data=self.wrapped_list) with self.assertRaises(ValueError): data_obj.extend(['bad_data']) + +class TestTypeConfig(TestCase): + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + def tearDown(self): + unload_type_config() + + def test_get_loaded_type_config_error(self): + with self.assertRaises(ValueError): + get_loaded_type_config() + + def test_config_path(self): + path = 'tests/unit/hdmf_config.yaml' + tc = TypeConfigurator(path=path) + self.assertEqual(tc.path, [path]) + + def test_get_config(self): + path = 'tests/unit/hdmf_config.yaml' + tc = TypeConfigurator(path=path) + self.assertEqual(tc.get_config('VectorData', 'hdmf-common'), + {'description': {'termset': 'example_test_term_set.yaml'}}) + + def test_get_config_namespace_error(self): + path = 'tests/unit/hdmf_config.yaml' + tc = TypeConfigurator(path=path) + with self.assertRaises(ValueError): + tc.get_config('VectorData', 'hdmf-common11') + + def test_get_config_container_error(self): + path = 'tests/unit/hdmf_config.yaml' + tc = TypeConfigurator(path=path) + with self.assertRaises(ValueError): + tc.get_config('VectorData11', 'hdmf-common') + + def test_already_loaded_path_error(self): + path = 'tests/unit/hdmf_config.yaml' + tc = TypeConfigurator(path=path) + with self.assertRaises(ValueError): + tc.load_type_config(config_path=path) + + def test_load_two_unique_configs(self): + path = 'tests/unit/hdmf_config.yaml' + path2 = 'tests/unit/hdmf_config2.yaml' + tc = TypeConfigurator(path=path) + tc.load_type_config(config_path=path2) + config = {'namespaces': {'hdmf-common': {'version': '3.12.2', + 'data_types': {'VectorData': {'description': '...'}, + 'VectorIndex': {'data': '...'}, + 'Data': {'description': {'termset': 'example_test_term_set.yaml'}}, + 'EnumData': {'description': {'termset': 'example_test_term_set.yaml'}}}}, + 'namespace2': {'version': 0, + 'data_types': {'MythicData': {'description': {'termset': 'example_test_term_set.yaml'}}}}}} + self.assertEqual(tc.path, [path, path2]) + self.assertEqual(tc.config, config) + + +class ExtensionContainer(Container): + __fields__ = ("description",) + + def __init__(self, **kwargs): + description, namespace = popargs('description', 'namespace', kwargs) + self.namespace = namespace + super().__init__(**kwargs) + self.description = description + + +class TestGlobalTypeConfig(TestCase): + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + load_type_config(config_path='tests/unit/hdmf_config.yaml') + + def tearDown(self): + unload_type_config() + + def test_load_config(self): + config = get_loaded_type_config() + self.assertEqual(config, + {'namespaces': {'hdmf-common': {'version': '3.12.2', + 'data_types': {'VectorData': {'description': {'termset': 'example_test_term_set.yaml'}}, + 'VectorIndex': {'data': '...'}}}}}) + + def test_validate_with_config(self): + data = VectorData(name='foo', data=[0], description='Homo sapiens') + self.assertEqual(data.description.value, 'Homo sapiens') + + def test_namespace_warn(self): + with self.assertWarns(Warning): + ExtensionContainer(name='foo', + namespace='foo', + description='Homo sapiens') + + def test_container_type_warn(self): + with self.assertWarns(Warning): + ExtensionContainer(name='foo', + namespace='hdmf-common', + description='Homo sapiens') + + def test_already_wrapped_warn(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + with self.assertWarns(Warning): + VectorData(name='foo', + data=[0], + description=TermSetWrapper(value='Homo sapiens', termset=terms)) + + def test_warn_field_not_in_spec(self): + col1 = VectorData(name='col1', + description='Homo sapiens', + data=['1a', '1b', '1c', '2a']) + with self.assertWarns(Warning): + VectorIndex(name='col1_index', + target=col1, + data=[3, 4]) diff --git a/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml index 222205959..52af4b8a7 100644 --- a/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml +++ b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml @@ -2,8 +2,7 @@ classes: BrainSample: slot_usage: cell_type: {} - slots: - - cell_type + slots: [cell_type] default_prefix: TEMP default_range: string description: this schema demonstrates the use of static enums @@ -11,42 +10,27 @@ enums: NeuronOrGlialCellTypeEnum: description: Enumeration to capture various cell types found in the brain. permissible_values: - ASTROCYTE: - description: Characteristic star-shaped glial cells in the brain and spinal - cord. - meaning: CL:0000127 - INTERNEURON: - description: Neurons whose axons (and dendrites) are limited to a single brain - area. - meaning: CL:0000099 - MICROGLIAL_CELL: - description: Microglia are the resident immune cells of the brain and constantly - patrol the cerebral microenvironment to respond to pathogens and damage. - meaning: CL:0000129 - MOTOR_NEURON: - description: Neurons whose cell body is located in the motor cortex, brainstem - or the spinal cord, and whose axon (fiber) projects to the spinal cord or - outside of the spinal cord to directly or indirectly control effector organs, - mainly muscles and glands. - meaning: CL:0000100 - OLIGODENDROCYTE: - description: Type of neuroglia whose main functions are to provide support - and insulation to axons within the central nervous system (CNS) of jawed - vertebrates. - meaning: CL:0000128 - PYRAMIDAL_NEURON: - description: Neurons with a pyramidal shaped cell body (soma) and two distinct - dendritic trees. - meaning: CL:0000598 + ASTROCYTE: {description: Characteristic star-shaped glial cells in the brain + and spinal cord., meaning: 'CL:0000127'} + INTERNEURON: {description: Neurons whose axons (and dendrites) are limited to + a single brain area., meaning: 'CL:0000099'} + MICROGLIAL_CELL: {description: Microglia are the resident immune cells of the + brain and constantly patrol the cerebral microenvironment to respond to + pathogens and damage., meaning: 'CL:0000129'} + MOTOR_NEURON: {description: 'Neurons whose cell body is located in the motor + cortex, brainstem or the spinal cord, and whose axon (fiber) projects to + the spinal cord or outside of the spinal cord to directly or indirectly + control effector organs, mainly muscles and glands.', meaning: 'CL:0000100'} + OLIGODENDROCYTE: {description: Type of neuroglia whose main functions are to + provide support and insulation to axons within the central nervous system + (CNS) of jawed vertebrates., meaning: 'CL:0000128'} + PYRAMIDAL_NEURON: {description: Neurons with a pyramidal shaped cell body (soma) + and two distinct dendritic trees., meaning: 'CL:0000598'} id: https://w3id.org/linkml/examples/nwb_static_enums -imports: -- linkml:types +imports: ['linkml:types'] name: nwb_static_enums -prefixes: - CL: http://purl.obolibrary.org/obo/CL_ - TEMP: https://example.org/TEMP/ - linkml: https://w3id.org/linkml/ +prefixes: {CL: 'http://purl.obolibrary.org/obo/CL_', TEMP: 'https://example.org/TEMP/', + linkml: 'https://w3id.org/linkml/'} slots: - cell_type: - required: true + cell_type: {required: true} title: static enums example From d85d0cbc36d2e0fdb25e8fbea14d58ba7bf24a40 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 4 Apr 2024 00:57:28 -0700 Subject: [PATCH 13/13] Compound Dataset Support for TermSetWrapper (#1061) * concept * ruff * test * clean up * doc * ruff * Update CHANGELOG.md * Update term_set.py * Update term_set.py * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update docs/gallery/plot_term_set.py * Update src/hdmf/term_set.py Co-authored-by: Ryan Ly * checkpoint * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * tests * test * Update plot_term_set.py * Update term_set.py * Update tests/unit/common/test_table.py Co-authored-by: Ryan Ly * Update tests/unit/common/test_table.py Co-authored-by: Ryan Ly * data * data * method * tests * tests * Update CHANGELOG.md * Update CHANGELOG.md * Update data_utils.py * Update data_utils.py * Update data_utils.py * Update test_table.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update data_utils.py * Update data_utils.py * test --------- Co-authored-by: Ryan Ly Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 27 +++++----- docs/gallery/plot_term_set.py | 14 +++++ src/hdmf/data_utils.py | 5 +- src/hdmf/term_set.py | 64 +++++++++++++++++----- tests/unit/common/test_table.py | 95 +++++++++++++++++++++++++++++++++ tests/unit/test_term_set.py | 11 ++-- 6 files changed, 185 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22c21b0e8..e72015601 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Enhancements - Added `TermSetConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016) +- Updated `TermSetWrapper` to support validating a single field within a compound array. @mavaylon1 [#1061](https://github.com/hdmf-dev/hdmf/pull/1061) ## HDMF 3.13.0 (March 20, 2024) @@ -138,8 +139,8 @@ will increase the minor version number to 3.10.0. See the 3.9.1 release notes be ## HDMF 3.6.0 (May 12, 2023) ### New features and minor improvements -- Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) -- Updated hdmf-common-schema version to 1.6.0. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon1 [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Updated hdmf-common-schema version to 1.6.0. @mavaylon1 [#850](https://github.com/hdmf-dev/hdmf/pull/850) - Added testing of HDMF-Zarr on PR and nightly. @rly [#859](https://github.com/hdmf-dev/hdmf/pull/859) - Replaced `setup.py` with `pyproject.toml`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) - Use `ruff` instead of `flake8`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) @@ -153,7 +154,7 @@ will increase the minor version number to 3.10.0. See the 3.9.1 release notes be [#853](https://github.com/hdmf-dev/hdmf/pull/853) ### Documentation and tutorial enhancements: -- Updated `ExternalResources` how to tutorial to include the new features. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Updated `ExternalResources` how to tutorial to include the new features. @mavaylon1 [#850](https://github.com/hdmf-dev/hdmf/pull/850) ## HDMF 3.5.6 (April 28, 2023) @@ -193,13 +194,13 @@ will increase the minor version number to 3.10.0. See the 3.9.1 release notes be ### Bug fixes - Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) -- Fixed issue with deprecated `pkg_resources`. @mavaylon [#822](https://github.com/hdmf-dev/hdmf/pull/822) -- Fixed `hdmf.common` deprecation warning. @mavaylon [#826]((https://github.com/hdmf-dev/hdmf/pull/826) +- Fixed issue with deprecated `pkg_resources`. @mavaylon1 [#822](https://github.com/hdmf-dev/hdmf/pull/822) +- Fixed `hdmf.common` deprecation warning. @mavaylon1 [#826]((https://github.com/hdmf-dev/hdmf/pull/826) ### Internal improvements - A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. -- Added additional documentation for `__fields__` in `AbstactContainer`. @mavaylon [#827](https://github.com/hdmf-dev/hdmf/pull/827) -- Updated warning message for broken links. @mavaylon [#829](https://github.com/hdmf-dev/hdmf/pull/829) +- Added additional documentation for `__fields__` in `AbstactContainer`. @mavaylon1 [#827](https://github.com/hdmf-dev/hdmf/pull/827) +- Updated warning message for broken links. @mavaylon1 [#829](https://github.com/hdmf-dev/hdmf/pull/829) ## HDMF 3.5.1 (January 26, 2023) @@ -218,9 +219,9 @@ will increase the minor version number to 3.10.0. See the 3.9.1 release notes be - Added ``HDMFIO.__del__`` to ensure that I/O objects are being closed on delete. @oruebel[#811](https://github.com/hdmf-dev/hdmf/pull/811) ### Minor improvements -- Added support for reading and writing `ExternalResources` to and from denormalized TSV files. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) -- Changed the name of `ExternalResources.export_to_sqlite` to `ExternalResources.to_sqlite`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) -- Updated the tutorial for `ExternalResources`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Added support for reading and writing `ExternalResources` to and from denormalized TSV files. @mavaylon1 [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Changed the name of `ExternalResources.export_to_sqlite` to `ExternalResources.to_sqlite`. @mavaylon1 [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Updated the tutorial for `ExternalResources`. @mavaylon1 [#799](https://github.com/hdmf-dev/hdmf/pull/799) - Added `message` argument for assert methods defined by `hdmf.testing.TestCase` to allow developers to include custom error messages with asserts. @oruebel [#812](https://github.com/hdmf-dev/hdmf/pull/812) - Clarify the expected chunk shape behavior for `DataChunkIterator`. @oruebel [#813](https://github.com/hdmf-dev/hdmf/pull/813) @@ -361,7 +362,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly - Plotted results in external resources tutorial. @oruebel (#667) - Added support for Python 3.10. @rly (#679) - Updated requirements. @rly @TheChymera (#681) -- Improved testing for `ExternalResources`. @mavaylon (#673) +- Improved testing for `ExternalResources`. @mavaylon1 (#673) - Improved docs for export. @rly (#674) - Enhanced data chunk iteration speeds through new ``GenericDataChunkIterator`` class. @CodyCBakerPhD (#672) - Enhanced issue template forms on GitHub. @CodyCBakerPHD (#700) @@ -437,7 +438,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly - Allow passing ``index=True`` to ``DynamicTable.to_dataframe()`` to support returning `DynamicTableRegion` columns as indices or Pandas DataFrame. @rly (#579) - Improve ``DynamicTable`` documentation. @rly (#639) -- Updated external resources tutorial. @mavaylon (#611) +- Updated external resources tutorial. @mavaylon1 (#611) ### Breaking changes and deprecations - Previously, when using ``DynamicTable.__getitem__`` or ``DynamicTable.get`` to access a selection of a @@ -522,7 +523,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly - Add experimental namespace to HDMF common schema. New data types should go in the experimental namespace (hdmf-experimental) prior to being added to the core (hdmf-common) namespace. The purpose of this is to provide a place to test new data types that may break backward compatibility as they are refined. @ajtritt (#545) - - `ExternalResources` was changed to support storing both names and URIs for resources. @mavaylon (#517, #548) + - `ExternalResources` was changed to support storing both names and URIs for resources. @mavaylon1 (#517, #548) - The `VocabData` data type was replaced by `EnumData` to provide more flexible support for data from a set of fixed values. - Added `AlignedDynamicTable`, which defines a `DynamicTable` that supports storing a collection of sub-tables. diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index c1f7c7257..8bf2375aa 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -67,6 +67,7 @@ """ from hdmf.common import DynamicTable, VectorData import os +import numpy as np try: import linkml_runtime # noqa: F401 @@ -129,6 +130,19 @@ data=TermSetWrapper(value=['Homo sapiens'], termset=terms) ) +###################################################### +# Validate Compound Data with TermSetWrapper +# ---------------------------------------------------- +# :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around compound data. +# The user will set the field within the compound data type that is to be validated +# with the termset. +c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) +data = VectorData( + name='species', + description='...', + data=TermSetWrapper(value=c_data, termset=terms, field='species') +) + ###################################################### # Validate Attributes with TermSetWrapper # ---------------------------------------------------- diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 2df66106d..23f0b4019 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -20,7 +20,10 @@ def append_data(data, arg): data.append(arg) return data elif isinstance(data, np.ndarray): - return np.append(data, np.expand_dims(arg, axis=0), axis=0) + if len(data.dtype)>0: # data is a structured array + return np.append(data, arg) + else: # arg is a scalar or row vector + return np.append(data, np.expand_dims(arg, axis=0), axis=0) elif isinstance(data, h5py.Dataset): shape = list(data.shape) shape[0] += 1 diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py index 1464f505c..0f42819b0 100644 --- a/src/hdmf/term_set.py +++ b/src/hdmf/term_set.py @@ -216,19 +216,26 @@ class TermSetWrapper: {'name': 'value', 'type': (list, np.ndarray, dict, str, tuple), 'doc': 'The target item that is wrapped, either data or attribute.'}, + {'name': 'field', 'type': str, 'default': None, + 'doc': 'The field within a compound array.'} ) def __init__(self, **kwargs): self.__value = kwargs['value'] self.__termset = kwargs['termset'] + self.__field = kwargs['field'] self.__validate() def __validate(self): - # check if list, tuple, array - if isinstance(self.__value, (list, np.ndarray, tuple)): # TODO: Future ticket on DataIO support - values = self.__value - # create list if none of those -> mostly for attributes + if self.__field is not None: + values = self.__value[self.__field] else: - values = [self.__value] + # check if list, tuple, array + if isinstance(self.__value, (list, np.ndarray, tuple)): + values = self.__value + # create list if none of those -> mostly for scalar attributes + else: + values = [self.__value] + # iteratively validate bad_values = [] for term in values: @@ -243,6 +250,10 @@ def __validate(self): def value(self): return self.__value + @property + def field(self): + return self.__field + @property def termset(self): return self.__termset @@ -273,26 +284,55 @@ def __iter__(self): """ return self.__value.__iter__() + def __multi_validation(self, data): + """ + append_data includes numpy arrays. This is not the same as list append. + Numpy array append is essentially list extend. Now if a user appends an array (for compound data), we need to + support validating arrays with multiple items. This method is an internal bulk validation + check for numpy arrays and extend. + """ + bad_values = [] + for item in data: + if not self.termset.validate(term=item): + bad_values.append(item) + return bad_values + def append(self, arg): """ This append resolves the wrapper to use the append of the container using the wrapper. """ - if self.termset.validate(term=arg): - self.__value = append_data(self.__value, arg) + if isinstance(arg, np.ndarray): + if self.__field is not None: # compound array + values = arg[self.__field] + else: + msg = "Array needs to be a structured array with compound dtype. If this does not apply, use extend." + raise ValueError(msg) else: - msg = ('"%s" is not in the term set.' % arg) + values = [arg] + + bad_values = self.__multi_validation(values) + + if len(bad_values)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values])) raise ValueError(msg) + self.__value = append_data(self.__value, arg) + def extend(self, arg): """ This append resolves the wrapper to use the extend of the container using the wrapper. """ - bad_data = [] - for item in arg: - if not self.termset.validate(term=item): - bad_data.append(item) + if isinstance(arg, np.ndarray): + if self.__field is not None: # compound array + values = arg[self.__field] + else: + values = arg + else: + values = arg + + bad_data = self.__multi_validation(values) if len(bad_data)==0: self.__value = extend_data(self.__value, arg) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index f2d03332f..00b3c14a3 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -220,6 +220,101 @@ def test_add_row_validate_bad_data_all_col(self): with self.assertRaises(ValueError): species.add_row(Species_1='bad data', Species_2='bad data') + def test_compound_data_append(self): + c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + c_data2 = np.array([('Mus musculus', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + compound_vector_data = VectorData( + name='Species_1', + description='...', + data=c_data + ) + compound_vector_data.append(c_data2) + + np.testing.assert_array_equal(compound_vector_data.data, np.append(c_data, c_data2)) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_array_append_error(self): + c_data = np.array(['Homo sapiens']) + c_data2 = np.array(['Mus musculus']) + + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + vectordata_termset = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=c_data, termset=terms) + ) + + with self.assertRaises(ValueError): + vectordata_termset.append(c_data2) + + def test_compound_data_extend(self): + c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + c_data2 = np.array([('Mus musculus', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + compound_vector_data = VectorData( + name='Species_1', + description='...', + data=c_data + ) + compound_vector_data.extend(c_data2) + + np.testing.assert_array_equal(compound_vector_data.data, np.vstack((c_data, c_data2))) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_wrapped_array_append(self): + data = np.array(['Homo sapiens']) + data2 = 'Mus musculus' + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + vector_data = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=data, termset=terms) + ) + vector_data.append(data2) + + np.testing.assert_array_equal(vector_data.data.data, np.append(data, data2)) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_wrapped_array_extend(self): + data = np.array(['Homo sapiens']) + data2 = np.array(['Mus musculus']) + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + vector_data = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=data, termset=terms) + ) + vector_data.extend(data2) + + np.testing.assert_array_equal(vector_data.data.data, np.vstack((data, data2))) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_wrapped_compound_data_append(self): + c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + c_data2 = np.array([('Mus musculus', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + compound_vector_data = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=c_data, field='species', termset=terms) + ) + compound_vector_data.append(c_data2) + + np.testing.assert_array_equal(compound_vector_data.data.data, np.append(c_data, c_data2)) + + @unittest.skipIf(not REQUIREMENTS_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_wrapped_compound_data_extend(self): + c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + c_data2 = np.array([('Mus musculus', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + compound_vector_data = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=c_data, field='species', termset=terms) + ) + compound_vector_data.extend(c_data2) + + np.testing.assert_array_equal(compound_vector_data.data.data, np.vstack((c_data, c_data2))) + def test_constructor_bad_columns(self): columns = ['bad_column'] msg = "'columns' must be a list of dict, VectorData, DynamicTableRegion, or VectorIndex" diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py index 99bd6bf59..1d7721f1b 100644 --- a/tests/unit/test_term_set.py +++ b/tests/unit/test_term_set.py @@ -155,21 +155,22 @@ def setUp(self): self.wrapped_array = TermSetWrapper(value=np.array(['Homo sapiens']), termset=self.termset) self.wrapped_list = TermSetWrapper(value=['Homo sapiens'], termset=self.termset) + c_data = np.array([('Homo sapiens', 24)], dtype=[('species', 'U50'), ('age', 'i4')]) + self.wrapped_comp_array = TermSetWrapper(value=c_data, + termset=self.termset, + field='species') + self.np_data = VectorData( name='Species_1', description='...', data=self.wrapped_array ) - self.list_data = VectorData( - name='Species_1', - description='...', - data=self.wrapped_list - ) def test_properties(self): self.assertEqual(self.wrapped_array.value, ['Homo sapiens']) self.assertEqual(self.wrapped_array.termset.view_set, self.termset.view_set) self.assertEqual(self.wrapped_array.dtype, 'U12') # this covers __getattr__ + self.assertEqual(self.wrapped_comp_array.field, 'species') def test_get_item(self): self.assertEqual(self.np_data.data[0], 'Homo sapiens')