diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index ef3daed9c..c96a78551 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -46,25 +46,17 @@ body: - Linux validations: required: true - - type: dropdown - id: executable - attributes: - label: Python Executable - options: - - Conda - - Python - validations: - required: true - type: dropdown id: python_version attributes: label: Python Version options: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" + - "newer" validations: required: true - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 86a7ad57d..82f987164 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,11 +2,8 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Make sure all PRs to be included in this release have been merged to `dev`. -- [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, `requirements-opt.txt`, and `environment-ros3.yml` to the latest versions, - and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed. - Run `pip install pur && pur -r requirements-dev.txt -r requirements.txt -r requirements-opt.txt` - and manually update `environment-ros3.yml`. +- [ ] Major and minor releases: Update dependency ranges in `pyproject.toml` and minimums in + `requirements-min.txt` as needed. - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `pyproject.toml` as needed @@ -34,5 +31,5 @@ Prepare for release of HDMF [version] 4. Either monitor [conda-forge/hdmf-feedstock](https://github.com/conda-forge/hdmf-feedstock) for the regro-cf-autotick-bot bot to create a PR updating the version of HDMF to the latest PyPI release, usually within 24 hours of release, or manually create a PR updating `recipe/meta.yaml` with the latest version number - and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render and update - dependencies as needed. + and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render and + update the dependencies as needed. diff --git a/.github/workflows/check_sphinx_links.yml b/.github/workflows/check_sphinx_links.yml index 15fc61e30..24422c47c 100644 --- a/.github/workflows/check_sphinx_links.yml +++ b/.github/workflows/check_sphinx_links.yml @@ -21,13 +21,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.11' # TODO update to 3.12 when optional reqs (e.g., oaklib) support 3.12 + python-version: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt -r requirements-opt.txt - python -m pip install . + python -m pip install ".[all]" - name: Check Sphinx internal and external links run: sphinx-build -W -b linkcheck ./docs/source ./test_build diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 5861ab136..ab0db960a 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Install build dependencies run: | @@ -28,7 +28,7 @@ jobs: - name: Run tox tests run: | - tox -e py312-upgraded + tox -e py313-upgraded - name: Build wheel and source distribution run: | diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml index 5d141d1d1..6037bd4ab 100644 --- a/.github/workflows/project_action.yml +++ b/.github/workflows/project_action.yml @@ -20,7 +20,7 @@ jobs: - name: Add to Developer Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.6.1 + uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/hdmf-dev/projects/7 github-token: ${{ env.TOKEN }} @@ -28,7 +28,7 @@ jobs: - name: Add to Community Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v0.6.1 + uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/hdmf-dev/projects/8 github-token: ${{ env.TOKEN }} diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index d5f8afc7f..b1d2ddc59 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -25,30 +25,27 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: windows-latest } - - { name: windows-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: windows-latest } - - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: windows-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: windows-latest } - - { name: macos-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: macos-latest } - - { name: macos-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: macos-latest } - - { name: macos-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: macos-latest } - - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } - - { name: macos-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: windows-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } + - { name: windows-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } + - { name: macos-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -97,18 +94,16 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-gallery-python3.12-prerelease , test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: windows-gallery-python3.12-prerelease, test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: windows-latest } - - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-latest } - - { name: macos-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: macos-latest } - - { name: macos-gallery-python3.12-prerelease , test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: macos-latest } + # TODO: Update to 3.13 when linkml and its deps support 3.13 + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: windows-latest } + - { name: windows-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: windows-latest } + - { name: macos-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: macos-latest } + - { name: macos-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -131,73 +126,7 @@ jobs: run: | tox -e ${{ matrix.test-tox-env }} - run-all-tests-on-conda: - name: ${{ matrix.name }} - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} # needed for conda environment to work - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} - cancel-in-progress: true - strategy: - fail-fast: false - matrix: - include: - - { name: conda-linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: conda-linux-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: ubuntu-latest } - - { name: conda-linux-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: conda-linux-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: conda-linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: conda-linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: conda-linux-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - steps: - - name: Checkout repo with submodules - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required to determine the version - - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: ${{ matrix.python-ver }} - channels: conda-forge - mamba-version: "*" - - - name: Install build dependencies - run: | - conda config --set always_yes yes --set changeps1 no - conda info - mamba install -c conda-forge "tox>=4" - - - name: Conda reporting - run: | - conda info - conda config --show-sources - conda list --show-channel-urls - - # NOTE tox installs packages from PyPI not conda-forge... - - name: Run tox tests - run: | - tox -e ${{ matrix.test-tox-env }} - - - name: Build wheel and source distribution - run: | - tox -e build - ls -1 dist - - - name: Test installation from a wheel - run: | - tox -e wheelinstall --installpkg dist/*-none-any.whl - - - name: Test installation from a source distribution - run: | - tox -e wheelinstall --installpkg dist/*.tar.gz - - run-gallery-ros3-tests: + run-ros3-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} defaults: @@ -210,9 +139,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-gallery-python3.12-ros3 , python-ver: "3.12", os: windows-latest } - - { name: macos-gallery-python3.12-ros3 , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } + - { name: windows-python3.13-ros3 , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.13-ros3 , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -229,11 +158,10 @@ jobs: python-version: ${{ matrix.python-ver }} channels: conda-forge auto-activate-base: false - mamba-version: "*" - name: Install run dependencies run: | - pip install -e . + pip install . pip list - name: Conda reporting diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 7a18e5068..ee1f9ff91 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -31,7 +31,7 @@ jobs: - { os: macos-latest , opt_req: false } env: # used by codecov-action OS: ${{ matrix.os }} - PYTHON: '3.11' # TODO update to 3.12 when optional reqs (e.g., oaklib) support 3.12 + PYTHON: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -44,29 +44,85 @@ jobs: with: python-version: ${{ env.PYTHON }} - - name: Install dependencies + - name: Upgrade pip run: | python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt -r requirements.txt - - name: Install optional dependencies + - name: Install package + if: ${{ ! matrix.opt_req }} + run: | + python -m pip install ".[test]" + + - name: Install package with optional dependencies if: ${{ matrix.opt_req }} - run: python -m pip install -r requirements-opt.txt + run: | + python -m pip install ".[test,tqdm,zarr,termset]" - - name: Install package + - name: Run tests and generate coverage report run: | - python -m pip install -e . # must install in editable mode for coverage to find sources + # coverage is configured in pyproject.toml + # codecov uploader requires xml format python -m pip list + pytest --cov --cov-report=xml --cov-report=term - - name: Run tests and generate coverage report + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + fail_ci_if_error: true + files: ./coverage.xml + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + run-ros3-coverage: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # necessary for conda + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true + strategy: + fail-fast: false + matrix: + include: + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } + steps: + - name: Checkout repo with submodules + uses: actions/checkout@v4 + with: + submodules: 'recursive' + fetch-depth: 0 # tags are required to determine the version + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + activate-environment: ros3 + environment-file: environment-ros3.yml + python-version: ${{ matrix.python-ver }} + channels: conda-forge + auto-activate-base: false + + - name: Install run dependencies + run: | + pip install . + pip list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + - name: Run ros3 tests # TODO include gallery tests after they are written run: | - pytest --cov - python -m coverage xml # codecov uploader requires xml format - python -m coverage report -m + pytest --cov --cov-report=xml --cov-report=term tests/unit/test_io_hdf5_streaming.py - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: fail_ci_if_error: true + files: ./coverage.xml env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/run_hdmf_zarr_tests.yml b/.github/workflows/run_hdmf_zarr_tests.yml index ecfdeaeeb..51a01977a 100644 --- a/.github/workflows/run_hdmf_zarr_tests.yml +++ b/.github/workflows/run_hdmf_zarr_tests.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' # use 3.10 until hdmf-zarr updates versioneer.py which breaks on newer python + python-version: '3.13' - name: Update pip run: python -m pip install --upgrade pip @@ -29,11 +29,9 @@ jobs: - name: Clone HDMF-Zarr and install dev branch of HDMF run: | python -m pip list - git clone https://github.com/hdmf-dev/hdmf-zarr.git --recurse-submodules + git clone https://github.com/hdmf-dev/hdmf-zarr.git cd hdmf-zarr - python -m pip install -r requirements-dev.txt # do not install the pinned install requirements - # must install in editable mode for coverage to find sources - python -m pip install -e . # this will install a different version of hdmf from the current one + python -m pip install ".[test]" # this will install a different version of hdmf from the current one cd .. python -m pip uninstall -y hdmf # uninstall the other version of hdmf python -m pip install . # reinstall current branch of hdmf @@ -42,4 +40,4 @@ jobs: - name: Run HDMF-Zarr tests on HDMF-Zarr dev branch run: | cd hdmf-zarr - pytest + pytest -v diff --git a/.github/workflows/run_pynwb_tests.yml b/.github/workflows/run_pynwb_tests.yml index bf3f32343..a159380cd 100644 --- a/.github/workflows/run_pynwb_tests.yml +++ b/.github/workflows/run_pynwb_tests.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Update pip run: python -m pip install --upgrade pip @@ -32,8 +32,7 @@ jobs: git clone https://github.com/NeurodataWithoutBorders/pynwb.git --recurse-submodules cd pynwb python -m pip install -r requirements-dev.txt # do not install the pinned install requirements - # must install in editable mode for coverage to find sources - python -m pip install -e . # this will install a different version of hdmf from the current one + python -m pip install . # this will install a different version of hdmf from the current one cd .. python -m pip uninstall -y hdmf # uninstall the other version of hdmf python -m pip install . # reinstall current branch of hdmf diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 049cec2e5..2ff759029 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -23,13 +23,13 @@ jobs: matrix: include: # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: macos-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: macos-latest } - - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: windows-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -85,10 +85,11 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: windows-latest } + # TODO: Update to 3.13 when linkml and its deps support 3.13 + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: windows-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -111,69 +112,9 @@ jobs: run: | tox -e ${{ matrix.test-tox-env }} - run-tests-on-conda: - name: ${{ matrix.name }} - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} - cancel-in-progress: true - strategy: - fail-fast: false - matrix: - include: - - { name: conda-linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: conda-linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - steps: - - name: Checkout repo with submodules - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required to determine the version - - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: ${{ matrix.python-ver }} - channels: conda-forge - mamba-version: "*" - - - name: Install build dependencies - run: | - conda config --set always_yes yes --set changeps1 no - conda info - mamba install -c conda-forge "tox>=4" - - - name: Conda reporting - run: | - conda info - conda config --show-sources - conda list --show-channel-urls - - # NOTE tox installs packages from PyPI not conda-forge... - - name: Run tox tests - run: | - tox -e ${{ matrix.test-tox-env }} - - - name: Build wheel and source distribution - run: | - tox -e build - ls -1 dist - - - name: Test installation from a wheel - run: | - tox -e wheelinstall --installpkg dist/*-none-any.whl - - - name: Test installation from a source distribution - run: | - tox -e wheelinstall --installpkg dist/*.tar.gz - deploy-dev: name: Deploy pre-release from dev - needs: [run-tests, run-gallery-tests, run-tests-on-conda] + needs: [run-tests, run-gallery-tests] if: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest concurrency: @@ -189,7 +130,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Download wheel and source distributions from artifact uses: actions/download-artifact@v4 @@ -209,7 +150,7 @@ jobs: --token ${{ secrets.BOT_GITHUB_TOKEN }} \ --re-upload - run-gallery-ros3-tests: + run-ros3-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} defaults: @@ -222,7 +163,7 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -239,11 +180,10 @@ jobs: python-version: ${{ matrix.python-ver }} channels: conda-forge auto-activate-base: false - mamba-version: "*" - name: Install run dependencies run: | - pip install -e . + pip install . pip list - name: Conda reporting diff --git a/.gitignore b/.gitignore index 8257bc927..e202b3526 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,11 @@ # Auto-generated apidocs RST files /docs/source/gen_modules/ /docs/source/hdmf*.rst +/docs/source/sg_execution_times.rst /docs/gallery/*.hdf5 /docs/gallery/*.sqlite +/docs/gallery/expanded_example_dynamic_term_set.yaml +/docs/gallery/schemasheets/nwb_static_enums.yaml # Auto-generated files after running tutorials mylab.*.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 786a3e4b7..03680eb2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # NOTE: run `pre-commit autoupdate` to update hooks to latest version repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.3 + rev: v0.9.1 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate @@ -26,7 +26,7 @@ repos: # hooks: # - id: interrogate - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell additional_dependencies: diff --git a/.readthedocs.yaml b/.readthedocs.yaml index a4f1ea037..b752396f4 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,9 +6,9 @@ version: 2 build: - os: ubuntu-20.04 + os: ubuntu-24.04 tools: - python: '3.9' + python: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 # Build documentation in the docs/ directory with Sphinx sphinx: @@ -24,10 +24,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - requirements: requirements-doc.txt - - requirements: requirements-opt.txt - - requirements: requirements.txt - - path: . + - path: .[docs,tqdm,zarr,termset] # path to the package relative to the root # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f6214c2a..4a37ae084 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,103 @@ # HDMF Changelog -## HDMF 3.14.0 (Upcoming) +## [Unreleased] + +### Breaking changes +- The following classes have been deprecated and removed: Array, AbstractSortedArray, SortedArray, LinSpace, Query, RegionSlicer, ListSlicer, H5RegionSlicer, DataRegion, RegionBuilder. The following methods have been deprecated and removed: fmt_docval_args, call_docval_func, get_container_cls, add_child, set_dataio (now refactored as set_data_io). We have also removed all early development for region references. @mavaylon1, @rly [#1998](https://github.com/hdmf-dev/hdmf/pull/1198), [#1212](https://github.com/hdmf-dev/hdmf/pull/1212) +- Importing from `hdmf.build.map` is no longer supported. Import from `hdmf.build` instead. @rly [#1221](https://github.com/hdmf-dev/hdmf/pull/1221) +- Python 3.8 has reached end of life. Dropped support for Python 3.8 and add support for Python 3.13. @mavaylon1 [#1209](https://github.com/hdmf-dev/hdmf/pull/1209) +- Support for Zarr is limited to versions < 3. @rly [#1229](https://github.com/hdmf-dev/hdmf/pull/1229) + +### Changed +- Added checks to ensure that group and dataset spec names and default names do not contain slashes. @bendichter [#1219](https://github.com/hdmf-dev/hdmf/pull/1219) +- Updated copyright dates. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Created optional dependency groups in `pyproject.toml` and update GitHub Actions workflows to use those instead of requirements files. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Stopped using pinned dependencies in the docs and testing. These are not necessary for library testing, confuse new users and developers, and add maintenance burden. Current dependencies are stable enough that they need not be pinned and users can report the libraries they use. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Stopped redundant testing using a conda environment. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Adopted changelog format conventions: https://keepachangelog.com/en/1.1.0/ . @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) + +### Added +- Added script to check Python version support for HDMF dependencies. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) + +## HDMF 3.14.6 (December 20, 2024) + +### Enhancements +- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188) +- Improved html representation of data in `Container` objects. @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100) +- Added error when using colon for `Container` name. A colon cannot be used as a group name when writing to Zarr on Windows. @stephprince [#1202](https://github.com/hdmf-dev/hdmf/pull/1202) +- Adjusted testing for hdmf-zarr. @rly [#1222](https://github.com/hdmf-dev/hdmf/pull/1222) + +### Bug fixes +- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199) +- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205) + +## HDMF 3.14.5 (October 6, 2024) + +### Enhancements +- Added support for overriding backend configurations of `h5py.Dataset` objects in `Container.set_data_io`. @pauladkisson [#1172](https://github.com/hdmf-dev/hdmf/pull/1172) + +### Bug fixes +- Fixed bug in writing of string arrays to an HDF5 file that were read from an HDF5 file that was introduced in 3.14.4. @rly @stephprince + [#1189](https://github.com/hdmf-dev/hdmf/pull/1189) +- Fixed export of scalar datasets with a compound data type. @stephprince [#1185](https://github.com/hdmf-dev/hdmf/pull/1185) +- Fixed mamba-related error in conda-based GitHub Actions. @rly [#1194](https://github.com/hdmf-dev/hdmf/pull/1194) + +## HDMF 3.14.4 (September 4, 2024) + +### Enhancements +- Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) +- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166) +- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) +- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173) +- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135) + +### Bug fixes +- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176) +- Fixed H5DataIO not exposing `maxshape` on non-dci dsets. @cboulay [#1149](https://github.com/hdmf-dev/hdmf/pull/1149) +- Fixed generation of classes in an extension that contain attributes or datasets storing references to other types defined in the extension. + @rly [#1183](https://github.com/hdmf-dev/hdmf/pull/1183) + +## HDMF 3.14.3 (July 29, 2024) + +### Enhancements +- Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the +dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute +is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081) +- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102) +- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103) + +## HDMF 3.14.2 (July 7, 2024) + +### Enhancements +- Warn when unexpected keys are present in specs. @rly [#1134](https://github.com/hdmf-dev/hdmf/pull/1134) +- Support appending to zarr arrays. @mavaylon1 [#1136](https://github.com/hdmf-dev/hdmf/pull/1136) +- Support specifying "value" key in DatasetSpec. @rly [#1143](https://github.com/hdmf-dev/hdmf/pull/1143) +- Add support for numpy 2. @rly [#1139](https://github.com/hdmf-dev/hdmf/pull/1139) + +### Bug fixes +- Fix iterator increment causing an extra +1 added after the end of completion. @CodyCBakerPhD [#1128](https://github.com/hdmf-dev/hdmf/pull/1128) + +## HDMF 3.14.1 (June 6, 2024) + +### Bug fixes +- Excluded unnecessary artifacts from sdist and wheel. @rly [#1119](https://github.com/hdmf-dev/hdmf/pull/1119) +- Fixed issue with resolving attribute specs that have the same name at different levels of a spec hierarchy. + @rly [#1122](https://github.com/hdmf-dev/hdmf/pull/1122) + +## HDMF 3.14.0 (May 20, 2024) ### Enhancements - Updated `_field_config` to take `type_map` as an argument for APIs. @mavaylon1 [#1094](https://github.com/hdmf-dev/hdmf/pull/1094) - Added `TypeConfigurator` to automatically wrap fields with `TermSetWrapper` according to a configuration file. @mavaylon1 [#1016](https://github.com/hdmf-dev/hdmf/pull/1016) - Updated `TermSetWrapper` to support validating a single field within a compound array. @mavaylon1 [#1061](https://github.com/hdmf-dev/hdmf/pull/1061) +- Updated testing to not install in editable mode and not run `coverage` by default. @rly [#1107](https://github.com/hdmf-dev/hdmf/pull/1107) +- Add `post_init_method` parameter when generating classes to perform post-init functionality, i.e., validation. @mavaylon1 [#1089](https://github.com/hdmf-dev/hdmf/pull/1089) +- Exposed `aws_region` to `HDF5IO` and downstream passes to `h5py.File`. @codycbakerphd [#1040](https://github.com/hdmf-dev/hdmf/pull/1040) +- Exposed `progress_bar_class` to the `GenericDataChunkIterator` for more custom control over display of progress while iterating. @codycbakerphd [#1110](https://github.com/hdmf-dev/hdmf/pull/1110) +- Updated loading, unloading, and getting the `TypeConfigurator` to support a `TypeMap` parameter. @mavaylon1 [#1117](https://github.com/hdmf-dev/hdmf/pull/1117) + +### Bug Fixes +- Fixed `TermSetWrapper` warning raised during the setters. @mavaylon1 [#1116](https://github.com/hdmf-dev/hdmf/pull/1116) ### Bug fixes - Fixed issue with `DynamicTable.add_column` not allowing subclasses of `DynamicTableRegion` or `EnumData`. @rly [#1091](https://github.com/hdmf-dev/hdmf/pull/1091) @@ -543,7 +635,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly Each sub-table is itself a DynamicTable that is aligned with the main table by row index. Each subtable defines a sub-category in the main table effectively creating a table with sub-headings to organize columns. @oruebel (#551) -- Add tutoral for new `AlignedDynamicTable` type. @oruebel (#571) +- Add tutorial for new `AlignedDynamicTable` type. @oruebel (#571) - Equality check for `DynamicTable` now also checks that the name and description of the table are the same. @rly (#566) ### Internal improvements diff --git a/Legal.txt b/Legal.txt index db343a634..e54bb27ac 100644 --- a/Legal.txt +++ b/Legal.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 9b77b2ac8..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include license.txt Legal.txt src/hdmf/_due.py -include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt requirements-opt.txt -include test_gallery.py tox.ini -graft tests -global-exclude *.py[cod] diff --git a/README.rst b/README.rst index b56f7efd2..c35f45ccd 100644 --- a/README.rst +++ b/README.rst @@ -94,7 +94,7 @@ Citing HDMF LICENSE ======= -"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -110,7 +110,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg COPYRIGHT ========= -"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. diff --git a/docs/gallery/expanded_example_dynamic_term_set.yaml b/docs/gallery/expanded_example_dynamic_term_set.yaml deleted file mode 100644 index a2631696a..000000000 --- a/docs/gallery/expanded_example_dynamic_term_set.yaml +++ /dev/null @@ -1,2073 +0,0 @@ -id: https://w3id.org/linkml/examples/nwb_dynamic_enums -title: dynamic enums example -name: nwb_dynamic_enums -description: this schema demonstrates the use of dynamic enums - -prefixes: - linkml: https://w3id.org/linkml/ - CL: http://purl.obolibrary.org/obo/CL_ - -imports: -- linkml:types - -default_range: string - -# ======================== # -# CLASSES # -# ======================== # -classes: - BrainSample: - slots: - - cell_type - -# ======================== # -# SLOTS # -# ======================== # -slots: - cell_type: - required: true - range: NeuronTypeEnum - -# ======================== # -# ENUMS # -# ======================== # -enums: - NeuronTypeEnum: - reachable_from: - source_ontology: obo:cl - source_nodes: - - CL:0000540 ## neuron - include_self: false - relationship_types: - - rdfs:subClassOf - permissible_values: - CL:0000705: - text: CL:0000705 - description: R6 photoreceptor cell - meaning: CL:0000705 - CL:4023108: - text: CL:4023108 - description: oxytocin-secreting magnocellular cell - meaning: CL:4023108 - CL:0004240: - text: CL:0004240 - description: WF1 amacrine cell - meaning: CL:0004240 - CL:0004242: - text: CL:0004242 - description: WF3-1 amacrine cell - meaning: CL:0004242 - CL:1000380: - text: CL:1000380 - description: type 1 vestibular sensory cell of epithelium of macula of saccule - of membranous labyrinth - meaning: CL:1000380 - CL:4023128: - text: CL:4023128 - description: rostral periventricular region of the third ventricle KNDy neuron - meaning: CL:4023128 - CL:0003020: - text: CL:0003020 - description: retinal ganglion cell C outer - meaning: CL:0003020 - CL:4023094: - text: CL:4023094 - description: tufted pyramidal neuron - meaning: CL:4023094 - CL:4023057: - text: CL:4023057 - description: cerebellar inhibitory GABAergic interneuron - meaning: CL:4023057 - CL:2000049: - text: CL:2000049 - description: primary motor cortex pyramidal cell - meaning: CL:2000049 - CL:0000119: - text: CL:0000119 - description: cerebellar Golgi cell - meaning: CL:0000119 - CL:0004227: - text: CL:0004227 - description: flat bistratified amacrine cell - meaning: CL:0004227 - CL:1000606: - text: CL:1000606 - description: kidney nerve cell - meaning: CL:1000606 - CL:1001582: - text: CL:1001582 - description: lateral ventricle neuron - meaning: CL:1001582 - CL:0000165: - text: CL:0000165 - description: neuroendocrine cell - meaning: CL:0000165 - CL:0000555: - text: CL:0000555 - description: neuronal brush cell - meaning: CL:0000555 - CL:0004231: - text: CL:0004231 - description: recurving diffuse amacrine cell - meaning: CL:0004231 - CL:0000687: - text: CL:0000687 - description: R1 photoreceptor cell - meaning: CL:0000687 - CL:0001031: - text: CL:0001031 - description: cerebellar granule cell - meaning: CL:0001031 - CL:0003026: - text: CL:0003026 - description: retinal ganglion cell D1 - meaning: CL:0003026 - CL:4033035: - text: CL:4033035 - description: giant bipolar cell - meaning: CL:4033035 - CL:4023009: - text: CL:4023009 - description: extratelencephalic-projecting glutamatergic cortical neuron - meaning: CL:4023009 - CL:0010022: - text: CL:0010022 - description: cardiac neuron - meaning: CL:0010022 - CL:0000287: - text: CL:0000287 - description: eye photoreceptor cell - meaning: CL:0000287 - CL:0000488: - text: CL:0000488 - description: visible light photoreceptor cell - meaning: CL:0000488 - CL:0003046: - text: CL:0003046 - description: M13 retinal ganglion cell - meaning: CL:0003046 - CL:4023169: - text: CL:4023169 - description: trigeminal neuron - meaning: CL:4023169 - CL:0005007: - text: CL:0005007 - description: Kolmer-Agduhr neuron - meaning: CL:0005007 - CL:0005008: - text: CL:0005008 - description: macular hair cell - meaning: CL:0005008 - CL:4023027: - text: CL:4023027 - description: L5 T-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023027 - CL:4033032: - text: CL:4033032 - description: diffuse bipolar 6 cell - meaning: CL:4033032 - CL:0008021: - text: CL:0008021 - description: anterior lateral line ganglion neuron - meaning: CL:0008021 - CL:4023028: - text: CL:4023028 - description: L5 non-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023028 - CL:4023063: - text: CL:4023063 - description: medial ganglionic eminence derived interneuron - meaning: CL:4023063 - CL:4023032: - text: CL:4023032 - description: ON retinal ganglion cell - meaning: CL:4023032 - CL:0003039: - text: CL:0003039 - description: M8 retinal ganglion cell - meaning: CL:0003039 - CL:0000757: - text: CL:0000757 - description: type 5 cone bipolar cell (sensu Mus) - meaning: CL:0000757 - CL:0000609: - text: CL:0000609 - description: vestibular hair cell - meaning: CL:0000609 - CL:0004219: - text: CL:0004219 - description: A2 amacrine cell - meaning: CL:0004219 - CL:4030028: - text: CL:4030028 - description: glycinergic amacrine cell - meaning: CL:4030028 - CL:0002450: - text: CL:0002450 - description: tether cell - meaning: CL:0002450 - CL:0002374: - text: CL:0002374 - description: ear hair cell - meaning: CL:0002374 - CL:0004124: - text: CL:0004124 - description: retinal ganglion cell C1 - meaning: CL:0004124 - CL:0004115: - text: CL:0004115 - description: retinal ganglion cell B - meaning: CL:0004115 - CL:1000384: - text: CL:1000384 - description: type 2 vestibular sensory cell of epithelium of macula of saccule - of membranous labyrinth - meaning: CL:1000384 - CL:2000037: - text: CL:2000037 - description: posterior lateral line neuromast hair cell - meaning: CL:2000037 - CL:0000673: - text: CL:0000673 - description: Kenyon cell - meaning: CL:0000673 - CL:4023052: - text: CL:4023052 - description: Betz upper motor neuron - meaning: CL:4023052 - CL:0004243: - text: CL:0004243 - description: WF3-2 amacrine cell - meaning: CL:0004243 - CL:1000222: - text: CL:1000222 - description: stomach neuroendocrine cell - meaning: CL:1000222 - CL:0002310: - text: CL:0002310 - description: mammosomatotroph - meaning: CL:0002310 - CL:4023066: - text: CL:4023066 - description: horizontal pyramidal neuron - meaning: CL:4023066 - CL:0000379: - text: CL:0000379 - description: sensory processing neuron - meaning: CL:0000379 - CL:0011006: - text: CL:0011006 - description: Lugaro cell - meaning: CL:0011006 - CL:0004216: - text: CL:0004216 - description: type 5b cone bipolar cell - meaning: CL:0004216 - CL:0004126: - text: CL:0004126 - description: retinal ganglion cell C2 outer - meaning: CL:0004126 - CL:0000108: - text: CL:0000108 - description: cholinergic neuron - meaning: CL:0000108 - CL:0011103: - text: CL:0011103 - description: sympathetic neuron - meaning: CL:0011103 - CL:4023107: - text: CL:4023107 - description: reticulospinal neuron - meaning: CL:4023107 - CL:4023002: - text: CL:4023002 - description: dynamic beta motor neuron - meaning: CL:4023002 - CL:4030048: - text: CL:4030048 - description: striosomal D1 medium spiny neuron - meaning: CL:4030048 - CL:4023163: - text: CL:4023163 - description: spherical bushy cell - meaning: CL:4023163 - CL:4023061: - text: CL:4023061 - description: hippocampal CA4 neuron - meaning: CL:4023061 - CL:0000532: - text: CL:0000532 - description: CAP motoneuron - meaning: CL:0000532 - CL:0000526: - text: CL:0000526 - description: afferent neuron - meaning: CL:0000526 - CL:0003003: - text: CL:0003003 - description: G2 retinal ganglion cell - meaning: CL:0003003 - CL:0000530: - text: CL:0000530 - description: primary neuron (sensu Teleostei) - meaning: CL:0000530 - CL:4023045: - text: CL:4023045 - description: medulla-projecting glutamatergic neuron of the primary motor - cortex - meaning: CL:4023045 - CL:3000004: - text: CL:3000004 - description: peripheral sensory neuron - meaning: CL:3000004 - CL:0000544: - text: CL:0000544 - description: slowly adapting mechanoreceptor cell - meaning: CL:0000544 - CL:4030047: - text: CL:4030047 - description: matrix D2 medium spiny neuron - meaning: CL:4030047 - CL:0004220: - text: CL:0004220 - description: flag amacrine cell - meaning: CL:0004220 - CL:4023125: - text: CL:4023125 - description: KNDy neuron - meaning: CL:4023125 - CL:0004228: - text: CL:0004228 - description: broad diffuse amacrine cell - meaning: CL:0004228 - CL:4023122: - text: CL:4023122 - description: oxytocin receptor sst GABAergic cortical interneuron - meaning: CL:4023122 - CL:1000379: - text: CL:1000379 - description: type 1 vestibular sensory cell of epithelium of macula of utricle - of membranous labyrinth - meaning: CL:1000379 - CL:0011111: - text: CL:0011111 - description: gonadotropin-releasing hormone neuron - meaning: CL:0011111 - CL:0003042: - text: CL:0003042 - description: M9-OFF retinal ganglion cell - meaning: CL:0003042 - CL:0003030: - text: CL:0003030 - description: M3 retinal ganglion cell - meaning: CL:0003030 - CL:0003011: - text: CL:0003011 - description: G8 retinal ganglion cell - meaning: CL:0003011 - CL:0000202: - text: CL:0000202 - description: auditory hair cell - meaning: CL:0000202 - CL:0002271: - text: CL:0002271 - description: type EC1 enteroendocrine cell - meaning: CL:0002271 - CL:4023013: - text: CL:4023013 - description: corticothalamic-projecting glutamatergic cortical neuron - meaning: CL:4023013 - CL:4023114: - text: CL:4023114 - description: calyx vestibular afferent neuron - meaning: CL:4023114 - CL:0003045: - text: CL:0003045 - description: M12 retinal ganglion cell - meaning: CL:0003045 - CL:0002487: - text: CL:0002487 - description: cutaneous/subcutaneous mechanoreceptor cell - meaning: CL:0002487 - CL:4030053: - text: CL:4030053 - description: Island of Calleja granule cell - meaning: CL:4030053 - CL:0000490: - text: CL:0000490 - description: photopic photoreceptor cell - meaning: CL:0000490 - CL:2000023: - text: CL:2000023 - description: spinal cord ventral column interneuron - meaning: CL:2000023 - CL:1000381: - text: CL:1000381 - description: type 1 vestibular sensory cell of epithelium of crista of ampulla - of semicircular duct of membranous labyrinth - meaning: CL:1000381 - CL:0003013: - text: CL:0003013 - description: G10 retinal ganglion cell - meaning: CL:0003013 - CL:0000602: - text: CL:0000602 - description: pressoreceptor cell - meaning: CL:0000602 - CL:4023039: - text: CL:4023039 - description: amygdala excitatory neuron - meaning: CL:4023039 - CL:4030043: - text: CL:4030043 - description: matrix D1 medium spiny neuron - meaning: CL:4030043 - CL:0000105: - text: CL:0000105 - description: pseudounipolar neuron - meaning: CL:0000105 - CL:0004137: - text: CL:0004137 - description: retinal ganglion cell A2 inner - meaning: CL:0004137 - CL:1001436: - text: CL:1001436 - description: hair-tylotrich neuron - meaning: CL:1001436 - CL:1001503: - text: CL:1001503 - description: olfactory bulb tufted cell - meaning: CL:1001503 - CL:0000406: - text: CL:0000406 - description: CNS short range interneuron - meaning: CL:0000406 - CL:2000087: - text: CL:2000087 - description: dentate gyrus of hippocampal formation basket cell - meaning: CL:2000087 - CL:0000534: - text: CL:0000534 - description: primary interneuron (sensu Teleostei) - meaning: CL:0000534 - CL:0000246: - text: CL:0000246 - description: Mauthner neuron - meaning: CL:0000246 - CL:0003027: - text: CL:0003027 - description: retinal ganglion cell D2 - meaning: CL:0003027 - CL:0000752: - text: CL:0000752 - description: cone retinal bipolar cell - meaning: CL:0000752 - CL:0000410: - text: CL:0000410 - description: CNS long range interneuron - meaning: CL:0000410 - CL:0009000: - text: CL:0009000 - description: sensory neuron of spinal nerve - meaning: CL:0009000 - CL:0000754: - text: CL:0000754 - description: type 2 cone bipolar cell (sensu Mus) - meaning: CL:0000754 - CL:0002309: - text: CL:0002309 - description: corticotroph - meaning: CL:0002309 - CL:0010009: - text: CL:0010009 - description: camera-type eye photoreceptor cell - meaning: CL:0010009 - CL:4023069: - text: CL:4023069 - description: medial ganglionic eminence derived GABAergic cortical interneuron - meaning: CL:4023069 - CL:0000102: - text: CL:0000102 - description: polymodal neuron - meaning: CL:0000102 - CL:0000694: - text: CL:0000694 - description: R3 photoreceptor cell - meaning: CL:0000694 - CL:0004183: - text: CL:0004183 - description: retinal ganglion cell B3 - meaning: CL:0004183 - CL:0000693: - text: CL:0000693 - description: neurogliaform cell - meaning: CL:0000693 - CL:0000760: - text: CL:0000760 - description: type 8 cone bipolar cell (sensu Mus) - meaning: CL:0000760 - CL:4023001: - text: CL:4023001 - description: static beta motor neuron - meaning: CL:4023001 - CL:1000424: - text: CL:1000424 - description: chromaffin cell of paraaortic body - meaning: CL:1000424 - CL:0000120: - text: CL:0000120 - description: granule cell - meaning: CL:0000120 - CL:0002312: - text: CL:0002312 - description: somatotroph - meaning: CL:0002312 - CL:0000107: - text: CL:0000107 - description: autonomic neuron - meaning: CL:0000107 - CL:2000047: - text: CL:2000047 - description: brainstem motor neuron - meaning: CL:2000047 - CL:4023080: - text: CL:4023080 - description: stellate L6 intratelencephalic projecting glutamatergic neuron - of the primary motor cortex (Mmus) - meaning: CL:4023080 - CL:0000848: - text: CL:0000848 - description: microvillous olfactory receptor neuron - meaning: CL:0000848 - CL:0004213: - text: CL:0004213 - description: type 3a cone bipolar cell - meaning: CL:0004213 - CL:0000116: - text: CL:0000116 - description: pioneer neuron - meaning: CL:0000116 - CL:4023187: - text: CL:4023187 - description: koniocellular cell - meaning: CL:4023187 - CL:4023116: - text: CL:4023116 - description: type 2 spiral ganglion neuron - meaning: CL:4023116 - CL:0008015: - text: CL:0008015 - description: inhibitory motor neuron - meaning: CL:0008015 - CL:0003048: - text: CL:0003048 - description: L cone cell - meaning: CL:0003048 - CL:1000082: - text: CL:1000082 - description: stretch receptor cell - meaning: CL:1000082 - CL:0003031: - text: CL:0003031 - description: M3-ON retinal ganglion cell - meaning: CL:0003031 - CL:1001474: - text: CL:1001474 - description: medium spiny neuron - meaning: CL:1001474 - CL:0000745: - text: CL:0000745 - description: retina horizontal cell - meaning: CL:0000745 - CL:0002515: - text: CL:0002515 - description: interrenal norepinephrine type cell - meaning: CL:0002515 - CL:2000027: - text: CL:2000027 - description: cerebellum basket cell - meaning: CL:2000027 - CL:0004225: - text: CL:0004225 - description: spider amacrine cell - meaning: CL:0004225 - CL:4023031: - text: CL:4023031 - description: L4 sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023031 - CL:0008038: - text: CL:0008038 - description: alpha motor neuron - meaning: CL:0008038 - CL:4033030: - text: CL:4033030 - description: diffuse bipolar 3b cell - meaning: CL:4033030 - CL:0000336: - text: CL:0000336 - description: adrenal medulla chromaffin cell - meaning: CL:0000336 - CL:0000751: - text: CL:0000751 - description: rod bipolar cell - meaning: CL:0000751 - CL:0008037: - text: CL:0008037 - description: gamma motor neuron - meaning: CL:0008037 - CL:0003028: - text: CL:0003028 - description: M1 retinal ganglion cell - meaning: CL:0003028 - CL:0003016: - text: CL:0003016 - description: G11-OFF retinal ganglion cell - meaning: CL:0003016 - CL:0004239: - text: CL:0004239 - description: wavy bistratified amacrine cell - meaning: CL:0004239 - CL:4023168: - text: CL:4023168 - description: somatosensory neuron - meaning: CL:4023168 - CL:4023018: - text: CL:4023018 - description: pvalb GABAergic cortical interneuron - meaning: CL:4023018 - CL:0004138: - text: CL:0004138 - description: retinal ganglion cell A2 - meaning: CL:0004138 - CL:0000750: - text: CL:0000750 - description: OFF-bipolar cell - meaning: CL:0000750 - CL:0000709: - text: CL:0000709 - description: R8 photoreceptor cell - meaning: CL:0000709 - CL:0004214: - text: CL:0004214 - description: type 3b cone bipolar cell - meaning: CL:0004214 - CL:0003047: - text: CL:0003047 - description: M14 retinal ganglion cell - meaning: CL:0003047 - CL:0015000: - text: CL:0015000 - description: cranial motor neuron - meaning: CL:0015000 - CL:0003036: - text: CL:0003036 - description: M7 retinal ganglion cell - meaning: CL:0003036 - CL:0000397: - text: CL:0000397 - description: ganglion interneuron - meaning: CL:0000397 - CL:1001509: - text: CL:1001509 - description: glycinergic neuron - meaning: CL:1001509 - CL:4023038: - text: CL:4023038 - description: L6b glutamatergic cortical neuron - meaning: CL:4023038 - CL:0000112: - text: CL:0000112 - description: columnar neuron - meaning: CL:0000112 - CL:0002517: - text: CL:0002517 - description: interrenal epinephrin secreting cell - meaning: CL:0002517 - CL:1000383: - text: CL:1000383 - description: type 2 vestibular sensory cell of epithelium of macula of utricle - of membranous labyrinth - meaning: CL:1000383 - CL:0004116: - text: CL:0004116 - description: retinal ganglion cell C - meaning: CL:0004116 - CL:4023113: - text: CL:4023113 - description: bouton vestibular afferent neuron - meaning: CL:4023113 - CL:0003034: - text: CL:0003034 - description: M5 retinal ganglion cell - meaning: CL:0003034 - CL:0011005: - text: CL:0011005 - description: GABAergic interneuron - meaning: CL:0011005 - CL:0011105: - text: CL:0011105 - description: dopamanergic interplexiform cell - meaning: CL:0011105 - CL:0000749: - text: CL:0000749 - description: ON-bipolar cell - meaning: CL:0000749 - CL:0000498: - text: CL:0000498 - description: inhibitory interneuron - meaning: CL:0000498 - CL:4023071: - text: CL:4023071 - description: L5/6 cck cortical GABAergic interneuron (Mmus) - meaning: CL:4023071 - CL:1000245: - text: CL:1000245 - description: posterior lateral line ganglion neuron - meaning: CL:1000245 - CL:0004139: - text: CL:0004139 - description: retinal ganglion cell A2 outer - meaning: CL:0004139 - CL:0000531: - text: CL:0000531 - description: primary sensory neuron (sensu Teleostei) - meaning: CL:0000531 - CL:0004125: - text: CL:0004125 - description: retinal ganglion cell C2 inner - meaning: CL:0004125 - CL:4023064: - text: CL:4023064 - description: caudal ganglionic eminence derived interneuron - meaning: CL:4023064 - CL:4030049: - text: CL:4030049 - description: striosomal D2 medium spiny neuron - meaning: CL:4030049 - CL:0017002: - text: CL:0017002 - description: prostate neuroendocrine cell - meaning: CL:0017002 - CL:0000756: - text: CL:0000756 - description: type 4 cone bipolar cell (sensu Mus) - meaning: CL:0000756 - CL:0000707: - text: CL:0000707 - description: R7 photoreceptor cell - meaning: CL:0000707 - CL:0000700: - text: CL:0000700 - description: dopaminergic neuron - meaning: CL:0000700 - CL:0003002: - text: CL:0003002 - description: G1 retinal ganglion cell - meaning: CL:0003002 - CL:1000001: - text: CL:1000001 - description: retrotrapezoid nucleus neuron - meaning: CL:1000001 - CL:4023007: - text: CL:4023007 - description: L2/3 bipolar vip GABAergic cortical interneuron (Mmus) - meaning: CL:4023007 - CL:0000528: - text: CL:0000528 - description: nitrergic neuron - meaning: CL:0000528 - CL:0000639: - text: CL:0000639 - description: basophil cell of pars distalis of adenohypophysis - meaning: CL:0000639 - CL:0000849: - text: CL:0000849 - description: crypt olfactory receptor neuron - meaning: CL:0000849 - CL:0011110: - text: CL:0011110 - description: histaminergic neuron - meaning: CL:0011110 - CL:0005025: - text: CL:0005025 - description: visceromotor neuron - meaning: CL:0005025 - CL:0003001: - text: CL:0003001 - description: bistratified retinal ganglion cell - meaning: CL:0003001 - CL:0004241: - text: CL:0004241 - description: WF2 amacrine cell - meaning: CL:0004241 - CL:4023019: - text: CL:4023019 - description: L5/6 cck, vip cortical GABAergic interneuron (Mmus) - meaning: CL:4023019 - CL:4023040: - text: CL:4023040 - description: L2/3-6 intratelencephalic projecting glutamatergic cortical neuron - meaning: CL:4023040 - CL:1001435: - text: CL:1001435 - description: periglomerular cell - meaning: CL:1001435 - CL:4023127: - text: CL:4023127 - description: arcuate nucleus of hypothalamus KNDy neuron - meaning: CL:4023127 - CL:0003007: - text: CL:0003007 - description: G4-OFF retinal ganglion cell - meaning: CL:0003007 - CL:0000101: - text: CL:0000101 - description: sensory neuron - meaning: CL:0000101 - CL:2000097: - text: CL:2000097 - description: midbrain dopaminergic neuron - meaning: CL:2000097 - CL:4023095: - text: CL:4023095 - description: untufted pyramidal neuron - meaning: CL:4023095 - CL:0003004: - text: CL:0003004 - description: G3 retinal ganglion cell - meaning: CL:0003004 - CL:0000527: - text: CL:0000527 - description: efferent neuron - meaning: CL:0000527 - CL:1000382: - text: CL:1000382 - description: type 2 vestibular sensory cell of stato-acoustic epithelium - meaning: CL:1000382 - CL:4033019: - text: CL:4033019 - description: ON-blue cone bipolar cell - meaning: CL:4033019 - CL:0000589: - text: CL:0000589 - description: cochlear inner hair cell - meaning: CL:0000589 - CL:4023160: - text: CL:4023160 - description: cartwheel cell - meaning: CL:4023160 - CL:1001437: - text: CL:1001437 - description: hair-down neuron - meaning: CL:1001437 - CL:0011102: - text: CL:0011102 - description: parasympathetic neuron - meaning: CL:0011102 - CL:2000029: - text: CL:2000029 - description: central nervous system neuron - meaning: CL:2000029 - CL:4023115: - text: CL:4023115 - description: type 1 spiral ganglion neuron - meaning: CL:4023115 - CL:0002311: - text: CL:0002311 - description: mammotroph - meaning: CL:0002311 - CL:0003025: - text: CL:0003025 - description: retinal ganglion cell C3 - meaning: CL:0003025 - CL:4030050: - text: CL:4030050 - description: D1/D2-hybrid medium spiny neuron - meaning: CL:4030050 - CL:4023118: - text: CL:4023118 - description: L5/6 non-Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023118 - CL:4023110: - text: CL:4023110 - description: amygdala pyramidal neuron - meaning: CL:4023110 - CL:0002273: - text: CL:0002273 - description: type ECL enteroendocrine cell - meaning: CL:0002273 - CL:0003050: - text: CL:0003050 - description: S cone cell - meaning: CL:0003050 - CL:4023121: - text: CL:4023121 - description: sst chodl GABAergic cortical interneuron - meaning: CL:4023121 - CL:4023020: - text: CL:4023020 - description: dynamic gamma motor neuron - meaning: CL:4023020 - CL:0004246: - text: CL:0004246 - description: monostratified cell - meaning: CL:0004246 - CL:0000495: - text: CL:0000495 - description: blue sensitive photoreceptor cell - meaning: CL:0000495 - CL:0000029: - text: CL:0000029 - description: neural crest derived neuron - meaning: CL:0000029 - CL:0004001: - text: CL:0004001 - description: local interneuron - meaning: CL:0004001 - CL:0000551: - text: CL:0000551 - description: unimodal nocireceptor - meaning: CL:0000551 - CL:0003006: - text: CL:0003006 - description: G4-ON retinal ganglion cell - meaning: CL:0003006 - CL:4023011: - text: CL:4023011 - description: lamp5 GABAergic cortical interneuron - meaning: CL:4023011 - CL:4023109: - text: CL:4023109 - description: vasopressin-secreting magnocellular cell - meaning: CL:4023109 - CL:0000121: - text: CL:0000121 - description: Purkinje cell - meaning: CL:0000121 - CL:0000678: - text: CL:0000678 - description: commissural neuron - meaning: CL:0000678 - CL:0004252: - text: CL:0004252 - description: medium field retinal amacrine cell - meaning: CL:0004252 - CL:0000103: - text: CL:0000103 - description: bipolar neuron - meaning: CL:0000103 - CL:4033036: - text: CL:4033036 - description: OFFx cell - meaning: CL:4033036 - CL:4023014: - text: CL:4023014 - description: L5 vip cortical GABAergic interneuron (Mmus) - meaning: CL:4023014 - CL:0008031: - text: CL:0008031 - description: cortical interneuron - meaning: CL:0008031 - CL:0008010: - text: CL:0008010 - description: cranial somatomotor neuron - meaning: CL:0008010 - CL:0000637: - text: CL:0000637 - description: chromophil cell of anterior pituitary gland - meaning: CL:0000637 - CL:0003014: - text: CL:0003014 - description: G11 retinal ganglion cell - meaning: CL:0003014 - CL:4033029: - text: CL:4033029 - description: diffuse bipolar 3a cell - meaning: CL:4033029 - CL:0002611: - text: CL:0002611 - description: neuron of the dorsal spinal cord - meaning: CL:0002611 - CL:0010010: - text: CL:0010010 - description: cerebellar stellate cell - meaning: CL:0010010 - CL:1000465: - text: CL:1000465 - description: chromaffin cell of ovary - meaning: CL:1000465 - CL:0000761: - text: CL:0000761 - description: type 9 cone bipolar cell (sensu Mus) - meaning: CL:0000761 - CL:0004226: - text: CL:0004226 - description: monostratified amacrine cell - meaning: CL:0004226 - CL:0004253: - text: CL:0004253 - description: wide field retinal amacrine cell - meaning: CL:0004253 - CL:4023075: - text: CL:4023075 - description: L6 tyrosine hydroxylase sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023075 - CL:4023068: - text: CL:4023068 - description: thalamic excitatory neuron - meaning: CL:4023068 - CL:1000377: - text: CL:1000377 - description: dense-core granulated cell of epithelium of trachea - meaning: CL:1000377 - CL:4023089: - text: CL:4023089 - description: nest basket cell - meaning: CL:4023089 - CL:4023189: - text: CL:4023189 - description: parasol ganglion cell of retina - meaning: CL:4023189 - CL:0000856: - text: CL:0000856 - description: neuromast hair cell - meaning: CL:0000856 - CL:4023025: - text: CL:4023025 - description: long-range projecting sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023025 - CL:0003043: - text: CL:0003043 - description: M10 retinal ganglion cell - meaning: CL:0003043 - CL:4023000: - text: CL:4023000 - description: beta motor neuron - meaning: CL:4023000 - CL:4023048: - text: CL:4023048 - description: L4/5 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023048 - CL:0000855: - text: CL:0000855 - description: sensory hair cell - meaning: CL:0000855 - CL:4023070: - text: CL:4023070 - description: caudal ganglionic eminence derived GABAergic cortical interneuron - meaning: CL:4023070 - CL:0002070: - text: CL:0002070 - description: type I vestibular sensory cell - meaning: CL:0002070 - CL:2000028: - text: CL:2000028 - description: cerebellum glutamatergic neuron - meaning: CL:2000028 - CL:0000533: - text: CL:0000533 - description: primary motor neuron (sensu Teleostei) - meaning: CL:0000533 - CL:4023083: - text: CL:4023083 - description: chandelier cell - meaning: CL:4023083 - CL:2000034: - text: CL:2000034 - description: anterior lateral line neuromast hair cell - meaning: CL:2000034 - CL:0003015: - text: CL:0003015 - description: G11-ON retinal ganglion cell - meaning: CL:0003015 - CL:0000204: - text: CL:0000204 - description: acceleration receptive cell - meaning: CL:0000204 - CL:4033031: - text: CL:4033031 - description: diffuse bipolar 4 cell - meaning: CL:4033031 - CL:0003024: - text: CL:0003024 - description: retinal ganglion cell C inner - meaning: CL:0003024 - CL:4023074: - text: CL:4023074 - description: mammillary body neuron - meaning: CL:4023074 - CL:2000089: - text: CL:2000089 - description: dentate gyrus granule cell - meaning: CL:2000089 - CL:4033028: - text: CL:4033028 - description: diffuse bipolar 2 cell - meaning: CL:4033028 - CL:0000110: - text: CL:0000110 - description: peptidergic neuron - meaning: CL:0000110 - CL:4033002: - text: CL:4033002 - description: neuroendocrine cell of epithelium of crypt of Lieberkuhn - meaning: CL:4033002 - CL:4033027: - text: CL:4033027 - description: diffuse bipolar 1 cell - meaning: CL:4033027 - CL:3000003: - text: CL:3000003 - description: sympathetic cholinergic neuron - meaning: CL:3000003 - CL:4023158: - text: CL:4023158 - description: octopus cell of the mammalian cochlear nucleus - meaning: CL:4023158 - CL:0000118: - text: CL:0000118 - description: basket cell - meaning: CL:0000118 - CL:0004223: - text: CL:0004223 - description: AB diffuse-1 amacrine cell - meaning: CL:0004223 - CL:4030054: - text: CL:4030054 - description: RXFP1-positive interface island D1-medium spiny neuron - meaning: CL:4030054 - CL:0002610: - text: CL:0002610 - description: raphe nuclei neuron - meaning: CL:0002610 - CL:4023026: - text: CL:4023026 - description: direct pathway medium spiny neuron - meaning: CL:4023026 - CL:4023016: - text: CL:4023016 - description: vip GABAergic cortical interneuron - meaning: CL:4023016 - CL:0004237: - text: CL:0004237 - description: fountain amacrine cell - meaning: CL:0004237 - CL:0003035: - text: CL:0003035 - description: M6 retinal ganglion cell - meaning: CL:0003035 - CL:1001611: - text: CL:1001611 - description: cerebellar neuron - meaning: CL:1001611 - CL:0000591: - text: CL:0000591 - description: warmth sensing thermoreceptor cell - meaning: CL:0000591 - CL:0002613: - text: CL:0002613 - description: striatum neuron - meaning: CL:0002613 - CL:0000496: - text: CL:0000496 - description: green sensitive photoreceptor cell - meaning: CL:0000496 - CL:0007011: - text: CL:0007011 - description: enteric neuron - meaning: CL:0007011 - CL:2000056: - text: CL:2000056 - description: Meynert cell - meaning: CL:2000056 - CL:0003040: - text: CL:0003040 - description: M9 retinal ganglion cell - meaning: CL:0003040 - CL:0004250: - text: CL:0004250 - description: bistratified retinal amacrine cell - meaning: CL:0004250 - CL:0003029: - text: CL:0003029 - description: M2 retinal ganglion cell - meaning: CL:0003029 - CL:4023017: - text: CL:4023017 - description: sst GABAergic cortical interneuron - meaning: CL:4023017 - CL:0008028: - text: CL:0008028 - description: visual system neuron - meaning: CL:0008028 - CL:0008039: - text: CL:0008039 - description: lower motor neuron - meaning: CL:0008039 - CL:2000086: - text: CL:2000086 - description: neocortex basket cell - meaning: CL:2000086 - CL:4023023: - text: CL:4023023 - description: L5,6 neurogliaform lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023023 - CL:0000697: - text: CL:0000697 - description: R4 photoreceptor cell - meaning: CL:0000697 - CL:2000088: - text: CL:2000088 - description: Ammon's horn basket cell - meaning: CL:2000088 - CL:0004232: - text: CL:0004232 - description: starburst amacrine cell - meaning: CL:0004232 - CL:4023041: - text: CL:4023041 - description: L5 extratelencephalic projecting glutamatergic cortical neuron - meaning: CL:4023041 - CL:0004121: - text: CL:0004121 - description: retinal ganglion cell B2 - meaning: CL:0004121 - CL:0000748: - text: CL:0000748 - description: retinal bipolar neuron - meaning: CL:0000748 - CL:4023164: - text: CL:4023164 - description: globular bushy cell - meaning: CL:4023164 - CL:0000536: - text: CL:0000536 - description: secondary motor neuron (sensu Teleostei) - meaning: CL:0000536 - CL:1000466: - text: CL:1000466 - description: chromaffin cell of right ovary - meaning: CL:1000466 - CL:0011001: - text: CL:0011001 - description: spinal cord motor neuron - meaning: CL:0011001 - CL:0000755: - text: CL:0000755 - description: type 3 cone bipolar cell (sensu Mus) - meaning: CL:0000755 - CL:0004238: - text: CL:0004238 - description: asymmetric bistratified amacrine cell - meaning: CL:0004238 - CL:0004161: - text: CL:0004161 - description: 510 nm-cone - meaning: CL:0004161 - CL:0000198: - text: CL:0000198 - description: pain receptor cell - meaning: CL:0000198 - CL:0003038: - text: CL:0003038 - description: M7-OFF retinal ganglion cell - meaning: CL:0003038 - CL:0003033: - text: CL:0003033 - description: M4 retinal ganglion cell - meaning: CL:0003033 - CL:0012001: - text: CL:0012001 - description: neuron of the forebrain - meaning: CL:0012001 - CL:0011104: - text: CL:0011104 - description: interplexiform cell - meaning: CL:0011104 - CL:0003049: - text: CL:0003049 - description: M cone cell - meaning: CL:0003049 - CL:2000032: - text: CL:2000032 - description: peripheral nervous system neuron - meaning: CL:2000032 - CL:0011100: - text: CL:0011100 - description: galanergic neuron - meaning: CL:0011100 - CL:0008025: - text: CL:0008025 - description: noradrenergic neuron - meaning: CL:0008025 - CL:0000122: - text: CL:0000122 - description: stellate neuron - meaning: CL:0000122 - CL:0003005: - text: CL:0003005 - description: G4 retinal ganglion cell - meaning: CL:0003005 - CL:0000699: - text: CL:0000699 - description: paraganglial type 1 cell - meaning: CL:0000699 - CL:4033050: - text: CL:4033050 - description: catecholaminergic neuron - meaning: CL:4033050 - CL:1001502: - text: CL:1001502 - description: mitral cell - meaning: CL:1001502 - CL:0002069: - text: CL:0002069 - description: type II vestibular sensory cell - meaning: CL:0002069 - CL:4023065: - text: CL:4023065 - description: meis2 expressing cortical GABAergic cell - meaning: CL:4023065 - CL:4023077: - text: CL:4023077 - description: bitufted neuron - meaning: CL:4023077 - CL:0000847: - text: CL:0000847 - description: ciliated olfactory receptor neuron - meaning: CL:0000847 - CL:4023188: - text: CL:4023188 - description: midget ganglion cell of retina - meaning: CL:4023188 - CL:2000090: - text: CL:2000090 - description: dentate gyrus of hippocampal formation stellate cell - meaning: CL:2000090 - CL:0000568: - text: CL:0000568 - description: amine precursor uptake and decarboxylation cell - meaning: CL:0000568 - CL:1000426: - text: CL:1000426 - description: chromaffin cell of adrenal gland - meaning: CL:1000426 - CL:0000100: - text: CL:0000100 - description: motor neuron - meaning: CL:0000100 - CL:0011109: - text: CL:0011109 - description: hypocretin-secreting neuron - meaning: CL:0011109 - CL:4023171: - text: CL:4023171 - description: trigeminal motor neuron - meaning: CL:4023171 - CL:1001434: - text: CL:1001434 - description: olfactory bulb interneuron - meaning: CL:1001434 - CL:0000494: - text: CL:0000494 - description: UV sensitive photoreceptor cell - meaning: CL:0000494 - CL:0004117: - text: CL:0004117 - description: retinal ganglion cell A - meaning: CL:0004117 - CL:0000205: - text: CL:0000205 - description: thermoreceptor cell - meaning: CL:0000205 - CL:0004217: - text: CL:0004217 - description: H1 horizontal cell - meaning: CL:0004217 - CL:0000200: - text: CL:0000200 - description: touch receptor cell - meaning: CL:0000200 - CL:4023111: - text: CL:4023111 - description: cerebral cortex pyramidal neuron - meaning: CL:4023111 - CL:4032001: - text: CL:4032001 - description: reelin GABAergic cortical interneuron - meaning: CL:4032001 - CL:4023076: - text: CL:4023076 - description: Martinotti neuron - meaning: CL:4023076 - CL:0000753: - text: CL:0000753 - description: type 1 cone bipolar cell (sensu Mus) - meaning: CL:0000753 - CL:1001451: - text: CL:1001451 - description: sensory neuron of dorsal root ganglion - meaning: CL:1001451 - CL:4023021: - text: CL:4023021 - description: static gamma motor neuron - meaning: CL:4023021 - CL:0002066: - text: CL:0002066 - description: Feyrter cell - meaning: CL:0002066 - CL:0000598: - text: CL:0000598 - description: pyramidal neuron - meaning: CL:0000598 - CL:0000702: - text: CL:0000702 - description: R5 photoreceptor cell - meaning: CL:0000702 - CL:0008049: - text: CL:0008049 - description: Betz cell - meaning: CL:0008049 - CL:0001033: - text: CL:0001033 - description: hippocampal granule cell - meaning: CL:0001033 - CL:0000587: - text: CL:0000587 - description: cold sensing thermoreceptor cell - meaning: CL:0000587 - CL:4023161: - text: CL:4023161 - description: unipolar brush cell - meaning: CL:4023161 - CL:2000031: - text: CL:2000031 - description: lateral line ganglion neuron - meaning: CL:2000031 - CL:4023119: - text: CL:4023119 - description: displaced amacrine cell - meaning: CL:4023119 - CL:1001569: - text: CL:1001569 - description: hippocampal interneuron - meaning: CL:1001569 - CL:4023130: - text: CL:4023130 - description: kisspeptin neuron - meaning: CL:4023130 - CL:4023090: - text: CL:4023090 - description: small basket cell - meaning: CL:4023090 - CL:4023033: - text: CL:4023033 - description: OFF retinal ganglion cell - meaning: CL:4023033 - CL:4023112: - text: CL:4023112 - description: vestibular afferent neuron - meaning: CL:4023112 - CL:0004234: - text: CL:0004234 - description: diffuse multistratified amacrine cell - meaning: CL:0004234 - CL:0002082: - text: CL:0002082 - description: type II cell of adrenal medulla - meaning: CL:0002082 - CL:0010011: - text: CL:0010011 - description: cerebral cortex GABAergic interneuron - meaning: CL:0010011 - CL:4030052: - text: CL:4030052 - description: nucleus accumbens shell and olfactory tubercle D2 medium spiny - neuron - meaning: CL:4030052 - CL:0000604: - text: CL:0000604 - description: retinal rod cell - meaning: CL:0000604 - CL:4030027: - text: CL:4030027 - description: GABAergic amacrine cell - meaning: CL:4030027 - CL:1001561: - text: CL:1001561 - description: vomeronasal sensory neuron - meaning: CL:1001561 - CL:0000210: - text: CL:0000210 - description: photoreceptor cell - meaning: CL:0000210 - CL:4023012: - text: CL:4023012 - description: near-projecting glutamatergic cortical neuron - meaning: CL:4023012 - CL:4023087: - text: CL:4023087 - description: fan Martinotti neuron - meaning: CL:4023087 - CL:0000028: - text: CL:0000028 - description: CNS neuron (sensu Nematoda and Protostomia) - meaning: CL:0000028 - CL:0000006: - text: CL:0000006 - description: neuronal receptor cell - meaning: CL:0000006 - CL:0004247: - text: CL:0004247 - description: bistratified cell - meaning: CL:0004247 - CL:0010012: - text: CL:0010012 - description: cerebral cortex neuron - meaning: CL:0010012 - CL:0004245: - text: CL:0004245 - description: indoleamine-accumulating amacrine cell - meaning: CL:0004245 - CL:0004224: - text: CL:0004224 - description: AB diffuse-2 amacrine cell - meaning: CL:0004224 - CL:0003009: - text: CL:0003009 - description: G6 retinal ganglion cell - meaning: CL:0003009 - CL:0000679: - text: CL:0000679 - description: glutamatergic neuron - meaning: CL:0000679 - CL:0000166: - text: CL:0000166 - description: chromaffin cell - meaning: CL:0000166 - CL:4023088: - text: CL:4023088 - description: large basket cell - meaning: CL:4023088 - CL:4030057: - text: CL:4030057 - description: eccentric medium spiny neuron - meaning: CL:4030057 - CL:4023024: - text: CL:4023024 - description: neurogliaform lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023024 - CL:0005024: - text: CL:0005024 - description: somatomotor neuron - meaning: CL:0005024 - CL:4023049: - text: CL:4023049 - description: L5 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023049 - CL:0000573: - text: CL:0000573 - description: retinal cone cell - meaning: CL:0000573 - CL:4023123: - text: CL:4023123 - description: hypothalamus kisspeptin neuron - meaning: CL:4023123 - CL:0000376: - text: CL:0000376 - description: humidity receptor cell - meaning: CL:0000376 - CL:0004235: - text: CL:0004235 - description: AB broad diffuse-1 amacrine cell - meaning: CL:0004235 - CL:0000106: - text: CL:0000106 - description: unipolar neuron - meaning: CL:0000106 - CL:0001032: - text: CL:0001032 - description: cortical granule cell - meaning: CL:0001032 - CL:0000561: - text: CL:0000561 - description: amacrine cell - meaning: CL:0000561 - CL:4023093: - text: CL:4023093 - description: stellate pyramidal neuron - meaning: CL:4023093 - CL:0000247: - text: CL:0000247 - description: Rohon-Beard neuron - meaning: CL:0000247 - CL:0003008: - text: CL:0003008 - description: G5 retinal ganglion cell - meaning: CL:0003008 - CL:0000203: - text: CL:0000203 - description: gravity sensitive cell - meaning: CL:0000203 - CL:0003037: - text: CL:0003037 - description: M7-ON retinal ganglion cell - meaning: CL:0003037 - CL:0004221: - text: CL:0004221 - description: flag A amacrine cell - meaning: CL:0004221 - CL:0000638: - text: CL:0000638 - description: acidophil cell of pars distalis of adenohypophysis - meaning: CL:0000638 - CL:0004229: - text: CL:0004229 - description: A2-like amacrine cell - meaning: CL:0004229 - CL:4023120: - text: CL:4023120 - description: cochlea auditory hair cell - meaning: CL:4023120 - CL:0008032: - text: CL:0008032 - description: rosehip neuron - meaning: CL:0008032 - CL:0008027: - text: CL:0008027 - description: rod bipolar cell (sensu Mus) - meaning: CL:0008027 - CL:0000497: - text: CL:0000497 - description: red sensitive photoreceptor cell - meaning: CL:0000497 - CL:4023062: - text: CL:4023062 - description: dentate gyrus neuron - meaning: CL:4023062 - CL:0002516: - text: CL:0002516 - description: interrenal chromaffin cell - meaning: CL:0002516 - CL:0004119: - text: CL:0004119 - description: retinal ganglion cell B1 - meaning: CL:0004119 - CL:4030039: - text: CL:4030039 - description: von Economo neuron - meaning: CL:4030039 - CL:4023036: - text: CL:4023036 - description: chandelier pvalb GABAergic cortical interneuron - meaning: CL:4023036 - CL:0000117: - text: CL:0000117 - description: CNS neuron (sensu Vertebrata) - meaning: CL:0000117 - CL:4023015: - text: CL:4023015 - description: sncg GABAergic cortical interneuron - meaning: CL:4023015 - CL:4033033: - text: CL:4033033 - description: flat midget bipolar cell - meaning: CL:4033033 - CL:0000626: - text: CL:0000626 - description: olfactory granule cell - meaning: CL:0000626 - CL:0004218: - text: CL:0004218 - description: H2 horizontal cell - meaning: CL:0004218 - CL:0004233: - text: CL:0004233 - description: DAPI-3 amacrine cell - meaning: CL:0004233 - CL:0003021: - text: CL:0003021 - description: retinal ganglion cell C4 - meaning: CL:0003021 - CL:0000489: - text: CL:0000489 - description: scotopic photoreceptor cell - meaning: CL:0000489 - CL:4023159: - text: CL:4023159 - description: double bouquet cell - meaning: CL:4023159 - CL:0002612: - text: CL:0002612 - description: neuron of the ventral spinal cord - meaning: CL:0002612 - CL:0000476: - text: CL:0000476 - description: thyrotroph - meaning: CL:0000476 - CL:4033034: - text: CL:4033034 - description: invaginating midget bipolar cell - meaning: CL:4033034 - CL:4023029: - text: CL:4023029 - description: indirect pathway medium spiny neuron - meaning: CL:4023029 - CL:0004236: - text: CL:0004236 - description: AB broad diffuse-2 amacrine cell - meaning: CL:0004236 - CL:0003017: - text: CL:0003017 - description: retinal ganglion cell B3 outer - meaning: CL:0003017 - CL:0000759: - text: CL:0000759 - description: type 7 cone bipolar cell (sensu Mus) - meaning: CL:0000759 - CL:0000740: - text: CL:0000740 - description: retinal ganglion cell - meaning: CL:0000740 - CL:0004120: - text: CL:0004120 - description: retinal ganglion cell A1 - meaning: CL:0004120 - CL:3000002: - text: CL:3000002 - description: sympathetic noradrenergic neuron - meaning: CL:3000002 - CL:0003023: - text: CL:0003023 - description: retinal ganglion cell C6 - meaning: CL:0003023 - CL:0000690: - text: CL:0000690 - description: R2 photoreceptor cell - meaning: CL:0000690 - CL:4023047: - text: CL:4023047 - description: L2/3 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023047 - CL:4023022: - text: CL:4023022 - description: canopy lamp5 GABAergic cortical interneuron (Mmus) - meaning: CL:4023022 - CL:4023060: - text: CL:4023060 - description: hippocampal CA1-3 neuron - meaning: CL:4023060 - CL:0000758: - text: CL:0000758 - description: type 6 cone bipolar cell (sensu Mus) - meaning: CL:0000758 - CL:0000535: - text: CL:0000535 - description: secondary neuron (sensu Teleostei) - meaning: CL:0000535 - CL:4023055: - text: CL:4023055 - description: corticothalamic VAL/VM projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023055 - CL:1000467: - text: CL:1000467 - description: chromaffin cell of left ovary - meaning: CL:1000467 - CL:0011002: - text: CL:0011002 - description: lateral motor column neuron - meaning: CL:0011002 - CL:0004244: - text: CL:0004244 - description: WF4 amacrine cell - meaning: CL:0004244 - CL:1000223: - text: CL:1000223 - description: lung neuroendocrine cell - meaning: CL:1000223 - CL:1000385: - text: CL:1000385 - description: type 2 vestibular sensory cell of epithelium of crista of ampulla - of semicircular duct of membranous labyrinth - meaning: CL:1000385 - CL:0000691: - text: CL:0000691 - description: stellate interneuron - meaning: CL:0000691 - CL:4023008: - text: CL:4023008 - description: intratelencephalic-projecting glutamatergic cortical neuron - meaning: CL:4023008 - CL:4023044: - text: CL:4023044 - description: non-medulla, extratelencephalic-projecting glutamatergic neuron - of the primary motor cortex - meaning: CL:4023044 - CL:0000850: - text: CL:0000850 - description: serotonergic neuron - meaning: CL:0000850 - CL:0000695: - text: CL:0000695 - description: Cajal-Retzius cell - meaning: CL:0000695 - CL:0003051: - text: CL:0003051 - description: UV cone cell - meaning: CL:0003051 - CL:0000402: - text: CL:0000402 - description: CNS interneuron - meaning: CL:0000402 - CL:0005023: - text: CL:0005023 - description: branchiomotor neuron - meaning: CL:0005023 - CL:4023043: - text: CL:4023043 - description: L5/6 near-projecting glutamatergic neuron of the primary motor - cortex - meaning: CL:4023043 - CL:0004162: - text: CL:0004162 - description: 360 nm-cone - meaning: CL:0004162 - CL:0011003: - text: CL:0011003 - description: magnocellular neurosecretory cell - meaning: CL:0011003 - CL:0004230: - text: CL:0004230 - description: diffuse bistratified amacrine cell - meaning: CL:0004230 - CL:1001505: - text: CL:1001505 - description: parvocellular neurosecretory cell - meaning: CL:1001505 - CL:0011106: - text: CL:0011106 - description: GABAnergic interplexiform cell - meaning: CL:0011106 - CL:0000437: - text: CL:0000437 - description: gonadtroph - meaning: CL:0000437 - CL:4023010: - text: CL:4023010 - description: alpha7 GABAergic cortical interneuron (Mmus) - meaning: CL:4023010 - CL:4023046: - text: CL:4023046 - description: L6b subplate glutamatergic neuron of the primary motor cortex - meaning: CL:4023046 - CL:0000109: - text: CL:0000109 - description: adrenergic neuron - meaning: CL:0000109 - CL:0011000: - text: CL:0011000 - description: dorsal horn interneuron - meaning: CL:0011000 - CL:0000251: - text: CL:0000251 - description: extramedullary cell - meaning: CL:0000251 - CL:0003044: - text: CL:0003044 - description: M11 retinal ganglion cell - meaning: CL:0003044 - CL:4023053: - text: CL:4023053 - description: spinal interneuron synapsing Betz cell - meaning: CL:4023053 - CL:1000378: - text: CL:1000378 - description: type 1 vestibular sensory cell of stato-acoustic epithelium - meaning: CL:1000378 - CL:4023124: - text: CL:4023124 - description: dentate gyrus kisspeptin neuron - meaning: CL:4023124 - CL:1000427: - text: CL:1000427 - description: adrenal cortex chromaffin cell - meaning: CL:1000427 - CL:0000207: - text: CL:0000207 - description: olfactory receptor cell - meaning: CL:0000207 - CL:4023162: - text: CL:4023162 - description: bushy cell - meaning: CL:4023162 - CL:2000019: - text: CL:2000019 - description: compound eye photoreceptor cell - meaning: CL:2000019 - CL:4023086: - text: CL:4023086 - description: T Martinotti neuron - meaning: CL:4023086 - CL:0003012: - text: CL:0003012 - description: G9 retinal ganglion cell - meaning: CL:0003012 - CL:0002270: - text: CL:0002270 - description: type EC2 enteroendocrine cell - meaning: CL:0002270 - CL:2000024: - text: CL:2000024 - description: spinal cord medial motor column neuron - meaning: CL:2000024 - CL:0003022: - text: CL:0003022 - description: retinal ganglion cell C5 - meaning: CL:0003022 - CL:0000104: - text: CL:0000104 - description: multipolar neuron - meaning: CL:0000104 - CL:4023050: - text: CL:4023050 - description: L6 intratelencephalic projecting glutamatergic neuron of the - primary motor cortex - meaning: CL:4023050 - CL:4023030: - text: CL:4023030 - description: L2/3/5 fan Martinotti sst GABAergic cortical interneuron (Mmus) - meaning: CL:4023030 - CL:0000741: - text: CL:0000741 - description: spinal accessory motor neuron - meaning: CL:0000741 - CL:4033010: - text: CL:4033010 - description: neuroendocrine cell of epithelium of lobar bronchus - meaning: CL:4033010 - CL:1000425: - text: CL:1000425 - description: chromaffin cell of paraganglion - meaning: CL:1000425 - CL:4030051: - text: CL:4030051 - description: nucleus accumbens shell and olfactory tubercle D1 medium spiny - neuron - meaning: CL:4030051 - CL:0000567: - text: CL:0000567 - description: polymodal nocireceptor - meaning: CL:0000567 - CL:0004215: - text: CL:0004215 - description: type 5a cone bipolar cell - meaning: CL:0004215 - CL:0003032: - text: CL:0003032 - description: M3-OFF retinal ganglion cell - meaning: CL:0003032 - CL:4023079: - text: CL:4023079 - description: midbrain-derived inhibitory neuron - meaning: CL:4023079 - CL:0000099: - text: CL:0000099 - description: interneuron - meaning: CL:0000099 - CL:0000253: - text: CL:0000253 - description: eurydendroid cell - meaning: CL:0000253 - CL:0008013: - text: CL:0008013 - description: cranial visceromotor neuron - meaning: CL:0008013 - CL:0005000: - text: CL:0005000 - description: spinal cord interneuron - meaning: CL:0005000 - CL:0004222: - text: CL:0004222 - description: flag B amacrine cell - meaning: CL:0004222 - CL:0000617: - text: CL:0000617 - description: GABAergic neuron - meaning: CL:0000617 - CL:0003010: - text: CL:0003010 - description: G7 retinal ganglion cell - meaning: CL:0003010 - CL:0000577: - text: CL:0000577 - description: type EC enteroendocrine cell - meaning: CL:0000577 - CL:0003018: - text: CL:0003018 - description: retinal ganglion cell B3 inner - meaning: CL:0003018 - CL:0002083: - text: CL:0002083 - description: type I cell of adrenal medulla - meaning: CL:0002083 - CL:4023081: - text: CL:4023081 - description: inverted L6 intratelencephalic projecting glutamatergic neuron - of the primary motor cortex (Mmus) - meaning: CL:4023081 - CL:0004251: - text: CL:0004251 - description: narrow field retinal amacrine cell - meaning: CL:0004251 - CL:4023092: - text: CL:4023092 - description: inverted pyramidal neuron - meaning: CL:4023092 - CL:0002608: - text: CL:0002608 - description: hippocampal neuron - meaning: CL:0002608 - CL:0008048: - text: CL:0008048 - description: upper motor neuron - meaning: CL:0008048 - CL:0011113: - text: CL:0011113 - description: spiral ganglion neuron - meaning: CL:0011113 - CL:0000601: - text: CL:0000601 - description: cochlear outer hair cell - meaning: CL:0000601 - CL:0003041: - text: CL:0003041 - description: M9-ON retinal ganglion cell - meaning: CL:0003041 - CL:4023042: - text: CL:4023042 - description: L6 corticothalamic-projecting glutamatergic cortical neuron - meaning: CL:4023042 - CL:0000199: - text: CL:0000199 - description: mechanoreceptor cell - meaning: CL:0000199 - CL:1001571: - text: CL:1001571 - description: hippocampal pyramidal neuron - meaning: CL:1001571 - CL:2000048: - text: CL:2000048 - description: anterior horn motor neuron - meaning: CL:2000048 - CL:4023170: - text: CL:4023170 - description: trigeminal sensory neuron - meaning: CL:4023170 - CL:0002614: - text: CL:0002614 - description: neuron of the substantia nigra - meaning: CL:0002614 diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 36e84b357..c8090f30f 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -100,6 +100,11 @@ import warnings warnings.filterwarnings("ignore", category=UserWarning, message="HERD is experimental*") +try: + import linkml_runtime # noqa: F401 +except ImportError as e: + raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e + try: dir_path = os.path.dirname(os.path.abspath(__file__)) yaml_file = os.path.join(dir_path, 'example_term_set.yaml') diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 8bf2375aa..50945889a 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -65,6 +65,8 @@ For more information how to properly format the schema to support LinkML Dynamic Enumerations, please refer to https://linkml.io/linkml/schemas/enums.html#dynamic-enums. """ +# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_termset.png' + from hdmf.common import DynamicTable, VectorData import os import numpy as np diff --git a/docs/gallery/schemasheets/nwb_static_enums.yaml b/docs/gallery/schemasheets/nwb_static_enums.yaml deleted file mode 100644 index 222205959..000000000 --- a/docs/gallery/schemasheets/nwb_static_enums.yaml +++ /dev/null @@ -1,52 +0,0 @@ -classes: - BrainSample: - slot_usage: - cell_type: {} - slots: - - cell_type -default_prefix: TEMP -default_range: string -description: this schema demonstrates the use of static enums -enums: - NeuronOrGlialCellTypeEnum: - description: Enumeration to capture various cell types found in the brain. - permissible_values: - ASTROCYTE: - description: Characteristic star-shaped glial cells in the brain and spinal - cord. - meaning: CL:0000127 - INTERNEURON: - description: Neurons whose axons (and dendrites) are limited to a single brain - area. - meaning: CL:0000099 - MICROGLIAL_CELL: - description: Microglia are the resident immune cells of the brain and constantly - patrol the cerebral microenvironment to respond to pathogens and damage. - meaning: CL:0000129 - MOTOR_NEURON: - description: Neurons whose cell body is located in the motor cortex, brainstem - or the spinal cord, and whose axon (fiber) projects to the spinal cord or - outside of the spinal cord to directly or indirectly control effector organs, - mainly muscles and glands. - meaning: CL:0000100 - OLIGODENDROCYTE: - description: Type of neuroglia whose main functions are to provide support - and insulation to axons within the central nervous system (CNS) of jawed - vertebrates. - meaning: CL:0000128 - PYRAMIDAL_NEURON: - description: Neurons with a pyramidal shaped cell body (soma) and two distinct - dendritic trees. - meaning: CL:0000598 -id: https://w3id.org/linkml/examples/nwb_static_enums -imports: -- linkml:types -name: nwb_static_enums -prefixes: - CL: http://purl.obolibrary.org/obo/CL_ - TEMP: https://example.org/TEMP/ - linkml: https://w3id.org/linkml/ -slots: - cell_type: - required: true -title: static enums example diff --git a/docs/source/conf.py b/docs/source/conf.py index caff737e7..4898074d2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -76,7 +76,7 @@ "matplotlib": ("https://matplotlib.org/stable/", None), "h5py": ("https://docs.h5py.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "zarr": ("https://zarr.readthedocs.io/en/stable/", None), + "zarr": ("https://zarr.readthedocs.io/en/v2.18.4/", None), # TODO - update when hdmf-zarr supports Zarr 3.0 } # these links cannot be checked in github actions @@ -87,12 +87,13 @@ nitpicky = True nitpick_ignore = [('py:class', 'Intracomm'), - ('py:class', 'h5py.RegionReference'), ('py:class', 'h5py._hl.dataset.Dataset'), ('py:class', 'function'), ('py:class', 'unittest.case.TestCase'), ] +suppress_warnings = ["config.cache"] + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -108,7 +109,7 @@ # General information about the project. project = "HDMF" -copyright = "2017-2024, Hierarchical Data Modeling Framework" +copyright = "2017-2025, Hierarchical Data Modeling Framework" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -161,16 +162,12 @@ # html_theme = 'default' # html_theme = "sphinxdoc" html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None diff --git a/docs/source/figures/gallery_thumbnail_termset.png b/docs/source/figures/gallery_thumbnail_termset.png new file mode 100644 index 000000000..29a0db903 Binary files /dev/null and b/docs/source/figures/gallery_thumbnail_termset.png differ diff --git a/docs/source/figures/gallery_thumbnails.pptx b/docs/source/figures/gallery_thumbnails.pptx index ac3da484d..5ede1c4b5 100644 Binary files a/docs/source/figures/gallery_thumbnails.pptx and b/docs/source/figures/gallery_thumbnails.pptx differ diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fcd4778a..842bacc98 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -62,7 +62,6 @@ If you use HDMF in your research, please use the following citation: :caption: For Maintainers make_a_release - update_requirements .. toctree:: :hidden: diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index d043a351a..72da40332 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -52,11 +52,11 @@ Option 2: Using conda The `conda package and environment management system`_ is an alternate way of managing virtual environments. First, install Anaconda_ to install the ``conda`` tool. Then create and -activate a new virtual environment called ``"hdmf-env"`` with Python 3.12 installed. +activate a new virtual environment called ``"hdmf-env"`` with Python 3.13 installed. .. code:: bash - conda create --name hdmf-env python=3.12 + conda create --name hdmf-env python=3.13 conda activate hdmf-env Similar to a virtual environment created with ``venv``, a conda environment @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command. For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager that includes conda-forge as a default channel. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Anaconda: https://www.anaconda.com/download .. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub @@ -88,8 +88,7 @@ package requirements using the pip_ Python package manager, and install HDMF in git clone --recurse-submodules https://github.com/hdmf-dev/hdmf.git cd hdmf - pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt -r requirements-opt.txt - pip install -e . + pip install -e ".[all]" .. note:: diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8102651ff..f4d701c07 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -4,7 +4,7 @@ Installing HDMF --------------- -HDMF requires having Python 3.8, 3.9, 3.10, 3.11, or 3.12 installed. If you don't have Python installed and want the simplest way to +HDMF requires having Python 3.9-3.13 installed. If you don't have Python installed and want the simplest way to get started, we recommend you install and use the `Anaconda Distribution`_. It includes Python, NumPy, and many other commonly used packages for scientific computing and data science. @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/products/distribution +.. _Anaconda Distribution: https://www.anaconda.com/download diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index d2da593bd..57dd26a2e 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -20,8 +20,7 @@ Prerequisites * You have a `GPG signing key`_. -* Dependency versions in ``requirements.txt``, ``requirements-dev.txt``, ``requirements-opt.txt``, - ``requirements-doc.txt``, and ``requirements-min.txt`` are up-to-date. +* Dependency versions are up-to-date. * Legal information and copyright dates in ``Legal.txt``, ``license.txt``, ``README.rst``, ``docs/source/conf.py``, and any other files are up-to-date. @@ -177,7 +176,7 @@ Publish release on conda-forge: Step-by-step Conda-forge maintains a bot called "regro-cf-autotick-bot" that regularly monitors PyPI for new releases of packages that are also on conda-forge. When a new release is detected, usually within 24 hours of publishing on PyPI, the bot will create a Pull Request with the correct modifications to the version and sha256 values - in ``meta.yaml``. If the requirements in ``setup.py`` have been changed, then you need to modify the + in ``meta.yaml``. If the requirements in ``pyproject.toml`` have been changed, then you need to modify the requirements/run section in ``meta.yaml`` manually to reflect these changes. Once tests pass, merge the PR, and a new release will be published on Anaconda cloud. This is the easiest way to update the package version on conda-forge. @@ -242,7 +241,7 @@ In order to release a new version on conda-forge manually, follow the steps belo $ sha=$(openssl sha256 /tmp/hdmf-$release.tar.gz | awk '{print $2}') $ sed -i -- "3s/.*/{$ set sha256 = \"$sha\" %}/" recipe/meta.yaml - If the requirements in ``setup.py`` have been changed, then modify the requirements/run list in + If the requirements in ``pyproject.toml`` have been changed, then modify the requirements/run list in the ``meta.yaml`` file to reflect these changes. diff --git a/docs/source/overview_software_architecture.rst b/docs/source/overview_software_architecture.rst index 973a01b2f..d63c953fe 100644 --- a/docs/source/overview_software_architecture.rst +++ b/docs/source/overview_software_architecture.rst @@ -68,7 +68,7 @@ Builder * :py:class:`~hdmf.build.builders.GroupBuilder` - represents a collection of objects * :py:class:`~hdmf.build.builders.DatasetBuilder` - represents data * :py:class:`~hdmf.build.builders.LinkBuilder` - represents soft-links - * :py:class:`~hdmf.build.builders.RegionBuilder` - represents a slice into data (Subclass of :py:class:`~hdmf.build.builders.DatasetBuilder`) + * :py:class:`~hdmf.build.builders.ReferenceBuilder` - represents a reference to another group or dataset * **Main Module:** :py:class:`hdmf.build.builders` diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 30501769e..f3a6c7457 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -45,48 +45,44 @@ pyproject.toml_ contains a list of package dependencies and their version ranges running HDMF. As a library, upper bound version constraints create more harm than good in the long term (see this `blog post`_) so we avoid setting upper bounds on requirements. -If some of the packages are outdated, see :ref:`update_requirements_files`. +When setting lower bounds, make sure to specify the lower bounds in both pyproject.toml_ and +requirements-min.txt_. The latter is used in automated testing to ensure that the package runs +correctly using the minimum versions of dependencies. + +Minimum requirements should be updated manually if a new feature or bug fix is added in a dependency that is required +for proper running of HDMF. Minimum requirements should also be updated if a user requests that HDMF be installable +with an older version of a dependency, all tests pass using the older version, and there is no valid reason for the +minimum version to be as high as it is. .. _pyproject.toml: https://github.com/hdmf-dev/hdmf/blob/dev/pyproject.toml .. _blog post: https://iscinumpy.dev/post/bound-version-constraints/ +.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -------------------- Testing Requirements -------------------- -There are several kinds of requirements files used for testing PyNWB. - -The first one is requirements-min.txt_, which lists the package dependencies and their minimum versions for -installing HDMF. +pyproject.toml_ contains the optional dependency group "test" with testing requirements. -The second one is requirements.txt_, which lists the pinned (concrete) dependencies to reproduce -an entire development environment to use HDMF. +See tox.ini_ and the GitHub Actions workflows for how different testing environments are +defined using the optional dependency groups. -The third one is requirements-dev.txt_, which list the pinned (concrete) dependencies to reproduce -an entire development environment to use HDMF, run HDMF tests, check code style, compute coverage, and create test -environments. +environment-ros3.yml_ lists the dependencies used to test ROS3 streaming in HDMF which +can only be done in a Conda environment. -The fourth one is requirements-opt.txt_, which lists the pinned (concrete) optional dependencies to use all -available features in HDMF. - -The final one is environment-ros3.yml_, which lists the dependencies used to -test ROS3 streaming in HDMF. - -.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt -.. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt -.. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt +.. _tox.ini: https://github.com/hdmf-dev/hdmf/blob/dev/tox.ini .. _environment-ros3.yml: https://github.com/hdmf-dev/hdmf/blob/dev/environment-ros3.yml -------------------------- Documentation Requirements -------------------------- -requirements-doc.txt_ lists the dependencies to generate the documentation for HDMF. -Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. +pyproject.toml_ contains the optional dependency group "docs" with documentation requirements. +This dependency group is used by ReadTheDocs_ to initialize the local environment for Sphinx to run +(see .readthedocs.yaml_). -.. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt .. _ReadTheDocs: https://readthedocs.org/projects/hdmf/ +.. _.readthedocs.yaml: https://github.com/hdmf-dev/hdmf/blob/dev/.readthedocs.yaml ------------------------- Versioning and Releasing diff --git a/docs/source/update_requirements.rst b/docs/source/update_requirements.rst deleted file mode 100644 index 65b4b99d4..000000000 --- a/docs/source/update_requirements.rst +++ /dev/null @@ -1,78 +0,0 @@ - -.. _update_requirements_files: - -================================ -How to Update Requirements Files -================================ - -The different requirements files introduced in :ref:`software_process` section are the following: - -* requirements.txt_ -* requirements-dev.txt_ -* requirements-doc.txt_ -* requirements-min.txt_ -* requirements-opt.txt_ - -.. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt -.. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt -.. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt -.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt - -requirements.txt -================ - -`requirements.txt` of the project can be created or updated and then captured using -the following script: - -.. code:: - - mkvirtualenv hdmf-requirements - - cd hdmf - pip install . - pip check # check for package conflicts - pip freeze > requirements.txt - - deactivate - rmvirtualenv hdmf-requirements - - -requirements-(dev|doc|opt).txt -============================== - -Any of these requirements files can be updated using -the following scripts: - -.. code:: - - cd hdmf - - # Set the requirements file to update - target_requirements=requirements-dev.txt - - mkvirtualenv hdmf-requirements - - # Install updated requirements - pip install -U -r $target_requirements - - # If relevant, you could pip install new requirements now - # pip install -U - - # Check for any conflicts in installed packages - pip check - - # Update list of pinned requirements - pip freeze > $target_requirements - - deactivate - rmvirtualenv hdmf-requirements - - -requirements-min.txt -==================== - -Minimum requirements should be updated manually if a new feature or bug fix is added in a dependency that is required -for proper running of HDMF. Minimum requirements should also be updated if a user requests that HDMF be installable -with an older version of a dependency, all tests pass using the older version, and there is no valid reason for the -minimum version to be as high as it is. diff --git a/environment-ros3.yml b/environment-ros3.yml index 458b899ba..6b4f6c472 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -1,15 +1,14 @@ -# pinned dependencies to reproduce an entire development environment to use PyNWB with ROS3 support +# environment file used to test HDMF with ROS3 support name: ros3 channels: - conda-forge - defaults dependencies: - - python==3.12 - - h5py==3.10.0 - - matplotlib==3.8.0 - - numpy==1.26.0 - - pandas==2.1.2 - - python-dateutil==2.8.2 - - pytest==7.4.3 - - pytest-cov==4.1.0 - - setuptools + - python==3.13 + - h5py==3.12.1 + - matplotlib==3.9.2 + - numpy==2.2.1 + - pandas==2.2.3 + - python-dateutil==2.9.0.post0 + - pytest==8.3.4 + - pytest-cov==6.0.0 diff --git a/license.txt b/license.txt index f7964f329..c43f1f876 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/pyproject.toml b/pyproject.toml index b60ae6943..5308543d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,15 +13,15 @@ authors = [ ] description = "A hierarchical data modeling framework for modern science data standards" readme = "README.rst" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "BSD-3-Clause"} classifiers = [ "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: BSD License", "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", @@ -30,23 +30,47 @@ classifiers = [ "Topic :: Scientific/Engineering :: Medical Science Apps.", ] dependencies = [ - "h5py>=2.10", - "jsonschema>=2.6.0", - "numpy>=1.18", - "pandas>=1.0.5", + "h5py>=3.1.0", + "jsonschema>=3.2.0", + 'numpy>=1.19.3', + "pandas>=1.2.0", "ruamel.yaml>=0.16", - "scipy>=1.4", - "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 + "scipy>=1.7", ] dynamic = ["version"] [project.optional-dependencies] -zarr = ["zarr>=2.12.0"] tqdm = ["tqdm>=4.41.0"] -termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'", - "schemasheets>=0.1.23; python_version >= '3.9'", - "oaklib>=0.5.12; python_version >= '3.9'", - "pyyaml>=6.0.1; python_version >= '3.9'"] +zarr = ["zarr>=2.12.0,<3"] +termset = [ + "linkml-runtime>=1.5.5", + "schemasheets>=0.1.23", + "oaklib>=0.5.12", + "pyyaml>=6.0.1", +] + +# development dependencies +test = [ + "codespell", + "pre-commit", + "pytest", + "pytest-cov", + "python-dateutil", + "ruff", + "tox", +] + +# documentation dependencies +docs = [ + "matplotlib", + "sphinx>=4", # improved support for docutils>=0.17 + "sphinx_rtd_theme>=1", # <1 does not work with docutils>=0.17 + "sphinx-gallery", + "sphinx-copybutton", +] + +# all possible dependencies +all = ["hdmf[tqdm,zarr,termset,test,docs]"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" @@ -64,10 +88,23 @@ source = "vcs" version-file = "src/hdmf/_version.py" [tool.hatch.build.targets.sdist] -exclude = [".git_archival.txt"] +exclude = [ + ".git*", + ".codecov.yml", + ".readthedocs.yaml", + ".mailmap", + ".pre-commit-config.yaml", +] [tool.hatch.build.targets.wheel] packages = ["src/hdmf"] +exclude = [ + ".git*", + ".codecov.yml", + ".readthedocs.yaml", + ".mailmap", + ".pre-commit-config.yaml", +] # [tool.mypy] # no_incremental = true # needed b/c mypy and ruamel.yaml do not play nice. https://github.com/python/mypy/issues/12664 @@ -77,26 +114,25 @@ packages = ["src/hdmf"] # verbose = 1 [tool.pytest.ini_options] -addopts = "--cov --cov-report html" norecursedirs = "tests/unit/helpers" [tool.codespell] skip = "htmlcov,.git,.mypy_cache,.pytest_cache,.coverage,*.pdf,*.svg,venvs,.tox,hdmf-common-schema,./docs/_build/*,*.ipynb" -ignore-words-list = "datas" +ignore-words-list = "datas,assertIn" [tool.coverage.run] branch = true -source = ["src/"] -omit = [ - "src/hdmf/_due.py", - "src/hdmf/testing/*", -] +source = ["hdmf"] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "@abstract" ] +omit = [ + "*/hdmf/_due.py", + "*/hdmf/testing/*", +] # [tool.black] # line-length = 120 @@ -105,7 +141,7 @@ exclude_lines = [ # force-exclude = "src/hdmf/common/hdmf-common-schema|docs/gallery" [tool.ruff] -select = ["E", "F", "T100", "T201", "T203"] +lint.select = ["E", "F", "T100", "T201", "T203"] exclude = [ ".git", ".tox", @@ -117,14 +153,15 @@ exclude = [ "src/hdmf/_due.py", "docs/source/tutorials/", "docs/_build/", + "scripts/" ] line-length = 120 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "docs/gallery/*" = ["E402", "T201"] "src/*/__init__.py" = ["F401"] "setup.py" = ["T201"] "test_gallery.py" = ["T201"] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 17 diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 1d856e4e7..000000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,13 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, -# compute coverage, and create test environments. note that depending on the version of python installed, different -# versions of requirements may be installed due to package incompatibilities. -# -black==24.3.0 -codespell==2.2.6 -coverage==7.3.2 -pre-commit==3.5.0 -pytest==7.4.3 -pytest-cov==4.1.0 -python-dateutil==2.8.2 -ruff==0.1.3 -tox==4.11.3 diff --git a/requirements-doc.txt b/requirements-doc.txt deleted file mode 100644 index 32a790cf8..000000000 --- a/requirements-doc.txt +++ /dev/null @@ -1,6 +0,0 @@ -# dependencies to generate the documentation for HDMF -matplotlib -sphinx>=4 # improved support for docutils>=0.17 -sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 -sphinx-gallery -sphinx-copybutton diff --git a/requirements-min.txt b/requirements-min.txt index a437fc588..a9fbeb93e 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,15 +1,10 @@ # minimum versions of package dependencies for installing HDMF -h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 +# NOTE: these should match the minimum bound for dependencies in pyproject.toml +h5py==3.1.0 jsonschema==3.2.0 -numpy==1.18 -pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 -ruamel.yaml==0.16 -scipy==1.4 -# this file is currently used to test only python~=3.8 so these dependencies are not needed -# linkml-runtime==1.5.5; python_version >= "3.9" -# schemasheets==0.1.23; python_version >= "3.9" -# oaklib==0.5.12; python_version >= "3.9" -# pyyaml==6.0.1; python_version >= "3.9" +numpy==1.19.3 +pandas==1.2.0 +ruamel.yaml==0.16.0 +scipy==1.7.0 tqdm==4.41.0 zarr==2.12.0 diff --git a/requirements-opt.txt b/requirements-opt.txt deleted file mode 100644 index 53fd11e3a..000000000 --- a/requirements-opt.txt +++ /dev/null @@ -1,6 +0,0 @@ -# pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.66.2 -zarr==2.17.1 -linkml-runtime==1.7.4; python_version >= "3.9" -schemasheets==0.2.1; python_version >= "3.9" -oaklib==0.5.32; python_version >= "3.9" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5182d5c2e..000000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF -h5py==3.10.0 -importlib-resources==6.1.0; python_version < "3.9" # TODO: remove when minimum python version is 3.9 -jsonschema==4.19.1 -numpy==1.26.1 -pandas==2.1.2 -ruamel.yaml==0.18.2 -scipy==1.11.3 diff --git a/scripts/check_py_support.py b/scripts/check_py_support.py new file mode 100644 index 000000000..5c48dac3c --- /dev/null +++ b/scripts/check_py_support.py @@ -0,0 +1,205 @@ +""" +Python Version Support Checker + +This script analyzes Python package dependencies listed in pyproject.toml to check their +compatibility with a specified Python version (default: 3.13). It examines both regular +and optional dependencies, checking their trove classifiers for explicit version support. + +The script provides: +- Grouped output of supported and unsupported packages +- Latest supported Python version for packages without explicit support +- Error reporting for packages that cannot be checked +- Summary statistics of compatibility status + +Usage: + # Run this command from the root of the repo + python scripts/check_py_support.py + +Requirements: + - Python 3.11+ + - packaging + - colorama + +Input: + - pyproject.toml file in the current directory + +Output format: + - Supported packages (green) with their versions + - Unsupported packages (red) with their versions and latest supported Python version + - Packages with errors (yellow) + - Summary statistics + +Note: + The absence of explicit version support in trove classifiers doesn't necessarily + indicate incompatibility, just that the package hasn't declared support. +""" + +import tomllib +import importlib.metadata +from pathlib import Path +from packaging.requirements import Requirement +from colorama import init, Fore, Style +from typing import NamedTuple +import re + +# Initialize colorama +init() + +# Global configuration +PYTHON_VERSION = "3.13" + +class PackageSupport(NamedTuple): + name: str + spec: str + version: str | None + latest_python: str | None + error: str | None + +def parse_dependencies(pyproject_path: Path) -> list[str]: + """Parse dependencies from pyproject.toml, including optional dependencies.""" + with pyproject_path.open("rb") as f: + pyproject = tomllib.load(f) + + # Get main dependencies + dependencies = pyproject.get("project", {}).get("dependencies", []) + + # Get optional dependencies and flatten them + optional_deps = pyproject.get("project", {}).get("optional-dependencies", {}) + for group_deps in optional_deps.values(): + dependencies.extend(group_deps) + + return dependencies + +def get_package_name(dependency_spec: str) -> str: + """Extract package name from dependency specification.""" + return Requirement(dependency_spec).name + +def get_latest_python_version(classifiers: list[str]) -> str | None: + """Extract the latest supported Python version from classifiers.""" + python_versions = [] + pattern = r"Programming Language :: Python :: (\d+\.\d+)" + + for classifier in classifiers: + match = re.match(pattern, classifier) + if match: + version = match.group(1) + try: + major, minor = map(int, version.split('.')) + python_versions.append((major, minor)) + except ValueError: + continue + + if not python_versions: + return None + + # Sort by major and minor version + latest = sorted(python_versions, key=lambda x: (x[0], x[1]), reverse=True)[0] + return f"{latest[0]}.{latest[1]}" + +def check_python_version_support(package_name: str) -> dict[str, str | bool | None]: + """Check if installed package supports Python 3.13.""" + try: + dist = importlib.metadata.distribution(package_name) + classifiers = dist.metadata.get_all('Classifier') + version_classifier = f"Programming Language :: Python :: {PYTHON_VERSION}" + + return { + 'installed_version': dist.version, + 'has_support': version_classifier in classifiers, + 'latest_python': get_latest_python_version(classifiers), + 'error': None + } + except importlib.metadata.PackageNotFoundError: + return { + 'installed_version': None, + 'has_support': False, + 'latest_python': None, + 'error': 'Package not installed' + } + except Exception as e: + return { + 'installed_version': None, + 'has_support': False, + 'latest_python': None, + 'error': str(e) + } + +def print_section_header(title: str, count: int) -> None: + """Print a formatted section header with count.""" + print(f"\n{Fore.CYAN}{title} ({count} packages){Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + print(f"{Fore.YELLOW}{'Package':<25} {'Specification':<30} {'Version':<20} {'Latest Python'}{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + +def main() -> None: + pyproject_path = Path("pyproject.toml") + + if not pyproject_path.exists(): + print(f"{Fore.RED}Error: pyproject.toml not found{Style.RESET_ALL}") + return + + try: + dependencies = parse_dependencies(pyproject_path) + except Exception as e: + print(f"{Fore.RED}Error parsing pyproject.toml: {e}{Style.RESET_ALL}") + return + + # Check each dependency + supported: list[PackageSupport] = [] + unsupported: list[PackageSupport] = [] + errors: list[PackageSupport] = [] + + for dep in dependencies: + package_name = get_package_name(dep) + result = check_python_version_support(package_name) + + package_info = PackageSupport( + name=package_name, + spec=dep, + version=result['installed_version'], + latest_python=result['latest_python'], + error=result['error'] + ) + + if result['error']: + errors.append(package_info) + elif result['has_support']: + supported.append(package_info) + else: + unsupported.append(package_info) + + # Print results + print(f"\n{Fore.CYAN}Python {PYTHON_VERSION} Explicit Support Check Results{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'=' * 100}{Style.RESET_ALL}") + + # Print supported packages + if supported: + print_section_header("Supported Packages", len(supported)) + for pkg in supported: + print(f"{Fore.GREEN}{pkg.name:<25} {pkg.spec:<30} {pkg.version:<20} {PYTHON_VERSION}{Style.RESET_ALL}") + + # Print unsupported packages + if unsupported: + print_section_header("Unsupported Packages", len(unsupported)) + for pkg in unsupported: + latest = f"→ {pkg.latest_python}" if pkg.latest_python else "unknown" + print(f"{Fore.RED}{pkg.name:<25} {pkg.spec:<30} {pkg.version:<20} {latest}{Style.RESET_ALL}") + + # Print packages with errors + if errors: + print_section_header("Packages with Errors", len(errors)) + for pkg in errors: + print(f"{Fore.YELLOW}{pkg.name:<25} {pkg.spec:<30} {pkg.error:<20} N/A{Style.RESET_ALL}") + + # Print summary + print(f"\n{Fore.CYAN}Summary:{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + total = len(supported) + len(unsupported) + len(errors) + print(f"{Fore.GREEN}Supported: {len(supported):3d} ({len(supported)/total*100:.1f}%){Style.RESET_ALL}") + print(f"{Fore.RED}Unsupported: {len(unsupported):3d} ({len(unsupported)/total*100:.1f}%){Style.RESET_ALL}") + if errors: + print(f"{Fore.YELLOW}Errors: {len(errors):3d} ({len(errors)/total*100:.1f}%){Style.RESET_ALL}") + print(f"{Fore.CYAN}Total: {total:3d}{Style.RESET_ALL}") + +if __name__ == "__main__": + main() diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 6fc72a117..10305d37b 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,32 +1,10 @@ from . import query -from .backends.hdf5.h5_utils import H5Dataset, H5RegionSlicer -from .container import Container, Data, DataRegion, HERDManager -from .region import ListSlicer +from .backends.hdf5.h5_utils import H5Dataset +from .container import Container, Data, HERDManager from .utils import docval, getargs from .term_set import TermSet, TermSetWrapper, TypeConfigurator -@docval( - {"name": "dataset", "type": None, "doc": "the HDF5 dataset to slice"}, - {"name": "region", "type": None, "doc": "the region reference to use to slice"}, - is_method=False, -) -def get_region_slicer(**kwargs): - import warnings # noqa: E402 - - warnings.warn( - "get_region_slicer is deprecated and will be removed in HDMF 3.0.", - DeprecationWarning, - ) - - dataset, region = getargs("dataset", "region", kwargs) - if isinstance(dataset, (list, tuple, Data)): - return ListSlicer(dataset, region) - elif isinstance(dataset, H5Dataset): - return H5RegionSlicer(dataset, region) - return None - - try: # see https://effigies.gitlab.io/posts/python-packaging-2023/ from ._version import __version__ diff --git a/src/hdmf/array.py b/src/hdmf/array.py deleted file mode 100644 index a684572e4..000000000 --- a/src/hdmf/array.py +++ /dev/null @@ -1,197 +0,0 @@ -from abc import abstractmethod, ABCMeta - -import numpy as np - - -class Array: - - def __init__(self, data): - self.__data = data - if hasattr(data, 'dtype'): - self.dtype = data.dtype - else: - tmp = data - while isinstance(tmp, (list, tuple)): - tmp = tmp[0] - self.dtype = type(tmp) - - @property - def data(self): - return self.__data - - def __len__(self): - return len(self.__data) - - def get_data(self): - return self.__data - - def __getidx__(self, arg): - return self.__data[arg] - - def __sliceiter(self, arg): - return (x for x in range(*arg.indices(len(self)))) - - def __getitem__(self, arg): - if isinstance(arg, list): - idx = list() - for i in arg: - if isinstance(i, slice): - idx.extend(x for x in self.__sliceiter(i)) - else: - idx.append(i) - return np.fromiter((self.__getidx__(x) for x in idx), dtype=self.dtype) - elif isinstance(arg, slice): - return np.fromiter((self.__getidx__(x) for x in self.__sliceiter(arg)), dtype=self.dtype) - elif isinstance(arg, tuple): - return (self.__getidx__(arg[0]), self.__getidx__(arg[1])) - else: - return self.__getidx__(arg) - - -class AbstractSortedArray(Array, metaclass=ABCMeta): - ''' - An abstract class for representing sorted array - ''' - - @abstractmethod - def find_point(self, val): - pass - - def get_data(self): - return self - - def __lower(self, other): - ins = self.find_point(other) - return ins - - def __upper(self, other): - ins = self.__lower(other) - while self[ins] == other: - ins += 1 - return ins - - def __lt__(self, other): - ins = self.__lower(other) - return slice(0, ins) - - def __le__(self, other): - ins = self.__upper(other) - return slice(0, ins) - - def __gt__(self, other): - ins = self.__upper(other) - return slice(ins, len(self)) - - def __ge__(self, other): - ins = self.__lower(other) - return slice(ins, len(self)) - - @staticmethod - def __sort(a): - if isinstance(a, tuple): - return a[0] - else: - return a - - def __eq__(self, other): - if isinstance(other, list): - ret = list() - for i in other: - eq = self == i - ret.append(eq) - ret = sorted(ret, key=self.__sort) - tmp = list() - for i in range(1, len(ret)): - a, b = ret[i - 1], ret[i] - if isinstance(a, tuple): - if isinstance(b, tuple): - if a[1] >= b[0]: - b[0] = a[0] - else: - tmp.append(slice(*a)) - else: - if b > a[1]: - tmp.append(slice(*a)) - elif b == a[1]: - a[1] == b + 1 - else: - ret[i] = a - else: - if isinstance(b, tuple): - if a < b[0]: - tmp.append(a) - else: - if b - a == 1: - ret[i] = (a, b) - else: - tmp.append(a) - if isinstance(ret[-1], tuple): - tmp.append(slice(*ret[-1])) - else: - tmp.append(ret[-1]) - ret = tmp - return ret - elif isinstance(other, tuple): - ge = self >= other[0] - ge = ge.start - lt = self < other[1] - lt = lt.stop - if ge == lt: - return ge - else: - return slice(ge, lt) - else: - lower = self.__lower(other) - upper = self.__upper(other) - d = upper - lower - if d == 1: - return lower - elif d == 0: - return None - else: - return slice(lower, upper) - - def __ne__(self, other): - eq = self == other - if isinstance(eq, tuple): - return [slice(0, eq[0]), slice(eq[1], len(self))] - else: - return [slice(0, eq), slice(eq + 1, len(self))] - - -class SortedArray(AbstractSortedArray): - ''' - A class for wrapping sorted arrays. This class overrides - <,>,<=,>=,==, and != to leverage the sorted content for - efficiency. - ''' - - def __init__(self, array): - super().__init__(array) - - def find_point(self, val): - return np.searchsorted(self.data, val) - - -class LinSpace(SortedArray): - - def __init__(self, start, stop, step): - self.start = start - self.stop = stop - self.step = step - self.dtype = float if any(isinstance(s, float) for s in (start, stop, step)) else int - self.__len = int((stop - start) / step) - - def __len__(self): - return self.__len - - def find_point(self, val): - nsteps = (val - self.start) / self.step - fl = int(nsteps) - if fl == nsteps: - return int(fl) - else: - return int(fl + 1) - - def __getidx__(self, arg): - return self.start + self.step * arg diff --git a/src/hdmf/backends/hdf5/__init__.py b/src/hdmf/backends/hdf5/__init__.py index 6abfc8c85..8f76d7bcc 100644 --- a/src/hdmf/backends/hdf5/__init__.py +++ b/src/hdmf/backends/hdf5/__init__.py @@ -1,3 +1,3 @@ from . import h5_utils, h5tools -from .h5_utils import H5RegionSlicer, H5DataIO +from .h5_utils import H5DataIO from .h5tools import HDF5IO, H5SpecWriter, H5SpecReader diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index 8654e2b4b..878ebf089 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -8,7 +8,7 @@ from collections.abc import Iterable from copy import copy -from h5py import Group, Dataset, RegionReference, Reference, special_dtype +from h5py import Group, Dataset, Reference, special_dtype from h5py import filters as h5py_filters import json import numpy as np @@ -16,12 +16,10 @@ import os import logging -from ...array import Array -from ...data_utils import DataIO, AbstractDataChunkIterator +from ...data_utils import DataIO, AbstractDataChunkIterator, append_data from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver -from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader -from ...utils import docval, getargs, popargs, get_docval +from ...utils import docval, getargs, popargs, get_docval, get_data_shape class HDF5IODataChunkIteratorQueue(deque): @@ -85,7 +83,7 @@ def append(self, dataset, data): class H5Dataset(HDMFDataset): - @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, + @docval({'name': 'dataset', 'type': Dataset, 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): @@ -96,10 +94,6 @@ def __init__(self, **kwargs): def io(self): return self.__io - @property - def regionref(self): - return self.dataset.regionref - @property def ref(self): return self.dataset.ref @@ -108,6 +102,20 @@ def ref(self): def shape(self): return self.dataset.shape + def append(self, arg): + # Get Builder + builder = self.io.manager.get_builder(arg) + if builder is None: + raise ValueError( + "The container being appended to the dataset has not yet been built. " + "Please write the container to the file, then open the modified file, and " + "append the read container to the dataset." + ) + + # Get HDF5 Reference + ref = self.io._create_ref(builder) + append_data(self.dataset, ref) + class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ @@ -175,7 +183,7 @@ def get_object(self, h5obj): class AbstractH5TableDataset(DatasetOfReferences): - @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, + @docval({'name': 'dataset', 'type': Dataset, 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), @@ -185,9 +193,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.__refgetters = dict() for i, t in enumerate(types): - if t is RegionReference: - self.__refgetters[i] = self.__get_regref - elif t is Reference: + if t is Reference: self.__refgetters[i] = self._get_ref elif t is str: # we need this for when we read compound data types @@ -209,8 +215,6 @@ def __init__(self, **kwargs): t = sub.metadata['ref'] if t is Reference: tmp.append('object') - elif t is RegionReference: - tmp.append('region') else: tmp.append(sub.type.__name__) self.__dtype = tmp @@ -243,10 +247,6 @@ def _get_utf(self, string): """ return string.decode('utf-8') if isinstance(string, bytes) else string - def __get_regref(self, ref): - obj = self._get_ref(ref) - return obj[ref] - def resolve(self, manager): return self[0:len(self)] @@ -269,18 +269,6 @@ def dtype(self): return 'object' -class AbstractH5RegionDataset(AbstractH5ReferenceDataset): - - def __getitem__(self, arg): - obj = super().__getitem__(arg) - ref = self.dataset[arg] - return obj[ref] - - @property - def dtype(self): - return 'region' - - class ContainerH5TableDataset(ContainerResolverMixin, AbstractH5TableDataset): """ A reference-resolving dataset for resolving references inside tables @@ -325,28 +313,6 @@ def get_inverse_class(cls): return ContainerH5ReferenceDataset -class ContainerH5RegionDataset(ContainerResolverMixin, AbstractH5RegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Containers - """ - - @classmethod - def get_inverse_class(cls): - return BuilderH5RegionDataset - - -class BuilderH5RegionDataset(BuilderResolverMixin, AbstractH5RegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Builders - """ - - @classmethod - def get_inverse_class(cls): - return ContainerH5RegionDataset - - class H5SpecWriter(SpecWriter): __str_type = special_dtype(vlen=str) @@ -406,28 +372,6 @@ def read_namespace(self, ns_path): return ret -class H5RegionSlicer(RegionSlicer): - - @docval({'name': 'dataset', 'type': (Dataset, H5Dataset), 'doc': 'the HDF5 dataset to slice'}, - {'name': 'region', 'type': RegionReference, 'doc': 'the region reference to use to slice'}) - def __init__(self, **kwargs): - self.__dataset = getargs('dataset', kwargs) - self.__regref = getargs('region', kwargs) - self.__len = self.__dataset.regionref.selection(self.__regref)[0] - self.__region = None - - def __read_region(self): - if self.__region is None: - self.__region = self.__dataset[self.__regref] - - def __getitem__(self, idx): - self.__read_region() - return self.__region[idx] - - def __len__(self): - return self.__len - - class H5DataIO(DataIO): """ Wrap data arrays for write via HDF5IO to customize I/O behavior, such as compression and chunking @@ -501,7 +445,7 @@ def __init__(self, **kwargs): # Check for possible collision with other parameters if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data: self.__link_data = False - warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2) + warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3) # Call the super constructor and consume the data parameter super().__init__(**kwargs) # Construct the dict with the io args, ignoring all options that were set to None @@ -525,7 +469,7 @@ def __init__(self, **kwargs): self.__iosettings.pop('compression', None) if 'compression_opts' in self.__iosettings: warnings.warn('Compression disabled by compression=False setting. ' + - 'compression_opts parameter will, therefore, be ignored.', stacklevel=2) + 'compression_opts parameter will, therefore, be ignored.', stacklevel=3) self.__iosettings.pop('compression_opts', None) # Validate the compression options used self._check_compression_options() @@ -540,16 +484,37 @@ def __init__(self, **kwargs): if isinstance(self.data, Dataset): for k in self.__iosettings.keys(): warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k, - stacklevel=2) + stacklevel=3) self.__dataset = None @property def dataset(self): + """Get the cached h5py.Dataset.""" return self.__dataset @dataset.setter def dataset(self, val): + """Cache the h5py.Dataset written with the stored IO settings. + + This attribute can be used to cache a written, empty dataset and fill it in later. + This allows users to access the handle to the dataset *without* having to close + and reopen a file. + + For example:: + + dataio = H5DataIO(shape=(5,), dtype=int) + foo = Foo('foo1', dataio, "I am foo1", 17, 3.14) + bucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[bucket]) + + io = HDF5IO(self.path, manager=self.manager, mode='w') + # write the object to disk, including initializing an empty int dataset with shape (5,) + io.write(foofile) + + foo.my_data.dataset[:] = [0, 1, 2, 3, 4] + io.close() + """ if self.__dataset is not None: raise ValueError("Cannot overwrite H5DataIO.dataset") self.__dataset = val @@ -597,7 +562,7 @@ def _check_compression_options(self): if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " "on all installations of HDF5. Use of gzip is recommended to ensure portability of " - "the generated HDF5 files.", stacklevel=3) + "the generated HDF5 files.", stacklevel=4) @staticmethod def filter_available(filter, allow_plugin_filters): @@ -637,3 +602,14 @@ def valid(self): if isinstance(self.data, Dataset) and not self.data.id.valid: return False return super().valid + + @property + def maxshape(self): + if 'maxshape' in self.io_settings: + return self.io_settings['maxshape'] + elif hasattr(self.data, 'maxshape'): + return self.data.maxshape + elif hasattr(self, "shape"): + return self.shape + else: + return get_data_shape(self.data) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 05ce36e13..d30cef06c 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -7,19 +7,20 @@ import numpy as np import h5py -from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, RegionReference, check_dtype +from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, check_dtype -from .h5_utils import (BuilderH5ReferenceDataset, BuilderH5RegionDataset, BuilderH5TableDataset, H5DataIO, +from .h5_utils import (BuilderH5ReferenceDataset, BuilderH5TableDataset, H5DataIO, H5SpecReader, H5SpecWriter, HDF5IODataChunkIteratorQueue) from ..io import HDMFIO from ..errors import UnsupportedOperation from ..warnings import BrokenLinkWarning -from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, +from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog -from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset +from ...utils import (docval, getargs, popargs, get_data_shape, get_docval, StrDataset, + get_basic_array_info, generate_array_html_repr) from ..utils import NamespaceToBuilderHelper, WriteStatusTracker ROOT_NAME = 'root' @@ -27,7 +28,6 @@ H5_TEXT = special_dtype(vlen=str) H5_BINARY = special_dtype(vlen=bytes) H5_REF = special_dtype(ref=Reference) -H5_REGREF = special_dtype(ref=RegionReference) RDCC_NBYTES = 32*2**20 # set raw data chunk cache size = 32 MiB @@ -62,15 +62,21 @@ def can_read(path): {'name': 'file', 'type': [File, "S3File", "RemFile"], 'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + { + 'name': 'aws_region', + 'type': str, + 'doc': 'If driver is ros3, then specify the aws region of the url.', + 'default': None + }, {'name': 'herd_path', 'type': str, 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver, herd_path = popargs('path', 'manager', 'mode', + path, manager, mode, comm, file_obj, driver, aws_region, herd_path = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', - 'herd_path', + 'aws_region', 'herd_path', kwargs) self.__open_links = [] # keep track of other files opened from links in this file @@ -91,6 +97,7 @@ def __init__(self, **kwargs): elif isinstance(manager, TypeMap): manager = BuildManager(manager) self.__driver = driver + self.__aws_region = aws_region self.__comm = comm self.__mode = mode self.__file = file_obj @@ -116,6 +123,10 @@ def _file(self): def driver(self): return self.__driver + @property + def aws_region(self): + return self.__aws_region + @classmethod def __check_path_file_obj(cls, path, file_obj): if isinstance(path, Path): @@ -133,13 +144,17 @@ def __check_path_file_obj(cls, path, file_obj): return path @classmethod - def __resolve_file_obj(cls, path, file_obj, driver): + def __resolve_file_obj(cls, path, file_obj, driver, aws_region=None): + """Helper function to return a File when loading or getting namespaces from a file.""" path = cls.__check_path_file_obj(path, file_obj) if file_obj is None: file_kwargs = dict() if driver is not None: file_kwargs.update(driver=driver) + + if aws_region is not None: + file_kwargs.update(aws_region=bytes(aws_region, "ascii")) file_obj = File(path, 'r', **file_kwargs) return file_obj @@ -150,6 +165,8 @@ def __resolve_file_obj(cls, path, file_obj, driver): {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}, {'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + {'name': 'aws_region', 'type': str, 'doc': 'If driver is ros3, then specify the aws region of the url.', + 'default': None}, returns=("dict mapping the names of the loaded namespaces to a dict mapping included namespace names and " "the included data types"), rtype=dict) @@ -162,10 +179,10 @@ def load_namespaces(cls, **kwargs): :raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file`. """ - namespace_catalog, path, namespaces, file_obj, driver = popargs( - 'namespace_catalog', 'path', 'namespaces', 'file', 'driver', kwargs) + namespace_catalog, path, namespaces, file_obj, driver, aws_region = popargs( + 'namespace_catalog', 'path', 'namespaces', 'file', 'driver', 'aws_region', kwargs) - open_file_obj = cls.__resolve_file_obj(path, file_obj, driver) + open_file_obj = cls.__resolve_file_obj(path, file_obj, driver, aws_region=aws_region) if file_obj is None: # need to close the file object that we just opened with open_file_obj: return cls.__load_namespaces(namespace_catalog, namespaces, open_file_obj) @@ -214,6 +231,8 @@ def __check_specloc(cls, file_obj): @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None}, {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + {'name': 'aws_region', 'type': str, 'doc': 'If driver is ros3, then specify the aws region of the url.', + 'default': None}, returns="dict mapping names to versions of the namespaces in the file", rtype=dict) def get_namespaces(cls, **kwargs): """Get the names and versions of the cached namespaces from a file. @@ -227,9 +246,9 @@ def get_namespaces(cls, **kwargs): :raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file`. """ - path, file_obj, driver = popargs('path', 'file', 'driver', kwargs) + path, file_obj, driver, aws_region = popargs('path', 'file', 'driver', 'aws_region', kwargs) - open_file_obj = cls.__resolve_file_obj(path, file_obj, driver) + open_file_obj = cls.__resolve_file_obj(path, file_obj, driver, aws_region=aws_region) if file_obj is None: # need to close the file object that we just opened with open_file_obj: return cls.__get_namespaces(open_file_obj) @@ -325,7 +344,7 @@ def copy_file(self, **kwargs): warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of " "HDMF. Please use the export method or h5py.File.copy method instead.", category=DeprecationWarning, - stacklevel=2) + stacklevel=3) source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename', 'dest_filename', @@ -673,12 +692,15 @@ def __read_dataset(self, h5obj, name=None): target = h5obj.file[scalar] target_builder = self.__read_dataset(target) self.__set_built(target.file.filename, target.id, target_builder) - if isinstance(scalar, RegionReference): - d = RegionBuilder(scalar, target_builder) - else: - d = ReferenceBuilder(target_builder) + d = ReferenceBuilder(target_builder) kwargs['data'] = d kwargs['dtype'] = d.dtype + elif h5obj.dtype.kind == 'V': # scalar compound data type + kwargs['data'] = np.array(scalar, dtype=h5obj.dtype) + cpd_dt = h5obj.dtype + ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))] + d = BuilderH5TableDataset(h5obj, self, ref_cols) + kwargs['dtype'] = HDF5IO.__compound_dtype_to_list(h5obj.dtype, d.dtype) else: kwargs["data"] = scalar else: @@ -687,9 +709,6 @@ def __read_dataset(self, h5obj, name=None): elem1 = h5obj[tuple([0] * (h5obj.ndim - 1) + [0])] if isinstance(elem1, (str, bytes)): d = self._check_str_dtype(h5obj) - elif isinstance(elem1, RegionReference): # read list of references - d = BuilderH5RegionDataset(h5obj, self) - kwargs['dtype'] = d.dtype elif isinstance(elem1, Reference): d = BuilderH5ReferenceDataset(h5obj, self) kwargs['dtype'] = d.dtype @@ -709,7 +728,7 @@ def __read_dataset(self, h5obj, name=None): def _check_str_dtype(self, h5obj): dtype = h5obj.dtype if dtype.kind == 'O': - if dtype.metadata.get('vlen') == str and H5PY_3: + if dtype.metadata.get('vlen') is str and H5PY_3: return StrDataset(h5obj, None) return h5obj @@ -725,9 +744,7 @@ def __read_attrs(self, h5obj): for k, v in h5obj.attrs.items(): if k == SPEC_LOC_ATTR: # ignore cached spec continue - if isinstance(v, RegionReference): - raise ValueError("cannot read region reference attributes yet") - elif isinstance(v, Reference): + if isinstance(v, Reference): ret[k] = self.__read_ref(h5obj.file[v]) else: ret[k] = v @@ -756,6 +773,9 @@ def open(self): if self.driver is not None: kwargs.update(driver=self.driver) + if self.driver == "ros3" and self.aws_region is not None: + kwargs.update(aws_region=bytes(self.aws_region, "ascii")) + self.__file = File(self.source, open_flag, **kwargs) def close(self, close_links=True): @@ -888,10 +908,7 @@ def get_type(cls, data): "utf-8": H5_TEXT, "ascii": H5_BINARY, "bytes": H5_BINARY, - "ref": H5_REF, - "reference": H5_REF, "object": H5_REF, - "region": H5_REGREF, "isodatetime": H5_TEXT, "datetime": H5_TEXT, } @@ -1205,31 +1222,16 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual + elif len(np.shape(data)) == 0: + dset = self.__scalar_fill__(parent, name, data, options) else: dset = self.__list_fill__(parent, name, data, options) - # Write a dataset containing references, i.e., a region or object reference. + # Write a dataset containing references, i.e., object reference. # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): _dtype = self.__dtypes.get(options['dtype']) - # Write a scalar data region reference dataset - if isinstance(data, RegionBuilder): - dset = parent.require_dataset(name, shape=(), dtype=_dtype) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing a " - "region reference. attributes: %s" - % (name, list(attributes.keys()))) - - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving region reference and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - ref = self.__get_ref(data.builder, data.region) - dset = parent[name] - dset[()] = ref - self.set_attributes(dset, attributes) # Write a scalar object reference dataset - elif isinstance(data, ReferenceBuilder): + if isinstance(data, ReferenceBuilder): dset = parent.require_dataset(name, dtype=_dtype, shape=()) self.__set_written(builder) self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing an " @@ -1247,44 +1249,24 @@ def _filler(): self.set_attributes(dset, attributes) # Write an array dataset of references else: - # Write a array of region references - if options['dtype'] == 'region': - dset = parent.require_dataset(name, dtype=_dtype, shape=(len(data),), **options['io_settings']) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " - "region references. attributes: %s" - % (name, list(attributes.keys()))) - - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving region references and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - refs = list() - for item in data: - refs.append(self.__get_ref(item.builder, item.region)) - dset = parent[name] - dset[()] = refs - self.set_attributes(dset, attributes) # Write array of object references - else: - dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings']) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " - "object references. attributes: %s" - % (name, list(attributes.keys()))) + dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings']) + self.__set_written(builder) + self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " + "object references. attributes: %s" + % (name, list(attributes.keys()))) - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving object references and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - refs = list() - for item in data: - refs.append(self.__get_ref(item)) - dset = parent[name] - dset[()] = refs - self.set_attributes(dset, attributes) + @self.__queue_ref + def _filler(): + self.logger.debug("Resolving object references and setting attribute on dataset '%s' " + "containing attributes: %s" + % (name, list(attributes.keys()))) + refs = list() + for item in data: + refs.append(self.__get_ref(item)) + dset = parent[name] + dset[()] = refs + self.set_attributes(dset, attributes) return # write a "regular" dataset else: @@ -1447,7 +1429,7 @@ def __list_fill__(cls, parent, name, data, options=None): data_shape = io_settings.pop('shape') elif hasattr(data, 'shape'): data_shape = data.shape - elif isinstance(dtype, np.dtype): + elif isinstance(dtype, np.dtype) and len(dtype) > 1: # check if compound dtype data_shape = (len(data),) else: data_shape = get_data_shape(data) @@ -1472,11 +1454,9 @@ def __list_fill__(cls, parent, name, data, options=None): @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', 'default': None}, - {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', - 'default': None}, returns='the reference', rtype=Reference) def __get_ref(self, **kwargs): - container, region = getargs('container', 'region', kwargs) + container = getargs('container', kwargs) if container is None: return None if isinstance(container, Builder): @@ -1492,16 +1472,15 @@ def __get_ref(self, **kwargs): self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name)) builder = self.manager.build(container) path = self.__get_path(builder) + self.logger.debug("Getting reference at path '%s'" % path) - if isinstance(container, RegionBuilder): - region = container.region - if region is not None: - dset = self.__file[path] - if not isinstance(dset, Dataset): - raise ValueError('cannot create region reference without Dataset') - return self.__file[path].regionref[region] - else: - return self.__file[path].ref + return self.__file[path].ref + + @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', + 'default': None}, + returns='the reference', rtype=Reference) + def _create_ref(self, **kwargs): + return self.__get_ref(**kwargs) def __is_ref(self, dtype): if isinstance(dtype, DtypeSpec): @@ -1511,7 +1490,7 @@ def __is_ref(self, dtype): if isinstance(dtype, dict): # may be dict from reading a compound dataset return self.__is_ref(dtype['dtype']) if isinstance(dtype, str): - return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE + return dtype == DatasetBuilder.OBJECT_REF_TYPE return False def __queue_ref(self, func): @@ -1530,17 +1509,6 @@ def __queue_ref(self, func): # dependency self.__ref_queue.append(func) - def __rec_get_ref(self, ref_list): - ret = list() - for elem in ref_list: - if isinstance(elem, (list, tuple)): - ret.append(self.__rec_get_ref(elem)) - elif isinstance(elem, (Builder, Container)): - ret.append(self.__get_ref(elem)) - else: - ret.append(elem) - return ret - @property def mode(self): """ @@ -1564,3 +1532,35 @@ def set_dataio(cls, **kwargs): data = H5DataIO(data) """ return H5DataIO.__init__(**kwargs) + + @staticmethod + def generate_dataset_html(dataset): + """Generates an html representation for a dataset for the HDF5IO class""" + + array_info_dict = get_basic_array_info(dataset) + if isinstance(dataset, h5py.Dataset): + dataset_type = "HDF5 dataset" + # get info from hdf5 dataset + compressed_size = dataset.id.get_storage_size() + if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 + uncompressed_size = dataset.nbytes + else: + uncompressed_size = dataset.size * dataset.dtype.itemsize + compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined" + + hdf5_info_dict = { + "Chunk shape": dataset.chunks, + "Compression": dataset.compression, + "Compression opts": dataset.compression_opts, + "Compression ratio": compression_ratio, + } + array_info_dict.update(hdf5_info_dict) + + elif isinstance(dataset, np.ndarray): + dataset_type = "NumPy array" + else: + dataset_type = dataset.__class__.__name__ + + repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type) + + return repr_html diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 35023066f..86fd25b26 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -5,7 +5,7 @@ from ..build import BuildManager, GroupBuilder from ..container import Container, HERDManager from .errors import UnsupportedOperation -from ..utils import docval, getargs, popargs +from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr from warnings import warn @@ -188,6 +188,14 @@ def close(self): ''' Close this HDMFIO object to further reading/writing''' pass + @staticmethod + def generate_dataset_html(dataset): + """Generates an html representation for a dataset""" + array_info_dict = get_basic_array_info(dataset) + repr_html = generate_array_html_repr(array_info_dict, dataset) + + return repr_html + def __enter__(self): return self diff --git a/src/hdmf/build/__init__.py b/src/hdmf/build/__init__.py index ea5d21152..87e0ac57e 100644 --- a/src/hdmf/build/__init__.py +++ b/src/hdmf/build/__init__.py @@ -1,4 +1,4 @@ -from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder from .classgenerator import CustomClassGenerator, MCIClassGenerator from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 73c683bbd..2d90c24e3 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -6,7 +6,6 @@ from datetime import datetime, date import numpy as np -from h5py import RegionReference from ..utils import docval, getargs, get_docval @@ -320,16 +319,19 @@ def values(self): class DatasetBuilder(BaseBuilder): OBJECT_REF_TYPE = 'object' - REGION_REF_TYPE = 'region' @docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'}, {'name': 'data', - 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date), + 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', Iterable, datetime, date), 'doc': 'The data in this dataset.', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'The datatype of this dataset.', 'default': None}, {'name': 'attributes', 'type': dict, 'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()}, + {'name': 'dimension_labels', 'type': tuple, + 'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is ' + 'supplied only on build.'), + 'default': None}, {'name': 'maxshape', 'type': (int, tuple), 'doc': 'The shape of this dataset. Use None for scalars.', 'default': None}, {'name': 'chunks', 'type': bool, 'doc': 'Whether or not to chunk this dataset.', 'default': False}, @@ -337,11 +339,14 @@ class DatasetBuilder(BaseBuilder): {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None}) def __init__(self, **kwargs): """ Create a Builder object for a dataset """ - name, data, dtype, attributes, maxshape, chunks, parent, source = getargs( - 'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', kwargs) + name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs( + 'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source', + kwargs + ) super().__init__(name, attributes, parent, source) self['data'] = data self['attributes'] = _copy.copy(attributes) + self.__dimension_labels = dimension_labels self.__chunks = chunks self.__maxshape = maxshape if isinstance(data, BaseBuilder): @@ -361,6 +366,11 @@ def data(self, val): raise AttributeError("Cannot overwrite data.") self['data'] = val + @property + def dimension_labels(self): + """Labels for each dimension of this dataset from the spec.""" + return self.__dimension_labels + @property def chunks(self): """Whether or not this dataset is chunked.""" @@ -417,20 +427,3 @@ def __init__(self, **kwargs): def builder(self): """The target builder object.""" return self['builder'] - - -class RegionBuilder(ReferenceBuilder): - - @docval({'name': 'region', 'type': (slice, tuple, list, RegionReference), - 'doc': 'The region, i.e. slice or indices, into the target dataset.'}, - {'name': 'builder', 'type': DatasetBuilder, 'doc': 'The dataset this region reference applies to.'}) - def __init__(self, **kwargs): - """Create a builder object for a region reference.""" - region, builder = getargs('region', 'builder', kwargs) - super().__init__(builder) - self['region'] = region - - @property - def region(self): - """The selected region of the target dataset.""" - return self['region'] diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index d2e7d4fc0..3b7d7c96e 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -1,9 +1,10 @@ from copy import deepcopy from datetime import datetime, date +from collections.abc import Callable import numpy as np -from ..container import Container, Data, DataRegion, MultiContainerInterface +from ..container import Container, Data, MultiContainerInterface from ..spec import AttributeSpec, LinkSpec, RefSpec, GroupSpec from ..spec.spec import BaseStorageSpec, ZERO_OR_MANY, ONE_OR_MANY from ..utils import docval, getargs, ExtenderMeta, get_docval, popargs, AllowPositional @@ -35,6 +36,8 @@ def register_generator(self, **kwargs): {'name': 'spec', 'type': BaseStorageSpec, 'doc': ''}, {'name': 'parent_cls', 'type': type, 'doc': ''}, {'name': 'attr_names', 'type': dict, 'doc': ''}, + {'name': 'post_init_method', 'type': Callable, 'default': None, + 'doc': 'The function used as a post_init method to validate the class generation.'}, {'name': 'type_map', 'type': 'hdmf.build.manager.TypeMap', 'doc': ''}, returns='the class for the given namespace and data_type', rtype=type) def generate_class(self, **kwargs): @@ -42,8 +45,10 @@ def generate_class(self, **kwargs): If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically created and returned. """ - data_type, spec, parent_cls, attr_names, type_map = getargs('data_type', 'spec', 'parent_cls', 'attr_names', - 'type_map', kwargs) + data_type, spec, parent_cls, attr_names, type_map, post_init_method = getargs('data_type', 'spec', + 'parent_cls', 'attr_names', + 'type_map', + 'post_init_method', kwargs) not_inherited_fields = dict() for k, field_spec in attr_names.items(): @@ -82,6 +87,8 @@ def generate_class(self, **kwargs): + str(e) + " Please define that type before defining '%s'." % name) cls = ExtenderMeta(data_type, tuple(bases), classdict) + cls.post_init_method = post_init_method + return cls @@ -188,7 +195,7 @@ def _ischild(cls, dtype): if isinstance(dtype, tuple): for sub in dtype: ret = ret or cls._ischild(sub) - elif isinstance(dtype, type) and issubclass(dtype, (Container, Data, DataRegion)): + elif isinstance(dtype, type) and issubclass(dtype, (Container, Data)): ret = True return ret @@ -316,8 +323,19 @@ def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): elif attr_name not in attrs_not_to_set: attrs_to_set.append(attr_name) - @docval(*docval_args, allow_positional=AllowPositional.WARNING) + # We want to use the skip_post_init of the current class and not the parent class + for item in docval_args: + if item['name'] == 'skip_post_init': + docval_args.remove(item) + + @docval(*docval_args, + {'name': 'skip_post_init', 'type': bool, 'default': False, + 'doc': 'bool to skip post_init'}, + allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): + skip_post_init = popargs('skip_post_init', kwargs) + + original_kwargs = dict(kwargs) if name is not None: # force container name to be the fixed name in the spec kwargs.update(name=name) @@ -343,6 +361,9 @@ def __init__(self, **kwargs): for f in fixed_value_attrs_to_set: self.fields[f] = getattr(not_inherited_fields[f], 'value') + if self.post_init_method is not None and not skip_post_init: + self.post_init_method(**original_kwargs) + classdict['__init__'] = __init__ @@ -417,6 +438,7 @@ def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): def __init__(self, **kwargs): # store the values passed to init for each MCI attribute so that they can be added # after calling __init__ + original_kwargs = dict(kwargs) new_kwargs = list() for field_clsconf in classdict['__clsconf__']: attr_name = field_clsconf['attr'] @@ -437,6 +459,7 @@ def __init__(self, **kwargs): kwargs[attr_name] = list() # call the parent class init without the MCI attribute + kwargs['skip_post_init'] = True previous_init(self, **kwargs) # call the add method for each MCI attribute @@ -444,5 +467,8 @@ def __init__(self, **kwargs): add_method = getattr(self, new_kwarg['add_method_name']) add_method(new_kwarg['value']) + if self.post_init_method is not None: + self.post_init_method(**original_kwargs) + # override __init__ classdict['__init__'] = __init__ diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index a26de3279..bc586013c 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -1,12 +1,13 @@ import logging from collections import OrderedDict, deque from copy import copy +from collections.abc import Callable from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, BaseBuilder from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator from ..container import AbstractContainer, Container, Data from ..term_set import TypeConfigurator -from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog +from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog, RefSpec from ..spec.spec import BaseStorageSpec from ..utils import docval, getargs, ExtenderMeta, get_docval @@ -479,6 +480,7 @@ def load_namespaces(self, **kwargs): load_namespaces here has the advantage of being able to keep track of type dependencies across namespaces. ''' deps = self.__ns_catalog.load_namespaces(**kwargs) + # register container types for each dependent type in each dependent namespace for new_ns, ns_deps in deps.items(): for src_ns, types in ns_deps.items(): for dt in types: @@ -488,21 +490,10 @@ def load_namespaces(self, **kwargs): self.register_container_type(new_ns, dt, container_cls) return deps - @docval({"name": "namespace", "type": str, "doc": "the namespace containing the data_type"}, - {"name": "data_type", "type": str, "doc": "the data type to create a AbstractContainer class for"}, - {"name": "autogen", "type": bool, "doc": "autogenerate class if one does not exist", "default": True}, - returns='the class for the given namespace and data_type', rtype=type) - def get_container_cls(self, **kwargs): - """Get the container class from data type specification. - If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically - created and returned. - """ - # NOTE: this internally used function get_container_cls will be removed in favor of get_dt_container_cls - namespace, data_type, autogen = getargs('namespace', 'data_type', 'autogen', kwargs) - return self.get_dt_container_cls(data_type, namespace, autogen) - @docval({"name": "data_type", "type": str, "doc": "the data type to create a AbstractContainer class for"}, {"name": "namespace", "type": str, "doc": "the namespace containing the data_type", "default": None}, + {'name': 'post_init_method', 'type': Callable, 'default': None, + 'doc': 'The function used as a post_init method to validate the class generation.'}, {"name": "autogen", "type": bool, "doc": "autogenerate class if one does not exist", "default": True}, returns='the class for the given namespace and data_type', rtype=type) def get_dt_container_cls(self, **kwargs): @@ -510,10 +501,11 @@ def get_dt_container_cls(self, **kwargs): If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically created and returned. - Replaces get_container_cls but namespace is optional. If namespace is unknown, it will be looked up from + Namespace is optional. If namespace is unknown, it will be looked up from all namespaces. """ - namespace, data_type, autogen = getargs('namespace', 'data_type', 'autogen', kwargs) + namespace, data_type, post_init_method, autogen = getargs('namespace', 'data_type', + 'post_init_method','autogen', kwargs) # namespace is unknown, so look it up if namespace is None: @@ -524,20 +516,28 @@ def get_dt_container_cls(self, **kwargs): namespace = ns_key break if namespace is None: - raise ValueError("Namespace could not be resolved.") + raise ValueError(f"Namespace could not be resolved for data type '{data_type}'.") cls = self.__get_container_cls(namespace, data_type) + if cls is None and autogen: # dynamically generate a class spec = self.__ns_catalog.get_spec(namespace, data_type) self.__check_dependent_types(spec, namespace) parent_cls = self.__get_parent_cls(namespace, data_type, spec) attr_names = self.__default_mapper_cls.get_attr_names(spec) - cls = self.__class_generator.generate_class(data_type, spec, parent_cls, attr_names, self) + cls = self.__class_generator.generate_class(data_type=data_type, + spec=spec, + parent_cls=parent_cls, + attr_names=attr_names, + post_init_method=post_init_method, + type_map=self) self.register_container_type(namespace, data_type, cls) return cls def __check_dependent_types(self, spec, namespace): """Ensure that classes for all types used by this type exist in this namespace and generate them if not. + + `spec` should be a GroupSpec or DatasetSpec in the `namespace` """ def __check_dependent_types_helper(spec, namespace): if isinstance(spec, (GroupSpec, DatasetSpec)): @@ -553,6 +553,16 @@ def __check_dependent_types_helper(spec, namespace): if spec.data_type_inc is not None: self.get_dt_container_cls(spec.data_type_inc, namespace) + + # handle attributes that have a reference dtype + for attr_spec in spec.attributes: + if isinstance(attr_spec.dtype, RefSpec): + self.get_dt_container_cls(attr_spec.dtype.target_type, namespace) + # handle datasets that have a reference dtype + if isinstance(spec, DatasetSpec): + if isinstance(spec.dtype, RefSpec): + self.get_dt_container_cls(spec.dtype.target_type, namespace) + # recurse into nested types if isinstance(spec, GroupSpec): for child_spec in (spec.groups + spec.datasets + spec.links): __check_dependent_types_helper(child_spec, namespace) diff --git a/src/hdmf/build/map.py b/src/hdmf/build/map.py deleted file mode 100644 index 5267609f5..000000000 --- a/src/hdmf/build/map.py +++ /dev/null @@ -1,7 +0,0 @@ -# this prevents breaking of code that imports these classes directly from map.py -from .manager import Proxy, BuildManager, TypeSource, TypeMap # noqa: F401 -from .objectmapper import ObjectMapper # noqa: F401 - -import warnings -warnings.warn('Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed ' - 'in HDMF 3.0.', DeprecationWarning, stacklevel=2) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index fed678d41..176de322c 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -6,18 +6,22 @@ import numpy as np -from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder +from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, BaseBuilder from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import Proxy, BuildManager -from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning -from ..container import AbstractContainer, Data, DataRegion + +from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, + IncorrectDatasetShapeBuildWarning) +from hdmf.backends.hdf5.h5_utils import H5DataIO + +from ..container import AbstractContainer, Data from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec from ..spec.spec import BaseStorageSpec -from ..utils import docval, getargs, ExtenderMeta, get_docval +from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset _const_arg = '__constructor_arg' @@ -208,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): if spec_dtype_type is _unicode: - ret = value.astype('U') + if isinstance(value, StrDataset): + ret = value + else: + ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype_type is _ascii: ret = value.astype('S') @@ -299,7 +306,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 cls.__check_convert_numeric(value.dtype.type) if np.issubdtype(value.dtype, np.str_): ret_dtype = 'utf8' - elif np.issubdtype(value.dtype, np.string_): + elif np.issubdtype(value.dtype, np.bytes_): ret_dtype = 'ascii' elif np.issubdtype(value.dtype, np.dtype('O')): # Only variable-length strings should ever appear as generic objects. @@ -597,11 +604,20 @@ def __get_data_type(cls, spec): def __convert_string(self, value, spec): """Convert string types to the specified dtype.""" + def __apply_string_type(value, string_type): + # NOTE: if a user passes a h5py.Dataset that is not wrapped with a hdmf.utils.StrDataset, + # then this conversion may not be correct. Users should unpack their string h5py.Datasets + # into a numpy array (or wrap them in StrDataset) before passing them to a container object. + if hasattr(value, '__iter__') and not isinstance(value, (str, bytes)): + return [__apply_string_type(item, string_type) for item in value] + else: + return string_type(value) + ret = value if isinstance(spec, AttributeSpec): if 'text' in spec.dtype: if spec.shape is not None or spec.dims is not None: - ret = list(map(str, value)) + ret = __apply_string_type(value, str) else: ret = str(value) elif isinstance(spec, DatasetSpec): @@ -617,7 +633,7 @@ def string_type(x): return x.isoformat() # method works for both date and datetime if string_type is not None: if spec.shape is not None or spec.dims is not None: - ret = list(map(string_type, value)) + ret = __apply_string_type(value, string_type) else: ret = string_type(value) # copy over any I/O parameters if they were specified @@ -721,19 +737,34 @@ def build(self, **kwargs): if not isinstance(container, Data): msg = "'container' must be of type Data with DatasetSpec" raise ValueError(msg) - spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext) + spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext) + dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims) if isinstance(spec_dtype, RefSpec): self.logger.debug("Building %s '%s' as a dataset of references (source: %s)" % (container.__class__.__name__, container.name, repr(source))) # create dataset builder with data=None as a placeholder. fill in with refs later - builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype.reftype) + builder = DatasetBuilder( + name, + data=None, + parent=parent, + source=source, + dtype=spec_dtype.reftype, + dimension_labels=dimension_labels, + ) manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager)) elif isinstance(spec_dtype, list): # a compound dataset self.logger.debug("Building %s '%s' as a dataset of compound dtypes (source: %s)" % (container.__class__.__name__, container.name, repr(source))) # create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later - builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype) + builder = DatasetBuilder( + name, + data=None, + parent=parent, + source=source, + dtype=spec_dtype, + dimension_labels=dimension_labels, + ) manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container, manager)) else: @@ -744,7 +775,14 @@ def build(self, **kwargs): % (container.__class__.__name__, container.name, repr(source))) # an unspecified dtype and we were given references # create dataset builder with data=None as a placeholder. fill in with refs later - builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype='object') + builder = DatasetBuilder( + name, + data=None, + parent=parent, + source=source, + dtype="object", + dimension_labels=dimension_labels, + ) manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager)) else: # a dataset that has no references, pass the conversion off to the convert_dtype method @@ -760,7 +798,14 @@ def build(self, **kwargs): except Exception as ex: msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) raise Exception(msg) from ex - builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) + builder = DatasetBuilder( + name, + data=bldr_data, + parent=parent, + source=source, + dtype=dtype, + dimension_labels=dimension_labels, + ) # Add attributes from the specification extension to the list of attributes all_attrs = self.__spec.attributes + getattr(spec_ext, 'attributes', tuple()) @@ -779,14 +824,67 @@ def __check_dset_spec(self, orig, ext): """ dtype = orig.dtype shape = orig.shape + dims = orig.dims spec = orig if ext is not None: if ext.dtype is not None: dtype = ext.dtype if ext.shape is not None: shape = ext.shape + dims = ext.dims spec = ext - return dtype, shape, spec + return dtype, shape, dims, spec + + def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple: + if spec_shape is None or spec_dims is None: + return None + data_shape = get_data_shape(data) + # if shape is a list of allowed shapes, find the index of the shape that matches the data + if isinstance(spec_shape[0], list): + match_shape_inds = list() + for i, s in enumerate(spec_shape): + # skip this shape if it has a different number of dimensions from the data + if len(s) != len(data_shape): + continue + # check each dimension. None means any length is allowed + match = True + for j, d in enumerate(data_shape): + if s[j] is not None and s[j] != d: + match = False + break + if match: + match_shape_inds.append(i) + # use the most specific match -- the one with the fewest Nones + if match_shape_inds: + if len(match_shape_inds) == 1: + return tuple(spec_dims[match_shape_inds[0]]) + else: + count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] + index_min_count = count_nones.index(min(count_nones)) + best_match_ind = match_shape_inds[index_min_count] + return tuple(spec_dims[best_match_ind]) + else: + # no matches found + msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None + else: + if len(data_shape) != len(spec_shape): + msg = "Shape of data does not match shape in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None + # check each dimension. None means any length is allowed + match = True + for j, d in enumerate(data_shape): + if spec_shape[j] is not None and spec_shape[j] != d: + match = False + break + if not match: + msg = "Shape of data does not match shape in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None + # shape is a single list of allowed dimension lengths + return tuple(spec_dims) def __is_reftype(self, data): if (isinstance(data, AbstractDataChunkIterator) or @@ -839,6 +937,9 @@ def _filler(): for j, subt in refs: tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager) bldr_data.append(tuple(tmp)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler @@ -857,43 +958,31 @@ def _filler(): else: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler def __get_ref_builder(self, builder, dtype, shape, container, build_manager): - bldr_data = None - if dtype.is_region(): - if shape is None: - if not isinstance(container, DataRegion): - msg = "'container' must be of type DataRegion if spec represents region reference" - raise ValueError(msg) - self.logger.debug("Setting %s '%s' data to region reference builder" - % (builder.__class__.__name__, builder.name)) - target_builder = self.__get_target_builder(container.data, build_manager, builder) - bldr_data = RegionBuilder(container.region, target_builder) - else: - self.logger.debug("Setting %s '%s' data to list of region reference builders" - % (builder.__class__.__name__, builder.name)) - bldr_data = list() - for d in container.data: - target_builder = self.__get_target_builder(d.target, build_manager, builder) - bldr_data.append(RegionBuilder(d.slice, target_builder)) + self.logger.debug("Setting object reference dataset on %s '%s' data" + % (builder.__class__.__name__, builder.name)) + if isinstance(container, Data): + self.logger.debug("Setting %s '%s' data to list of reference builders" + % (builder.__class__.__name__, builder.name)) + bldr_data = list() + for d in container.data: + target_builder = self.__get_target_builder(d, build_manager, builder) + bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) else: - self.logger.debug("Setting object reference dataset on %s '%s' data" + self.logger.debug("Setting %s '%s' data to reference builder" % (builder.__class__.__name__, builder.name)) - if isinstance(container, Data): - self.logger.debug("Setting %s '%s' data to list of reference builders" - % (builder.__class__.__name__, builder.name)) - bldr_data = list() - for d in container.data: - target_builder = self.__get_target_builder(d, build_manager, builder) - bldr_data.append(ReferenceBuilder(target_builder)) - else: - self.logger.debug("Setting %s '%s' data to reference builder" - % (builder.__class__.__name__, builder.name)) - target_builder = self.__get_target_builder(container, build_manager, builder) - bldr_data = ReferenceBuilder(target_builder) + target_builder = self.__get_target_builder(container, build_manager, builder) + bldr_data = ReferenceBuilder(target_builder) return bldr_data def __get_target_builder(self, container, build_manager, builder): @@ -1125,8 +1214,6 @@ def __get_subspec_values(self, builder, spec, manager): continue if isinstance(attr_val, (GroupBuilder, DatasetBuilder)): ret[attr_spec] = manager.construct(attr_val) - elif isinstance(attr_val, RegionBuilder): # pragma: no cover - raise ValueError("RegionReferences as attributes is not yet supported") elif isinstance(attr_val, ReferenceBuilder): ret[attr_spec] = manager.construct(attr_val.builder) else: @@ -1164,7 +1251,7 @@ def __get_subspec_values(self, builder, spec, manager): if not isinstance(builder, DatasetBuilder): # pragma: no cover raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec") if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and - type(builder.data[0]) != np.void): + type(builder.data[0]) is not np.void): # if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error ret[spec] = self.__check_ref_resolver(builder.data) diff --git a/src/hdmf/build/warnings.py b/src/hdmf/build/warnings.py index 3d5f02126..6a6ea6986 100644 --- a/src/hdmf/build/warnings.py +++ b/src/hdmf/build/warnings.py @@ -15,6 +15,13 @@ class IncorrectQuantityBuildWarning(BuildWarning): pass +class IncorrectDatasetShapeBuildWarning(BuildWarning): + """ + Raised when a dataset has a shape that is not allowed by the spec. + """ + pass + + class MissingRequiredBuildWarning(BuildWarning): """ Raised when a required field is missing. diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 248ca1095..6b36e29cd 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -3,6 +3,7 @@ ''' import os.path from copy import deepcopy +from collections.abc import Callable CORE_NAMESPACE = 'hdmf-common' EXP_NAMESPACE = 'hdmf-experimental' @@ -21,6 +22,7 @@ global __TYPE_MAP @docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}, + {'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, is_method=False) def load_type_config(**kwargs): """ @@ -28,23 +30,33 @@ def load_type_config(**kwargs): NOTE: This config is global and shared across all type maps. """ config_path = kwargs['config_path'] - __TYPE_MAP.type_config.load_type_config(config_path) + type_map = kwargs['type_map'] or get_type_map() -def get_loaded_type_config(): + type_map.type_config.load_type_config(config_path) + +@docval({'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, + is_method=False) +def get_loaded_type_config(**kwargs): """ This method returns the entire config file. """ - if __TYPE_MAP.type_config.config is None: + type_map = kwargs['type_map'] or get_type_map() + + if type_map.type_config.config is None: msg = "No configuration is loaded." raise ValueError(msg) else: - return __TYPE_MAP.type_config.config + return type_map.type_config.config -def unload_type_config(): +@docval({'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, + is_method=False) +def unload_type_config(**kwargs): """ Unload the configuration file. """ - return __TYPE_MAP.type_config.unload_type_config() + type_map = kwargs['type_map'] or get_type_map() + + return type_map.type_config.unload_type_config() # a function to register a container classes with the global map @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the spec for'}, @@ -96,11 +108,7 @@ def _dec(cls): def __get_resources(): - try: - from importlib.resources import files - except ImportError: - # TODO: Remove when python 3.9 becomes the new minimum - from importlib_resources import files + from importlib.resources import files __location_of_this_file = files(__name__) __core_ns_file_name = 'namespace.yaml' @@ -136,12 +144,14 @@ def available_namespaces(): @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the Container class for'}, {'name': 'namespace', 'type': str, 'doc': 'the namespace the data_type is defined in'}, + {'name': 'post_init_method', 'type': Callable, 'default': None, + 'doc': 'The function used as a post_init method to validate the class generation.'}, is_method=False) def get_class(**kwargs): """Get the class object of the Container subclass corresponding to a given neurdata_type. """ - data_type, namespace = getargs('data_type', 'namespace', kwargs) - return __TYPE_MAP.get_dt_container_cls(data_type, namespace) + data_type, namespace, post_init_method = getargs('data_type', 'namespace', 'post_init_method', kwargs) + return __TYPE_MAP.get_dt_container_cls(data_type, namespace, post_init_method) @docval({'name': 'extensions', 'type': (str, TypeMap, list), diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index fdca4bb81..1fc731ef5 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -628,7 +628,7 @@ def add_ref(self, **kwargs): if entity_uri is not None: entity_uri = entity.entity_uri msg = 'This entity already exists. Ignoring new entity uri' - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) ################# # Validate Object diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 59acbcd96..2b03c2c76 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -235,7 +235,7 @@ def __eq__(self, other): if isinstance(search_ids, int): search_ids = [search_ids] # Find all matching locations - return np.in1d(self.data, search_ids).nonzero()[0] + return np.isin(self.data, search_ids).nonzero()[0] def _validate_new_data(self, data): # NOTE this may not cover all the many AbstractDataChunkIterator edge cases @@ -717,7 +717,7 @@ def add_row(self, **kwargs): warn(("Data has elements with different lengths and therefore cannot be coerced into an " "N-dimensional array. Use the 'index' argument when creating a column to add rows " "with different lengths."), - stacklevel=2) + stacklevel=3) def __eq__(self, other): """Compare if the two DynamicTables contain the same data. @@ -775,8 +775,8 @@ def add_column(self, **kwargs): # noqa: C901 index, table, enum, col_cls, check_ragged = popargs('index', 'table', 'enum', 'col_cls', 'check_ragged', kwargs) if isinstance(index, VectorIndex): - warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2) + msg = "Passing a VectorIndex may lead to unexpected behavior. This functionality is not supported." + raise ValueError(msg) if name in self.__colids: # column has already been added msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name) @@ -793,7 +793,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_table)) - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) index_bool = index or not isinstance(index, bool) spec_index = self.__uninit_cols[name].get('index', False) @@ -803,7 +803,7 @@ def add_column(self, **kwargs): # noqa: C901 "Please ensure the new column complies with the spec. " "This will raise an error in a future version of HDMF." % (name, self.__class__.__name__, spec_index)) - warn(msg, stacklevel=2) + warn(msg, stacklevel=3) spec_col_cls = self.__uninit_cols[name].get('class', VectorData) if col_cls != spec_col_cls: @@ -843,7 +843,7 @@ def add_column(self, **kwargs): # noqa: C901 warn(("Data has elements with different lengths and therefore cannot be coerced into an " "N-dimensional array. Use the 'index' argument when adding a column of data with " "different lengths."), - stacklevel=2) + stacklevel=3) # Check that we are asked to create an index if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0: diff --git a/src/hdmf/container.py b/src/hdmf/container.py index ca2c5252b..ce4e8b821 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -1,8 +1,7 @@ import types -from abc import abstractmethod from collections import OrderedDict from copy import deepcopy -from typing import Type +from typing import Type, Optional from uuid import uuid4 from warnings import warn import os @@ -11,8 +10,9 @@ import numpy as np import pandas as pd -from .data_utils import DataIO, append_data, extend_data -from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict +from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator +from .utils import (docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict, + get_basic_array_info, generate_array_html_repr) from .term_set import TermSet, TermSetWrapper @@ -112,12 +112,6 @@ def _field_config(self, arg_name, val, type_map): itself is only one file. When a user loads custom configs, the config is appended/modified. The modifications are not written to file, avoiding permanent modifications. """ - # If the val has been manually wrapped then skip checking the config for the attr - if isinstance(val, TermSetWrapper): - msg = "Field value already wrapped with TermSetWrapper." - warn(msg) - return val - configurator = type_map.type_config if len(configurator.path)>0: @@ -127,6 +121,12 @@ def _field_config(self, arg_name, val, type_map): else: return val + # If the val has been manually wrapped then skip checking the config for the attr + if isinstance(val, TermSetWrapper): + msg = "Field value already wrapped with TermSetWrapper." + warn(msg) + return val + # check to see that the namespace for the container is in the config if self.namespace not in termset_config['namespaces']: msg = "%s not found within loaded configuration." % self.namespace @@ -302,8 +302,8 @@ def __new__(cls, *args, **kwargs): @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}) def __init__(self, **kwargs): name = getargs('name', kwargs) - if '/' in name: - raise ValueError("name '" + name + "' cannot contain '/'") + if ('/' in name or ':' in name) and not self._in_construct_mode: + raise ValueError(f"name '{name}' cannot contain a '/' or ':'") self.__name = name self.__field_values = dict() self.__read_io = None @@ -466,21 +466,6 @@ def set_modified(self, **kwargs): def children(self): return tuple(self.__children) - @docval({'name': 'child', 'type': 'Container', - 'doc': 'the child Container for this Container', 'default': None}) - def add_child(self, **kwargs): - warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) - child = getargs('child', kwargs) - if child is not None: - # if child.parent is a Container, then the mismatch between child.parent and parent - # is used to make a soft/external link from the parent to a child elsewhere - # if child.parent is not a Container, it is either None or a Proxy and should be set to self - if not isinstance(child.parent, AbstractContainer): - # actually add the child to the parent in parent setter - child.parent = self - else: - warn('Cannot add None as child to a container %s' % self.name) - @classmethod def type_hierarchy(cls): return cls.__mro__ @@ -629,12 +614,8 @@ def __repr__(self): template += "\nFields:\n" for k in sorted(self.fields): # sorted to enable tests v = self.fields[k] - # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0: if hasattr(v, '__len__'): - if isinstance(v, (np.ndarray, list, tuple)): - if len(v) > 0: - template += " {}: {}\n".format(k, self.__smart_str(v, 1)) - elif v: + if isinstance(v, (np.ndarray, list, tuple)) or v: template += " {}: {}\n".format(k, self.__smart_str(v, 1)) else: template += " {}: {}\n".format(k, v) @@ -711,8 +692,6 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False): for index, item in enumerate(fields): access_code += f'[{index}]' html_repr += self._generate_field_html(index, item, level, access_code) - elif isinstance(fields, np.ndarray): - html_repr += self._generate_array_html(fields, level) else: pass @@ -728,18 +707,26 @@ def _generate_field_html(self, key, value, level, access_code): return f'
{key}: {value}
' - if hasattr(value, "generate_html_repr"): - html_content = value.generate_html_repr(level + 1, access_code) + # Detects array-like objects that conform to the Array Interface specification + # (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both + # 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'. + # This approach keeps the implementation generic without coupling to specific backends methods + is_array_data = hasattr(value, "shape") and hasattr(value, "dtype") + if is_array_data: + html_content = self._generate_array_html(value, level + 1) + elif hasattr(value, "generate_html_repr"): + html_content = value.generate_html_repr(level + 1, access_code) elif hasattr(value, '__repr_html__'): html_content = value.__repr_html__() - - elif hasattr(value, "fields"): + elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True) elif isinstance(value, (list, dict, np.ndarray)): html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False) else: html_content = f'{value}' + + html_repr = ( f'
{key}' @@ -749,10 +736,33 @@ def _generate_field_html(self, key, value, level, access_code): return html_repr + def _generate_array_html(self, array, level): - """Generates HTML for a NumPy array.""" - str_ = str(array).replace("\n", "
") - return f'
{str_}
' + """Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects).""" + + is_numpy_array = isinstance(array, np.ndarray) + read_io = self.get_read_io() + it_was_read_with_io = read_io is not None + is_data_io = isinstance(array, DataIO) + + if is_numpy_array: + array_info_dict = get_basic_array_info(array) + repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array") + elif is_data_io: + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO") + elif it_was_read_with_io: + # The backend handles the representation here. Two special cases worth noting: + # 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays + # even when their parent container has an IO + # 2. Data may have been modified after being read from storage + repr_html = read_io.generate_dataset_html(array) + else: # Not sure which object could get here + object_class = array.__class__.__name__ + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, object_class) + + return f'
{repr_html}
' @staticmethod def __smart_str(v, num_indent): @@ -830,7 +840,14 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out - def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs): + def set_data_io( + self, + dataset_name: str, + data_io_class: Type[DataIO], + data_io_kwargs: dict = None, + data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None, + data_chunk_iterator_kwargs: dict = None, **kwargs + ): """ Apply DataIO object to a dataset field of the Container. @@ -842,9 +859,18 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw Class to use for DataIO, e.g. H5DataIO or ZarrDataIO data_io_kwargs: dict keyword arguments passed to the constructor of the DataIO class. + data_chunk_iterator_class: Type[AbstractDataChunkIterator] + Class to use for DataChunkIterator. If None, no DataChunkIterator is used. + data_chunk_iterator_kwargs: dict + keyword arguments passed to the constructor of the DataChunkIterator class. **kwargs: DEPRECATED. Use data_io_kwargs instead. kwargs are passed to the constructor of the DataIO class. + + Notes + ----- + If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in + the DataIO. This allows for rewriting the backend configuration of hdf5 datasets. """ if kwargs or (data_io_kwargs is None): warn( @@ -855,8 +881,11 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw ) data_io_kwargs = kwargs data = self.fields.get(dataset_name) + data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict() if data is None: raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class") + if data_chunk_iterator_class is not None: + data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs) self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs) @@ -886,21 +915,13 @@ def shape(self): """ return get_data_shape(self.__data) - @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'}) - def set_dataio(self, **kwargs): - """ - Apply DataIO object to the data held by this Data object - """ - warn( - "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.", - DeprecationWarning, - stacklevel=2, - ) - dataio = getargs('dataio', kwargs) - dataio.data = self.__data - self.__data = dataio - - def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None: + def set_data_io( + self, + data_io_class: Type[DataIO], + data_io_kwargs: dict, + data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None, + data_chunk_iterator_kwargs: dict = None, + ) -> None: """ Apply DataIO object to the data held by this Data object. @@ -910,8 +931,21 @@ def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None The DataIO to apply to the data held by this Data. data_io_kwargs: dict The keyword arguments to pass to the DataIO. + data_chunk_iterator_class: Type[AbstractDataChunkIterator] + The DataChunkIterator to use for the DataIO. If None, no DataChunkIterator is used. + data_chunk_iterator_kwargs: dict + The keyword arguments to pass to the DataChunkIterator. + + Notes + ----- + If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in + the DataIO. This allows for rewriting the backend configuration of hdf5 datasets. """ - self.__data = data_io_class(data=self.__data, **data_io_kwargs) + data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict() + data = self.__data + if data_chunk_iterator_class is not None: + data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs) + self.__data = data_io_class(data=data, **data_io_kwargs) @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'}) def transform(self, **kwargs): @@ -976,25 +1010,6 @@ def _validate_new_data_element(self, arg): pass -class DataRegion(Data): - - @property - @abstractmethod - def data(self): - ''' - The target data that this region applies to - ''' - pass - - @property - @abstractmethod - def region(self): - ''' - The region that indexes into data e.g. slice or list of indices - ''' - pass - - class MultiContainerInterface(Container): """Class that dynamically defines methods to support a Container holding multiple Containers of the same type. @@ -1142,7 +1157,9 @@ def _func(self, **kwargs): # still need to mark self as modified self.set_modified() if tmp.name in d: - msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name) + msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in " + f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} " + f"already exists in '{attr_name}' and has the same name.") raise ValueError(msg) d[tmp.name] = tmp return container diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 23f0b4019..91400da84 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -1,19 +1,25 @@ import copy import math from abc import ABCMeta, abstractmethod -from collections.abc import Iterable +from collections.abc import Iterable, Callable from warnings import warn -from typing import Tuple, Callable +from typing import Tuple from itertools import product, chain +try: + from zarr import Array as ZarrArray + ZARR_INSTALLED = True +except ImportError: + ZARR_INSTALLED = False + import h5py import numpy as np from .utils import docval, getargs, popargs, docval_macro, get_data_shape - def append_data(data, arg): - if isinstance(data, (list, DataIO)): + from hdmf.backends.hdf5.h5_utils import HDMFDataset + if isinstance(data, (list, DataIO, HDMFDataset)): data.append(arg) return data elif type(data).__name__ == 'TermSetWrapper': # circular import @@ -30,6 +36,9 @@ def append_data(data, arg): data.resize(shape) data[-1] = arg return data + elif ZARR_INSTALLED and isinstance(data, ZarrArray): + data.append([arg], axis=0) + return data else: msg = "Data cannot append to object of type '%s'" % type(data) raise ValueError(msg) @@ -179,9 +188,15 @@ class GenericDataChunkIterator(AbstractDataChunkIterator): doc="Display a progress bar with iteration rate and estimated completion time.", default=False, ), + dict( + name="progress_bar_class", + type=Callable, + doc="The progress bar class to use. Defaults to tqdm.tqdm if the TQDM package is installed.", + default=None, + ), dict( name="progress_bar_options", - type=None, + type=dict, doc="Dictionary of keyword arguments to be passed directly to tqdm.", default=None, ), @@ -199,8 +214,23 @@ def __init__(self, **kwargs): HDF5 recommends chunk size in the range of 2 to 16 MB for optimal cloud performance. https://youtu.be/rcS5vt-mKok?t=621 """ - buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, progress_bar_options = getargs( - "buffer_gb", "buffer_shape", "chunk_mb", "chunk_shape", "display_progress", "progress_bar_options", kwargs + ( + buffer_gb, + buffer_shape, + chunk_mb, + chunk_shape, + self.display_progress, + progress_bar_class, + progress_bar_options, + ) = getargs( + "buffer_gb", + "buffer_shape", + "chunk_mb", + "chunk_shape", + "display_progress", + "progress_bar_class", + "progress_bar_options", + kwargs, ) self.progress_bar_options = progress_bar_options or dict() @@ -277,11 +307,13 @@ def __init__(self, **kwargs): try: from tqdm import tqdm + progress_bar_class = progress_bar_class or tqdm + if "total" in self.progress_bar_options: warn("Option 'total' in 'progress_bar_options' is not allowed to be over-written! Ignoring.") self.progress_bar_options.pop("total") - self.progress_bar = tqdm(total=self.num_buffers, **self.progress_bar_options) + self.progress_bar = progress_bar_class(total=self.num_buffers, **self.progress_bar_options) except ImportError: warn( "You must install tqdm to use the progress bar feature (pip install tqdm)! " @@ -363,14 +395,18 @@ def __next__(self): :returns: DataChunk object with the data and selection of the current buffer. :rtype: DataChunk """ - if self.display_progress: - self.progress_bar.update(n=1) try: buffer_selection = next(self.buffer_selection_generator) + + # Only update after successful iteration + if self.display_progress: + self.progress_bar.update(n=1) + return DataChunk(data=self._get_data(selection=buffer_selection), selection=buffer_selection) except StopIteration: + # Allow text to be written to new lines after completion if self.display_progress: - self.progress_bar.write("\n") # Allows text to be written to new lines after completion + self.progress_bar.write("\n") raise StopIteration def __reduce__(self) -> Tuple[Callable, Iterable]: @@ -915,7 +951,7 @@ class ShapeValidatorResult: {'name': 'message', 'type': str, 'doc': 'Message describing the result of the shape validation', 'default': None}, {'name': 'ignored', 'type': tuple, - 'doc': 'Axes that have been ignored in the validaton process', 'default': tuple(), 'shape': (None,)}, + 'doc': 'Axes that have been ignored in the validation process', 'default': tuple(), 'shape': (None,)}, {'name': 'unmatched', 'type': tuple, 'doc': 'List of axes that did not match during shape validation', 'default': tuple(), 'shape': (None,)}, {'name': 'error', 'type': str, 'doc': 'Error that may have occurred. One of ERROR_TYPE', 'default': None}, diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 835b295c5..abe2a93a7 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -2,143 +2,24 @@ import numpy as np -from .array import Array from .utils import ExtenderMeta, docval_macro, docval, getargs -class Query(metaclass=ExtenderMeta): - __operations__ = ( - '__lt__', - '__gt__', - '__le__', - '__ge__', - '__eq__', - '__ne__', - ) - - @classmethod - def __build_operation(cls, op): - def __func(self, arg): - return cls(self, op, arg) - - @ExtenderMeta.pre_init - def __make_operators(cls, name, bases, classdict): - if not isinstance(cls.__operations__, tuple): - raise TypeError("'__operations__' must be of type tuple") - # add any new operations - if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ - and bases[-1].__operations__ is not cls.__operations__: - new_operations = list(cls.__operations__) - new_operations[0:0] = bases[-1].__operations__ - cls.__operations__ = tuple(new_operations) - for op in cls.__operations__: - if not hasattr(cls, op): - setattr(cls, op, cls.__build_operation(op)) - - def __init__(self, obj, op, arg): - self.obj = obj - self.op = op - self.arg = arg - self.collapsed = None - self.expanded = None - - @docval({'name': 'expand', 'type': bool, 'help': 'whether or not to expand result', 'default': True}) - def evaluate(self, **kwargs): - expand = getargs('expand', kwargs) - if expand: - if self.expanded is None: - self.expanded = self.__evalhelper() - return self.expanded - else: - if self.collapsed is None: - self.collapsed = self.__collapse(self.__evalhelper()) - return self.collapsed - - def __evalhelper(self): - obj = self.obj - arg = self.arg - if isinstance(obj, Query): - obj = obj.evaluate() - elif isinstance(obj, HDMFDataset): - obj = obj.dataset - if isinstance(arg, Query): - arg = self.arg.evaluate() - return getattr(obj, self.op)(self.arg) - - def __collapse(self, result): - if isinstance(result, slice): - return (result.start, result.stop) - elif isinstance(result, list): - ret = list() - for idx in result: - if isinstance(idx, slice) and (idx.step is None or idx.step == 1): - ret.append((idx.start, idx.stop)) - else: - ret.append(idx) - return ret - else: - return result - - def __and__(self, other): - return NotImplemented - - def __or__(self, other): - return NotImplemented - - def __xor__(self, other): - return NotImplemented - - def __contains__(self, other): - return NotImplemented - - @docval_macro('array_data') class HDMFDataset(metaclass=ExtenderMeta): - __operations__ = ( - '__lt__', - '__gt__', - '__le__', - '__ge__', - '__eq__', - '__ne__', - ) - - @classmethod - def __build_operation(cls, op): - def __func(self, arg): - return Query(self, op, arg) - - setattr(__func, '__name__', op) - return __func - - @ExtenderMeta.pre_init - def __make_operators(cls, name, bases, classdict): - if not isinstance(cls.__operations__, tuple): - raise TypeError("'__operations__' must be of type tuple") - # add any new operations - if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ - and bases[-1].__operations__ is not cls.__operations__: - new_operations = list(cls.__operations__) - new_operations[0:0] = bases[-1].__operations__ - cls.__operations__ = tuple(new_operations) - for op in cls.__operations__: - setattr(cls, op, cls.__build_operation(op)) - def __evaluate_key(self, key): if isinstance(key, tuple) and len(key) == 0: return key if isinstance(key, (tuple, list, np.ndarray)): return list(map(self.__evaluate_key, key)) else: - if isinstance(key, Query): - return key.evaluate() return key def __getitem__(self, key): idx = self.__evaluate_key(key) return self.dataset[idx] - @docval({'name': 'dataset', 'type': ('array_data', Array), 'doc': 'the HDF5 file lazily evaluate'}) + @docval({'name': 'dataset', 'type': 'array_data', 'doc': 'the HDF5 file lazily evaluate'}) def __init__(self, **kwargs): super().__init__() self.__dataset = getargs('dataset', kwargs) @@ -163,6 +44,12 @@ def __next__(self): def next(self): return self.dataset.next() + def append(self, arg): + """ + Override this method to support appending to backend-specific datasets + """ + pass # pragma: no cover + class ReferenceResolver(metaclass=ABCMeta): """ diff --git a/src/hdmf/region.py b/src/hdmf/region.py deleted file mode 100644 index 9feeba401..000000000 --- a/src/hdmf/region.py +++ /dev/null @@ -1,91 +0,0 @@ -from abc import ABCMeta, abstractmethod -from operator import itemgetter - -from .container import Data, DataRegion -from .utils import docval, getargs - - -class RegionSlicer(DataRegion, metaclass=ABCMeta): - ''' - A abstract base class to control getting using a region - - Subclasses must implement `__getitem__` and `__len__` - ''' - - @docval({'name': 'target', 'type': None, 'doc': 'the target to slice'}, - {'name': 'slice', 'type': None, 'doc': 'the region to slice'}) - def __init__(self, **kwargs): - self.__target = getargs('target', kwargs) - self.__slice = getargs('slice', kwargs) - - @property - def data(self): - """The target data. Same as self.target""" - return self.target - - @property - def region(self): - """The selected region. Same as self.slice""" - return self.slice - - @property - def target(self): - """The target data""" - return self.__target - - @property - def slice(self): - """The selected slice""" - return self.__slice - - @property - @abstractmethod - def __getitem__(self, idx): - """Must be implemented by subclasses""" - pass - - @property - @abstractmethod - def __len__(self): - """Must be implemented by subclasses""" - pass - - -class ListSlicer(RegionSlicer): - """Implementation of RegionSlicer for slicing Lists and Data""" - - @docval({'name': 'dataset', 'type': (list, tuple, Data), 'doc': 'the dataset to slice'}, - {'name': 'region', 'type': (list, tuple, slice), 'doc': 'the region reference to use to slice'}) - def __init__(self, **kwargs): - self.__dataset, self.__region = getargs('dataset', 'region', kwargs) - super().__init__(self.__dataset, self.__region) - if isinstance(self.__region, slice): - self.__getter = itemgetter(self.__region) - self.__len = len(range(*self.__region.indices(len(self.__dataset)))) - else: - self.__getter = itemgetter(*self.__region) - self.__len = len(self.__region) - - def __read_region(self): - """ - Internal helper function used to define self._read - """ - if not hasattr(self, '_read'): - self._read = self.__getter(self.__dataset) - del self.__getter - - def __getitem__(self, idx): - """ - Get data values from selected data - """ - self.__read_region() - getter = None - if isinstance(idx, (list, tuple)): - getter = itemgetter(*idx) - else: - getter = itemgetter(idx) - return getter(self._read) - - def __len__(self): - """Number of values in the slice/region""" - return self.__len diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index a2ae0bd37..57232bd25 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -50,13 +50,13 @@ def __init__(self, **kwargs): self['full_name'] = full_name if version == str(SpecNamespace.UNVERSIONED): # the unversioned version may be written to file as a string and read from file as a string - warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2) + warn(f"Loaded namespace '{name}' is unversioned. Please notify the extension author.") version = SpecNamespace.UNVERSIONED if version is None: # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to # be able to read older files with extensions that are missing the version key. - warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. " - "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2) + warn(f"Loaded namespace '{name}' is missing the required key 'version'. Version will be set to " + f"'{SpecNamespace.UNVERSIONED}'. Please notify the extension author.") version = SpecNamespace.UNVERSIONED self['version'] = version if date is not None: @@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True): return included_types def __register_type(self, ndt, inc_ns, catalog, registered_types): - spec = inc_ns.get_spec(ndt) - spec_file = inc_ns.catalog.get_spec_source_file(ndt) - self.__register_dependent_types(spec, inc_ns, catalog, registered_types) - if isinstance(spec, DatasetSpec): - built_spec = self.dataset_spec_cls.build_spec(spec) + if ndt in registered_types: + # already registered + pass else: - built_spec = self.group_spec_cls.build_spec(spec) - registered_types.add(ndt) - catalog.register_spec(built_spec, spec_file) + spec = inc_ns.get_spec(ndt) + spec_file = inc_ns.catalog.get_spec_source_file(ndt) + self.__register_dependent_types(spec, inc_ns, catalog, registered_types) + if isinstance(spec, DatasetSpec): + built_spec = self.dataset_spec_cls.build_spec(spec) + else: + built_spec = self.group_spec_cls.build_spec(spec) + registered_types.add(ndt) + catalog.register_spec(built_spec, spec_file) def __register_dependent_types(self, spec, inc_ns, catalog, registered_types): """Ensure that classes for all types used by this type are registered @@ -529,7 +533,7 @@ def load_namespaces(self, **kwargs): if ns['version'] != self.__namespaces.get(ns['name'])['version']: # warn if the cached namespace differs from the already loaded namespace warn("Ignoring cached namespace '%s' version %s because version %s is already loaded." - % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2) + % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version'])) else: to_load.append(ns) # now load specs into namespace diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 585fc6494..bbd97b592 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1,7 +1,6 @@ import re from abc import ABCMeta from collections import OrderedDict -from copy import deepcopy from warnings import warn from ..utils import docval, getargs, popargs, get_docval @@ -39,7 +38,6 @@ class DtypeHelper: 'uint32': ["uint32", "uint"], 'uint64': ["uint64"], 'object': ['object'], - 'region': ['region'], 'numeric': ['numeric'], 'isodatetime': ["isodatetime", "datetime", "date"] } @@ -84,7 +82,7 @@ class ConstructableDict(dict, metaclass=ABCMeta): def build_const_args(cls, spec_dict): ''' Build constructor arguments for this ConstructableDict class from a dictionary ''' # main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file - return deepcopy(spec_dict) + return spec_dict.copy() @classmethod def build_spec(cls, spec_dict): @@ -93,9 +91,13 @@ def build_spec(cls, spec_dict): vargs = cls.build_const_args(spec_dict) kwargs = dict() # iterate through the Spec docval and construct kwargs based on matching values in spec_dict + unused_vargs = list(vargs) for x in get_docval(cls.__init__): if x['name'] in vargs: kwargs[x['name']] = vargs.get(x['name']) + unused_vargs.remove(x['name']) + if unused_vargs: + warn(f'Unexpected keys {unused_vargs} in spec {spec_dict}') return cls(**kwargs) @@ -171,12 +173,13 @@ def path(self): _ref_args = [ {'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, - {'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'}, + {'name': 'reftype', 'type': str, + 'doc': 'the type of reference this is. only "object" is supported currently.'}, ] class RefSpec(ConstructableDict): - __allowable_types = ('object', 'region') + __allowable_types = ('object', ) @docval(*_ref_args) def __init__(self, **kwargs): @@ -197,10 +200,6 @@ def reftype(self): '''The type of reference''' return self['reftype'] - @docval(rtype=bool, returns='True if this RefSpec specifies a region reference, False otherwise') - def is_region(self): - return self['reftype'] == 'region' - _attr_args = [ {'name': 'name', 'type': str, 'doc': 'The name of this attribute'}, @@ -311,14 +310,20 @@ class BaseStorageSpec(Spec): def __init__(self, **kwargs): name, doc, quantity, attributes, linkable, data_type_def, data_type_inc = \ getargs('name', 'doc', 'quantity', 'attributes', 'linkable', 'data_type_def', 'data_type_inc', kwargs) + if name is not None and "/" in name: + raise ValueError(f"Name '{name}' is invalid. Names of Groups and Datasets cannot contain '/'") if name is None and data_type_def is None and data_type_inc is None: raise ValueError("Cannot create Group or Dataset spec with no name " "without specifying '%s' and/or '%s'." % (self.def_key(), self.inc_key())) super().__init__(doc, name=name) default_name = getargs('default_name', kwargs) if default_name: + if "/" in default_name: + raise ValueError( + f"Default name '{default_name}' is invalid. Names of Groups and Datasets cannot contain '/'" + ) if name is not None: - warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2) + warn("found 'default_name' with 'name' - ignoring 'default_name'") else: self['default_name'] = default_name self.__attributes = dict() @@ -385,7 +390,7 @@ def resolve_spec(self, **kwargs): self.set_attribute(attribute) self.__resolved = True - @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) + @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to check'}) def is_inherited_spec(self, **kwargs): ''' Return True if this spec was inherited from the parent type, False otherwise. @@ -393,13 +398,11 @@ def is_inherited_spec(self, **kwargs): Returns False if the spec is not found. ''' spec = getargs('spec', kwargs) - if isinstance(spec, Spec): - spec = spec.name - if spec in self.__attributes: - return self.is_inherited_attribute(spec) + if spec.parent is self and spec.name in self.__attributes: + return self.is_inherited_attribute(spec.name) return False - @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) + @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to check'}) def is_overridden_spec(self, **kwargs): ''' Return True if this spec overrides a specification from the parent type, False otherwise. @@ -407,10 +410,8 @@ def is_overridden_spec(self, **kwargs): Returns False if the spec is not found. ''' spec = getargs('spec', kwargs) - if isinstance(spec, Spec): - spec = spec.name - if spec in self.__attributes: - return self.is_overridden_attribute(spec) + if spec.parent is self and spec.name in self.__attributes: + return self.is_overridden_attribute(spec.name) return False @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute to check'}) @@ -648,6 +649,7 @@ def build_const_args(cls, spec_dict): {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'default_value', 'type': None, 'doc': 'a default value for this dataset', 'default': None}, + {'name': 'value', 'type': None, 'doc': 'a fixed value for this dataset', 'default': None}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, {'name': 'data_type_inc', 'type': (str, 'DatasetSpec'), 'doc': 'the data type this specification extends', 'default': None}, @@ -662,7 +664,8 @@ class DatasetSpec(BaseStorageSpec): @docval(*_dataset_args) def __init__(self, **kwargs): - doc, shape, dims, dtype, default_value = popargs('doc', 'shape', 'dims', 'dtype', 'default_value', kwargs) + doc, shape, dims, dtype = popargs('doc', 'shape', 'dims', 'dtype', kwargs) + default_value, value = popargs('default_value', 'value', kwargs) if shape is not None: self['shape'] = shape if dims is not None: @@ -685,6 +688,8 @@ def __init__(self, **kwargs): super().__init__(doc, **kwargs) if default_value is not None: self['default_value'] = default_value + if value is not None: + self['value'] = value if self.name is not None: valid_quant_vals = [1, 'zero_or_one', ZERO_OR_ONE] if self.quantity not in valid_quant_vals: @@ -762,6 +767,11 @@ def default_value(self): '''The default value of the dataset or None if not specified''' return self.get('default_value', None) + @property + def value(self): + '''The fixed value of the dataset or None if not specified''' + return self.get('value', None) + @classmethod def dtype_spec_cls(cls): ''' The class to use when constructing DtypeSpec objects @@ -1011,85 +1021,92 @@ def is_overridden_link(self, **kwargs): raise ValueError("Link '%s' not found in spec" % name) return name in self.__overridden_links - @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) + @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to check'}) def is_inherited_spec(self, **kwargs): ''' Returns 'True' if specification was inherited from a parent type ''' spec = getargs('spec', kwargs) - if isinstance(spec, Spec): - name = spec.name - if name is None and hasattr(spec, 'data_type_def'): - name = spec.data_type_def - if name is None: # NOTE: this will return the target type for LinkSpecs - name = spec.data_type_inc - if name is None: # pragma: no cover - # this should not be possible - raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') - spec = name + spec_name = spec.name + if spec_name is None and hasattr(spec, 'data_type_def'): + spec_name = spec.data_type_def + if spec_name is None: # NOTE: this will return the target type for LinkSpecs + spec_name = spec.data_type_inc + if spec_name is None: # pragma: no cover + # this should not be possible + raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') # if the spec has a name, it will be found in __links/__groups/__datasets before __data_types/__target_types - if spec in self.__links: - return self.is_inherited_link(spec) - elif spec in self.__groups: - return self.is_inherited_group(spec) - elif spec in self.__datasets: - return self.is_inherited_dataset(spec) - elif spec in self.__data_types: + if spec_name in self.__links: + return self.is_inherited_link(spec_name) + elif spec_name in self.__groups: + return self.is_inherited_group(spec_name) + elif spec_name in self.__datasets: + return self.is_inherited_dataset(spec_name) + elif spec_name in self.__data_types: # NOTE: the same data type can be both an unnamed data type and an unnamed target type - return self.is_inherited_type(spec) - elif spec in self.__target_types: - return self.is_inherited_target_type(spec) + return self.is_inherited_type(spec_name) + elif spec_name in self.__target_types: + return self.is_inherited_target_type(spec_name) else: + # attribute spec if super().is_inherited_spec(spec): return True else: - for s in self.__datasets: - if self.is_inherited_dataset(s): - if self.__datasets[s].get_attribute(spec) is not None: - return True - for s in self.__groups: - if self.is_inherited_group(s): - if self.__groups[s].get_attribute(spec) is not None: - return True + parent_name = spec.parent.name + if parent_name is None: + parent_name = spec.parent.data_type + if isinstance(spec.parent, DatasetSpec): + if parent_name in self.__datasets: + if self.is_inherited_dataset(parent_name): + if self.__datasets[parent_name].get_attribute(spec_name) is not None: + return True + else: + if parent_name in self.__groups: + if self.is_inherited_group(parent_name): + if self.__groups[parent_name].get_attribute(spec_name) is not None: + return True return False - @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) + @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to check'}) def is_overridden_spec(self, **kwargs): # noqa: C901 ''' Returns 'True' if specification overrides a specification from the parent type ''' spec = getargs('spec', kwargs) - if isinstance(spec, Spec): - name = spec.name - if name is None: - if isinstance(spec, LinkSpec): # unnamed LinkSpec cannot be overridden - return False - if spec.is_many(): # this is a wildcard spec, so it cannot be overridden - return False - name = spec.data_type_def - if name is None: # NOTE: this will return the target type for LinkSpecs - name = spec.data_type_inc - if name is None: # pragma: no cover - # this should not happen - raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') - spec = name + spec_name = spec.name + if spec_name is None: + if isinstance(spec, LinkSpec): # unnamed LinkSpec cannot be overridden + return False + if spec.is_many(): # this is a wildcard spec, so it cannot be overridden + return False + spec_name = spec.data_type_def + if spec_name is None: # NOTE: this will return the target type for LinkSpecs + spec_name = spec.data_type_inc + if spec_name is None: # pragma: no cover + # this should not happen + raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') # if the spec has a name, it will be found in __links/__groups/__datasets before __data_types/__target_types - if spec in self.__links: - return self.is_overridden_link(spec) - elif spec in self.__groups: - return self.is_overridden_group(spec) - elif spec in self.__datasets: - return self.is_overridden_dataset(spec) - elif spec in self.__data_types: - return self.is_overridden_type(spec) + if spec_name in self.__links: + return self.is_overridden_link(spec_name) + elif spec_name in self.__groups: + return self.is_overridden_group(spec_name) + elif spec_name in self.__datasets: + return self.is_overridden_dataset(spec_name) + elif spec_name in self.__data_types: + return self.is_overridden_type(spec_name) else: if super().is_overridden_spec(spec): # check if overridden attribute return True else: - for s in self.__datasets: - if self.is_overridden_dataset(s): - if self.__datasets[s].is_overridden_spec(spec): - return True - for s in self.__groups: - if self.is_overridden_group(s): - if self.__groups[s].is_overridden_spec(spec): - return True + parent_name = spec.parent.name + if parent_name is None: + parent_name = spec.parent.data_type + if isinstance(spec.parent, DatasetSpec): + if parent_name in self.__datasets: + if self.is_overridden_dataset(parent_name): + if self.__datasets[parent_name].is_overridden_spec(spec): + return True + else: + if parent_name in self.__groups: + if self.is_overridden_group(parent_name): + if self.__groups[parent_name].is_overridden_spec(spec): + return True return False @docval({'name': 'spec', 'type': (BaseStorageSpec, str), 'doc': 'the specification to check'}) diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 798df6fe4..1be4bcecd 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -174,7 +174,7 @@ def _assert_array_equal(self, :param message: custom additional message to show when assertions as part of this assert are failing """ array_data_types = tuple([i for i in get_docval_macro('array_data') - if (i != list and i != tuple and i != AbstractDataChunkIterator)]) + if (i is not list and i is not tuple and i is not AbstractDataChunkIterator)]) # We construct array_data_types this way to avoid explicit dependency on h5py, Zarr and other # I/O backends. Only list and tuple do not support [()] slicing, and AbstractDataChunkIterator # should never occur here. The effective value of array_data_types is then: diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 5e0b61539..c21382a2a 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -382,8 +382,6 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, for key in extras.keys(): type_errors.append("unrecognized argument: '%s'" % key) else: - # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args. - # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out for key in extras.keys(): ret[key] = extras[key] return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors, @@ -414,95 +412,6 @@ def get_docval(func, *args): return tuple() -# def docval_wrap(func, is_method=True): -# if is_method: -# @docval(*get_docval(func)) -# def method(self, **kwargs): -# -# return call_docval_args(func, kwargs) -# return method -# else: -# @docval(*get_docval(func)) -# def static_method(**kwargs): -# return call_docval_args(func, kwargs) -# return method - - -def fmt_docval_args(func, kwargs): - ''' Separate positional and keyword arguments - - Useful for methods that wrap other methods - ''' - warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " - "call the function directly with the kwargs. Please note that fmt_docval_args " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning, stacklevel=2) - func_docval = getattr(func, docval_attr_name, None) - ret_args = list() - ret_kwargs = dict() - kwargs_copy = _copy.copy(kwargs) - if func_docval: - for arg in func_docval[__docval_args_loc]: - val = kwargs_copy.pop(arg['name'], None) - if 'default' in arg: - if val is not None: - ret_kwargs[arg['name']] = val - else: - ret_args.append(val) - if func_docval['allow_extra']: - ret_kwargs.update(kwargs_copy) - else: - raise ValueError('no docval found on %s' % str(func)) - return ret_args, ret_kwargs - - -# def _remove_extra_args(func, kwargs): -# """Return a dict of only the keyword arguments that are accepted by the function's docval. -# -# If the docval specifies allow_extra=True, then the original kwargs are returned. -# """ -# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that -# # kwargs are kept as kwargs instead of parsed into args and kwargs -# func_docval = getattr(func, docval_attr_name, None) -# if func_docval: -# if func_docval['allow_extra']: -# # if extra args are allowed, return all args -# return kwargs -# else: -# # save only the arguments listed in the function's docval (skip any others present in kwargs) -# ret_kwargs = dict() -# for arg in func_docval[__docval_args_loc]: -# val = kwargs.get(arg['name'], None) -# if val is not None: # do not return arguments that are not present or have value None -# ret_kwargs[arg['name']] = val -# return ret_kwargs -# else: -# raise ValueError('No docval found on %s' % str(func)) - - -def call_docval_func(func, kwargs): - """Call the function with only the keyword arguments that are accepted by the function's docval. - - Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True. - """ - warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " - "call the function directly with the kwargs. Please note that call_docval_func " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning, stacklevel=2) - with warnings.catch_warnings(record=True): - # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two - # PendingDeprecationWarnings saying the same thing are not raised - warnings.simplefilter("ignore", UserWarning) - warnings.simplefilter("always", PendingDeprecationWarning) - fargs, fkwargs = fmt_docval_args(func, kwargs) - - return func(*fargs, **fkwargs) - - def __resolve_type(t): if t is None: return t @@ -967,6 +876,62 @@ def is_ragged(data): return False +def get_basic_array_info(array): + def convert_bytes_to_str(bytes_size): + suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'] + i = 0 + while bytes_size >= 1024 and i < len(suffixes)-1: + bytes_size /= 1024. + i += 1 + return f"{bytes_size:.2f} {suffixes[i]}" + + if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 + array_size_in_bytes = array.nbytes + else: + array_size_in_bytes = array.size * array.dtype.itemsize + array_size_repr = convert_bytes_to_str(array_size_in_bytes) + basic_array_info_dict = {"Data type": array.dtype, "Shape": array.shape, "Array size": array_size_repr} + + return basic_array_info_dict + +def generate_array_html_repr(array_info_dict, array, dataset_type=None): + def html_table(item_dicts) -> str: + """ + Generates an html table from a dictionary + """ + report = '' + report += "" + for k, v in item_dicts.items(): + report += ( + f"" + f'' + f'' + f"" + ) + report += "" + report += "
{k}{v}
" + return report + + array_info_html = html_table(array_info_dict) + repr_html = dataset_type + "
" + array_info_html if dataset_type is not None else array_info_html + + # Array like might lack nbytes (h5py < 3.0) or size (DataIO object) + if hasattr(array, "nbytes"): + array_size_bytes = array.nbytes + else: + if hasattr(array, "size"): + array_size = array.size + else: + import math + array_size = math.prod(array.shape) + array_size_bytes = array_size * array.dtype.itemsize + + # Heuristic for displaying data + array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array + if array_is_small: + repr_html += "
" + str(np.asarray(array)) + + return repr_html class LabelledDict(dict): """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items. @@ -1140,7 +1105,7 @@ def update(self, other): @docval_macro('array_data') class StrDataset(h5py.Dataset): - """Wrapper to decode strings on reading the dataset""" + """Wrapper to decode strings on reading the dataset. Use only for h5py 3+.""" def __init__(self, dset, encoding, errors='strict'): self.dset = dset if encoding is None: diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index bdfc15f8f..d7ec78eaa 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -8,7 +8,7 @@ from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType from .errors import ExpectedArrayError, IncorrectQuantityError -from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder from ..build.builders import BaseBuilder from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec from ..spec import SpecNamespace @@ -124,9 +124,6 @@ def get_type(data, builder_dtype=None): # Bytes data elif isinstance(data, bytes): return 'ascii', get_string_format(data) - # RegionBuilder data - elif isinstance(data, RegionBuilder): - return 'region', None # ReferenceBuilder data elif isinstance(data, ReferenceBuilder): return 'object', None @@ -134,7 +131,7 @@ def get_type(data, builder_dtype=None): elif isinstance(data, ReferenceResolver): return data.dtype, None # Numpy nd-array data - elif isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray) and len(data.dtype) <= 1: if data.size > 0: return get_type(data[0], builder_dtype) else: @@ -151,7 +148,10 @@ def get_type(data, builder_dtype=None): dtypes = [] string_formats = [] for i in range(len(builder_dtype)): - dtype, string_format = get_type(data[0][i]) + if len(np.shape(data)) == 0: + dtype, string_format = get_type(data[()][i]) + else: + dtype, string_format = get_type(data[0][i]) dtypes.append(dtype) string_formats.append(string_format) return dtypes, string_formats @@ -164,7 +164,7 @@ def get_type(data, builder_dtype=None): # Empty array else: # Empty string array - if data.dtype.metadata["vlen"] == str: + if data.dtype.metadata["vlen"] is str: return "utf", None # Undetermined variable length data type. else: # pragma: no cover @@ -433,12 +433,18 @@ def validate(self, **kwargs): try: dtype, string_format = get_type(data, builder.dtype) if not check_type(self.spec.dtype, dtype, string_format): - ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, + if isinstance(self.spec.dtype, RefSpec): + expected = f'{self.spec.dtype.reftype} reference' + else: + expected = self.spec.dtype + ret.append(DtypeError(self.get_spec_loc(self.spec), expected, dtype, location=self.get_builder_loc(builder))) except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple pass - if isinstance(builder.dtype, list): + if isinstance(builder.dtype, list) and len(np.shape(builder.data)) == 0: + shape = () # scalar compound dataset + elif isinstance(builder.dtype, list): shape = (len(builder.data), ) # only 1D datasets with compound types are supported else: shape = get_data_shape(data) diff --git a/test_gallery.py b/test_gallery.py index c3128b8fd..b2f0a9047 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -67,8 +67,9 @@ def run_gallery_tests(): ) _import_from_file(script) except (ImportError, ValueError) as e: - if "linkml" in str(e): - pass # this is OK because linkml is not always installed + if "Please install linkml-runtime to run this example" in str(e): + # this is OK because linkml is not always installed + print(f"Skipping {script} because linkml-runtime is not installed") else: raise e except Exception: diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index b90ad6f1a..28cc9518e 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -4,7 +4,7 @@ from hdmf import Container, Data, TermSet, TermSetWrapper from hdmf.common import VectorData, get_type_map from hdmf.build import ObjectMapper, BuildManager, TypeMap, GroupBuilder, DatasetBuilder -from hdmf.build.warnings import DtypeConversionWarning +from hdmf.build.warnings import DtypeConversionWarning, IncorrectDatasetShapeBuildWarning from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, Spec from hdmf.testing import TestCase from hdmf.utils import docval, getargs @@ -650,3 +650,287 @@ def test_build_incorrect_dtype(self): msg = "could not resolve dtype for BarData 'my_bar'" with self.assertRaisesWith(Exception, msg): self.manager.build(bar_data_holder_inst, source='test.h5') + + +class BuildDatasetShapeMixin(TestCase, metaclass=ABCMeta): + + def setUp(self): + self.set_up_specs() + spec_catalog = SpecCatalog() + spec_catalog.register_spec(self.bar_data_spec, 'test.yaml') + spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml') + namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData) + type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder) + type_map.register_map(BarData, ExtBarDataMapper) + type_map.register_map(BarDataHolder, ObjectMapper) + self.manager = BuildManager(type_map) + + def set_up_specs(self): + shape, dims = self.get_base_shape_dims() + self.bar_data_spec = DatasetSpec( + doc='A test dataset specification with a data type', + data_type_def='BarData', + dtype='int', + shape=shape, + dims=dims, + ) + self.bar_data_holder_spec = GroupSpec( + doc='A container of multiple extended BarData objects', + data_type_def='BarDataHolder', + datasets=[self.get_dataset_inc_spec()], + ) + + @abstractmethod + def get_base_shape_dims(self): + pass + + @abstractmethod + def get_dataset_inc_spec(self): + pass + + +class TestBuildDatasetOneOptionBadShapeUnspecified1(BuildDatasetShapeMixin): + """Test dataset spec shape = 2D any length, data = 1D. Should raise warning and set dimension_labels to None.""" + + def get_base_shape_dims(self): + return [None, None], ['a', 'b'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[1, 2, 3], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + msg = "Shape of data does not match shape in spec 'BarData'" + with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg): + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels is None + + +class TestBuildDatasetOneOptionBadShapeUnspecified2(BuildDatasetShapeMixin): + """Test dataset spec shape = (any, 2), data = (3, 1). Should raise warning and set dimension_labels to None.""" + + def get_base_shape_dims(self): + return [None, 2], ['a', 'b'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1], [2], [3]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + msg = "Shape of data does not match shape in spec 'BarData'" + with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg): + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels is None + + +class TestBuildDatasetTwoOptionsBadShapeUnspecified(BuildDatasetShapeMixin): + """Test dataset spec shape = (any, 2) or (any, 3), data = (3, 1). + Should raise warning and set dimension_labels to None. + """ + + def get_base_shape_dims(self): + return [[None, 2], [None, 3]], [['a', 'b1'], ['a', 'b2']] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1], [2], [3]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + msg = "Shape of data does not match any allowed shapes in spec 'BarData'" + with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg): + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels is None + + +class TestBuildDatasetDimensionLabelsUnspecified(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return None, None + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels is None + + +class TestBuildDatasetDimensionLabelsOneOption(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return [None, None], ['a', 'b'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels == ('a', 'b') + + +class TestBuildDatasetDimensionLabelsTwoOptionsOneMatch(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return [[None], [None, None]], [['a'], ['a', 'b']] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels == ('a', 'b') + + +class TestBuildDatasetDimensionLabelsTwoOptionsTwoMatches(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return [[None, None], [None, 3]], [['a', 'b1'], ['a', 'b2']] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2') + + +class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return [None, None], ['a', 'b1'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + shape=[None, 3], + dims=['a', 'b2'], + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2') diff --git a/tests/unit/build_tests/test_builder.py b/tests/unit/build_tests/test_builder.py index a35dc64ac..62ebd0675 100644 --- a/tests/unit/build_tests/test_builder.py +++ b/tests/unit/build_tests/test_builder.py @@ -1,4 +1,4 @@ -from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder from hdmf.testing import TestCase @@ -392,12 +392,3 @@ def test_constructor(self): db = DatasetBuilder('db1', [1, 2, 3]) rb = ReferenceBuilder(db) self.assertIs(rb.builder, db) - - -class TestRegionBuilder(TestCase): - - def test_constructor(self): - db = DatasetBuilder('db1', [1, 2, 3]) - rb = RegionBuilder(slice(1, 3), db) - self.assertEqual(rb.region, slice(1, 3)) - self.assertIs(rb.builder, db) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 0c117820b..42a55b470 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -2,11 +2,14 @@ import os import shutil import tempfile +from warnings import warn from hdmf.build import TypeMap, CustomClassGenerator from hdmf.build.classgenerator import ClassGenerator, MCIClassGenerator from hdmf.container import Container, Data, MultiContainerInterface, AbstractContainer -from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec +from hdmf.spec import ( + GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec, RefSpec +) from hdmf.testing import TestCase from hdmf.utils import get_docval, docval @@ -82,6 +85,79 @@ def test_no_generators(self): self.assertTrue(hasattr(cls, '__init__')) +class TestPostInitGetClass(TestCase): + def setUp(self): + def post_init_method(self, **kwargs): + attr1 = kwargs['attr1'] + if attr1<10: + msg = "attr1 should be >=10" + warn(msg) + self.post_init=post_init_method + + def test_post_init(self): + spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz', + attributes=[ + AttributeSpec(name='attr1', doc='a int attribute', dtype='int') + ] + ) + + spec_catalog = SpecCatalog() + spec_catalog.register_spec(spec, 'test.yaml') + namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + + cls = type_map.get_dt_container_cls('Baz', CORE_NAMESPACE, self.post_init) + + with self.assertWarns(Warning): + cls(name='instance', attr1=9) + + def test_multi_container_post_init(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='a dataset', + dtype='int', + name='data', + attributes=[AttributeSpec(name='attr2', doc='an integer attribute', dtype='int')] + ) + ], + attributes=[AttributeSpec(name='attr1', doc='a string attribute', dtype='text')]) + + multi_spec = GroupSpec(doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[GroupSpec(data_type_inc=bar_spec, doc='test multi', quantity='*')], + attributes=[AttributeSpec(name='attr1', doc='a float attribute', dtype='float')]) + + spec_catalog = SpecCatalog() + spec_catalog.register_spec(bar_spec, 'test.yaml') + spec_catalog.register_spec(multi_spec, 'test.yaml') + namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + Multi = type_map.get_dt_container_cls('Multi', CORE_NAMESPACE, self.post_init) + + with self.assertWarns(Warning): + Multi(name='instance', attr1=9.1) + class TestDynamicContainer(TestCase): def setUp(self): @@ -106,16 +182,19 @@ def test_dynamic_container_creation(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), - AttributeSpec('attr4', 'another float attribute', 'float')]) + AttributeSpec('attr4', 'another float attribute', 'float'), + AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'} received_args = set() + for x in get_docval(cls.__init__): if x['name'] != 'foo': received_args.add(x['name']) with self.subTest(name=x['name']): - self.assertNotIn('default', x) + if x['name'] != 'skip_post_init': + self.assertNotIn('default', x) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') self.assertTrue(issubclass(cls, Bar)) @@ -135,7 +214,7 @@ def test_dynamic_container_creation_defaults(self): AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'} received_args = set(map(lambda x: x['name'], get_docval(cls.__init__))) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') @@ -285,13 +364,14 @@ def __init__(self, **kwargs): AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr2', 'attr3', 'attr4'} + expected_args = {'name', 'data', 'attr2', 'attr3', 'attr4', 'skip_post_init'} received_args = set() for x in get_docval(cls.__init__): if x['name'] != 'foo': received_args.add(x['name']) with self.subTest(name=x['name']): - self.assertNotIn('default', x) + if x['name'] != 'skip_post_init': + self.assertNotIn('default', x) self.assertSetEqual(expected_args, received_args) self.assertTrue(issubclass(cls, FixedAttrBar)) inst = cls(name="My Baz", data=[1, 2, 3, 4], attr2=1000, attr3=98.6, attr4=1.0) @@ -445,7 +525,7 @@ def setUp(self): def test_init_docval(self): cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class - expected_args = {'name'} # 'attr1' should not be included + expected_args = {'name', 'skip_post_init'} # 'attr1' should not be included received_args = set() for x in get_docval(cls.__init__): received_args.add(x['name']) @@ -518,6 +598,8 @@ def test_gen_parent_class(self): {'name': 'my_baz1', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, {'name': 'my_baz2', 'doc': 'A composition inside with a fixed name', 'type': baz2_cls}, {'name': 'my_baz1_link', 'doc': 'A composition inside without a fixed name', 'type': baz1_cls}, + {'name': 'skip_post_init', 'type': bool, 'default': False, + 'doc': 'bool to skip post_init'} )) def test_init_fields(self): @@ -654,9 +736,18 @@ def _build_separate_namespaces(self): GroupSpec(data_type_inc='Bar', doc='a bar', quantity='?') ] ) + moo_spec = DatasetSpec( + doc='A test dataset that is a 1D array of object references of Baz', + data_type_def='Moo', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Baz' + ) + ) create_load_namespace_yaml( namespace_name='ndx-test', - specs=[baz_spec], + specs=[baz_spec, moo_spec], output_dir=self.test_dir, incl_types={ CORE_NAMESPACE: ['Bar'], @@ -748,6 +839,171 @@ def test_get_class_include_from_separate_ns_4(self): self._check_classes(baz_cls, bar_cls, bar_cls2, qux_cls, qux_cls2) +class TestGetClassObjectReferences(TestCase): + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + if os.path.exists(self.test_dir): # start clean + self.tearDown() + os.mkdir(self.test_dir) + self.type_map = TypeMap() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_class_include_dataset_of_references(self): + """Test that get_class resolves datasets of object references.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + moo_spec = DatasetSpec( + doc='A test dataset that is a 1D array of object references of Qux', + data_type_def='Moo', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Qux' + ), + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, moo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Moo', 'ndx-test') + # now, Moo and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 2 + assert "Moo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_attribute_object_reference(self): + """Test that get_class resolves data types with an attribute that is an object reference.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + woo_spec = DatasetSpec( + doc='A test dataset that has a scalar object reference to a Qux', + data_type_def='Woo', + attributes=[ + AttributeSpec( + name='attr1', + doc='a string attribute', + dtype=RefSpec(reftype='object', target_type='Qux') + ), + ] + ) + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, woo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Woo', 'ndx-test') + # now, Woo and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 2 + assert "Woo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_nested_object_reference(self): + """Test that get_class resolves nested datasets that are object references.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + spam_spec = DatasetSpec( + doc='A test extension', + data_type_def='Spam', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Qux' + ), + ) + goo_spec = GroupSpec( + doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux', + data_type_def='Goo', + datasets=[ + DatasetSpec( + doc='a dataset', + data_type_inc='Spam', + ), + ], + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, spam_spec, goo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Goo', 'ndx-test') + # now, Goo, Spam, and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 3 + assert "Goo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Spam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_nested_attribute_object_reference(self): + """Test that get_class resolves nested datasets that have an attribute that is an object reference.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + bam_spec = DatasetSpec( + doc='A test extension', + data_type_def='Bam', + attributes=[ + AttributeSpec( + name='attr1', + doc='a string attribute', + dtype=RefSpec(reftype='object', target_type='Qux') + ), + ], + ) + boo_spec = GroupSpec( + doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux', + data_type_def='Boo', + datasets=[ + DatasetSpec( + doc='a dataset', + data_type_inc='Bam', + ), + ], + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, bam_spec, boo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Boo', 'ndx-test') + # now, Boo, Bam, and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 3 + assert "Boo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Bam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + class EmptyBar(Container): pass diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 8f9e49239..8f30386d8 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,12 +1,17 @@ from datetime import datetime, date import numpy as np +import h5py +import unittest + from hdmf.backends.hdf5 import H5DataIO from hdmf.build import ObjectMapper from hdmf.data_utils import DataChunkIterator from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec from hdmf.testing import TestCase +from hdmf.utils import StrDataset +H5PY_3 = h5py.__version__.startswith('3') class TestConvertDtype(TestCase): @@ -321,6 +326,19 @@ def test_text_spec(self): self.assertIs(ret, value) self.assertEqual(ret_dtype, 'utf8') + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_text_spec_str_dataset(self): + text_spec_types = ['text', 'utf', 'utf8', 'utf-8'] + for spec_type in text_spec_types: + with self.subTest(spec_type=spec_type): + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + spec = DatasetSpec('an example dataset', spec_type, name='data') + + value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None) + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion + self.assertIs(ret, value) + self.assertEqual(ret_dtype, 'utf8') + def test_ascii_spec(self): ascii_spec_types = ['ascii', 'bytes'] for spec_type in ascii_spec_types: diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index 01421e218..a3be47cf7 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -341,7 +341,7 @@ def test_get_dt_container_cls(self): self.assertIs(ret, Foo) def test_get_dt_container_cls_no_namespace(self): - with self.assertRaisesWith(ValueError, "Namespace could not be resolved."): + with self.assertRaisesWith(ValueError, "Namespace could not be resolved for data type 'Unknown'."): self.type_map.get_dt_container_cls(data_type="Unknown") diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index 63f397682..730530a5a 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -1,4 +1,4 @@ -from hdmf.utils import docval, getargs +from hdmf.utils import StrDataset, docval, getargs from hdmf import Container, Data from hdmf.backends.hdf5 import H5DataIO from hdmf.build import (GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager, TypeMap, LinkBuilder, @@ -7,11 +7,15 @@ from hdmf.spec import (GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, RefSpec, LinkSpec) from hdmf.testing import TestCase +import h5py from abc import ABCMeta, abstractmethod import unittest +import numpy as np from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map +H5PY_3 = h5py.__version__.startswith('3') + class Bar(Container): @@ -20,24 +24,27 @@ class Bar(Container): {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}, + {'name': 'attr_array', 'type': 'array_data', 'doc': 'another attribute', 'default': (1, 2, 3)}, {'name': 'foo', 'type': 'Foo', 'doc': 'a group', 'default': None}) def __init__(self, **kwargs): - name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs) + name, data, attr1, attr2, attr3, attr_array, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', + 'attr_array', 'foo', kwargs) super().__init__(name=name) self.__data = data self.__attr1 = attr1 self.__attr2 = attr2 self.__attr3 = attr3 + self.__attr_array = attr_array self.__foo = foo if self.__foo is not None and self.__foo.parent is None: self.__foo.parent = self def __eq__(self, other): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) @property @@ -60,6 +67,10 @@ def attr2(self): def attr3(self): return self.__attr3 + @property + def attr_array(self): + return self.__attr_array + @property def foo(self): return self.__foo @@ -333,12 +344,15 @@ def test_build_1d(self): datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,), attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), + AttributeSpec('attr_array', 'an example array attribute', 'text', + shape=(None,))]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) - bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10) + bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10, attr_array=['a', 'b', 'c', 'd']) builder = type_map.build(bar_inst) - self.assertEqual(builder.get('data').data, ['a', 'b', 'c', 'd']) + np.testing.assert_array_equal(builder.get('data').data, np.array(['a', 'b', 'c', 'd'])) + np.testing.assert_array_equal(builder.get('attr_array'), np.array(['a', 'b', 'c', 'd'])) def test_build_scalar(self): bar_spec = GroupSpec('A test group specification with a data type', @@ -353,6 +367,228 @@ def test_build_scalar(self): builder = type_map.build(bar_inst) self.assertEqual(builder.get('data').data, "['a', 'b', 'c', 'd']") + def test_build_2d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_2d = [['aa', 'bb'], ['cc', 'dd']] + bar_inst = Bar('my_bar', str_lol_2d, 'value1', 10, attr_array=str_lol_2d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_2d) + self.assertEqual(builder.get('attr_array'), str_lol_2d) + + def test_build_2d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_2d = np.array([['aa', 'bb'], ['cc', 'dd']]) + bar_inst = Bar('my_bar', str_array_2d, 'value1', 10, attr_array=str_array_2d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_2d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_2d) + + def test_build_3d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_3d = [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]] + bar_inst = Bar('my_bar', str_lol_3d, 'value1', 10, attr_array=str_lol_3d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_3d) + self.assertEqual(builder.get('attr_array'), str_lol_3d) + + def test_build_3d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_3d = np.array([[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]]) + bar_inst = Bar('my_bar', str_array_3d, 'value1', 10, attr_array=str_array_3d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_3d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d) + + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_build_1d_h5py_3_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, ), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, ))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_1d = np.array( + ['aa', 'bb', 'cc', 'dd'], + dtype=h5py.special_dtype(vlen=str) + ) + # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+ + dataset = StrDataset(f.create_dataset('data', data=str_array_1d), None) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_build_3d_h5py_3_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_3d = np.array( + [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]], + dtype=h5py.special_dtype(vlen=str) + ) + # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+ + dataset = StrDataset(f.create_dataset('data', data=str_array_3d), None) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3") + def test_build_1d_h5py_2_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, ), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, ))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_1d = np.array( + ['aa', 'bb', 'cc', 'dd'], + dtype=h5py.special_dtype(vlen=str) + ) + dataset = f.create_dataset('data', data=str_array_1d) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3") + def test_build_3d_h5py_2_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_3d = np.array( + [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]], + dtype=h5py.special_dtype(vlen=str) + ) + dataset = f.create_dataset('data', data=str_array_3d) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + def test_build_dataio(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 00b3c14a3..15a0c9e91 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -429,9 +429,7 @@ def test_add_column_vectorindex(self): table.add_column(name='qux', description='qux column') ind = VectorIndex(name='quux', data=list(), target=table['qux']) - msg = ("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.") - with self.assertWarnsWith(FutureWarning, msg): + with self.assertRaises(ValueError): table.add_column(name='bad', description='bad column', index=ind) def test_add_column_multi_index(self): @@ -2852,6 +2850,57 @@ def test_dtr_references(self): pd.testing.assert_frame_equal(ret, expected) +class TestDataIOReferences(H5RoundTripMixin, TestCase): + + def setUpContainer(self): + """Test roundtrip of a table with an expandable column of references.""" + group1 = Container('group1') + group2 = Container('group2') + + table = DynamicTable( + name='table', + description='test table' + ) + table.add_column( + name='x', + description='test column of ints' + ) + table.add_column( + name='y', + description='test column of reference' + ) + table.add_row(id=101, x=1, y=group1) + table.add_row(id=102, x=2, y=group2) + table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + + multi_container = SimpleMultiContainer(name='multi') + multi_container.add_container(group1) + multi_container.add_container(group2) + multi_container.add_container(table) + + return multi_container + + def test_append(self, cache_spec=False): + """Write the container to an HDF5 file, read the container from the file, and append to it.""" + + # write file + with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io: + write_io.write(self.container, cache_spec=cache_spec) + + # read container from file + self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a') + read_container = self.reader.read() + self.assertContainerEqual(read_container, self.container, ignore_name=True) + self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2']) + + # append row + group1 = read_container['group1'] + read_container['table'].add_row(id=103, x=3, y=group1) + + self.assertContainerEqual(read_container['table']['y'][-1], group1) + class TestVectorIndexDtype(TestCase): def set_up_array_index(self): diff --git a/tests/unit/spec_tests/test_attribute_spec.py b/tests/unit/spec_tests/test_attribute_spec.py index 15102e728..bac8e12a3 100644 --- a/tests/unit/spec_tests/test_attribute_spec.py +++ b/tests/unit/spec_tests/test_attribute_spec.py @@ -91,3 +91,15 @@ def test_build_spec_no_doc(self): msg = "AttributeSpec.__init__: missing argument 'doc'" with self.assertRaisesWith(TypeError, msg): AttributeSpec.build_spec(spec_dict) + + def test_build_warn_extra_args(self): + spec_dict = { + 'name': 'attribute1', + 'doc': 'test attribute', + 'dtype': 'int', + 'quantity': '?', + } + msg = ("Unexpected keys ['quantity'] in spec {'name': 'attribute1', 'doc': 'test attribute', " + "'dtype': 'int', 'quantity': '?'}") + with self.assertWarnsWith(UserWarning, msg): + AttributeSpec.build_spec(spec_dict) diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py index 0309aced4..60025fd7e 100644 --- a/tests/unit/spec_tests/test_dataset_spec.py +++ b/tests/unit/spec_tests/test_dataset_spec.py @@ -245,3 +245,33 @@ def test_data_type_property_value(self): group = GroupSpec('A group', name='group', data_type_inc=data_type_inc, data_type_def=data_type_def) self.assertEqual(group.data_type, data_type) + + def test_constructor_value(self): + spec = DatasetSpec(doc='my first dataset', dtype='int', name='dataset1', value=42) + assert spec.value == 42 + + def test_build_warn_extra_args(self): + spec_dict = { + 'name': 'dataset1', + 'doc': 'test dataset', + 'dtype': 'int', + 'required': True, + } + msg = ("Unexpected keys ['required'] in spec {'name': 'dataset1', 'doc': 'test dataset', " + "'dtype': 'int', 'required': True}") + with self.assertWarnsWith(UserWarning, msg): + DatasetSpec.build_spec(spec_dict) + + def test_constructor_validates_name(self): + with self.assertRaisesWith( + ValueError, + "Name 'one/two' is invalid. Names of Groups and Datasets cannot contain '/'", + ): + DatasetSpec(doc='my first dataset', dtype='int', name='one/two') + + def test_constructor_validates_default_name(self): + with self.assertRaisesWith( + ValueError, + "Default name 'one/two' is invalid. Names of Groups and Datasets cannot contain '/'", + ): + DatasetSpec(doc='my first dataset', dtype='int', default_name='one/two', data_type_def='test') diff --git a/tests/unit/spec_tests/test_group_spec.py b/tests/unit/spec_tests/test_group_spec.py index 9c117fa1f..31c00cfbb 100644 --- a/tests/unit/spec_tests/test_group_spec.py +++ b/tests/unit/spec_tests/test_group_spec.py @@ -314,6 +314,16 @@ def test_get_namespace_spec(self): expected = AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) self.assertDictEqual(GroupSpec.get_namespace_spec(), expected) + def test_build_warn_extra_args(self): + spec_dict = { + 'name': 'group1', + 'doc': 'test group', + 'required': True, + } + msg = "Unexpected keys ['required'] in spec {'name': 'group1', 'doc': 'test group', 'required': True}" + with self.assertWarnsWith(UserWarning, msg): + GroupSpec.build_spec(spec_dict) + class TestNotAllowedConfig(TestCase): @@ -365,26 +375,22 @@ def test_resolved(self): self.assertTrue(self.inc_group_spec.resolved) def test_is_inherited_spec(self): - self.assertFalse(self.def_group_spec.is_inherited_spec('attribute1')) - self.assertFalse(self.def_group_spec.is_inherited_spec('attribute2')) - self.assertTrue(self.inc_group_spec.is_inherited_spec( - AttributeSpec('attribute1', 'my first attribute', 'text') - )) - self.assertTrue(self.inc_group_spec.is_inherited_spec('attribute1')) - self.assertTrue(self.inc_group_spec.is_inherited_spec('attribute2')) - self.assertFalse(self.inc_group_spec.is_inherited_spec('attribute3')) - self.assertFalse(self.inc_group_spec.is_inherited_spec('attribute4')) + self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.attributes[0])) + self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.attributes[1])) + + attr_spec_map = {attr.name: attr for attr in self.inc_group_spec.attributes} + self.assertTrue(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute1"])) + self.assertTrue(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute2"])) + self.assertFalse(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute3"])) def test_is_overridden_spec(self): - self.assertFalse(self.def_group_spec.is_overridden_spec('attribute1')) - self.assertFalse(self.def_group_spec.is_overridden_spec('attribute2')) - self.assertFalse(self.inc_group_spec.is_overridden_spec( - AttributeSpec('attribute1', 'my first attribute', 'text') - )) - self.assertFalse(self.inc_group_spec.is_overridden_spec('attribute1')) - self.assertTrue(self.inc_group_spec.is_overridden_spec('attribute2')) - self.assertFalse(self.inc_group_spec.is_overridden_spec('attribute3')) - self.assertFalse(self.inc_group_spec.is_overridden_spec('attribute4')) + self.assertFalse(self.def_group_spec.is_overridden_spec(self.def_group_spec.attributes[0])) + self.assertFalse(self.def_group_spec.is_overridden_spec(self.def_group_spec.attributes[0])) + + attr_spec_map = {attr.name: attr for attr in self.inc_group_spec.attributes} + self.assertFalse(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute1"])) + self.assertTrue(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute2"])) + self.assertFalse(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute3"])) def test_is_inherited_attribute(self): self.assertFalse(self.def_group_spec.is_inherited_attribute('attribute1')) @@ -405,6 +411,95 @@ def test_is_overridden_attribute(self): self.inc_group_spec.is_overridden_attribute('attribute4') +class TestResolveGroupSameAttributeName(TestCase): + # https://github.com/hdmf-dev/hdmf/issues/1121 + + def test_is_inherited_two_different_datasets(self): + self.def_group_spec = GroupSpec( + doc='A test group', + data_type_def='MyGroup', + datasets=[ + DatasetSpec( + name='dset1', + doc="dset1", + dtype='int', + attributes=[AttributeSpec('attr1', 'MyGroup.dset1.attr1', 'text')] + ), + ] + ) + self.inc_group_spec = GroupSpec( + doc='A test subgroup', + data_type_def='SubGroup', + data_type_inc='MyGroup', + datasets=[ + DatasetSpec( + name='dset2', + doc="dset2", + dtype='int', + attributes=[AttributeSpec('attr1', 'SubGroup.dset2.attr1', 'text')] + ), + ] + ) + self.inc_group_spec.resolve_spec(self.def_group_spec) + + self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.datasets[0].attributes[0])) + + dset_spec_map = {dset.name: dset for dset in self.inc_group_spec.datasets} + self.assertFalse(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + + def test_is_inherited_different_groups_and_datasets(self): + self.def_group_spec = GroupSpec( + doc='A test group', + data_type_def='MyGroup', + attributes=[AttributeSpec('attr1', 'MyGroup.attr1', 'text')], # <-- added from above + datasets=[ + DatasetSpec( + name='dset1', + doc="dset1", + dtype='int', + attributes=[AttributeSpec('attr1', 'MyGroup.dset1.attr1', 'text')] + ), + ] + ) + self.inc_group_spec = GroupSpec( + doc='A test subgroup', + data_type_def='SubGroup', + data_type_inc='MyGroup', + attributes=[AttributeSpec('attr1', 'SubGroup.attr1', 'text')], # <-- added from above + datasets=[ + DatasetSpec( + name='dset2', + doc="dset2", + dtype='int', + attributes=[AttributeSpec('attr1', 'SubGroup.dset2.attr1', 'text')] + ), + ] + ) + self.inc_group_spec.resolve_spec(self.def_group_spec) + + self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.datasets[0].attributes[0])) + + dset_spec_map = {dset.name: dset for dset in self.inc_group_spec.datasets} + self.assertFalse(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + self.assertTrue(self.inc_group_spec.is_inherited_spec(self.inc_group_spec.attributes[0])) + + self.inc_group_spec2 = GroupSpec( + doc='A test subsubgroup', + data_type_def='SubSubGroup', + data_type_inc='SubGroup', + ) + self.inc_group_spec2.resolve_spec(self.inc_group_spec) + + dset_spec_map = {dset.name: dset for dset in self.inc_group_spec2.datasets} + self.assertTrue(self.inc_group_spec2.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + self.assertTrue(self.inc_group_spec2.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(self.inc_group_spec2.is_inherited_spec(self.inc_group_spec2.attributes[0])) + + + + class GroupSpecWithLinksTest(TestCase): def test_constructor(self): diff --git a/tests/unit/spec_tests/test_link_spec.py b/tests/unit/spec_tests/test_link_spec.py index e6c680b7c..38e10886b 100644 --- a/tests/unit/spec_tests/test_link_spec.py +++ b/tests/unit/spec_tests/test_link_spec.py @@ -67,3 +67,15 @@ def test_required_is_many(self): ) self.assertEqual(spec.required, req) self.assertEqual(spec.is_many(), many) + + def test_build_warn_extra_args(self): + spec_dict = { + 'name': 'link1', + 'doc': 'test link', + 'target_type': 'TestType', + 'required': True, + } + msg = ("Unexpected keys ['required'] in spec {'name': 'link1', 'doc': 'test link', " + "'target_type': 'TestType', 'required': True}") + with self.assertWarnsWith(UserWarning, msg): + LinkSpec.build_spec(spec_dict) diff --git a/tests/unit/spec_tests/test_ref_spec.py b/tests/unit/spec_tests/test_ref_spec.py index bb1c0efb8..3277673d1 100644 --- a/tests/unit/spec_tests/test_ref_spec.py +++ b/tests/unit/spec_tests/test_ref_spec.py @@ -15,9 +15,3 @@ def test_constructor(self): def test_wrong_reference_type(self): with self.assertRaises(ValueError): RefSpec('TimeSeries', 'unknownreftype') - - def test_isregion(self): - spec = RefSpec('TimeSeries', 'object') - self.assertFalse(spec.is_region()) - spec = RefSpec('Data', 'region') - self.assertTrue(spec.is_region()) diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 9ac81ba13..2abe6349b 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -8,6 +8,7 @@ from hdmf.utils import docval from hdmf.common import DynamicTable, VectorData, DynamicTableRegion from hdmf.backends.hdf5.h5tools import HDF5IO +from hdmf.backends.io import HDMFIO class Subcontainer(Container): @@ -179,6 +180,17 @@ def test_set_parent_overwrite_proxy(self): def test_slash_restriction(self): self.assertRaises(ValueError, Container, 'bad/name') + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad/name') + + def test_colon_restriction(self): + self.assertRaises(ValueError, Container, 'bad:name') + + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad:name') + def test_set_modified_parent(self): """Test that set modified properly sets parent modified """ @@ -201,18 +213,6 @@ def test_all_children(self): obj = species.all_objects self.assertEqual(sorted(list(obj.keys())), sorted([species.object_id, species.id.object_id, col1.object_id])) - def test_add_child(self): - """Test that add child creates deprecation warning and also properly sets child's parent and modified - """ - parent_obj = Container('obj1') - child_obj = Container('obj2') - parent_obj.set_modified(False) - with self.assertWarnsWith(DeprecationWarning, 'add_child is deprecated. Set the parent attribute instead.'): - parent_obj.add_child(child_obj) - self.assertIs(child_obj.parent, parent_obj) - self.assertTrue(parent_obj.modified) - self.assertIs(parent_obj.children[0], child_obj) - def test_parent_set_link_warning(self): col1 = VectorData( name='col1', @@ -423,6 +423,23 @@ def __init__(self, **kwargs): self.data = kwargs['data'] self.str = kwargs['str'] + class ContainerWithData(Container): + + __fields__ = ( + "data", + "str" + ) + + @docval( + {'name': "data", "doc": 'data', 'type': 'array_data', "default": None}, + {'name': "str", "doc": 'str', 'type': str, "default": None}, + + ) + def __init__(self, **kwargs): + super().__init__('test name') + self.data = kwargs['data'] + self.str = kwargs['str'] + def test_repr_html_(self): child_obj1 = Container('test child 1') obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello") @@ -455,6 +472,82 @@ def test_repr_html_(self): 'class="field-value">hello' ) + def test_repr_html_array(self): + obj = self.ContainerWithData(data=np.array([1, 2, 3, 4], dtype=np.int64), str="hello") + expected_html_table = ( + 'class="container-fields">NumPy array
Data typeint64
Shape' + '(4,)
Array size32.00 bytes

[1 2 3 4]' + ) + self.assertIn(expected_html_table, obj._repr_html_()) + + def test_repr_html_array_large_arrays_not_displayed(self): + obj = self.ContainerWithData(data=np.arange(200, dtype=np.int64), str="hello") + expected_html_table = ( + 'class="container-fields">NumPy array
Data typeint64
Shape' + '(200,)
Array size1.56 KiB
' + ) + self.assertIn(expected_html_table, obj._repr_html_()) + + def test_repr_html_hdf5_dataset(self): + with HDF5IO('array_data.h5', mode='w') as io: + dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64)) + obj = self.ContainerWithData(data=dataset, str="hello") + obj.read_io = io + + expected_html_table = ( + 'class="container-fields">HDF5 dataset
Data typeint64
' + 'Shape(4,)
Array size' + '32.00 bytes
Chunk shape' + 'None
CompressionNone
Compression optsNone
Compression ratio1.0

[1 2 3 4]' + ) + + self.assertIn(expected_html_table, obj._repr_html_()) + + os.remove('array_data.h5') + + def test_repr_html_hdmf_io(self): + with HDF5IO('array_data.h5', mode='w') as io: + dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64)) + obj = self.ContainerWithData(data=dataset, str="hello") + + class OtherIO(HDMFIO): + + @staticmethod + def can_read(path): + pass + + def read_builder(self): + pass + + def write_builder(self, **kwargs): + pass + + def open(self): + pass + + def close(self): + pass + + obj.read_io = OtherIO() + + expected_html_table = ( + 'class="container-fields">
Data typeint64
' + 'Shape(4,)
Array size' + '32.00 bytes

[1 2 3 4]' + ) + + self.assertIn(expected_html_table, obj._repr_html_()) + + os.remove('array_data.h5') class TestData(TestCase): diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 0dae1fbbe..29b7f2d7f 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -121,10 +121,10 @@ def __assert_helper(self, a, b): # if strings, convert before comparing if b_array: if b_sub.dtype.char in ('S', 'U'): - a_sub = [np.string_(s) for s in a_sub] + a_sub = [np.bytes_(s) for s in a_sub] else: if a_sub.dtype.char in ('S', 'U'): - b_sub = [np.string_(s) for s in b_sub] + b_sub = [np.bytes_(s) for s in b_sub] equal = np.array_equal(a_sub, b_sub) else: equal = a_sub == b_sub diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a4fd5a32..6c679bb49 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -21,10 +21,10 @@ from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container from hdmf import Data, docval -from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError +from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace -from hdmf.spec.spec import GroupSpec +from hdmf.spec.spec import GroupSpec, DtypeSpec from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD from hdmf.term_set import TermSet, TermSetWrapper @@ -144,6 +144,16 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + def test_write_dataset_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + dset = self.f['test_dataset'] + self.assertTupleEqual(dset.shape, ()) + self.assertEqual(dset[()].tolist(), a.tolist()) + ########################################## # write_dataset tests: TermSetWrapper ########################################## @@ -164,6 +174,31 @@ def test_write_dataset_list(self): dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a)) + def test_write_dataset_lol_strings(self): + a = [['aa', 'bb'], ['cc', 'dd']] + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + decoded_dset = [[item.decode('utf-8') if isinstance(item, bytes) else item for item in sublist] + for sublist in dset[:]] + self.assertTrue(decoded_dset == a) + + def test_write_dataset_list_compound_datatype(self): + a = np.array([(1, 2, 0.5), (3, 4, 0.5)], dtype=[('x', 'int'), ('y', 'int'), ('z', 'float')]) + dset_builder = DatasetBuilder( + name='test_dataset', + data=a.tolist(), + attributes={}, + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='int'), + DtypeSpec('z', doc='z', dtype='float'), + ], + ) + self.io.write_dataset(self.f, dset_builder) + dset = self.f['test_dataset'] + for field in a.dtype.names: + self.assertTrue(np.all(dset[field][:] == a[field])) + def test_write_dataset_list_compress_gzip(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', @@ -572,6 +607,12 @@ def test_pass_through_of_chunk_shape_generic_data_chunk_iterator(self): ############################################# # H5DataIO general ############################################# + def test_pass_through_of_maxshape_on_h5dataset(self): + k = 10 + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(k), attributes={})) + dset = H5DataIO(self.f['test_dataset']) + self.assertEqual(dset.maxshape, (k,)) + def test_warning_on_non_gzip_compression(self): # Make sure no warning is issued when using gzip with warnings.catch_warnings(record=True) as w: @@ -762,6 +803,17 @@ def test_read_str(self): self.assertEqual(str(bldr['test_dataset'].data), '') + def test_read_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + self.io.close() + with HDF5IO(self.path, 'r') as io: + bldr = io.read_builder() + np.testing.assert_array_equal(bldr['test_dataset'].data[()], a) + class TestRoundTrip(TestCase): @@ -1795,7 +1847,7 @@ def test_link(self): self.assertTrue(self.foo2.my_data.valid) # test valid self.assertEqual(len(self.foo2.my_data), 5) # test len self.assertEqual(self.foo2.my_data.shape, (5,)) # test getattr with shape - self.assertTrue(np.array_equal(np.array(self.foo2.my_data), [1, 2, 3, 4, 5])) # test array conversion + np.testing.assert_array_equal(self.foo2.my_data, [1, 2, 3, 4, 5]) # test array conversion # test loop through iterable match = [1, 2, 3, 4, 5] @@ -2958,6 +3010,92 @@ def test_append_data(self): self.assertEqual(f['foofile_data'].file.filename, self.paths[1]) self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) + def test_append_dataset_of_references(self): + """Test that exporting a written container with a dataset of references works.""" + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + DoR = read_bucket1.baz_data.data + DoR.append(read_bucket1.bazs['new']) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + self.assertEqual(len(read_bucket1.baz_data.data), 2) + self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + + def test_append_dataset_of_references_compound(self): + """Test that exporting a written container with a dataset of references of compound data type works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,))) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + cpd_DoR = read_bucket1.baz_cpd_data.data + builder = ref_io.manager.get_builder(read_bucket1.bazs['new']) + ref = ref_io._create_ref(builder) + append_data(cpd_DoR.dataset, (11, ref)) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + + self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11) + self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new']) + + + def test_append_dataset_of_references_orphaned_target(self): + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + DoR = read_bucket1.baz_data.data + with self.assertRaises(ValueError): + DoR.append(read_bucket1.bazs['new']) + def test_append_external_link_data(self): """Test that exporting a written container after adding a link with link_data=True creates external links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -3666,6 +3804,14 @@ def test_dataio_shape_then_data(self): with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"): dataio.data = list() + def test_dataio_maxshape(self): + dataio = H5DataIO(data=np.arange(10), maxshape=(None,)) + self.assertEqual(dataio.maxshape, (None,)) + + def test_dataio_maxshape_from_data(self): + dataio = H5DataIO(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + self.assertEqual(dataio.maxshape, (10,)) + def test_hdf5io_can_read(): assert not HDF5IO.can_read("not_a_file") @@ -3690,6 +3836,11 @@ def __init__(self, **kwargs): self.data2 = kwargs["data2"] self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) + self.file_path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.file_path): + os.remove(self.file_path) def test_set_data_io(self): self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True)) @@ -3712,6 +3863,31 @@ def test_set_data_io_old_api(self): self.assertIsInstance(self.obj.data1, H5DataIO) self.assertTrue(self.obj.data1.io_settings["chunks"]) + def test_set_data_io_h5py_dataset(self): + file = File(self.file_path, 'w') + data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,)) + class ContainerWithData(Container): + __fields__ = ('data',) + + @docval( + {"name": "name", "doc": "name", "type": str}, + {'name': 'data', 'doc': 'field1 doc', 'type': h5py.Dataset}, + ) + def __init__(self, **kwargs): + super().__init__(name=kwargs["name"]) + self.data = kwargs["data"] + + container = ContainerWithData("name", data) + container.set_data_io( + "data", + H5DataIO, + data_io_kwargs=dict(chunks=(2,)), + data_chunk_iterator_class=DataChunkIterator, + ) + + self.assertIsInstance(container.data, H5DataIO) + self.assertEqual(container.data.io_settings["chunks"], (2,)) + file.close() class TestDataSetDataIO(TestCase): @@ -3720,8 +3896,30 @@ class MyData(Data): pass self.data = MyData("my_data", [1, 2, 3]) + self.file_path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.file_path): + os.remove(self.file_path) def test_set_data_io(self): self.data.set_data_io(H5DataIO, dict(chunks=True)) assert isinstance(self.data.data, H5DataIO) assert self.data.data.io_settings["chunks"] + + def test_set_data_io_h5py_dataset(self): + file = File(self.file_path, 'w') + data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,)) + class MyData(Data): + pass + + my_data = MyData("my_data", data) + my_data.set_data_io( + H5DataIO, + data_io_kwargs=dict(chunks=(2,)), + data_chunk_iterator_class=DataChunkIterator, + ) + + self.assertIsInstance(my_data.data, H5DataIO) + self.assertEqual(my_data.data.io_settings["chunks"], (2,)) + file.close() diff --git a/tests/unit/test_io_hdf5_streaming.py b/tests/unit/test_io_hdf5_streaming.py index d1c9d1ab3..d82c9c5c3 100644 --- a/tests/unit/test_io_hdf5_streaming.py +++ b/tests/unit/test_io_hdf5_streaming.py @@ -2,7 +2,9 @@ import os import urllib.request import h5py +import warnings +from hdmf.backends.hdf5.h5tools import HDF5IO from hdmf.build import TypeMap, BuildManager from hdmf.common import get_hdf5io, get_type_map from hdmf.spec import GroupSpec, DatasetSpec, SpecNamespace, NamespaceBuilder, NamespaceCatalog @@ -10,6 +12,7 @@ from hdmf.utils import docval, get_docval + class TestRos3(TestCase): """Test reading an HDMF file using HDF5 ROS3 streaming. @@ -77,6 +80,8 @@ def setUp(self): self.manager = BuildManager(type_map) + warnings.filterwarnings(action="ignore", message="Ignoring cached namespace .*") + def tearDown(self): if os.path.exists(self.ns_filename): os.remove(self.ns_filename) @@ -89,6 +94,57 @@ def test_basic_read(self): with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3") as io: io.read() + def test_basic_read_with_aws_region(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3", aws_region="us-east-2") as io: + io.read() + + def test_basic_read_s3_with_aws_region(self): + # NOTE: if an s3 path is used with ros3 driver, aws_region must be specified + s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3", aws_region="us-east-2") as io: + io.read() + assert io.aws_region == "us-east-2" + + def test_get_namespaces(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3") + self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'}) + + def test_get_namespaces_with_aws_region(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3", aws_region="us-east-2") + self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'}) + + def test_get_namespaces_s3_with_aws_region(self): + s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3", aws_region="us-east-2") + self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'}) + + def test_load_namespaces(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3") + assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"]) + + def test_load_namespaces_with_aws_region(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3", aws_region="us-east-2") + assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"]) + + def test_load_namespaces_s3_with_aws_region(self): + s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3", aws_region="us-east-2") + assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"]) + + # Util functions and classes to enable loading of the NWB namespace -- see pynwb/src/pynwb/spec.py diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py index c705d0a6e..6da81c2cc 100644 --- a/tests/unit/test_multicontainerinterface.py +++ b/tests/unit/test_multicontainerinterface.py @@ -198,7 +198,10 @@ def test_add_single_dup(self): """Test that adding a container to the attribute dict correctly adds the container.""" obj1 = Container('obj1') foo = Foo(obj1) - msg = "'obj1' already exists in Foo 'Foo'" + msg = (f"Cannot add 'obj1' at 0x{id(obj1)} to dict attribute " + "'containers' in 'Foo'. " + f" 'obj1' at 0x{id(obj1)} already exists in 'containers' " + "and has the same name.") with self.assertRaisesWith(ValueError, msg): foo.add_container(obj1) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py deleted file mode 100644 index b2ff267a7..000000000 --- a/tests/unit/test_query.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -from abc import ABCMeta, abstractmethod - -import numpy as np -from h5py import File -from hdmf.array import SortedArray, LinSpace -from hdmf.query import HDMFDataset, Query -from hdmf.testing import TestCase - - -class AbstractQueryMixin(metaclass=ABCMeta): - - @abstractmethod - def getDataset(self): - raise NotImplementedError('Cannot run test unless getDataset is implemented') - - def setUp(self): - self.dset = self.getDataset() - self.wrapper = HDMFDataset(self.dset) - - def test_get_dataset(self): - array = self.wrapper.dataset - self.assertIsInstance(array, SortedArray) - - def test___gt__(self): - ''' - Test wrapper greater than magic method - ''' - q = self.wrapper > 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - False, True, True, True, True] - expected = slice(6, 10) - self.assertEqual(result, expected) - - def test___ge__(self): - ''' - Test wrapper greater than or equal magic method - ''' - q = self.wrapper >= 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - True, True, True, True, True] - expected = slice(5, 10) - self.assertEqual(result, expected) - - def test___lt__(self): - ''' - Test wrapper less than magic method - ''' - q = self.wrapper < 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - False, False, False, False, False] - expected = slice(0, 5) - self.assertEqual(result, expected) - - def test___le__(self): - ''' - Test wrapper less than or equal magic method - ''' - q = self.wrapper <= 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - True, False, False, False, False] - expected = slice(0, 6) - self.assertEqual(result, expected) - - def test___eq__(self): - ''' - Test wrapper equals magic method - ''' - q = self.wrapper == 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - True, False, False, False, False] - expected = 5 - self.assertTrue(np.array_equal(result, expected)) - - def test___ne__(self): - ''' - Test wrapper not equal magic method - ''' - q = self.wrapper != 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - False, True, True, True, True] - expected = [slice(0, 5), slice(6, 10)] - self.assertTrue(np.array_equal(result, expected)) - - def test___getitem__(self): - ''' - Test wrapper getitem using slice - ''' - result = self.wrapper[0:5] - expected = [0, 1, 2, 3, 4] - self.assertTrue(np.array_equal(result, expected)) - - def test___getitem__query(self): - ''' - Test wrapper getitem using query - ''' - q = self.wrapper < 5 - result = self.wrapper[q] - expected = [0, 1, 2, 3, 4] - self.assertTrue(np.array_equal(result, expected)) - - -class SortedQueryTest(AbstractQueryMixin, TestCase): - - path = 'SortedQueryTest.h5' - - def getDataset(self): - self.f = File(self.path, 'w') - self.input = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - self.d = self.f.create_dataset('dset', data=self.input) - return SortedArray(self.d) - - def tearDown(self): - self.f.close() - if os.path.exists(self.path): - os.remove(self.path) - - -class LinspaceQueryTest(AbstractQueryMixin, TestCase): - - path = 'LinspaceQueryTest.h5' - - def getDataset(self): - return LinSpace(0, 10, 1) - - -class CompoundQueryTest(TestCase): - - def getM(self): - return SortedArray(np.arange(10, 20, 1)) - - def getN(self): - return SortedArray(np.arange(10.0, 20.0, 0.5)) - - def setUp(self): - self.m = HDMFDataset(self.getM()) - self.n = HDMFDataset(self.getN()) - - # TODO: test not completed - # def test_map(self): - # q = self.m == (12, 16) # IN operation - # q.evaluate() # [2,3,4,5] - # q.evaluate(False) # RangeResult(2,6) - # r = self.m[q] # noqa: F841 - # r = self.m[q.evaluate()] # noqa: F841 - # r = self.m[q.evaluate(False)] # noqa: F841 - - def tearDown(self): - pass diff --git a/tests/unit/utils_test/test_core_DataIO.py b/tests/unit/utils_test/test_core_DataIO.py index 778dd2617..80518a316 100644 --- a/tests/unit/utils_test/test_core_DataIO.py +++ b/tests/unit/utils_test/test_core_DataIO.py @@ -1,7 +1,6 @@ from copy import copy, deepcopy import numpy as np -from hdmf.container import Data from hdmf.data_utils import DataIO from hdmf.testing import TestCase @@ -29,26 +28,13 @@ def test_dataio_slice_delegation(self): dset = DataIO(indata) self.assertTrue(np.all(dset[1:3, 5:8] == indata[1:3, 5:8])) - def test_set_dataio(self): - """ - Test that Data.set_dataio works as intended - """ - dataio = DataIO() - data = np.arange(30).reshape(5, 2, 3) - container = Data('wrapped_data', data) - container.set_dataio(dataio) - self.assertIs(dataio.data, data) - self.assertIs(dataio, container.data) - - def test_set_dataio_data_already_set(self): + def test_set_data_io_data_already_set(self): """ Test that Data.set_dataio works as intended """ dataio = DataIO(data=np.arange(30).reshape(5, 2, 3)) - data = np.arange(30).reshape(5, 2, 3) - container = Data('wrapped_data', data) with self.assertRaisesWith(ValueError, "cannot overwrite 'data' on DataIO"): - container.set_dataio(dataio) + dataio.data=[1,2,3,4] def test_dataio_options(self): """ diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index debac9cab..cb1a727a4 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -4,7 +4,7 @@ from pathlib import Path from tempfile import mkdtemp from shutil import rmtree -from typing import Tuple, Iterable, Callable +from typing import Tuple, Iterable, Callable, Union from sys import version_info import h5py @@ -408,6 +408,32 @@ def test_progress_bar(self): first_line = file.read() self.assertIn(member=desc, container=first_line) + @unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed") + def test_progress_bar_class(self): + + class MyCustomProgressBar(tqdm.tqdm): + def update(self, n: int = 1) -> Union[bool, None]: + displayed = super().update(n) + print(f"Custom injection on step {n}") # noqa: T201 + + return displayed + + out_text_file = self.test_dir / "test_progress_bar_class.txt" + desc = "Testing progress bar..." + with open(file=out_text_file, mode="w") as file: + iterator = self.TestNumpyArrayDataChunkIterator( + array=self.test_array, + display_progress=True, + progress_bar_class=MyCustomProgressBar, + progress_bar_options=dict(file=file, desc=desc), + ) + j = 0 + for buffer in iterator: + j += 1 # dummy operation; must be silent for proper updating of bar + with open(file=out_text_file, mode="r") as file: + first_line = file.read() + self.assertIn(member=desc, container=first_line) + @unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is installed") def test_progress_bar_no_options(self): dci = self.TestNumpyArrayDataChunkIterator(array=self.test_array, display_progress=True) diff --git a/tests/unit/utils_test/test_data_utils.py b/tests/unit/utils_test/test_data_utils.py new file mode 100644 index 000000000..b5a5e50e7 --- /dev/null +++ b/tests/unit/utils_test/test_data_utils.py @@ -0,0 +1,37 @@ +from hdmf.data_utils import append_data +from hdmf.testing import TestCase + +import numpy as np +from numpy.testing import assert_array_equal + +try: + import zarr + ZARR_INSTALLED = True +except ImportError: + ZARR_INSTALLED = False + + +class MyIterable: + def __init__(self, data): + self.data = data + + +class TestAppendData(TestCase): + def test_append_exception(self): + data = MyIterable([1, 2, 3, 4, 5]) + with self.assertRaises(ValueError): + append_data(data, 4) + + +class TestZarrAppendData(TestCase): + + def setUp(self): + if not ZARR_INSTALLED: + self.skipTest("optional Zarr package is not installed") + + + def test_append_data_zarr(self): + zarr_array = zarr.array([1,2,3]) + new = append_data(zarr_array, 4) + + assert_array_equal(new[:], np.array([1,2,3,4])) diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index 154a5c4b0..bed5cd134 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -1,7 +1,7 @@ import numpy as np from hdmf.testing import TestCase -from hdmf.utils import (docval, fmt_docval_args, get_docval, getargs, popargs, AllowPositional, get_docval_macro, - docval_macro, popargs_to_dict, call_docval_func) +from hdmf.utils import (docval, get_docval, getargs, popargs, AllowPositional, get_docval_macro, + docval_macro, popargs_to_dict) class MyTestClass(object): @@ -137,80 +137,6 @@ def method1(self, **kwargs): with self.assertRaises(ValueError): method1(self, arg1=[[1, 1, 1]]) - fmt_docval_warning_msg = ( - "fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " - "call the function directly with the kwargs. Please note that fmt_docval_args " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function." - ) - - def test_fmt_docval_args(self): - """ Test that fmt_docval_args parses the args and strips extra args """ - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw, test_kwargs) - exp_args = ['a string', 1] - self.assertListEqual(rec_args, exp_args) - exp_kwargs = {'arg3': True} - self.assertDictEqual(rec_kwargs, exp_kwargs) - - def test_fmt_docval_args_no_docval(self): - """ Test that fmt_docval_args raises an error when run on function without docval """ - def method1(self, **kwargs): - pass - - with self.assertRaisesRegex(ValueError, r"no docval found on .*method1.*"): - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - fmt_docval_args(method1, {}) - - def test_fmt_docval_args_allow_extra(self): - """ Test that fmt_docval_args works """ - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw_allow_extra, test_kwargs) - exp_args = ['a string', 1] - self.assertListEqual(rec_args, exp_args) - exp_kwargs = {'arg3': True, 'hello': 'abc', 'list': ['abc', 1, 2, 3]} - self.assertDictEqual(rec_kwargs, exp_kwargs) - - def test_call_docval_func(self): - """Test that call_docval_func strips extra args and calls the function.""" - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - msg = ( - "call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " - "call the function directly with the kwargs. Please note that call_docval_func " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function." - ) - with self.assertWarnsWith(PendingDeprecationWarning, msg): - ret_kwargs = call_docval_func(self.test_obj.basic_add2_kw, test_kwargs) - exp_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True - } - self.assertDictEqual(ret_kwargs, exp_kwargs) - def test_docval_add(self): """Test that docval works with a single positional argument @@ -736,8 +662,12 @@ def method(self, **kwargs): self.assertEqual(method(self, np.uint(1)), np.uint(1)) self.assertEqual(method(self, np.uint(2)), np.uint(2)) + # the string rep of uint changes from numpy 1 to 2 ("1" to "np.uint64(1)"), so do not hardcode the string + uint_str1 = np.uint(1).__repr__() + uint_str2 = np.uint(2).__repr__() + msg = ("TestDocValidator.test_enum_uint..method: " - "forbidden value for 'arg1' (got 3, expected (1, 2))") + "forbidden value for 'arg1' (got 3, expected (%s, %s))" % (uint_str1, uint_str2)) with self.assertRaisesWith(ValueError, msg): method(self, np.uint(3)) @@ -767,8 +697,11 @@ def method(self, **kwargs): self.assertEqual(method(self, 'true'), 'true') self.assertEqual(method(self, np.uint(1)), np.uint(1)) + # the string rep of uint changes from numpy 1 to 2 ("1" to "np.uint64(1)"), so do not hardcode the string + uint_str = np.uint(1).__repr__() + msg = ("TestDocValidator.test_enum_bool_mixed..method: " - "forbidden value for 'arg1' (got 0, expected (True, 1, 1.0, 'true', 1))") + "forbidden value for 'arg1' (got 0, expected (True, 1, 1.0, 'true', %s))" % uint_str) with self.assertRaisesWith(ValueError, msg): method(self, 0) diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 95ff5d98e..64667b3e0 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -501,6 +501,60 @@ def test_np_bool_for_bool(self): results = self.vmap.validate(bar_builder) self.assertEqual(len(results), 0) + def test_scalar_compound_dtype(self): + """Test that validator allows scalar compound dtype data where a compound dtype is specified.""" + spec_catalog = SpecCatalog() + dtype = [DtypeSpec('x', doc='x', dtype='int'), DtypeSpec('y', doc='y', dtype='float')] + spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec('an example dataset', dtype, name='data',)], + attributes=[AttributeSpec('attr1', 'an example attribute', 'text',)]) + spec_catalog.register_spec(spec, 'test2.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test2.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + value = np.array((1, 2.2), dtype=[('x', 'int'), ('y', 'float')]) + bar_builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': 'test'}, + datasets=[DatasetBuilder(name='data', + data=value, + dtype=[DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='float'),],),]) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + +class TestReferenceValidation(ValidatorTestBase): + def getSpecs(self): + qux_spec = DatasetSpec( + doc='a simple scalar dataset', + data_type_def='Qux', + dtype='int', + shape=None + ) + bar_spec = GroupSpec('A test group specification with a reference dataset', + data_type_def='Bar', + datasets=[DatasetSpec('an example dataset', + dtype=RefSpec('Qux', reftype='object'), + name='data', + shape=(None, ))], + attributes=[AttributeSpec('attr1', + 'an example attribute', + dtype=RefSpec('Qux', reftype='object'), + shape=(None, ))]) + return (qux_spec, bar_spec) + + def test_invalid_reference(self): + """Test that validator does not allow another data type where a reference is specified.""" + value = np.array([1.0, 2.0, 3.0]) + bar_builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': value}, + datasets=[DatasetBuilder('data', value)]) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {"Bar/attr1 (my_bar.attr1): incorrect type - expected 'object reference', got 'float64'", + "Bar/data (my_bar/data): incorrect type - expected 'object reference', got 'float64'"} + self.assertEqual(result_strings, expected_errors) class Test1DArrayValidation(TestCase): diff --git a/tox.ini b/tox.ini index aeb743c45..775cb7592 100644 --- a/tox.ini +++ b/tox.ini @@ -4,56 +4,55 @@ # and then run "tox -e [envname]" from this directory. [tox] -requires = pip >= 22.0 +requires = pip >= 24.3.1 [testenv] download = True -usedevelop = True setenv = PYTHONDONTWRITEBYTECODE = 1 - VIRTUALENV_PIP = 23.3.1 recreate = - pinned, minimum, upgraded, prerelease: False + minimum, upgraded, prerelease: False build, wheelinstall: True # good practice to recreate the environment skip_install = - pinned, minimum, upgraded, prerelease, wheelinstall: False + minimum, upgraded, prerelease, wheelinstall: False build: True # no need to install anything when building install_command = # when using [testenv:wheelinstall] and --installpkg, the wheel and its dependencies # are installed, instead of the package in the current directory - pinned, minimum, wheelinstall: python -I -m pip install {opts} {packages} - upgraded: python -I -m pip install -U {opts} {packages} - prerelease: python -I -m pip install -U --pre {opts} {packages} + minimum, wheelinstall: python -I -m pip install {opts} {packages} + upgraded: python -I -m pip install -U {opts} {packages} + prerelease: python -I -m pip install -U --pre {opts} {packages} deps = - # use pinned, minimum, or neither (use dependencies in pyproject.toml) - pytest, gallery: -rrequirements-dev.txt - gallery: -rrequirements-doc.txt - optional: -rrequirements-opt.txt - pinned: -rrequirements.txt - minimum: -rrequirements-min.txt + # which requirements files to use (default: none) + minimum: -r requirements-min.txt +extras = + # which optional dependency set(s) to use (default: none) + pytest: test + gallery: doc + optional: tqdm,zarr,termset commands = + # commands to run for every environment python --version # print python version for debugging python -m pip check # check for conflicting packages python -m pip list # list installed packages for debugging + + # commands to run for select environments pytest: pytest -v gallery: python test_gallery.py build: python -m pip install -U build build: python -m build wheelinstall: python -c "import hdmf; import hdmf.common" -# list of pre-defined environments. (Technically environments not listed here -# like build-py312 can also be used.) -[testenv:pytest-py312-upgraded] -[testenv:pytest-py312-prerelease] -[testenv:pytest-py311-optional-pinned] # some optional reqs not compatible with py312 yet -[testenv:pytest-py{38,39,310,311,312}-pinned] -[testenv:pytest-py38-minimum] +# list of pre-defined environments +[testenv:pytest-py{39,310,311,312,313}-upgraded] +[testenv:pytest-py313-upgraded-optional] +[testenv:pytest-py313-prerelease-optional] +[testenv:pytest-py39-minimum] -[testenv:gallery-py312-upgraded] -[testenv:gallery-py312-prerelease] -[testenv:gallery-py311-optional-pinned] -[testenv:gallery-py{38,39,310,311,312}-pinned] -[testenv:gallery-py38-minimum] +# TODO: Update to 3.13 when linkml and its deps support 3.13 +[testenv:gallery-py312-upgraded-optional] +[testenv:gallery-py312-prerelease-optional] +[testenv:gallery-py39-minimum] [testenv:build] # using tox for this so that we can have a clean build environment [testenv:wheelinstall] # use with `--installpkg dist/*-none-any.whl`