diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index ef3daed9c..c96a78551 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -46,25 +46,17 @@ body: - Linux validations: required: true - - type: dropdown - id: executable - attributes: - label: Python Executable - options: - - Conda - - Python - validations: - required: true - type: dropdown id: python_version attributes: label: Python Version options: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" + - "newer" validations: required: true - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 86a7ad57d..82f987164 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,11 +2,8 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Make sure all PRs to be included in this release have been merged to `dev`. -- [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, `requirements-opt.txt`, and `environment-ros3.yml` to the latest versions, - and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed. - Run `pip install pur && pur -r requirements-dev.txt -r requirements.txt -r requirements-opt.txt` - and manually update `environment-ros3.yml`. +- [ ] Major and minor releases: Update dependency ranges in `pyproject.toml` and minimums in + `requirements-min.txt` as needed. - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `pyproject.toml` as needed @@ -34,5 +31,5 @@ Prepare for release of HDMF [version] 4. Either monitor [conda-forge/hdmf-feedstock](https://github.com/conda-forge/hdmf-feedstock) for the regro-cf-autotick-bot bot to create a PR updating the version of HDMF to the latest PyPI release, usually within 24 hours of release, or manually create a PR updating `recipe/meta.yaml` with the latest version number - and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render and update - dependencies as needed. + and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render and + update the dependencies as needed. diff --git a/.github/workflows/check_sphinx_links.yml b/.github/workflows/check_sphinx_links.yml index 15fc61e30..24422c47c 100644 --- a/.github/workflows/check_sphinx_links.yml +++ b/.github/workflows/check_sphinx_links.yml @@ -21,13 +21,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.11' # TODO update to 3.12 when optional reqs (e.g., oaklib) support 3.12 + python-version: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt -r requirements-opt.txt - python -m pip install . + python -m pip install ".[all]" - name: Check Sphinx internal and external links run: sphinx-build -W -b linkcheck ./docs/source ./test_build diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 5861ab136..ab0db960a 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Install build dependencies run: | @@ -28,7 +28,7 @@ jobs: - name: Run tox tests run: | - tox -e py312-upgraded + tox -e py313-upgraded - name: Build wheel and source distribution run: | diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml index 0f0d8f3ce..6037bd4ab 100644 --- a/.github/workflows/project_action.yml +++ b/.github/workflows/project_action.yml @@ -20,7 +20,7 @@ jobs: - name: Add to Developer Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v1.0.1 + uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/hdmf-dev/projects/7 github-token: ${{ env.TOKEN }} @@ -28,7 +28,7 @@ jobs: - name: Add to Community Board env: TOKEN: ${{ steps.generate_token.outputs.token }} - uses: actions/add-to-project@v1.0.1 + uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/hdmf-dev/projects/8 github-token: ${{ env.TOKEN }} diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 8df190d55..b1d2ddc59 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -25,30 +25,27 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: windows-latest } - - { name: windows-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: windows-latest } - - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: windows-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: windows-latest } - - { name: macos-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: macos-13 } - - { name: macos-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: macos-13 } - - { name: macos-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: macos-latest } - - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } - - { name: macos-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: windows-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } + - { name: windows-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-python3.10-upgraded , test-tox-env: pytest-py310-upgraded , python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: pytest-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } + - { name: macos-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-prerelease-optional , test-tox-env: pytest-py313-prerelease-optional , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -97,18 +94,16 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-gallery-python3.12-prerelease , test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: windows-latest } - - { name: windows-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: windows-gallery-python3.12-prerelease, test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: windows-latest } - - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-13 } - - { name: macos-gallery-python3.11-optional , test-tox-env: gallery-py311-optional-pinned , python-ver: "3.11", os: macos-latest } - - { name: macos-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: macos-latest } - - { name: macos-gallery-python3.12-prerelease , test-tox-env: gallery-py312-prerelease , python-ver: "3.12", os: macos-latest } + # TODO: Update to 3.13 when linkml and its deps support 3.13 + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: windows-latest } + - { name: windows-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: windows-latest } + - { name: macos-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: macos-latest } + - { name: macos-gallery-python3.12-prerelease-optional , test-tox-env: gallery-py312-prerelease-optional , python-ver: "3.12", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -131,72 +126,6 @@ jobs: run: | tox -e ${{ matrix.test-tox-env }} - run-all-tests-on-conda: - name: ${{ matrix.name }} - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} # needed for conda environment to work - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} - cancel-in-progress: true - strategy: - fail-fast: false - matrix: - include: - - { name: conda-linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: conda-linux-python3.9 , test-tox-env: pytest-py39-pinned , python-ver: "3.9" , os: ubuntu-latest } - - { name: conda-linux-python3.10 , test-tox-env: pytest-py310-pinned , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.11 , test-tox-env: pytest-py311-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: conda-linux-python3.11-optional , test-tox-env: pytest-py311-optional-pinned , python-ver: "3.11", os: ubuntu-latest } - - { name: conda-linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: conda-linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: conda-linux-python3.12-prerelease , test-tox-env: pytest-py312-prerelease , python-ver: "3.12", os: ubuntu-latest } - steps: - - name: Checkout repo with submodules - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required to determine the version - - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: ${{ matrix.python-ver }} - channels: conda-forge - mamba-version: "*" - - - name: Install build dependencies - run: | - conda config --set always_yes yes --set changeps1 no - conda info - mamba install -c conda-forge "tox>=4" - - - name: Conda reporting - run: | - conda info - conda config --show-sources - conda list --show-channel-urls - - # NOTE tox installs packages from PyPI not conda-forge... - - name: Run tox tests - run: | - tox -e ${{ matrix.test-tox-env }} - - - name: Build wheel and source distribution - run: | - tox -e build - ls -1 dist - - - name: Test installation from a wheel - run: | - tox -e wheelinstall --installpkg dist/*-none-any.whl - - - name: Test installation from a source distribution - run: | - tox -e wheelinstall --installpkg dist/*.tar.gz - run-ros3-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} @@ -210,9 +139,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-python3.12-ros3 , python-ver: "3.12", os: windows-latest } - - { name: macos-python3.12-ros3 , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } + - { name: windows-python3.13-ros3 , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.13-ros3 , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -229,7 +158,6 @@ jobs: python-version: ${{ matrix.python-ver }} channels: conda-forge auto-activate-base: false - mamba-version: "*" - name: Install run dependencies run: | diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index bd2eeb921..330bb7aba 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -31,7 +31,7 @@ jobs: - { os: macos-latest , opt_req: false } env: # used by codecov-action OS: ${{ matrix.os }} - PYTHON: '3.11' # TODO update to 3.12 when optional reqs (e.g., oaklib) support 3.12 + PYTHON: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -44,30 +44,32 @@ jobs: with: python-version: ${{ env.PYTHON }} - - name: Install dependencies + - name: Upgrade pip run: | python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt -r requirements.txt - - - name: Install optional dependencies - if: ${{ matrix.opt_req }} - run: python -m pip install -r requirements-opt.txt - name: Install package + if: ${{ ! matrix.opt_req }} run: | - python -m pip install . - python -m pip list + python -m pip install ".[test]" + + - name: Install package with optional dependencies + if: ${{ matrix.opt_req }} + run: | + python -m pip install ".[test,tqdm,sparse,zarr,termset]" - name: Run tests and generate coverage report run: | # coverage is configured in pyproject.toml - pytest --cov --cov-report=xml --cov-report=term # codecov uploader requires xml format + # codecov uploader requires xml format + python -m pip list + pytest --cov --cov-report=xml --cov-report=term - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: fail_ci_if_error: true - file: ./coverage.xml + files: ./coverage.xml env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} @@ -84,7 +86,7 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -101,7 +103,6 @@ jobs: python-version: ${{ matrix.python-ver }} channels: conda-forge auto-activate-base: false - mamba-version: "*" - name: Install run dependencies run: | @@ -119,9 +120,9 @@ jobs: pytest --cov --cov-report=xml --cov-report=term tests/unit/test_io_hdf5_streaming.py - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: fail_ci_if_error: true - file: ./coverage.xml + files: ./coverage.xml env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/run_hdmf_zarr_tests.yml b/.github/workflows/run_hdmf_zarr_tests.yml index 5e76711af..51a01977a 100644 --- a/.github/workflows/run_hdmf_zarr_tests.yml +++ b/.github/workflows/run_hdmf_zarr_tests.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' # use 3.10 until hdmf-zarr updates versioneer.py which breaks on newer python + python-version: '3.13' - name: Update pip run: python -m pip install --upgrade pip @@ -29,10 +29,9 @@ jobs: - name: Clone HDMF-Zarr and install dev branch of HDMF run: | python -m pip list - git clone https://github.com/hdmf-dev/hdmf-zarr.git --recurse-submodules + git clone https://github.com/hdmf-dev/hdmf-zarr.git cd hdmf-zarr - python -m pip install -r requirements-dev.txt # do not install the pinned install requirements - python -m pip install . # this will install a different version of hdmf from the current one + python -m pip install ".[test]" # this will install a different version of hdmf from the current one cd .. python -m pip uninstall -y hdmf # uninstall the other version of hdmf python -m pip install . # reinstall current branch of hdmf @@ -41,4 +40,4 @@ jobs: - name: Run HDMF-Zarr tests on HDMF-Zarr dev branch run: | cd hdmf-zarr - pytest + pytest -v diff --git a/.github/workflows/run_pynwb_tests.yml b/.github/workflows/run_pynwb_tests.yml index 1a714ed9f..a159380cd 100644 --- a/.github/workflows/run_pynwb_tests.yml +++ b/.github/workflows/run_pynwb_tests.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Update pip run: python -m pip install --upgrade pip diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 5e0b3bff2..2ff759029 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -23,13 +23,13 @@ jobs: matrix: include: # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-python3.12 , test-tox-env: pytest-py312-pinned , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: windows-latest } - - { name: macos-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: macos-13 } - - { name: macos-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: macos-latest } + - { name: linux-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.13-upgraded , test-tox-env: pytest-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: windows-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: pytest-py39-minimum , python-ver: "3.9" , os: macos-13 } + - { name: macos-python3.13-upgraded-optional , test-tox-env: pytest-py313-upgraded-optional , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -85,10 +85,11 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: linux-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - - { name: windows-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: windows-latest } + # TODO: Update to 3.13 when linkml and its deps support 3.13 + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.12-upgraded-optional , test-tox-env: gallery-py312-upgraded-optional , python-ver: "3.12", os: windows-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -111,69 +112,9 @@ jobs: run: | tox -e ${{ matrix.test-tox-env }} - run-tests-on-conda: - name: ${{ matrix.name }} - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} - cancel-in-progress: true - strategy: - fail-fast: false - matrix: - include: - - { name: conda-linux-python3.8-minimum , test-tox-env: pytest-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: conda-linux-python3.12-upgraded , test-tox-env: pytest-py312-upgraded , python-ver: "3.12", os: ubuntu-latest } - steps: - - name: Checkout repo with submodules - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required to determine the version - - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: ${{ matrix.python-ver }} - channels: conda-forge - mamba-version: "*" - - - name: Install build dependencies - run: | - conda config --set always_yes yes --set changeps1 no - conda info - mamba install -c conda-forge "tox>=4" - - - name: Conda reporting - run: | - conda info - conda config --show-sources - conda list --show-channel-urls - - # NOTE tox installs packages from PyPI not conda-forge... - - name: Run tox tests - run: | - tox -e ${{ matrix.test-tox-env }} - - - name: Build wheel and source distribution - run: | - tox -e build - ls -1 dist - - - name: Test installation from a wheel - run: | - tox -e wheelinstall --installpkg dist/*-none-any.whl - - - name: Test installation from a source distribution - run: | - tox -e wheelinstall --installpkg dist/*.tar.gz - deploy-dev: name: Deploy pre-release from dev - needs: [run-tests, run-gallery-tests, run-tests-on-conda] + needs: [run-tests, run-gallery-tests] if: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest concurrency: @@ -189,7 +130,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Download wheel and source distributions from artifact uses: actions/download-artifact@v4 @@ -222,7 +163,7 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13-ros3 , python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo with submodules uses: actions/checkout@v4 @@ -239,7 +180,6 @@ jobs: python-version: ${{ matrix.python-ver }} channels: conda-forge auto-activate-base: false - mamba-version: "*" - name: Install run dependencies run: | diff --git a/.gitignore b/.gitignore index d75abc985..e202b3526 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ # Auto-generated apidocs RST files /docs/source/gen_modules/ /docs/source/hdmf*.rst +/docs/source/sg_execution_times.rst /docs/gallery/*.hdf5 /docs/gallery/*.sqlite /docs/gallery/expanded_example_dynamic_term_set.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 221182985..80e876a58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # NOTE: run `pre-commit autoupdate` to update hooks to latest version repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.3 + rev: v0.9.2 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate diff --git a/.readthedocs.yaml b/.readthedocs.yaml index a4f1ea037..f17c323b1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,9 +6,9 @@ version: 2 build: - os: ubuntu-20.04 + os: ubuntu-24.04 tools: - python: '3.9' + python: '3.12' # TODO: Update to 3.13 when linkml and its deps support 3.13 # Build documentation in the docs/ directory with Sphinx sphinx: @@ -24,10 +24,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - requirements: requirements-doc.txt - - requirements: requirements-opt.txt - - requirements: requirements.txt - - path: . + - path: .[docs,tqdm,sparse,zarr,termset] # path to the package relative to the root # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index e71271bcb..1ce49f9c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,53 @@ # HDMF Changelog -## HDMF 4.0.0 (Upcoming) + +## HDMF 4.0.0 (January 22, 2025) + +### Breaking changes +- The following classes have been deprecated and removed: Array, AbstractSortedArray, SortedArray, LinSpace, Query, RegionSlicer, ListSlicer, H5RegionSlicer, DataRegion, RegionBuilder. The following methods have been deprecated and removed: fmt_docval_args, call_docval_func, get_container_cls, add_child, set_dataio (now refactored as set_data_io). We have also removed all early development for region references. @mavaylon1, @rly [#1998](https://github.com/hdmf-dev/hdmf/pull/1198), [#1212](https://github.com/hdmf-dev/hdmf/pull/1212) +- Importing from `hdmf.build.map` is no longer supported. Import from `hdmf.build` instead. @rly [#1221](https://github.com/hdmf-dev/hdmf/pull/1221) +- Python 3.8 has reached end of life. Dropped support for Python 3.8 and add support for Python 3.13. @mavaylon1 [#1209](https://github.com/hdmf-dev/hdmf/pull/1209) +- Support for Zarr is limited to versions < 3. @rly [#1229](https://github.com/hdmf-dev/hdmf/pull/1229) +- Scipy is no longer a required dependency. Users using the `CSRMatrix` data type should install `scipy` separately or with `pip install "hdmf[sparse]"`. @rly [#1140](https://github.com/hdmf-dev/hdmf/pull/1140) + +### Changed +- Added checks to ensure that group and dataset spec names and default names do not contain slashes. @bendichter [#1219](https://github.com/hdmf-dev/hdmf/pull/1219) +- Updated copyright dates. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Created optional dependency groups in `pyproject.toml` and update GitHub Actions workflows to use those instead of requirements files. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Stopped using pinned dependencies in the docs and testing. These are not necessary for library testing, confuse new users and developers, and add maintenance burden. Current dependencies are stable enough that they need not be pinned and users can report the libraries they use. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Stopped redundant testing using a conda environment. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +- Adopted changelog format conventions: https://keepachangelog.com/en/1.1.0/ . @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) + +### Added +- Added script to check Python version support for HDMF dependencies. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) + +### Fixed +- Fixed issue with `DynamicTable.add_column` not allowing subclasses of `DynamicTableRegion` or `EnumData`. @rly [#1091](https://github.com/hdmf-dev/hdmf/pull/1091) + +## HDMF 3.14.6 (December 20, 2024) + ### Enhancements -- Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158) +- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188) +- Improved html representation of data in `Container` objects. @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100) +- Added error when using colon for `Container` name. A colon cannot be used as a group name when writing to Zarr on Windows. @stephprince [#1202](https://github.com/hdmf-dev/hdmf/pull/1202) +- Adjusted testing for hdmf-zarr. @rly [#1222](https://github.com/hdmf-dev/hdmf/pull/1222) + +### Bug fixes +- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199) +- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205) + +## HDMF 3.14.5 (October 6, 2024) + +### Enhancements +- Added support for overriding backend configurations of `h5py.Dataset` objects in `Container.set_data_io`. @pauladkisson [#1172](https://github.com/hdmf-dev/hdmf/pull/1172) + +### Bug fixes +- Fixed bug in writing of string arrays to an HDF5 file that were read from an HDF5 file that was introduced in 3.14.4. @rly @stephprince + [#1189](https://github.com/hdmf-dev/hdmf/pull/1189) +- Fixed export of scalar datasets with a compound data type. @stephprince [#1185](https://github.com/hdmf-dev/hdmf/pull/1185) +- Fixed mamba-related error in conda-based GitHub Actions. @rly [#1194](https://github.com/hdmf-dev/hdmf/pull/1194) -## HDMF 3.14.4 (August 22, 2024) +## HDMF 3.14.4 (September 4, 2024) ### Enhancements - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) diff --git a/Legal.txt b/Legal.txt index db343a634..e54bb27ac 100644 --- a/Legal.txt +++ b/Legal.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. diff --git a/README.rst b/README.rst index b56f7efd2..c35f45ccd 100644 --- a/README.rst +++ b/README.rst @@ -94,7 +94,7 @@ Citing HDMF LICENSE ======= -"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -110,7 +110,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg COPYRIGHT ========= -"hdmf" Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 36e84b357..c8090f30f 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -100,6 +100,11 @@ import warnings warnings.filterwarnings("ignore", category=UserWarning, message="HERD is experimental*") +try: + import linkml_runtime # noqa: F401 +except ImportError as e: + raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e + try: dir_path = os.path.dirname(os.path.abspath(__file__)) yaml_file = os.path.join(dir_path, 'example_term_set.yaml') diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py index 8bf2375aa..50945889a 100644 --- a/docs/gallery/plot_term_set.py +++ b/docs/gallery/plot_term_set.py @@ -65,6 +65,8 @@ For more information how to properly format the schema to support LinkML Dynamic Enumerations, please refer to https://linkml.io/linkml/schemas/enums.html#dynamic-enums. """ +# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_termset.png' + from hdmf.common import DynamicTable, VectorData import os import numpy as np diff --git a/docs/source/conf.py b/docs/source/conf.py index 9781933f5..4898074d2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -76,7 +76,7 @@ "matplotlib": ("https://matplotlib.org/stable/", None), "h5py": ("https://docs.h5py.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "zarr": ("https://zarr.readthedocs.io/en/stable/", None), + "zarr": ("https://zarr.readthedocs.io/en/v2.18.4/", None), # TODO - update when hdmf-zarr supports Zarr 3.0 } # these links cannot be checked in github actions @@ -87,7 +87,6 @@ nitpicky = True nitpick_ignore = [('py:class', 'Intracomm'), - ('py:class', 'h5py.RegionReference'), ('py:class', 'h5py._hl.dataset.Dataset'), ('py:class', 'function'), ('py:class', 'unittest.case.TestCase'), @@ -110,7 +109,7 @@ # General information about the project. project = "HDMF" -copyright = "2017-2024, Hierarchical Data Modeling Framework" +copyright = "2017-2025, Hierarchical Data Modeling Framework" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -163,16 +162,12 @@ # html_theme = 'default' # html_theme = "sphinxdoc" html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None diff --git a/docs/source/figures/gallery_thumbnail_termset.png b/docs/source/figures/gallery_thumbnail_termset.png new file mode 100644 index 000000000..29a0db903 Binary files /dev/null and b/docs/source/figures/gallery_thumbnail_termset.png differ diff --git a/docs/source/figures/gallery_thumbnails.pptx b/docs/source/figures/gallery_thumbnails.pptx index ac3da484d..5ede1c4b5 100644 Binary files a/docs/source/figures/gallery_thumbnails.pptx and b/docs/source/figures/gallery_thumbnails.pptx differ diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fcd4778a..842bacc98 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -62,7 +62,6 @@ If you use HDMF in your research, please use the following citation: :caption: For Maintainers make_a_release - update_requirements .. toctree:: :hidden: diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index 04e351c41..72da40332 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -52,11 +52,11 @@ Option 2: Using conda The `conda package and environment management system`_ is an alternate way of managing virtual environments. First, install Anaconda_ to install the ``conda`` tool. Then create and -activate a new virtual environment called ``"hdmf-env"`` with Python 3.12 installed. +activate a new virtual environment called ``"hdmf-env"`` with Python 3.13 installed. .. code:: bash - conda create --name hdmf-env python=3.12 + conda create --name hdmf-env python=3.13 conda activate hdmf-env Similar to a virtual environment created with ``venv``, a conda environment @@ -88,8 +88,7 @@ package requirements using the pip_ Python package manager, and install HDMF in git clone --recurse-submodules https://github.com/hdmf-dev/hdmf.git cd hdmf - pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt -r requirements-opt.txt - pip install -e . + pip install -e ".[all]" .. note:: diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 49fbe07b2..f4d701c07 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -4,7 +4,7 @@ Installing HDMF --------------- -HDMF requires having Python 3.8, 3.9, 3.10, 3.11, or 3.12 installed. If you don't have Python installed and want the simplest way to +HDMF requires having Python 3.9-3.13 installed. If you don't have Python installed and want the simplest way to get started, we recommend you install and use the `Anaconda Distribution`_. It includes Python, NumPy, and many other commonly used packages for scientific computing and data science. diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index d2da593bd..57dd26a2e 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -20,8 +20,7 @@ Prerequisites * You have a `GPG signing key`_. -* Dependency versions in ``requirements.txt``, ``requirements-dev.txt``, ``requirements-opt.txt``, - ``requirements-doc.txt``, and ``requirements-min.txt`` are up-to-date. +* Dependency versions are up-to-date. * Legal information and copyright dates in ``Legal.txt``, ``license.txt``, ``README.rst``, ``docs/source/conf.py``, and any other files are up-to-date. @@ -177,7 +176,7 @@ Publish release on conda-forge: Step-by-step Conda-forge maintains a bot called "regro-cf-autotick-bot" that regularly monitors PyPI for new releases of packages that are also on conda-forge. When a new release is detected, usually within 24 hours of publishing on PyPI, the bot will create a Pull Request with the correct modifications to the version and sha256 values - in ``meta.yaml``. If the requirements in ``setup.py`` have been changed, then you need to modify the + in ``meta.yaml``. If the requirements in ``pyproject.toml`` have been changed, then you need to modify the requirements/run section in ``meta.yaml`` manually to reflect these changes. Once tests pass, merge the PR, and a new release will be published on Anaconda cloud. This is the easiest way to update the package version on conda-forge. @@ -242,7 +241,7 @@ In order to release a new version on conda-forge manually, follow the steps belo $ sha=$(openssl sha256 /tmp/hdmf-$release.tar.gz | awk '{print $2}') $ sed -i -- "3s/.*/{$ set sha256 = \"$sha\" %}/" recipe/meta.yaml - If the requirements in ``setup.py`` have been changed, then modify the requirements/run list in + If the requirements in ``pyproject.toml`` have been changed, then modify the requirements/run list in the ``meta.yaml`` file to reflect these changes. diff --git a/docs/source/overview_software_architecture.rst b/docs/source/overview_software_architecture.rst index 973a01b2f..d63c953fe 100644 --- a/docs/source/overview_software_architecture.rst +++ b/docs/source/overview_software_architecture.rst @@ -68,7 +68,7 @@ Builder * :py:class:`~hdmf.build.builders.GroupBuilder` - represents a collection of objects * :py:class:`~hdmf.build.builders.DatasetBuilder` - represents data * :py:class:`~hdmf.build.builders.LinkBuilder` - represents soft-links - * :py:class:`~hdmf.build.builders.RegionBuilder` - represents a slice into data (Subclass of :py:class:`~hdmf.build.builders.DatasetBuilder`) + * :py:class:`~hdmf.build.builders.ReferenceBuilder` - represents a reference to another group or dataset * **Main Module:** :py:class:`hdmf.build.builders` diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index 30501769e..f3a6c7457 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -45,48 +45,44 @@ pyproject.toml_ contains a list of package dependencies and their version ranges running HDMF. As a library, upper bound version constraints create more harm than good in the long term (see this `blog post`_) so we avoid setting upper bounds on requirements. -If some of the packages are outdated, see :ref:`update_requirements_files`. +When setting lower bounds, make sure to specify the lower bounds in both pyproject.toml_ and +requirements-min.txt_. The latter is used in automated testing to ensure that the package runs +correctly using the minimum versions of dependencies. + +Minimum requirements should be updated manually if a new feature or bug fix is added in a dependency that is required +for proper running of HDMF. Minimum requirements should also be updated if a user requests that HDMF be installable +with an older version of a dependency, all tests pass using the older version, and there is no valid reason for the +minimum version to be as high as it is. .. _pyproject.toml: https://github.com/hdmf-dev/hdmf/blob/dev/pyproject.toml .. _blog post: https://iscinumpy.dev/post/bound-version-constraints/ +.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -------------------- Testing Requirements -------------------- -There are several kinds of requirements files used for testing PyNWB. - -The first one is requirements-min.txt_, which lists the package dependencies and their minimum versions for -installing HDMF. +pyproject.toml_ contains the optional dependency group "test" with testing requirements. -The second one is requirements.txt_, which lists the pinned (concrete) dependencies to reproduce -an entire development environment to use HDMF. +See tox.ini_ and the GitHub Actions workflows for how different testing environments are +defined using the optional dependency groups. -The third one is requirements-dev.txt_, which list the pinned (concrete) dependencies to reproduce -an entire development environment to use HDMF, run HDMF tests, check code style, compute coverage, and create test -environments. +environment-ros3.yml_ lists the dependencies used to test ROS3 streaming in HDMF which +can only be done in a Conda environment. -The fourth one is requirements-opt.txt_, which lists the pinned (concrete) optional dependencies to use all -available features in HDMF. - -The final one is environment-ros3.yml_, which lists the dependencies used to -test ROS3 streaming in HDMF. - -.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt -.. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt -.. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt +.. _tox.ini: https://github.com/hdmf-dev/hdmf/blob/dev/tox.ini .. _environment-ros3.yml: https://github.com/hdmf-dev/hdmf/blob/dev/environment-ros3.yml -------------------------- Documentation Requirements -------------------------- -requirements-doc.txt_ lists the dependencies to generate the documentation for HDMF. -Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. +pyproject.toml_ contains the optional dependency group "docs" with documentation requirements. +This dependency group is used by ReadTheDocs_ to initialize the local environment for Sphinx to run +(see .readthedocs.yaml_). -.. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt .. _ReadTheDocs: https://readthedocs.org/projects/hdmf/ +.. _.readthedocs.yaml: https://github.com/hdmf-dev/hdmf/blob/dev/.readthedocs.yaml ------------------------- Versioning and Releasing diff --git a/docs/source/update_requirements.rst b/docs/source/update_requirements.rst deleted file mode 100644 index 65b4b99d4..000000000 --- a/docs/source/update_requirements.rst +++ /dev/null @@ -1,78 +0,0 @@ - -.. _update_requirements_files: - -================================ -How to Update Requirements Files -================================ - -The different requirements files introduced in :ref:`software_process` section are the following: - -* requirements.txt_ -* requirements-dev.txt_ -* requirements-doc.txt_ -* requirements-min.txt_ -* requirements-opt.txt_ - -.. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt -.. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt -.. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt -.. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt - -requirements.txt -================ - -`requirements.txt` of the project can be created or updated and then captured using -the following script: - -.. code:: - - mkvirtualenv hdmf-requirements - - cd hdmf - pip install . - pip check # check for package conflicts - pip freeze > requirements.txt - - deactivate - rmvirtualenv hdmf-requirements - - -requirements-(dev|doc|opt).txt -============================== - -Any of these requirements files can be updated using -the following scripts: - -.. code:: - - cd hdmf - - # Set the requirements file to update - target_requirements=requirements-dev.txt - - mkvirtualenv hdmf-requirements - - # Install updated requirements - pip install -U -r $target_requirements - - # If relevant, you could pip install new requirements now - # pip install -U - - # Check for any conflicts in installed packages - pip check - - # Update list of pinned requirements - pip freeze > $target_requirements - - deactivate - rmvirtualenv hdmf-requirements - - -requirements-min.txt -==================== - -Minimum requirements should be updated manually if a new feature or bug fix is added in a dependency that is required -for proper running of HDMF. Minimum requirements should also be updated if a user requests that HDMF be installable -with an older version of a dependency, all tests pass using the older version, and there is no valid reason for the -minimum version to be as high as it is. diff --git a/environment-ros3.yml b/environment-ros3.yml index 34c37cc01..6b4f6c472 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -1,15 +1,14 @@ -# pinned dependencies to reproduce an entire development environment to use PyNWB with ROS3 support +# environment file used to test HDMF with ROS3 support name: ros3 channels: - conda-forge - defaults dependencies: - - python==3.12 - - h5py==3.11.0 - - matplotlib==3.8.4 - - numpy==2.0.0 - - pandas==2.2.2 - - python-dateutil==2.8.2 - - pytest==8.1.2 # regression introduced in pytest 8.2.*, will be fixed in 8.3.0 - - pytest-cov==5.0.0 - - setuptools + - python==3.13 + - h5py==3.12.1 + - matplotlib==3.9.2 + - numpy==2.2.1 + - pandas==2.2.3 + - python-dateutil==2.9.0.post0 + - pytest==8.3.4 + - pytest-cov==6.0.0 diff --git a/license.txt b/license.txt index f7964f329..c43f1f876 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2024, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2025, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/pyproject.toml b/pyproject.toml index 86e52a137..5a58b6cef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,15 +13,15 @@ authors = [ ] description = "A hierarchical data modeling framework for modern science data standards" readme = "README.rst" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "BSD-3-Clause"} classifiers = [ "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: BSD License", "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", @@ -30,22 +30,47 @@ classifiers = [ "Topic :: Scientific/Engineering :: Medical Science Apps.", ] dependencies = [ - "h5py>=2.10", - "jsonschema>=2.6.0", - 'numpy>=1.18', - "pandas>=1.0.5", + "h5py>=3.1.0", + "jsonschema>=3.2.0", + 'numpy>=1.19.3', + "pandas>=1.2.0", "ruamel.yaml>=0.16", - "scipy>=1.4", - "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 ] dynamic = ["version"] [project.optional-dependencies] tqdm = ["tqdm>=4.41.0"] -termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'", - "schemasheets>=0.1.23; python_version >= '3.9'", - "oaklib>=0.5.12; python_version >= '3.9'", - "pyyaml>=6.0.1; python_version >= '3.9'"] +zarr = ["zarr>=2.12.0,<3"] +sparse = ["scipy>=1.7"] +termset = [ + "linkml-runtime>=1.5.5", + "schemasheets>=0.1.23", + "oaklib>=0.5.12", + "pyyaml>=6.0.1", +] + +# development dependencies +test = [ + "codespell", + "pre-commit", + "pytest", + "pytest-cov", + "python-dateutil", + "ruff", + "tox", +] + +# documentation dependencies +docs = [ + "matplotlib", + "sphinx>=4", # improved support for docutils>=0.17 + "sphinx_rtd_theme>=1", # <1 does not work with docutils>=0.17 + "sphinx-gallery", + "sphinx-copybutton", +] + +# all possible dependencies +all = ["hdmf[tqdm,zarr,sparse,termset,test,docs]"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" @@ -128,6 +153,7 @@ exclude = [ "src/hdmf/_due.py", "docs/source/tutorials/", "docs/_build/", + "scripts/" ] line-length = 120 diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 95cf0797e..000000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,14 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, -# compute coverage, and create test environments. note that depending on the version of python installed, different -# versions of requirements may be installed due to package incompatibilities. -# -black==24.4.2 -codespell==2.3.0 -coverage==7.5.4 -pre-commit==3.7.1; python_version >= "3.9" -pre-commit==3.5.0; python_version < "3.9" -pytest==8.1.2 # regression introduced in pytest 8.2.*, will be fixed in 8.3.0 -pytest-cov==5.0.0 -python-dateutil==2.8.2 -ruff==0.5.0 -tox==4.15.1 diff --git a/requirements-doc.txt b/requirements-doc.txt deleted file mode 100644 index 32a790cf8..000000000 --- a/requirements-doc.txt +++ /dev/null @@ -1,6 +0,0 @@ -# dependencies to generate the documentation for HDMF -matplotlib -sphinx>=4 # improved support for docutils>=0.17 -sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 -sphinx-gallery -sphinx-copybutton diff --git a/requirements-min.txt b/requirements-min.txt index a437fc588..a9fbeb93e 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,15 +1,10 @@ # minimum versions of package dependencies for installing HDMF -h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 +# NOTE: these should match the minimum bound for dependencies in pyproject.toml +h5py==3.1.0 jsonschema==3.2.0 -numpy==1.18 -pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 -ruamel.yaml==0.16 -scipy==1.4 -# this file is currently used to test only python~=3.8 so these dependencies are not needed -# linkml-runtime==1.5.5; python_version >= "3.9" -# schemasheets==0.1.23; python_version >= "3.9" -# oaklib==0.5.12; python_version >= "3.9" -# pyyaml==6.0.1; python_version >= "3.9" +numpy==1.19.3 +pandas==1.2.0 +ruamel.yaml==0.16.0 +scipy==1.7.0 tqdm==4.41.0 zarr==2.12.0 diff --git a/requirements-opt.txt b/requirements-opt.txt deleted file mode 100644 index 4831d1949..000000000 --- a/requirements-opt.txt +++ /dev/null @@ -1,6 +0,0 @@ -# pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.66.4 -zarr==2.18.2 -linkml-runtime==1.7.7; python_version >= "3.9" -schemasheets==0.2.1; python_version >= "3.9" -oaklib==0.6.10; python_version >= "3.9" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 30a596ada..000000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF -h5py==3.11.0 -importlib-resources==6.1.0; python_version < "3.9" # TODO: remove when minimum python version is 3.9 -jsonschema==4.22.0 -numpy==1.26.4 # TODO: numpy 2.0.0 is supported by hdmf but incompatible with pandas and scipy -pandas==2.2.2; python_version >= "3.9" -pandas==2.1.2; python_version < "3.8" # TODO: remove when minimum python version is 3.9 -ruamel.yaml==0.18.2 -scipy==1.14.0; python_version >= "3.10" -scipy==1.11.3; python_version < "3.10" diff --git a/scripts/check_py_support.py b/scripts/check_py_support.py new file mode 100644 index 000000000..5c48dac3c --- /dev/null +++ b/scripts/check_py_support.py @@ -0,0 +1,205 @@ +""" +Python Version Support Checker + +This script analyzes Python package dependencies listed in pyproject.toml to check their +compatibility with a specified Python version (default: 3.13). It examines both regular +and optional dependencies, checking their trove classifiers for explicit version support. + +The script provides: +- Grouped output of supported and unsupported packages +- Latest supported Python version for packages without explicit support +- Error reporting for packages that cannot be checked +- Summary statistics of compatibility status + +Usage: + # Run this command from the root of the repo + python scripts/check_py_support.py + +Requirements: + - Python 3.11+ + - packaging + - colorama + +Input: + - pyproject.toml file in the current directory + +Output format: + - Supported packages (green) with their versions + - Unsupported packages (red) with their versions and latest supported Python version + - Packages with errors (yellow) + - Summary statistics + +Note: + The absence of explicit version support in trove classifiers doesn't necessarily + indicate incompatibility, just that the package hasn't declared support. +""" + +import tomllib +import importlib.metadata +from pathlib import Path +from packaging.requirements import Requirement +from colorama import init, Fore, Style +from typing import NamedTuple +import re + +# Initialize colorama +init() + +# Global configuration +PYTHON_VERSION = "3.13" + +class PackageSupport(NamedTuple): + name: str + spec: str + version: str | None + latest_python: str | None + error: str | None + +def parse_dependencies(pyproject_path: Path) -> list[str]: + """Parse dependencies from pyproject.toml, including optional dependencies.""" + with pyproject_path.open("rb") as f: + pyproject = tomllib.load(f) + + # Get main dependencies + dependencies = pyproject.get("project", {}).get("dependencies", []) + + # Get optional dependencies and flatten them + optional_deps = pyproject.get("project", {}).get("optional-dependencies", {}) + for group_deps in optional_deps.values(): + dependencies.extend(group_deps) + + return dependencies + +def get_package_name(dependency_spec: str) -> str: + """Extract package name from dependency specification.""" + return Requirement(dependency_spec).name + +def get_latest_python_version(classifiers: list[str]) -> str | None: + """Extract the latest supported Python version from classifiers.""" + python_versions = [] + pattern = r"Programming Language :: Python :: (\d+\.\d+)" + + for classifier in classifiers: + match = re.match(pattern, classifier) + if match: + version = match.group(1) + try: + major, minor = map(int, version.split('.')) + python_versions.append((major, minor)) + except ValueError: + continue + + if not python_versions: + return None + + # Sort by major and minor version + latest = sorted(python_versions, key=lambda x: (x[0], x[1]), reverse=True)[0] + return f"{latest[0]}.{latest[1]}" + +def check_python_version_support(package_name: str) -> dict[str, str | bool | None]: + """Check if installed package supports Python 3.13.""" + try: + dist = importlib.metadata.distribution(package_name) + classifiers = dist.metadata.get_all('Classifier') + version_classifier = f"Programming Language :: Python :: {PYTHON_VERSION}" + + return { + 'installed_version': dist.version, + 'has_support': version_classifier in classifiers, + 'latest_python': get_latest_python_version(classifiers), + 'error': None + } + except importlib.metadata.PackageNotFoundError: + return { + 'installed_version': None, + 'has_support': False, + 'latest_python': None, + 'error': 'Package not installed' + } + except Exception as e: + return { + 'installed_version': None, + 'has_support': False, + 'latest_python': None, + 'error': str(e) + } + +def print_section_header(title: str, count: int) -> None: + """Print a formatted section header with count.""" + print(f"\n{Fore.CYAN}{title} ({count} packages){Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + print(f"{Fore.YELLOW}{'Package':<25} {'Specification':<30} {'Version':<20} {'Latest Python'}{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + +def main() -> None: + pyproject_path = Path("pyproject.toml") + + if not pyproject_path.exists(): + print(f"{Fore.RED}Error: pyproject.toml not found{Style.RESET_ALL}") + return + + try: + dependencies = parse_dependencies(pyproject_path) + except Exception as e: + print(f"{Fore.RED}Error parsing pyproject.toml: {e}{Style.RESET_ALL}") + return + + # Check each dependency + supported: list[PackageSupport] = [] + unsupported: list[PackageSupport] = [] + errors: list[PackageSupport] = [] + + for dep in dependencies: + package_name = get_package_name(dep) + result = check_python_version_support(package_name) + + package_info = PackageSupport( + name=package_name, + spec=dep, + version=result['installed_version'], + latest_python=result['latest_python'], + error=result['error'] + ) + + if result['error']: + errors.append(package_info) + elif result['has_support']: + supported.append(package_info) + else: + unsupported.append(package_info) + + # Print results + print(f"\n{Fore.CYAN}Python {PYTHON_VERSION} Explicit Support Check Results{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'=' * 100}{Style.RESET_ALL}") + + # Print supported packages + if supported: + print_section_header("Supported Packages", len(supported)) + for pkg in supported: + print(f"{Fore.GREEN}{pkg.name:<25} {pkg.spec:<30} {pkg.version:<20} {PYTHON_VERSION}{Style.RESET_ALL}") + + # Print unsupported packages + if unsupported: + print_section_header("Unsupported Packages", len(unsupported)) + for pkg in unsupported: + latest = f"→ {pkg.latest_python}" if pkg.latest_python else "unknown" + print(f"{Fore.RED}{pkg.name:<25} {pkg.spec:<30} {pkg.version:<20} {latest}{Style.RESET_ALL}") + + # Print packages with errors + if errors: + print_section_header("Packages with Errors", len(errors)) + for pkg in errors: + print(f"{Fore.YELLOW}{pkg.name:<25} {pkg.spec:<30} {pkg.error:<20} N/A{Style.RESET_ALL}") + + # Print summary + print(f"\n{Fore.CYAN}Summary:{Style.RESET_ALL}") + print(f"{Fore.BLUE}{'-' * 100}{Style.RESET_ALL}") + total = len(supported) + len(unsupported) + len(errors) + print(f"{Fore.GREEN}Supported: {len(supported):3d} ({len(supported)/total*100:.1f}%){Style.RESET_ALL}") + print(f"{Fore.RED}Unsupported: {len(unsupported):3d} ({len(unsupported)/total*100:.1f}%){Style.RESET_ALL}") + if errors: + print(f"{Fore.YELLOW}Errors: {len(errors):3d} ({len(errors)/total*100:.1f}%){Style.RESET_ALL}") + print(f"{Fore.CYAN}Total: {total:3d}{Style.RESET_ALL}") + +if __name__ == "__main__": + main() diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index 6fc72a117..10305d37b 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,32 +1,10 @@ from . import query -from .backends.hdf5.h5_utils import H5Dataset, H5RegionSlicer -from .container import Container, Data, DataRegion, HERDManager -from .region import ListSlicer +from .backends.hdf5.h5_utils import H5Dataset +from .container import Container, Data, HERDManager from .utils import docval, getargs from .term_set import TermSet, TermSetWrapper, TypeConfigurator -@docval( - {"name": "dataset", "type": None, "doc": "the HDF5 dataset to slice"}, - {"name": "region", "type": None, "doc": "the region reference to use to slice"}, - is_method=False, -) -def get_region_slicer(**kwargs): - import warnings # noqa: E402 - - warnings.warn( - "get_region_slicer is deprecated and will be removed in HDMF 3.0.", - DeprecationWarning, - ) - - dataset, region = getargs("dataset", "region", kwargs) - if isinstance(dataset, (list, tuple, Data)): - return ListSlicer(dataset, region) - elif isinstance(dataset, H5Dataset): - return H5RegionSlicer(dataset, region) - return None - - try: # see https://effigies.gitlab.io/posts/python-packaging-2023/ from ._version import __version__ diff --git a/src/hdmf/array.py b/src/hdmf/array.py deleted file mode 100644 index a684572e4..000000000 --- a/src/hdmf/array.py +++ /dev/null @@ -1,197 +0,0 @@ -from abc import abstractmethod, ABCMeta - -import numpy as np - - -class Array: - - def __init__(self, data): - self.__data = data - if hasattr(data, 'dtype'): - self.dtype = data.dtype - else: - tmp = data - while isinstance(tmp, (list, tuple)): - tmp = tmp[0] - self.dtype = type(tmp) - - @property - def data(self): - return self.__data - - def __len__(self): - return len(self.__data) - - def get_data(self): - return self.__data - - def __getidx__(self, arg): - return self.__data[arg] - - def __sliceiter(self, arg): - return (x for x in range(*arg.indices(len(self)))) - - def __getitem__(self, arg): - if isinstance(arg, list): - idx = list() - for i in arg: - if isinstance(i, slice): - idx.extend(x for x in self.__sliceiter(i)) - else: - idx.append(i) - return np.fromiter((self.__getidx__(x) for x in idx), dtype=self.dtype) - elif isinstance(arg, slice): - return np.fromiter((self.__getidx__(x) for x in self.__sliceiter(arg)), dtype=self.dtype) - elif isinstance(arg, tuple): - return (self.__getidx__(arg[0]), self.__getidx__(arg[1])) - else: - return self.__getidx__(arg) - - -class AbstractSortedArray(Array, metaclass=ABCMeta): - ''' - An abstract class for representing sorted array - ''' - - @abstractmethod - def find_point(self, val): - pass - - def get_data(self): - return self - - def __lower(self, other): - ins = self.find_point(other) - return ins - - def __upper(self, other): - ins = self.__lower(other) - while self[ins] == other: - ins += 1 - return ins - - def __lt__(self, other): - ins = self.__lower(other) - return slice(0, ins) - - def __le__(self, other): - ins = self.__upper(other) - return slice(0, ins) - - def __gt__(self, other): - ins = self.__upper(other) - return slice(ins, len(self)) - - def __ge__(self, other): - ins = self.__lower(other) - return slice(ins, len(self)) - - @staticmethod - def __sort(a): - if isinstance(a, tuple): - return a[0] - else: - return a - - def __eq__(self, other): - if isinstance(other, list): - ret = list() - for i in other: - eq = self == i - ret.append(eq) - ret = sorted(ret, key=self.__sort) - tmp = list() - for i in range(1, len(ret)): - a, b = ret[i - 1], ret[i] - if isinstance(a, tuple): - if isinstance(b, tuple): - if a[1] >= b[0]: - b[0] = a[0] - else: - tmp.append(slice(*a)) - else: - if b > a[1]: - tmp.append(slice(*a)) - elif b == a[1]: - a[1] == b + 1 - else: - ret[i] = a - else: - if isinstance(b, tuple): - if a < b[0]: - tmp.append(a) - else: - if b - a == 1: - ret[i] = (a, b) - else: - tmp.append(a) - if isinstance(ret[-1], tuple): - tmp.append(slice(*ret[-1])) - else: - tmp.append(ret[-1]) - ret = tmp - return ret - elif isinstance(other, tuple): - ge = self >= other[0] - ge = ge.start - lt = self < other[1] - lt = lt.stop - if ge == lt: - return ge - else: - return slice(ge, lt) - else: - lower = self.__lower(other) - upper = self.__upper(other) - d = upper - lower - if d == 1: - return lower - elif d == 0: - return None - else: - return slice(lower, upper) - - def __ne__(self, other): - eq = self == other - if isinstance(eq, tuple): - return [slice(0, eq[0]), slice(eq[1], len(self))] - else: - return [slice(0, eq), slice(eq + 1, len(self))] - - -class SortedArray(AbstractSortedArray): - ''' - A class for wrapping sorted arrays. This class overrides - <,>,<=,>=,==, and != to leverage the sorted content for - efficiency. - ''' - - def __init__(self, array): - super().__init__(array) - - def find_point(self, val): - return np.searchsorted(self.data, val) - - -class LinSpace(SortedArray): - - def __init__(self, start, stop, step): - self.start = start - self.stop = stop - self.step = step - self.dtype = float if any(isinstance(s, float) for s in (start, stop, step)) else int - self.__len = int((stop - start) / step) - - def __len__(self): - return self.__len - - def find_point(self, val): - nsteps = (val - self.start) / self.step - fl = int(nsteps) - if fl == nsteps: - return int(fl) - else: - return int(fl + 1) - - def __getidx__(self, arg): - return self.start + self.step * arg diff --git a/src/hdmf/backends/hdf5/__init__.py b/src/hdmf/backends/hdf5/__init__.py index 6abfc8c85..8f76d7bcc 100644 --- a/src/hdmf/backends/hdf5/__init__.py +++ b/src/hdmf/backends/hdf5/__init__.py @@ -1,3 +1,3 @@ from . import h5_utils, h5tools -from .h5_utils import H5RegionSlicer, H5DataIO +from .h5_utils import H5DataIO from .h5tools import HDF5IO, H5SpecWriter, H5SpecReader diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index 2d7187721..878ebf089 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -8,7 +8,7 @@ from collections.abc import Iterable from copy import copy -from h5py import Group, Dataset, RegionReference, Reference, special_dtype +from h5py import Group, Dataset, Reference, special_dtype from h5py import filters as h5py_filters import json import numpy as np @@ -16,10 +16,8 @@ import os import logging -from ...array import Array from ...data_utils import DataIO, AbstractDataChunkIterator, append_data from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver -from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader from ...utils import docval, getargs, popargs, get_docval, get_data_shape @@ -85,7 +83,7 @@ def append(self, dataset, data): class H5Dataset(HDMFDataset): - @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, + @docval({'name': 'dataset', 'type': Dataset, 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): @@ -96,10 +94,6 @@ def __init__(self, **kwargs): def io(self): return self.__io - @property - def regionref(self): - return self.dataset.regionref - @property def ref(self): return self.dataset.ref @@ -189,7 +183,7 @@ def get_object(self, h5obj): class AbstractH5TableDataset(DatasetOfReferences): - @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, + @docval({'name': 'dataset', 'type': Dataset, 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'hdmf.backends.hdf5.h5tools.HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), @@ -199,9 +193,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.__refgetters = dict() for i, t in enumerate(types): - if t is RegionReference: - self.__refgetters[i] = self.__get_regref - elif t is Reference: + if t is Reference: self.__refgetters[i] = self._get_ref elif t is str: # we need this for when we read compound data types @@ -223,8 +215,6 @@ def __init__(self, **kwargs): t = sub.metadata['ref'] if t is Reference: tmp.append('object') - elif t is RegionReference: - tmp.append('region') else: tmp.append(sub.type.__name__) self.__dtype = tmp @@ -257,10 +247,6 @@ def _get_utf(self, string): """ return string.decode('utf-8') if isinstance(string, bytes) else string - def __get_regref(self, ref): - obj = self._get_ref(ref) - return obj[ref] - def resolve(self, manager): return self[0:len(self)] @@ -283,18 +269,6 @@ def dtype(self): return 'object' -class AbstractH5RegionDataset(AbstractH5ReferenceDataset): - - def __getitem__(self, arg): - obj = super().__getitem__(arg) - ref = self.dataset[arg] - return obj[ref] - - @property - def dtype(self): - return 'region' - - class ContainerH5TableDataset(ContainerResolverMixin, AbstractH5TableDataset): """ A reference-resolving dataset for resolving references inside tables @@ -339,28 +313,6 @@ def get_inverse_class(cls): return ContainerH5ReferenceDataset -class ContainerH5RegionDataset(ContainerResolverMixin, AbstractH5RegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Containers - """ - - @classmethod - def get_inverse_class(cls): - return BuilderH5RegionDataset - - -class BuilderH5RegionDataset(BuilderResolverMixin, AbstractH5RegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Builders - """ - - @classmethod - def get_inverse_class(cls): - return ContainerH5RegionDataset - - class H5SpecWriter(SpecWriter): __str_type = special_dtype(vlen=str) @@ -420,28 +372,6 @@ def read_namespace(self, ns_path): return ret -class H5RegionSlicer(RegionSlicer): - - @docval({'name': 'dataset', 'type': (Dataset, H5Dataset), 'doc': 'the HDF5 dataset to slice'}, - {'name': 'region', 'type': RegionReference, 'doc': 'the region reference to use to slice'}) - def __init__(self, **kwargs): - self.__dataset = getargs('dataset', kwargs) - self.__regref = getargs('region', kwargs) - self.__len = self.__dataset.regionref.selection(self.__regref)[0] - self.__region = None - - def __read_region(self): - if self.__region is None: - self.__region = self.__dataset[self.__regref] - - def __getitem__(self, idx): - self.__read_region() - return self.__region[idx] - - def __len__(self): - return self.__len - - class H5DataIO(DataIO): """ Wrap data arrays for write via HDF5IO to customize I/O behavior, such as compression and chunking diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 2afb35b9c..cf8219e51 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -7,19 +7,20 @@ import numpy as np import h5py -from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, RegionReference, check_dtype +from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, check_dtype -from .h5_utils import (BuilderH5ReferenceDataset, BuilderH5RegionDataset, BuilderH5TableDataset, H5DataIO, +from .h5_utils import (BuilderH5ReferenceDataset, BuilderH5TableDataset, H5DataIO, H5SpecReader, H5SpecWriter, HDF5IODataChunkIteratorQueue) from ..io import HDMFIO from ..errors import UnsupportedOperation from ..warnings import BrokenLinkWarning -from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, +from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog -from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset +from ...utils import (docval, getargs, popargs, get_data_shape, get_docval, StrDataset, + get_basic_array_info, generate_array_html_repr) from ..utils import NamespaceToBuilderHelper, WriteStatusTracker ROOT_NAME = 'root' @@ -27,7 +28,6 @@ H5_TEXT = special_dtype(vlen=str) H5_BINARY = special_dtype(vlen=bytes) H5_REF = special_dtype(ref=Reference) -H5_REGREF = special_dtype(ref=RegionReference) RDCC_NBYTES = 32*2**20 # set raw data chunk cache size = 32 MiB @@ -694,14 +694,15 @@ def __read_dataset(self, h5obj, name=None): target = h5obj.file[scalar] target_builder = self.__read_dataset(target) self.__set_built(target.file.filename, target.id, target_builder) - if isinstance(scalar, RegionReference): - d = RegionBuilder(scalar, target_builder) - else: - d = ReferenceBuilder(target_builder) + d = ReferenceBuilder(target_builder) kwargs['data'] = d kwargs['dtype'] = d.dtype elif h5obj.dtype.kind == 'V': # scalar compound data type kwargs['data'] = np.array(scalar, dtype=h5obj.dtype) + cpd_dt = h5obj.dtype + ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))] + d = BuilderH5TableDataset(h5obj, self, ref_cols) + kwargs['dtype'] = HDF5IO.__compound_dtype_to_list(h5obj.dtype, d.dtype) else: kwargs["data"] = scalar else: @@ -710,9 +711,6 @@ def __read_dataset(self, h5obj, name=None): elem1 = h5obj[tuple([0] * (h5obj.ndim - 1) + [0])] if isinstance(elem1, (str, bytes)): d = self._check_str_dtype(h5obj) - elif isinstance(elem1, RegionReference): # read list of references - d = BuilderH5RegionDataset(h5obj, self) - kwargs['dtype'] = d.dtype elif isinstance(elem1, Reference): d = BuilderH5ReferenceDataset(h5obj, self) kwargs['dtype'] = d.dtype @@ -748,9 +746,7 @@ def __read_attrs(self, h5obj): for k, v in h5obj.attrs.items(): if k == SPEC_LOC_ATTR: # ignore cached spec continue - if isinstance(v, RegionReference): - raise ValueError("cannot read region reference attributes yet") - elif isinstance(v, Reference): + if isinstance(v, Reference): ret[k] = self.__read_ref(h5obj.file[v]) else: ret[k] = v @@ -919,10 +915,7 @@ def get_type(cls, data): "utf-8": H5_TEXT, "ascii": H5_BINARY, "bytes": H5_BINARY, - "ref": H5_REF, - "reference": H5_REF, "object": H5_REF, - "region": H5_REGREF, "isodatetime": H5_TEXT, "datetime": H5_TEXT, } @@ -1245,29 +1238,13 @@ def _filler(): dset = self.__scalar_fill__(parent, name, data, options) else: dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options) - # Write a dataset containing references, i.e., a region or object reference. + # Write a dataset containing references, i.e., object reference. + # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): _dtype = self.__dtypes.get(options['dtype']) - # Write a scalar data region reference dataset - if isinstance(data, RegionBuilder): - dset = parent.require_dataset(name, shape=(), dtype=_dtype) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing a " - "region reference. attributes: %s" - % (name, list(attributes.keys()))) - - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving region reference and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - ref = self.__get_ref(data.builder, data.region) - dset = parent[name] - dset[()] = ref - self.set_attributes(dset, attributes) # Write a scalar object reference dataset - elif isinstance(data, ReferenceBuilder): + if isinstance(data, ReferenceBuilder): dset = parent.require_dataset(name, dtype=_dtype, shape=()) self.__set_written(builder) self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing an " @@ -1285,44 +1262,24 @@ def _filler(): self.set_attributes(dset, attributes) # Write an array dataset of references else: - # Write a array of region references - if options['dtype'] == 'region': - dset = parent.require_dataset(name, dtype=_dtype, shape=(len(data),), **options['io_settings']) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " - "region references. attributes: %s" - % (name, list(attributes.keys()))) - - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving region references and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - refs = list() - for item in data: - refs.append(self.__get_ref(item.builder, item.region)) - dset = parent[name] - dset[()] = refs - self.set_attributes(dset, attributes) # Write array of object references - else: - dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings']) - self.__set_written(builder) - self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " - "object references. attributes: %s" - % (name, list(attributes.keys()))) + dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings']) + self.__set_written(builder) + self.logger.debug("Queueing reference resolution and set attribute on dataset '%s' containing " + "object references. attributes: %s" + % (name, list(attributes.keys()))) - @self.__queue_ref - def _filler(): - self.logger.debug("Resolving object references and setting attribute on dataset '%s' " - "containing attributes: %s" - % (name, list(attributes.keys()))) - refs = list() - for item in data: - refs.append(self.__get_ref(item)) - dset = parent[name] - dset[()] = refs - self.set_attributes(dset, attributes) + @self.__queue_ref + def _filler(): + self.logger.debug("Resolving object references and setting attribute on dataset '%s' " + "containing attributes: %s" + % (name, list(attributes.keys()))) + refs = list() + for item in data: + refs.append(self.__get_ref(item)) + dset = parent[name] + dset[()] = refs + self.set_attributes(dset, attributes) return # write a "regular" dataset else: @@ -1515,11 +1472,9 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', 'default': None}, - {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', - 'default': None}, returns='the reference', rtype=Reference) def __get_ref(self, **kwargs): - container, region = getargs('container', 'region', kwargs) + container = getargs('container', kwargs) if container is None: return None if isinstance(container, Builder): @@ -1537,20 +1492,10 @@ def __get_ref(self, **kwargs): path = self.__get_path(builder) self.logger.debug("Getting reference at path '%s'" % path) - if isinstance(container, RegionBuilder): - region = container.region - if region is not None: - dset = self.__file[path] - if not isinstance(dset, Dataset): - raise ValueError('cannot create region reference without Dataset') - return self.__file[path].regionref[region] - else: - return self.__file[path].ref + return self.__file[path].ref @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', 'default': None}, - {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', - 'default': None}, returns='the reference', rtype=Reference) def _create_ref(self, **kwargs): return self.__get_ref(**kwargs) @@ -1563,7 +1508,7 @@ def __is_ref(self, dtype): if isinstance(dtype, dict): # may be dict from reading a compound dataset return self.__is_ref(dtype['dtype']) if isinstance(dtype, str): - return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE + return dtype == DatasetBuilder.OBJECT_REF_TYPE return False def __queue_ref(self, func): @@ -1582,17 +1527,6 @@ def __queue_ref(self, func): # dependency self.__ref_queue.append(func) - def __rec_get_ref(self, ref_list): - ret = list() - for elem in ref_list: - if isinstance(elem, (list, tuple)): - ret.append(self.__rec_get_ref(elem)) - elif isinstance(elem, (Builder, Container)): - ret.append(self.__get_ref(elem)) - else: - ret.append(elem) - return ret - @property def mode(self): """ @@ -1616,3 +1550,35 @@ def set_dataio(cls, **kwargs): data = H5DataIO(data) """ return H5DataIO.__init__(**kwargs) + + @staticmethod + def generate_dataset_html(dataset): + """Generates an html representation for a dataset for the HDF5IO class""" + + array_info_dict = get_basic_array_info(dataset) + if isinstance(dataset, h5py.Dataset): + dataset_type = "HDF5 dataset" + # get info from hdf5 dataset + compressed_size = dataset.id.get_storage_size() + if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 + uncompressed_size = dataset.nbytes + else: + uncompressed_size = dataset.size * dataset.dtype.itemsize + compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined" + + hdf5_info_dict = { + "Chunk shape": dataset.chunks, + "Compression": dataset.compression, + "Compression opts": dataset.compression_opts, + "Compression ratio": compression_ratio, + } + array_info_dict.update(hdf5_info_dict) + + elif isinstance(dataset, np.ndarray): + dataset_type = "NumPy array" + else: + dataset_type = dataset.__class__.__name__ + + repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type) + + return repr_html diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 35023066f..86fd25b26 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -5,7 +5,7 @@ from ..build import BuildManager, GroupBuilder from ..container import Container, HERDManager from .errors import UnsupportedOperation -from ..utils import docval, getargs, popargs +from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr from warnings import warn @@ -188,6 +188,14 @@ def close(self): ''' Close this HDMFIO object to further reading/writing''' pass + @staticmethod + def generate_dataset_html(dataset): + """Generates an html representation for a dataset""" + array_info_dict = get_basic_array_info(dataset) + repr_html = generate_array_html_repr(array_info_dict, dataset) + + return repr_html + def __enter__(self): return self diff --git a/src/hdmf/build/__init__.py b/src/hdmf/build/__init__.py index ea5d21152..87e0ac57e 100644 --- a/src/hdmf/build/__init__.py +++ b/src/hdmf/build/__init__.py @@ -1,4 +1,4 @@ -from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder from .classgenerator import CustomClassGenerator, MCIClassGenerator from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 6ed453166..ba211df9f 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -6,7 +6,6 @@ from datetime import datetime, date import numpy as np -from h5py import RegionReference from ..utils import docval, getargs, get_docval @@ -320,11 +319,10 @@ def values(self): class DatasetBuilder(BaseBuilder): OBJECT_REF_TYPE = 'object' - REGION_REF_TYPE = 'region' @docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'}, {'name': 'data', - 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date), + 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', Iterable, datetime, date), 'doc': 'The data in this dataset.', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'The datatype of this dataset.', 'default': None}, @@ -437,20 +435,3 @@ def __init__(self, **kwargs): def builder(self): """The target builder object.""" return self['builder'] - - -class RegionBuilder(ReferenceBuilder): - - @docval({'name': 'region', 'type': (slice, tuple, list, RegionReference), - 'doc': 'The region, i.e. slice or indices, into the target dataset.'}, - {'name': 'builder', 'type': DatasetBuilder, 'doc': 'The dataset this region reference applies to.'}) - def __init__(self, **kwargs): - """Create a builder object for a region reference.""" - region, builder = getargs('region', 'builder', kwargs) - super().__init__(builder) - self['region'] = region - - @property - def region(self): - """The selected region of the target dataset.""" - return self['region'] diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index a3336b98e..3b7d7c96e 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -4,7 +4,7 @@ import numpy as np -from ..container import Container, Data, DataRegion, MultiContainerInterface +from ..container import Container, Data, MultiContainerInterface from ..spec import AttributeSpec, LinkSpec, RefSpec, GroupSpec from ..spec.spec import BaseStorageSpec, ZERO_OR_MANY, ONE_OR_MANY from ..utils import docval, getargs, ExtenderMeta, get_docval, popargs, AllowPositional @@ -195,7 +195,7 @@ def _ischild(cls, dtype): if isinstance(dtype, tuple): for sub in dtype: ret = ret or cls._ischild(sub) - elif isinstance(dtype, type) and issubclass(dtype, (Container, Data, DataRegion)): + elif isinstance(dtype, type) and issubclass(dtype, (Container, Data)): ret = True return ret diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 967c34010..bc586013c 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -490,20 +490,6 @@ def load_namespaces(self, **kwargs): self.register_container_type(new_ns, dt, container_cls) return deps - @docval({"name": "namespace", "type": str, "doc": "the namespace containing the data_type"}, - {"name": "data_type", "type": str, "doc": "the data type to create a AbstractContainer class for"}, - {"name": "autogen", "type": bool, "doc": "autogenerate class if one does not exist", "default": True}, - returns='the class for the given namespace and data_type', rtype=type) - def get_container_cls(self, **kwargs): - """Get the container class from data type specification. - If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically - created and returned. - """ - # NOTE: this internally used function get_container_cls will be removed in favor of get_dt_container_cls - # Deprecated: Will be removed by HDMF 4.0 - namespace, data_type, autogen = getargs('namespace', 'data_type', 'autogen', kwargs) - return self.get_dt_container_cls(data_type, namespace, autogen) - @docval({"name": "data_type", "type": str, "doc": "the data type to create a AbstractContainer class for"}, {"name": "namespace", "type": str, "doc": "the namespace containing the data_type", "default": None}, {'name': 'post_init_method', 'type': Callable, 'default': None, @@ -515,7 +501,7 @@ def get_dt_container_cls(self, **kwargs): If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically created and returned. - Replaces get_container_cls but namespace is optional. If namespace is unknown, it will be looked up from + Namespace is optional. If namespace is unknown, it will be looked up from all namespaces. """ namespace, data_type, post_init_method, autogen = getargs('namespace', 'data_type', diff --git a/src/hdmf/build/map.py b/src/hdmf/build/map.py deleted file mode 100644 index 5267609f5..000000000 --- a/src/hdmf/build/map.py +++ /dev/null @@ -1,7 +0,0 @@ -# this prevents breaking of code that imports these classes directly from map.py -from .manager import Proxy, BuildManager, TypeSource, TypeMap # noqa: F401 -from .objectmapper import ObjectMapper # noqa: F401 - -import warnings -warnings.warn('Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed ' - 'in HDMF 3.0.', DeprecationWarning, stacklevel=2) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index fc25efc16..0126d7dd3 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -6,7 +6,7 @@ import numpy as np -from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder +from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, BaseBuilder from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import Proxy, BuildManager @@ -15,13 +15,13 @@ IncorrectDatasetShapeBuildWarning) from hdmf.backends.hdf5.h5_utils import H5DataIO -from ..container import AbstractContainer, Data, DataRegion +from ..container import AbstractContainer, Data from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec from ..spec.spec import BaseStorageSpec -from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape +from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset _const_arg = '__constructor_arg' @@ -212,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): if spec_dtype_type is _unicode: - ret = value.astype('U') + if isinstance(value, StrDataset): + ret = value + else: + ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype_type is _ascii: ret = value.astype('S') @@ -603,7 +606,10 @@ def __get_data_type(cls, spec): def __convert_string(self, value, spec): """Convert string types to the specified dtype.""" def __apply_string_type(value, string_type): - if isinstance(value, (list, tuple, np.ndarray, DataIO)): + # NOTE: if a user passes a h5py.Dataset that is not wrapped with a hdmf.utils.StrDataset, + # then this conversion may not be correct. Users should unpack their string h5py.Datasets + # into a numpy array (or wrap them in StrDataset) before passing them to a container object. + if hasattr(value, '__iter__') and not isinstance(value, (str, bytes)): return [__apply_string_type(item, string_type) for item in value] else: return string_type(value) @@ -957,6 +963,9 @@ def _filler(): for j, subt in refs: tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager) bldr_data.append(tuple(tmp)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler @@ -975,46 +984,31 @@ def _filler(): else: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) builder.data = bldr_data return _filler def __get_ref_builder(self, builder, dtype, shape, container, build_manager): - bldr_data = None - if dtype.is_region(): - if shape is None: - if not isinstance(container, DataRegion): - msg = "'container' must be of type DataRegion if spec represents region reference" - raise ValueError(msg) - self.logger.debug("Setting %s '%s' data to region reference builder" - % (builder.__class__.__name__, builder.name)) - target_builder = self.__get_target_builder(container.data, build_manager, builder) - bldr_data = RegionBuilder(container.region, target_builder) - else: - self.logger.debug("Setting %s '%s' data to list of region reference builders" - % (builder.__class__.__name__, builder.name)) - bldr_data = list() - for d in container.data: - target_builder = self.__get_target_builder(d.target, build_manager, builder) - bldr_data.append(RegionBuilder(d.slice, target_builder)) + self.logger.debug("Setting object reference dataset on %s '%s' data" + % (builder.__class__.__name__, builder.name)) + if isinstance(container, Data): + self.logger.debug("Setting %s '%s' data to list of reference builders" + % (builder.__class__.__name__, builder.name)) + bldr_data = list() + for d in container.data: + target_builder = self.__get_target_builder(d, build_manager, builder) + bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) else: - self.logger.debug("Setting object reference dataset on %s '%s' data" + self.logger.debug("Setting %s '%s' data to reference builder" % (builder.__class__.__name__, builder.name)) - if isinstance(container, Data): - self.logger.debug("Setting %s '%s' data to list of reference builders" - % (builder.__class__.__name__, builder.name)) - bldr_data = list() - for d in container.data: - target_builder = self.__get_target_builder(d, build_manager, builder) - bldr_data.append(ReferenceBuilder(target_builder)) - if isinstance(container.data, H5DataIO): - # This is here to support appending a dataset of references. - bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) - else: - self.logger.debug("Setting %s '%s' data to reference builder" - % (builder.__class__.__name__, builder.name)) - target_builder = self.__get_target_builder(container, build_manager, builder) - bldr_data = ReferenceBuilder(target_builder) + target_builder = self.__get_target_builder(container, build_manager, builder) + bldr_data = ReferenceBuilder(target_builder) return bldr_data def __get_target_builder(self, container, build_manager, builder): @@ -1258,8 +1252,6 @@ def __get_subspec_values(self, builder, spec, manager): continue if isinstance(attr_val, (GroupBuilder, DatasetBuilder)): ret[attr_spec] = manager.construct(attr_val) - elif isinstance(attr_val, RegionBuilder): # pragma: no cover - raise ValueError("RegionReferences as attributes is not yet supported") elif isinstance(attr_val, ReferenceBuilder): ret[attr_spec] = manager.construct(attr_val.builder) else: diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 5c9d9a3b7..6b36e29cd 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -108,11 +108,7 @@ def _dec(cls): def __get_resources(): - try: - from importlib.resources import files - except ImportError: - # TODO: Remove when python 3.9 becomes the new minimum - from importlib_resources import files + from importlib.resources import files __location_of_this_file = files(__name__) __core_ns_file_name = 'namespace.yaml' diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 50395ba24..379553c07 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -78,12 +78,11 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i required=field_spec.required ) dtype = cls._get_type(field_spec, type_map) + column_conf['class'] = dtype if issubclass(dtype, DynamicTableRegion): # the spec does not know which table this DTR points to # the user must specify the table attribute on the DTR after it is generated column_conf['table'] = True - else: - column_conf['class'] = dtype index_counter = 0 index_name = attr_name diff --git a/src/hdmf/common/sparse.py b/src/hdmf/common/sparse.py index db38d12e8..0dd7d9654 100644 --- a/src/hdmf/common/sparse.py +++ b/src/hdmf/common/sparse.py @@ -1,4 +1,11 @@ -import scipy.sparse as sps +try: + from scipy.sparse import csr_matrix + SCIPY_INSTALLED = True +except ImportError: + SCIPY_INSTALLED = False + class csr_matrix: # dummy class to prevent import errors + pass + from . import register_class from ..container import Container from ..utils import docval, popargs, to_uint_array, get_data_shape, AllowPositional @@ -7,7 +14,7 @@ @register_class('CSRMatrix') class CSRMatrix(Container): - @docval({'name': 'data', 'type': (sps.csr_matrix, 'array_data'), + @docval({'name': 'data', 'type': (csr_matrix, 'array_data'), 'doc': 'the data to use for this CSRMatrix or CSR data array.' 'If passing CSR data array, *indices*, *indptr*, and *shape* must also be provided'}, {'name': 'indices', 'type': 'array_data', 'doc': 'CSR index array', 'default': None}, @@ -16,13 +23,17 @@ class CSRMatrix(Container): {'name': 'name', 'type': str, 'doc': 'the name to use for this when storing', 'default': 'csr_matrix'}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): + if not SCIPY_INSTALLED: + raise ImportError( + "scipy must be installed to use CSRMatrix. Please install scipy using `pip install scipy`." + ) data, indices, indptr, shape = popargs('data', 'indices', 'indptr', 'shape', kwargs) super().__init__(**kwargs) - if not isinstance(data, sps.csr_matrix): + if not isinstance(data, csr_matrix): temp_shape = get_data_shape(data) temp_ndim = len(temp_shape) if temp_ndim == 2: - data = sps.csr_matrix(data) + data = csr_matrix(data) elif temp_ndim == 1: if any(_ is None for _ in (indptr, indices, shape)): raise ValueError("Must specify 'indptr', 'indices', and 'shape' arguments when passing data array.") @@ -31,9 +42,10 @@ def __init__(self, **kwargs): shape = self.__check_arr(shape, 'shape') if len(shape) != 2: raise ValueError("'shape' argument must specify two and only two dimensions.") - data = sps.csr_matrix((data, indices, indptr), shape=shape) + data = csr_matrix((data, indices, indptr), shape=shape) else: raise ValueError("'data' argument cannot be ndarray of dimensionality > 2.") + # self.__data is a scipy.sparse.csr_matrix self.__data = data @staticmethod diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index b4530c7b7..2f6401672 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -521,7 +521,7 @@ def _init_class_columns(self): description=col['description'], index=col.get('index', False), table=col.get('table', False), - col_cls=col.get('class', VectorData), + col_cls=col.get('class'), # Pass through extra kwargs for add_column that subclasses may have added **{k: col[k] for k in col.keys() if k not in DynamicTable.__reserved_colspec_keys}) @@ -564,10 +564,13 @@ def _set_dtr_targets(self, target_tables: dict): if not column_conf.get('table', False): raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." % colname) - self.add_column(name=column_conf['name'], - description=column_conf['description'], - index=column_conf.get('index', False), - table=True) + self.add_column( + name=column_conf['name'], + description=column_conf['description'], + index=column_conf.get('index', False), + table=True, + col_cls=column_conf.get('class'), + ) if isinstance(self[colname], VectorIndex): col = self[colname].target else: @@ -681,7 +684,7 @@ def add_row(self, **kwargs): index=col.get('index', False), table=col.get('table', False), enum=col.get('enum', False), - col_cls=col.get('class', VectorData), + col_cls=col.get('class'), # Pass through extra keyword arguments for add_column that # subclasses may have added **{k: col[k] for k in col.keys() @@ -753,7 +756,7 @@ def __eq__(self, other): 'default': False}, {'name': 'enum', 'type': (bool, 'array_data'), 'default': False, 'doc': ('whether or not this column contains data from a fixed set of elements')}, - {'name': 'col_cls', 'type': type, 'default': VectorData, + {'name': 'col_cls', 'type': type, 'default': None, 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' 'object is used.')}, @@ -775,8 +778,8 @@ def add_column(self, **kwargs): # noqa: C901 index, table, enum, col_cls, check_ragged = popargs('index', 'table', 'enum', 'col_cls', 'check_ragged', kwargs) if isinstance(index, VectorIndex): - warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=3) + msg = "Passing a VectorIndex may lead to unexpected behavior. This functionality is not supported." + raise ValueError(msg) if name in self.__colids: # column has already been added msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name) @@ -805,29 +808,39 @@ def add_column(self, **kwargs): # noqa: C901 % (name, self.__class__.__name__, spec_index)) warn(msg, stacklevel=3) - spec_col_cls = self.__uninit_cols[name].get('class', VectorData) - if col_cls != spec_col_cls: - msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered " - "col_cls argument. The predefined class spec will be ignored. " - "Please ensure the new column complies with the spec. " - "This will raise an error in a future version of HDMF." - % (name, self.__class__.__name__, spec_col_cls)) - warn(msg, stacklevel=2) - ckwargs = dict(kwargs) # Add table if it's been specified if table and enum: raise ValueError("column '%s' cannot be both a table region " "and come from an enumerable set of elements" % name) + # Update col_cls if table is specified if table is not False: - col_cls = DynamicTableRegion + if col_cls is None: + col_cls = DynamicTableRegion if isinstance(table, DynamicTable): ckwargs['table'] = table + # Update col_cls if enum is specified if enum is not False: - col_cls = EnumData + if col_cls is None: + col_cls = EnumData if isinstance(enum, (list, tuple, np.ndarray, VectorData)): ckwargs['elements'] = enum + # Update col_cls to the default VectorData if col_cls is None + if col_cls is None: + col_cls = VectorData + + if name in self.__uninit_cols: # column is a predefined optional column from the spec + # check the given values against the predefined optional column spec. if they do not match, raise a warning + # and ignore the given arguments. users should not be able to override these values + spec_col_cls = self.__uninit_cols[name].get('class') + if spec_col_cls is not None and col_cls != spec_col_cls: + msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered " + "col_cls argument. The predefined class spec will be ignored. " + "Please ensure the new column complies with the spec. " + "This will raise an error in a future version of HDMF." + % (name, self.__class__.__name__, spec_col_cls)) + warn(msg, stacklevel=2) # If the user provided a list of lists that needs to be indexed, then we now need to flatten the data # We can only create the index actual VectorIndex once we have the VectorData column so we compute @@ -873,7 +886,7 @@ def add_column(self, **kwargs): # noqa: C901 if col in self.__uninit_cols: self.__uninit_cols.pop(col) - if col_cls is EnumData: + if issubclass(col_cls, EnumData): columns.append(col.elements) col.elements.parent = self diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 88a083599..ce4e8b821 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -1,8 +1,7 @@ import types -from abc import abstractmethod from collections import OrderedDict from copy import deepcopy -from typing import Type +from typing import Type, Optional from uuid import uuid4 from warnings import warn import os @@ -11,8 +10,9 @@ import numpy as np import pandas as pd -from .data_utils import DataIO, append_data, extend_data -from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict +from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator +from .utils import (docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict, + get_basic_array_info, generate_array_html_repr) from .term_set import TermSet, TermSetWrapper @@ -302,8 +302,8 @@ def __new__(cls, *args, **kwargs): @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}) def __init__(self, **kwargs): name = getargs('name', kwargs) - if '/' in name: - raise ValueError("name '" + name + "' cannot contain '/'") + if ('/' in name or ':' in name) and not self._in_construct_mode: + raise ValueError(f"name '{name}' cannot contain a '/' or ':'") self.__name = name self.__field_values = dict() self.__read_io = None @@ -466,21 +466,6 @@ def set_modified(self, **kwargs): def children(self): return tuple(self.__children) - @docval({'name': 'child', 'type': 'Container', - 'doc': 'the child Container for this Container', 'default': None}) - def add_child(self, **kwargs): - warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) - child = getargs('child', kwargs) - if child is not None: - # if child.parent is a Container, then the mismatch between child.parent and parent - # is used to make a soft/external link from the parent to a child elsewhere - # if child.parent is not a Container, it is either None or a Proxy and should be set to self - if not isinstance(child.parent, AbstractContainer): - # actually add the child to the parent in parent setter - child.parent = self - else: - warn('Cannot add None as child to a container %s' % self.name) - @classmethod def type_hierarchy(cls): return cls.__mro__ @@ -707,8 +692,6 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False): for index, item in enumerate(fields): access_code += f'[{index}]' html_repr += self._generate_field_html(index, item, level, access_code) - elif isinstance(fields, np.ndarray): - html_repr += self._generate_array_html(fields, level) else: pass @@ -724,18 +707,26 @@ def _generate_field_html(self, key, value, level, access_code): return f'
{key}: {value}
' - if hasattr(value, "generate_html_repr"): - html_content = value.generate_html_repr(level + 1, access_code) + # Detects array-like objects that conform to the Array Interface specification + # (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both + # 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'. + # This approach keeps the implementation generic without coupling to specific backends methods + is_array_data = hasattr(value, "shape") and hasattr(value, "dtype") + if is_array_data: + html_content = self._generate_array_html(value, level + 1) + elif hasattr(value, "generate_html_repr"): + html_content = value.generate_html_repr(level + 1, access_code) elif hasattr(value, '__repr_html__'): html_content = value.__repr_html__() - - elif hasattr(value, "fields"): + elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True) elif isinstance(value, (list, dict, np.ndarray)): html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False) else: html_content = f'{value}' + + html_repr = ( f'
{key}' @@ -745,10 +736,33 @@ def _generate_field_html(self, key, value, level, access_code): return html_repr + def _generate_array_html(self, array, level): - """Generates HTML for a NumPy array.""" - str_ = str(array).replace("\n", "
") - return f'
{str_}
' + """Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects).""" + + is_numpy_array = isinstance(array, np.ndarray) + read_io = self.get_read_io() + it_was_read_with_io = read_io is not None + is_data_io = isinstance(array, DataIO) + + if is_numpy_array: + array_info_dict = get_basic_array_info(array) + repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array") + elif is_data_io: + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO") + elif it_was_read_with_io: + # The backend handles the representation here. Two special cases worth noting: + # 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays + # even when their parent container has an IO + # 2. Data may have been modified after being read from storage + repr_html = read_io.generate_dataset_html(array) + else: # Not sure which object could get here + object_class = array.__class__.__name__ + array_info_dict = get_basic_array_info(array.data) + repr_html = generate_array_html_repr(array_info_dict, array.data, object_class) + + return f'
{repr_html}
' @staticmethod def __smart_str(v, num_indent): @@ -826,7 +840,14 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out - def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs): + def set_data_io( + self, + dataset_name: str, + data_io_class: Type[DataIO], + data_io_kwargs: dict = None, + data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None, + data_chunk_iterator_kwargs: dict = None, **kwargs + ): """ Apply DataIO object to a dataset field of the Container. @@ -838,9 +859,18 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw Class to use for DataIO, e.g. H5DataIO or ZarrDataIO data_io_kwargs: dict keyword arguments passed to the constructor of the DataIO class. + data_chunk_iterator_class: Type[AbstractDataChunkIterator] + Class to use for DataChunkIterator. If None, no DataChunkIterator is used. + data_chunk_iterator_kwargs: dict + keyword arguments passed to the constructor of the DataChunkIterator class. **kwargs: DEPRECATED. Use data_io_kwargs instead. kwargs are passed to the constructor of the DataIO class. + + Notes + ----- + If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in + the DataIO. This allows for rewriting the backend configuration of hdf5 datasets. """ if kwargs or (data_io_kwargs is None): warn( @@ -851,8 +881,11 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw ) data_io_kwargs = kwargs data = self.fields.get(dataset_name) + data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict() if data is None: raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class") + if data_chunk_iterator_class is not None: + data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs) self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs) @@ -882,21 +915,13 @@ def shape(self): """ return get_data_shape(self.__data) - @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'}) - def set_dataio(self, **kwargs): - """ - Apply DataIO object to the data held by this Data object - """ - warn( - "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.", - DeprecationWarning, - stacklevel=3, - ) - dataio = getargs('dataio', kwargs) - dataio.data = self.__data - self.__data = dataio - - def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None: + def set_data_io( + self, + data_io_class: Type[DataIO], + data_io_kwargs: dict, + data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None, + data_chunk_iterator_kwargs: dict = None, + ) -> None: """ Apply DataIO object to the data held by this Data object. @@ -906,8 +931,21 @@ def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None The DataIO to apply to the data held by this Data. data_io_kwargs: dict The keyword arguments to pass to the DataIO. + data_chunk_iterator_class: Type[AbstractDataChunkIterator] + The DataChunkIterator to use for the DataIO. If None, no DataChunkIterator is used. + data_chunk_iterator_kwargs: dict + The keyword arguments to pass to the DataChunkIterator. + + Notes + ----- + If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in + the DataIO. This allows for rewriting the backend configuration of hdf5 datasets. """ - self.__data = data_io_class(data=self.__data, **data_io_kwargs) + data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict() + data = self.__data + if data_chunk_iterator_class is not None: + data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs) + self.__data = data_io_class(data=data, **data_io_kwargs) @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'}) def transform(self, **kwargs): @@ -972,25 +1010,6 @@ def _validate_new_data_element(self, arg): pass -class DataRegion(Data): - - @property - @abstractmethod - def data(self): - ''' - The target data that this region applies to - ''' - pass - - @property - @abstractmethod - def region(self): - ''' - The region that indexes into data e.g. slice or list of indices - ''' - pass - - class MultiContainerInterface(Container): """Class that dynamically defines methods to support a Container holding multiple Containers of the same type. diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 9693b0b1c..abe2a93a7 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -2,143 +2,24 @@ import numpy as np -from .array import Array from .utils import ExtenderMeta, docval_macro, docval, getargs -class Query(metaclass=ExtenderMeta): - __operations__ = ( - '__lt__', - '__gt__', - '__le__', - '__ge__', - '__eq__', - '__ne__', - ) - - @classmethod - def __build_operation(cls, op): - def __func(self, arg): - return cls(self, op, arg) - - @ExtenderMeta.pre_init - def __make_operators(cls, name, bases, classdict): - if not isinstance(cls.__operations__, tuple): - raise TypeError("'__operations__' must be of type tuple") - # add any new operations - if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ - and bases[-1].__operations__ is not cls.__operations__: - new_operations = list(cls.__operations__) - new_operations[0:0] = bases[-1].__operations__ - cls.__operations__ = tuple(new_operations) - for op in cls.__operations__: - if not hasattr(cls, op): - setattr(cls, op, cls.__build_operation(op)) - - def __init__(self, obj, op, arg): - self.obj = obj - self.op = op - self.arg = arg - self.collapsed = None - self.expanded = None - - @docval({'name': 'expand', 'type': bool, 'help': 'whether or not to expand result', 'default': True}) - def evaluate(self, **kwargs): - expand = getargs('expand', kwargs) - if expand: - if self.expanded is None: - self.expanded = self.__evalhelper() - return self.expanded - else: - if self.collapsed is None: - self.collapsed = self.__collapse(self.__evalhelper()) - return self.collapsed - - def __evalhelper(self): - obj = self.obj - arg = self.arg - if isinstance(obj, Query): - obj = obj.evaluate() - elif isinstance(obj, HDMFDataset): - obj = obj.dataset - if isinstance(arg, Query): - arg = self.arg.evaluate() - return getattr(obj, self.op)(self.arg) - - def __collapse(self, result): - if isinstance(result, slice): - return (result.start, result.stop) - elif isinstance(result, list): - ret = list() - for idx in result: - if isinstance(idx, slice) and (idx.step is None or idx.step == 1): - ret.append((idx.start, idx.stop)) - else: - ret.append(idx) - return ret - else: - return result - - def __and__(self, other): - return NotImplemented - - def __or__(self, other): - return NotImplemented - - def __xor__(self, other): - return NotImplemented - - def __contains__(self, other): - return NotImplemented - - @docval_macro('array_data') class HDMFDataset(metaclass=ExtenderMeta): - __operations__ = ( - '__lt__', - '__gt__', - '__le__', - '__ge__', - '__eq__', - '__ne__', - ) - - @classmethod - def __build_operation(cls, op): - def __func(self, arg): - return Query(self, op, arg) - - setattr(__func, '__name__', op) - return __func - - @ExtenderMeta.pre_init - def __make_operators(cls, name, bases, classdict): - if not isinstance(cls.__operations__, tuple): - raise TypeError("'__operations__' must be of type tuple") - # add any new operations - if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ - and bases[-1].__operations__ is not cls.__operations__: - new_operations = list(cls.__operations__) - new_operations[0:0] = bases[-1].__operations__ - cls.__operations__ = tuple(new_operations) - for op in cls.__operations__: - setattr(cls, op, cls.__build_operation(op)) - def __evaluate_key(self, key): if isinstance(key, tuple) and len(key) == 0: return key if isinstance(key, (tuple, list, np.ndarray)): return list(map(self.__evaluate_key, key)) else: - if isinstance(key, Query): - return key.evaluate() return key def __getitem__(self, key): idx = self.__evaluate_key(key) return self.dataset[idx] - @docval({'name': 'dataset', 'type': ('array_data', Array), 'doc': 'the HDF5 file lazily evaluate'}) + @docval({'name': 'dataset', 'type': 'array_data', 'doc': 'the HDF5 file lazily evaluate'}) def __init__(self, **kwargs): super().__init__() self.__dataset = getargs('dataset', kwargs) diff --git a/src/hdmf/region.py b/src/hdmf/region.py deleted file mode 100644 index 9feeba401..000000000 --- a/src/hdmf/region.py +++ /dev/null @@ -1,91 +0,0 @@ -from abc import ABCMeta, abstractmethod -from operator import itemgetter - -from .container import Data, DataRegion -from .utils import docval, getargs - - -class RegionSlicer(DataRegion, metaclass=ABCMeta): - ''' - A abstract base class to control getting using a region - - Subclasses must implement `__getitem__` and `__len__` - ''' - - @docval({'name': 'target', 'type': None, 'doc': 'the target to slice'}, - {'name': 'slice', 'type': None, 'doc': 'the region to slice'}) - def __init__(self, **kwargs): - self.__target = getargs('target', kwargs) - self.__slice = getargs('slice', kwargs) - - @property - def data(self): - """The target data. Same as self.target""" - return self.target - - @property - def region(self): - """The selected region. Same as self.slice""" - return self.slice - - @property - def target(self): - """The target data""" - return self.__target - - @property - def slice(self): - """The selected slice""" - return self.__slice - - @property - @abstractmethod - def __getitem__(self, idx): - """Must be implemented by subclasses""" - pass - - @property - @abstractmethod - def __len__(self): - """Must be implemented by subclasses""" - pass - - -class ListSlicer(RegionSlicer): - """Implementation of RegionSlicer for slicing Lists and Data""" - - @docval({'name': 'dataset', 'type': (list, tuple, Data), 'doc': 'the dataset to slice'}, - {'name': 'region', 'type': (list, tuple, slice), 'doc': 'the region reference to use to slice'}) - def __init__(self, **kwargs): - self.__dataset, self.__region = getargs('dataset', 'region', kwargs) - super().__init__(self.__dataset, self.__region) - if isinstance(self.__region, slice): - self.__getter = itemgetter(self.__region) - self.__len = len(range(*self.__region.indices(len(self.__dataset)))) - else: - self.__getter = itemgetter(*self.__region) - self.__len = len(self.__region) - - def __read_region(self): - """ - Internal helper function used to define self._read - """ - if not hasattr(self, '_read'): - self._read = self.__getter(self.__dataset) - del self.__getter - - def __getitem__(self, idx): - """ - Get data values from selected data - """ - self.__read_region() - getter = None - if isinstance(idx, (list, tuple)): - getter = itemgetter(*idx) - else: - getter = itemgetter(idx) - return getter(self._read) - - def __len__(self): - """Number of values in the slice/region""" - return self.__len diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index e10d5e43e..bbd97b592 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -38,7 +38,6 @@ class DtypeHelper: 'uint32': ["uint32", "uint"], 'uint64': ["uint64"], 'object': ['object'], - 'region': ['region'], 'numeric': ['numeric'], 'isodatetime': ["isodatetime", "datetime", "date"] } @@ -174,12 +173,13 @@ def path(self): _ref_args = [ {'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, - {'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'}, + {'name': 'reftype', 'type': str, + 'doc': 'the type of reference this is. only "object" is supported currently.'}, ] class RefSpec(ConstructableDict): - __allowable_types = ('object', 'region') + __allowable_types = ('object', ) @docval(*_ref_args) def __init__(self, **kwargs): @@ -200,10 +200,6 @@ def reftype(self): '''The type of reference''' return self['reftype'] - @docval(rtype=bool, returns='True if this RefSpec specifies a region reference, False otherwise') - def is_region(self): - return self['reftype'] == 'region' - _attr_args = [ {'name': 'name', 'type': str, 'doc': 'The name of this attribute'}, @@ -314,12 +310,18 @@ class BaseStorageSpec(Spec): def __init__(self, **kwargs): name, doc, quantity, attributes, linkable, data_type_def, data_type_inc = \ getargs('name', 'doc', 'quantity', 'attributes', 'linkable', 'data_type_def', 'data_type_inc', kwargs) + if name is not None and "/" in name: + raise ValueError(f"Name '{name}' is invalid. Names of Groups and Datasets cannot contain '/'") if name is None and data_type_def is None and data_type_inc is None: raise ValueError("Cannot create Group or Dataset spec with no name " "without specifying '%s' and/or '%s'." % (self.def_key(), self.inc_key())) super().__init__(doc, name=name) default_name = getargs('default_name', kwargs) if default_name: + if "/" in default_name: + raise ValueError( + f"Default name '{default_name}' is invalid. Names of Groups and Datasets cannot contain '/'" + ) if name is not None: warn("found 'default_name' with 'name' - ignoring 'default_name'") else: diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 5e0b61539..c21382a2a 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -382,8 +382,6 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, for key in extras.keys(): type_errors.append("unrecognized argument: '%s'" % key) else: - # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args. - # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out for key in extras.keys(): ret[key] = extras[key] return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors, @@ -414,95 +412,6 @@ def get_docval(func, *args): return tuple() -# def docval_wrap(func, is_method=True): -# if is_method: -# @docval(*get_docval(func)) -# def method(self, **kwargs): -# -# return call_docval_args(func, kwargs) -# return method -# else: -# @docval(*get_docval(func)) -# def static_method(**kwargs): -# return call_docval_args(func, kwargs) -# return method - - -def fmt_docval_args(func, kwargs): - ''' Separate positional and keyword arguments - - Useful for methods that wrap other methods - ''' - warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " - "call the function directly with the kwargs. Please note that fmt_docval_args " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning, stacklevel=2) - func_docval = getattr(func, docval_attr_name, None) - ret_args = list() - ret_kwargs = dict() - kwargs_copy = _copy.copy(kwargs) - if func_docval: - for arg in func_docval[__docval_args_loc]: - val = kwargs_copy.pop(arg['name'], None) - if 'default' in arg: - if val is not None: - ret_kwargs[arg['name']] = val - else: - ret_args.append(val) - if func_docval['allow_extra']: - ret_kwargs.update(kwargs_copy) - else: - raise ValueError('no docval found on %s' % str(func)) - return ret_args, ret_kwargs - - -# def _remove_extra_args(func, kwargs): -# """Return a dict of only the keyword arguments that are accepted by the function's docval. -# -# If the docval specifies allow_extra=True, then the original kwargs are returned. -# """ -# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that -# # kwargs are kept as kwargs instead of parsed into args and kwargs -# func_docval = getattr(func, docval_attr_name, None) -# if func_docval: -# if func_docval['allow_extra']: -# # if extra args are allowed, return all args -# return kwargs -# else: -# # save only the arguments listed in the function's docval (skip any others present in kwargs) -# ret_kwargs = dict() -# for arg in func_docval[__docval_args_loc]: -# val = kwargs.get(arg['name'], None) -# if val is not None: # do not return arguments that are not present or have value None -# ret_kwargs[arg['name']] = val -# return ret_kwargs -# else: -# raise ValueError('No docval found on %s' % str(func)) - - -def call_docval_func(func, kwargs): - """Call the function with only the keyword arguments that are accepted by the function's docval. - - Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True. - """ - warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " - "call the function directly with the kwargs. Please note that call_docval_func " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", - PendingDeprecationWarning, stacklevel=2) - with warnings.catch_warnings(record=True): - # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two - # PendingDeprecationWarnings saying the same thing are not raised - warnings.simplefilter("ignore", UserWarning) - warnings.simplefilter("always", PendingDeprecationWarning) - fargs, fkwargs = fmt_docval_args(func, kwargs) - - return func(*fargs, **fkwargs) - - def __resolve_type(t): if t is None: return t @@ -967,6 +876,62 @@ def is_ragged(data): return False +def get_basic_array_info(array): + def convert_bytes_to_str(bytes_size): + suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'] + i = 0 + while bytes_size >= 1024 and i < len(suffixes)-1: + bytes_size /= 1024. + i += 1 + return f"{bytes_size:.2f} {suffixes[i]}" + + if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 + array_size_in_bytes = array.nbytes + else: + array_size_in_bytes = array.size * array.dtype.itemsize + array_size_repr = convert_bytes_to_str(array_size_in_bytes) + basic_array_info_dict = {"Data type": array.dtype, "Shape": array.shape, "Array size": array_size_repr} + + return basic_array_info_dict + +def generate_array_html_repr(array_info_dict, array, dataset_type=None): + def html_table(item_dicts) -> str: + """ + Generates an html table from a dictionary + """ + report = '' + report += "" + for k, v in item_dicts.items(): + report += ( + f"" + f'' + f'' + f"" + ) + report += "" + report += "
{k}{v}
" + return report + + array_info_html = html_table(array_info_dict) + repr_html = dataset_type + "
" + array_info_html if dataset_type is not None else array_info_html + + # Array like might lack nbytes (h5py < 3.0) or size (DataIO object) + if hasattr(array, "nbytes"): + array_size_bytes = array.nbytes + else: + if hasattr(array, "size"): + array_size = array.size + else: + import math + array_size = math.prod(array.shape) + array_size_bytes = array_size * array.dtype.itemsize + + # Heuristic for displaying data + array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array + if array_is_small: + repr_html += "
" + str(np.asarray(array)) + + return repr_html class LabelledDict(dict): """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items. @@ -1140,7 +1105,7 @@ def update(self, other): @docval_macro('array_data') class StrDataset(h5py.Dataset): - """Wrapper to decode strings on reading the dataset""" + """Wrapper to decode strings on reading the dataset. Use only for h5py 3+.""" def __init__(self, dset, encoding, errors='strict'): self.dset = dset if encoding is None: diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 2668da1ec..d7ec78eaa 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -8,7 +8,7 @@ from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType from .errors import ExpectedArrayError, IncorrectQuantityError -from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder from ..build.builders import BaseBuilder from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec from ..spec import SpecNamespace @@ -124,9 +124,6 @@ def get_type(data, builder_dtype=None): # Bytes data elif isinstance(data, bytes): return 'ascii', get_string_format(data) - # RegionBuilder data - elif isinstance(data, RegionBuilder): - return 'region', None # ReferenceBuilder data elif isinstance(data, ReferenceBuilder): return 'object', None @@ -147,7 +144,7 @@ def get_type(data, builder_dtype=None): # Case for h5py.Dataset and other I/O specific array types else: # Compound dtype - if builder_dtype and len(builder_dtype) > 1: + if builder_dtype and isinstance(builder_dtype, list): dtypes = [] string_formats = [] for i in range(len(builder_dtype)): @@ -436,12 +433,16 @@ def validate(self, **kwargs): try: dtype, string_format = get_type(data, builder.dtype) if not check_type(self.spec.dtype, dtype, string_format): - ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, + if isinstance(self.spec.dtype, RefSpec): + expected = f'{self.spec.dtype.reftype} reference' + else: + expected = self.spec.dtype + ret.append(DtypeError(self.get_spec_loc(self.spec), expected, dtype, location=self.get_builder_loc(builder))) except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple pass - if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0: + if isinstance(builder.dtype, list) and len(np.shape(builder.data)) == 0: shape = () # scalar compound dataset elif isinstance(builder.dtype, list): shape = (len(builder.data), ) # only 1D datasets with compound types are supported diff --git a/test_gallery.py b/test_gallery.py index c3128b8fd..b2f0a9047 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -67,8 +67,9 @@ def run_gallery_tests(): ) _import_from_file(script) except (ImportError, ValueError) as e: - if "linkml" in str(e): - pass # this is OK because linkml is not always installed + if "Please install linkml-runtime to run this example" in str(e): + # this is OK because linkml is not always installed + print(f"Skipping {script} because linkml-runtime is not installed") else: raise e except Exception: diff --git a/tests/unit/build_tests/test_builder.py b/tests/unit/build_tests/test_builder.py index a35dc64ac..62ebd0675 100644 --- a/tests/unit/build_tests/test_builder.py +++ b/tests/unit/build_tests/test_builder.py @@ -1,4 +1,4 @@ -from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder +from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder from hdmf.testing import TestCase @@ -392,12 +392,3 @@ def test_constructor(self): db = DatasetBuilder('db1', [1, 2, 3]) rb = ReferenceBuilder(db) self.assertIs(rb.builder, db) - - -class TestRegionBuilder(TestCase): - - def test_constructor(self): - db = DatasetBuilder('db1', [1, 2, 3]) - rb = RegionBuilder(slice(1, 3), db) - self.assertEqual(rb.region, slice(1, 3)) - self.assertIs(rb.builder, db) diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 8f9e49239..8f30386d8 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,12 +1,17 @@ from datetime import datetime, date import numpy as np +import h5py +import unittest + from hdmf.backends.hdf5 import H5DataIO from hdmf.build import ObjectMapper from hdmf.data_utils import DataChunkIterator from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec from hdmf.testing import TestCase +from hdmf.utils import StrDataset +H5PY_3 = h5py.__version__.startswith('3') class TestConvertDtype(TestCase): @@ -321,6 +326,19 @@ def test_text_spec(self): self.assertIs(ret, value) self.assertEqual(ret_dtype, 'utf8') + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_text_spec_str_dataset(self): + text_spec_types = ['text', 'utf', 'utf8', 'utf-8'] + for spec_type in text_spec_types: + with self.subTest(spec_type=spec_type): + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + spec = DatasetSpec('an example dataset', spec_type, name='data') + + value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None) + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion + self.assertIs(ret, value) + self.assertEqual(ret_dtype, 'utf8') + def test_ascii_spec(self): ascii_spec_types = ['ascii', 'bytes'] for spec_type in ascii_spec_types: diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index e095ef318..730530a5a 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -1,4 +1,4 @@ -from hdmf.utils import docval, getargs +from hdmf.utils import StrDataset, docval, getargs from hdmf import Container, Data from hdmf.backends.hdf5 import H5DataIO from hdmf.build import (GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager, TypeMap, LinkBuilder, @@ -7,12 +7,15 @@ from hdmf.spec import (GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, RefSpec, LinkSpec) from hdmf.testing import TestCase +import h5py from abc import ABCMeta, abstractmethod import unittest import numpy as np from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map +H5PY_3 = h5py.__version__.startswith('3') + class Bar(Container): @@ -460,6 +463,132 @@ def test_build_3d_ndarray(self): np.testing.assert_array_equal(builder.get('data').data, str_array_3d) np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d) + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_build_1d_h5py_3_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, ), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, ))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_1d = np.array( + ['aa', 'bb', 'cc', 'dd'], + dtype=h5py.special_dtype(vlen=str) + ) + # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+ + dataset = StrDataset(f.create_dataset('data', data=str_array_1d), None) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+") + def test_build_3d_h5py_3_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_3d = np.array( + [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]], + dtype=h5py.special_dtype(vlen=str) + ) + # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+ + dataset = StrDataset(f.create_dataset('data', data=str_array_3d), None) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3") + def test_build_1d_h5py_2_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, ), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, ))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_1d = np.array( + ['aa', 'bb', 'cc', 'dd'], + dtype=h5py.special_dtype(vlen=str) + ) + dataset = f.create_dataset('data', data=str_array_1d) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + + @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3") + def test_build_3d_h5py_2_dataset(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + # create in-memory hdf5 file that is discarded after closing + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + str_array_3d = np.array( + [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]], + dtype=h5py.special_dtype(vlen=str) + ) + dataset = f.create_dataset('data', data=str_array_3d) + bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, dataset[:]) + np.testing.assert_array_equal(builder.get('attr_array'), dataset[:]) + def test_build_dataio(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', diff --git a/tests/unit/common/test_generate_table.py b/tests/unit/common/test_generate_table.py index 7f7d7da40..71c15aad0 100644 --- a/tests/unit/common/test_generate_table.py +++ b/tests/unit/common/test_generate_table.py @@ -16,6 +16,13 @@ class TestDynamicDynamicTable(TestCase): def setUp(self): + + self.dtr_spec = DatasetSpec( + data_type_def='CustomDTR', + data_type_inc='DynamicTableRegion', + doc='a test DynamicTableRegion column', # this is overridden where it is used + ) + self.dt_spec = GroupSpec( 'A test extension that contains a dynamic table', data_type_def='TestTable', @@ -99,7 +106,13 @@ def setUp(self): doc='a test column', dtype='float', quantity='?', - ) + ), + DatasetSpec( + data_type_inc='CustomDTR', + name='optional_custom_dtr_col', + doc='a test DynamicTableRegion column', + quantity='?' + ), ] ) @@ -107,6 +120,7 @@ def setUp(self): writer = YAMLSpecWriter(outdir='.') self.spec_catalog = SpecCatalog() + self.spec_catalog.register_spec(self.dtr_spec, 'test.yaml') self.spec_catalog.register_spec(self.dt_spec, 'test.yaml') self.spec_catalog.register_spec(self.dt_spec2, 'test.yaml') self.namespace = SpecNamespace( @@ -124,7 +138,7 @@ def setUp(self): self.test_dir = tempfile.mkdtemp() spec_fpath = os.path.join(self.test_dir, 'test.yaml') namespace_fpath = os.path.join(self.test_dir, 'test-namespace.yaml') - writer.write_spec(dict(groups=[self.dt_spec, self.dt_spec2]), spec_fpath) + writer.write_spec(dict(datasets=[self.dtr_spec], groups=[self.dt_spec, self.dt_spec2]), spec_fpath) writer.write_namespace(self.namespace, namespace_fpath) self.namespace_catalog = NamespaceCatalog() hdmf_typemap = get_type_map() @@ -133,6 +147,7 @@ def setUp(self): self.type_map.load_namespaces(namespace_fpath) self.manager = BuildManager(self.type_map) + self.CustomDTR = self.type_map.get_dt_container_cls('CustomDTR', CORE_NAMESPACE) self.TestTable = self.type_map.get_dt_container_cls('TestTable', CORE_NAMESPACE) self.TestDTRTable = self.type_map.get_dt_container_cls('TestDTRTable', CORE_NAMESPACE) @@ -228,6 +243,22 @@ def test_dynamic_table_region_non_dtr_target(self): self.TestDTRTable(name='test_dtr_table', description='my table', target_tables={'optional_col3': test_table}) + def test_custom_dtr_class(self): + test_table = self.TestTable(name='test_table', description='my test table') + test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], optional_col2=.5) + test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], optional_col2=.5) + + test_dtr_table = self.TestDTRTable(name='test_dtr_table', description='my table', + target_tables={'optional_custom_dtr_col': test_table}) + + self.assertIsInstance(test_dtr_table['optional_custom_dtr_col'], self.CustomDTR) + self.assertEqual(test_dtr_table['optional_custom_dtr_col'].description, "a test DynamicTableRegion column") + self.assertIs(test_dtr_table['optional_custom_dtr_col'].table, test_table) + + test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=0) + test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=1) + self.assertEqual(test_dtr_table['optional_custom_dtr_col'].data, [0, 1]) + def test_attribute(self): test_table = self.TestTable(name='test_table', description='my test table') assert test_table.my_col is not None @@ -266,3 +297,17 @@ def test_roundtrip(self): for err in errors: raise Exception(err) self.reader.close() + + def test_add_custom_dtr_column(self): + test_table = self.TestTable(name='test_table', description='my test table') + test_table.add_column( + name='custom_dtr_column', + description='this is a custom DynamicTableRegion column', + col_cls=self.CustomDTR, + ) + self.assertIsInstance(test_table['custom_dtr_column'], self.CustomDTR) + self.assertEqual(test_table['custom_dtr_column'].description, 'this is a custom DynamicTableRegion column') + + test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], custom_dtr_column=0) + test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], custom_dtr_column=1) + self.assertEqual(test_table['custom_dtr_column'].data, [0, 1]) diff --git a/tests/unit/common/test_sparse.py b/tests/unit/common/test_sparse.py index 7d94231f4..720f1f473 100644 --- a/tests/unit/common/test_sparse.py +++ b/tests/unit/common/test_sparse.py @@ -1,10 +1,25 @@ from hdmf.common import CSRMatrix from hdmf.testing import TestCase, H5RoundTripMixin - -import scipy.sparse as sps import numpy as np +import unittest + +try: + import scipy.sparse as sps + SCIPY_INSTALLED = True +except ImportError: + SCIPY_INSTALLED = False + + +@unittest.skipIf(SCIPY_INSTALLED, "scipy is installed") +class TestCSRMatrixNoScipy(TestCase): + def test_import_error(self): + data = np.array([[1, 0, 2], [0, 0, 3], [4, 5, 6]]) + with self.assertRaises(ImportError): + CSRMatrix(data=data) + +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrix(TestCase): def test_from_sparse_matrix(self): @@ -153,7 +168,7 @@ def test_array_bad_dim(self): with self.assertRaisesWith(ValueError, msg): CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape) - +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrixRoundTrip(H5RoundTripMixin, TestCase): def setUpContainer(self): @@ -164,6 +179,7 @@ def setUpContainer(self): return CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape) +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrixRoundTripFromLists(H5RoundTripMixin, TestCase): """Test that CSRMatrix works with lists as well""" diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 00b3c14a3..15a0c9e91 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -429,9 +429,7 @@ def test_add_column_vectorindex(self): table.add_column(name='qux', description='qux column') ind = VectorIndex(name='quux', data=list(), target=table['qux']) - msg = ("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " - "deprecated in a future version of HDMF.") - with self.assertWarnsWith(FutureWarning, msg): + with self.assertRaises(ValueError): table.add_column(name='bad', description='bad column', index=ind) def test_add_column_multi_index(self): @@ -2852,6 +2850,57 @@ def test_dtr_references(self): pd.testing.assert_frame_equal(ret, expected) +class TestDataIOReferences(H5RoundTripMixin, TestCase): + + def setUpContainer(self): + """Test roundtrip of a table with an expandable column of references.""" + group1 = Container('group1') + group2 = Container('group2') + + table = DynamicTable( + name='table', + description='test table' + ) + table.add_column( + name='x', + description='test column of ints' + ) + table.add_column( + name='y', + description='test column of reference' + ) + table.add_row(id=101, x=1, y=group1) + table.add_row(id=102, x=2, y=group2) + table.id.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.x.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + table.y.set_data_io(H5DataIO, {'maxshape': (None,), 'chunks': True}) + + multi_container = SimpleMultiContainer(name='multi') + multi_container.add_container(group1) + multi_container.add_container(group2) + multi_container.add_container(table) + + return multi_container + + def test_append(self, cache_spec=False): + """Write the container to an HDF5 file, read the container from the file, and append to it.""" + + # write file + with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io: + write_io.write(self.container, cache_spec=cache_spec) + + # read container from file + self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a') + read_container = self.reader.read() + self.assertContainerEqual(read_container, self.container, ignore_name=True) + self.assertContainerEqual(read_container['table']['y'][-1], read_container['group2']) + + # append row + group1 = read_container['group1'] + read_container['table'].add_row(id=103, x=3, y=group1) + + self.assertContainerEqual(read_container['table']['y'][-1], group1) + class TestVectorIndexDtype(TestCase): def set_up_array_index(self): diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py index c9db14635..60025fd7e 100644 --- a/tests/unit/spec_tests/test_dataset_spec.py +++ b/tests/unit/spec_tests/test_dataset_spec.py @@ -261,3 +261,17 @@ def test_build_warn_extra_args(self): "'dtype': 'int', 'required': True}") with self.assertWarnsWith(UserWarning, msg): DatasetSpec.build_spec(spec_dict) + + def test_constructor_validates_name(self): + with self.assertRaisesWith( + ValueError, + "Name 'one/two' is invalid. Names of Groups and Datasets cannot contain '/'", + ): + DatasetSpec(doc='my first dataset', dtype='int', name='one/two') + + def test_constructor_validates_default_name(self): + with self.assertRaisesWith( + ValueError, + "Default name 'one/two' is invalid. Names of Groups and Datasets cannot contain '/'", + ): + DatasetSpec(doc='my first dataset', dtype='int', default_name='one/two', data_type_def='test') diff --git a/tests/unit/spec_tests/test_ref_spec.py b/tests/unit/spec_tests/test_ref_spec.py index bb1c0efb8..3277673d1 100644 --- a/tests/unit/spec_tests/test_ref_spec.py +++ b/tests/unit/spec_tests/test_ref_spec.py @@ -15,9 +15,3 @@ def test_constructor(self): def test_wrong_reference_type(self): with self.assertRaises(ValueError): RefSpec('TimeSeries', 'unknownreftype') - - def test_isregion(self): - spec = RefSpec('TimeSeries', 'object') - self.assertFalse(spec.is_region()) - spec = RefSpec('Data', 'region') - self.assertTrue(spec.is_region()) diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 9ac81ba13..2abe6349b 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -8,6 +8,7 @@ from hdmf.utils import docval from hdmf.common import DynamicTable, VectorData, DynamicTableRegion from hdmf.backends.hdf5.h5tools import HDF5IO +from hdmf.backends.io import HDMFIO class Subcontainer(Container): @@ -179,6 +180,17 @@ def test_set_parent_overwrite_proxy(self): def test_slash_restriction(self): self.assertRaises(ValueError, Container, 'bad/name') + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad/name') + + def test_colon_restriction(self): + self.assertRaises(ValueError, Container, 'bad:name') + + # check no error raised in construct mode + child_obj = Container.__new__(Container, in_construct_mode=True) + child_obj.__init__('bad:name') + def test_set_modified_parent(self): """Test that set modified properly sets parent modified """ @@ -201,18 +213,6 @@ def test_all_children(self): obj = species.all_objects self.assertEqual(sorted(list(obj.keys())), sorted([species.object_id, species.id.object_id, col1.object_id])) - def test_add_child(self): - """Test that add child creates deprecation warning and also properly sets child's parent and modified - """ - parent_obj = Container('obj1') - child_obj = Container('obj2') - parent_obj.set_modified(False) - with self.assertWarnsWith(DeprecationWarning, 'add_child is deprecated. Set the parent attribute instead.'): - parent_obj.add_child(child_obj) - self.assertIs(child_obj.parent, parent_obj) - self.assertTrue(parent_obj.modified) - self.assertIs(parent_obj.children[0], child_obj) - def test_parent_set_link_warning(self): col1 = VectorData( name='col1', @@ -423,6 +423,23 @@ def __init__(self, **kwargs): self.data = kwargs['data'] self.str = kwargs['str'] + class ContainerWithData(Container): + + __fields__ = ( + "data", + "str" + ) + + @docval( + {'name': "data", "doc": 'data', 'type': 'array_data', "default": None}, + {'name': "str", "doc": 'str', 'type': str, "default": None}, + + ) + def __init__(self, **kwargs): + super().__init__('test name') + self.data = kwargs['data'] + self.str = kwargs['str'] + def test_repr_html_(self): child_obj1 = Container('test child 1') obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello") @@ -455,6 +472,82 @@ def test_repr_html_(self): 'class="field-value">hello' ) + def test_repr_html_array(self): + obj = self.ContainerWithData(data=np.array([1, 2, 3, 4], dtype=np.int64), str="hello") + expected_html_table = ( + 'class="container-fields">NumPy array
Data typeint64
Shape' + '(4,)
Array size32.00 bytes

[1 2 3 4]' + ) + self.assertIn(expected_html_table, obj._repr_html_()) + + def test_repr_html_array_large_arrays_not_displayed(self): + obj = self.ContainerWithData(data=np.arange(200, dtype=np.int64), str="hello") + expected_html_table = ( + 'class="container-fields">NumPy array
Data typeint64
Shape' + '(200,)
Array size1.56 KiB
' + ) + self.assertIn(expected_html_table, obj._repr_html_()) + + def test_repr_html_hdf5_dataset(self): + with HDF5IO('array_data.h5', mode='w') as io: + dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64)) + obj = self.ContainerWithData(data=dataset, str="hello") + obj.read_io = io + + expected_html_table = ( + 'class="container-fields">HDF5 dataset
Data typeint64
' + 'Shape(4,)
Array size' + '32.00 bytes
Chunk shape' + 'None
CompressionNone
Compression optsNone
Compression ratio1.0

[1 2 3 4]' + ) + + self.assertIn(expected_html_table, obj._repr_html_()) + + os.remove('array_data.h5') + + def test_repr_html_hdmf_io(self): + with HDF5IO('array_data.h5', mode='w') as io: + dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64)) + obj = self.ContainerWithData(data=dataset, str="hello") + + class OtherIO(HDMFIO): + + @staticmethod + def can_read(path): + pass + + def read_builder(self): + pass + + def write_builder(self, **kwargs): + pass + + def open(self): + pass + + def close(self): + pass + + obj.read_io = OtherIO() + + expected_html_table = ( + 'class="container-fields">
Data typeint64
' + 'Shape(4,)
Array size' + '32.00 bytes

[1 2 3 4]' + ) + + self.assertIn(expected_html_table, obj._repr_html_()) + + os.remove('array_data.h5') class TestData(TestCase): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index cd2c483f7..b56dc5026 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -24,7 +24,7 @@ from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container from hdmf import Data, docval -from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError +from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace from hdmf.spec.spec import GroupSpec, DtypeSpec @@ -1871,7 +1871,7 @@ def test_link(self): self.assertTrue(self.foo2.my_data.valid) # test valid self.assertEqual(len(self.foo2.my_data), 5) # test len self.assertEqual(self.foo2.my_data.shape, (5,)) # test getattr with shape - self.assertTrue(np.array_equal(np.array(self.foo2.my_data), [1, 2, 3, 4, 5])) # test array conversion + np.testing.assert_array_equal(self.foo2.my_data, [1, 2, 3, 4, 5]) # test array conversion # test loop through iterable match = [1, 2, 3, 4, 5] @@ -3064,6 +3064,41 @@ def test_append_dataset_of_references(self): self.assertEqual(len(read_bucket1.baz_data.data), 2) self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + def test_append_dataset_of_references_compound(self): + """Test that exporting a written container with a dataset of references of compound data type works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=H5DataIO(baz_pairs, maxshape=(None,))) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + cpd_DoR = read_bucket1.baz_cpd_data.data + builder = ref_io.manager.get_builder(read_bucket1.bazs['new']) + ref = ref_io._create_ref(builder) + append_data(cpd_DoR.dataset, (11, ref)) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + + self.assertEqual(read_bucket2.baz_cpd_data.data[-1][0], 11) + self.assertIs(read_bucket2.baz_cpd_data.data[-1][1], read_bucket2.bazs['new']) + + def test_append_dataset_of_references_orphaned_target(self): bazs = [] num_bazs = 1 @@ -3825,6 +3860,11 @@ def __init__(self, **kwargs): self.data2 = kwargs["data2"] self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) + self.file_path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.file_path): + os.remove(self.file_path) def test_set_data_io(self): self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True)) @@ -3847,6 +3887,31 @@ def test_set_data_io_old_api(self): self.assertIsInstance(self.obj.data1, H5DataIO) self.assertTrue(self.obj.data1.io_settings["chunks"]) + def test_set_data_io_h5py_dataset(self): + file = File(self.file_path, 'w') + data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,)) + class ContainerWithData(Container): + __fields__ = ('data',) + + @docval( + {"name": "name", "doc": "name", "type": str}, + {'name': 'data', 'doc': 'field1 doc', 'type': h5py.Dataset}, + ) + def __init__(self, **kwargs): + super().__init__(name=kwargs["name"]) + self.data = kwargs["data"] + + container = ContainerWithData("name", data) + container.set_data_io( + "data", + H5DataIO, + data_io_kwargs=dict(chunks=(2,)), + data_chunk_iterator_class=DataChunkIterator, + ) + + self.assertIsInstance(container.data, H5DataIO) + self.assertEqual(container.data.io_settings["chunks"], (2,)) + file.close() class TestDataSetDataIO(TestCase): @@ -3855,6 +3920,11 @@ class MyData(Data): pass self.data = MyData("my_data", [1, 2, 3]) + self.file_path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.file_path): + os.remove(self.file_path) def test_set_data_io(self): self.data.set_data_io(H5DataIO, dict(chunks=True)) @@ -3914,3 +3984,4 @@ def test_expand_set_shape(self): [7, 8, 9]]) npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected) self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3)) + diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py deleted file mode 100644 index b2ff267a7..000000000 --- a/tests/unit/test_query.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -from abc import ABCMeta, abstractmethod - -import numpy as np -from h5py import File -from hdmf.array import SortedArray, LinSpace -from hdmf.query import HDMFDataset, Query -from hdmf.testing import TestCase - - -class AbstractQueryMixin(metaclass=ABCMeta): - - @abstractmethod - def getDataset(self): - raise NotImplementedError('Cannot run test unless getDataset is implemented') - - def setUp(self): - self.dset = self.getDataset() - self.wrapper = HDMFDataset(self.dset) - - def test_get_dataset(self): - array = self.wrapper.dataset - self.assertIsInstance(array, SortedArray) - - def test___gt__(self): - ''' - Test wrapper greater than magic method - ''' - q = self.wrapper > 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - False, True, True, True, True] - expected = slice(6, 10) - self.assertEqual(result, expected) - - def test___ge__(self): - ''' - Test wrapper greater than or equal magic method - ''' - q = self.wrapper >= 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - True, True, True, True, True] - expected = slice(5, 10) - self.assertEqual(result, expected) - - def test___lt__(self): - ''' - Test wrapper less than magic method - ''' - q = self.wrapper < 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - False, False, False, False, False] - expected = slice(0, 5) - self.assertEqual(result, expected) - - def test___le__(self): - ''' - Test wrapper less than or equal magic method - ''' - q = self.wrapper <= 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - True, False, False, False, False] - expected = slice(0, 6) - self.assertEqual(result, expected) - - def test___eq__(self): - ''' - Test wrapper equals magic method - ''' - q = self.wrapper == 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [False, False, False, False, False, - True, False, False, False, False] - expected = 5 - self.assertTrue(np.array_equal(result, expected)) - - def test___ne__(self): - ''' - Test wrapper not equal magic method - ''' - q = self.wrapper != 5 - self.assertIsInstance(q, Query) - result = q.evaluate() - expected = [True, True, True, True, True, - False, True, True, True, True] - expected = [slice(0, 5), slice(6, 10)] - self.assertTrue(np.array_equal(result, expected)) - - def test___getitem__(self): - ''' - Test wrapper getitem using slice - ''' - result = self.wrapper[0:5] - expected = [0, 1, 2, 3, 4] - self.assertTrue(np.array_equal(result, expected)) - - def test___getitem__query(self): - ''' - Test wrapper getitem using query - ''' - q = self.wrapper < 5 - result = self.wrapper[q] - expected = [0, 1, 2, 3, 4] - self.assertTrue(np.array_equal(result, expected)) - - -class SortedQueryTest(AbstractQueryMixin, TestCase): - - path = 'SortedQueryTest.h5' - - def getDataset(self): - self.f = File(self.path, 'w') - self.input = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - self.d = self.f.create_dataset('dset', data=self.input) - return SortedArray(self.d) - - def tearDown(self): - self.f.close() - if os.path.exists(self.path): - os.remove(self.path) - - -class LinspaceQueryTest(AbstractQueryMixin, TestCase): - - path = 'LinspaceQueryTest.h5' - - def getDataset(self): - return LinSpace(0, 10, 1) - - -class CompoundQueryTest(TestCase): - - def getM(self): - return SortedArray(np.arange(10, 20, 1)) - - def getN(self): - return SortedArray(np.arange(10.0, 20.0, 0.5)) - - def setUp(self): - self.m = HDMFDataset(self.getM()) - self.n = HDMFDataset(self.getN()) - - # TODO: test not completed - # def test_map(self): - # q = self.m == (12, 16) # IN operation - # q.evaluate() # [2,3,4,5] - # q.evaluate(False) # RangeResult(2,6) - # r = self.m[q] # noqa: F841 - # r = self.m[q.evaluate()] # noqa: F841 - # r = self.m[q.evaluate(False)] # noqa: F841 - - def tearDown(self): - pass diff --git a/tests/unit/utils_test/test_core_DataIO.py b/tests/unit/utils_test/test_core_DataIO.py index 4c2ffac15..80518a316 100644 --- a/tests/unit/utils_test/test_core_DataIO.py +++ b/tests/unit/utils_test/test_core_DataIO.py @@ -1,10 +1,8 @@ from copy import copy, deepcopy import numpy as np -from hdmf.container import Data from hdmf.data_utils import DataIO from hdmf.testing import TestCase -import warnings class DataIOTests(TestCase): @@ -30,34 +28,13 @@ def test_dataio_slice_delegation(self): dset = DataIO(indata) self.assertTrue(np.all(dset[1:3, 5:8] == indata[1:3, 5:8])) - def test_set_dataio(self): - """ - Test that Data.set_dataio works as intended - """ - dataio = DataIO() - data = np.arange(30).reshape(5, 2, 3) - container = Data('wrapped_data', data) - msg = "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead." - with self.assertWarnsWith(DeprecationWarning, msg): - container.set_dataio(dataio) - self.assertIs(dataio.data, data) - self.assertIs(dataio, container.data) - - def test_set_dataio_data_already_set(self): + def test_set_data_io_data_already_set(self): """ Test that Data.set_dataio works as intended """ dataio = DataIO(data=np.arange(30).reshape(5, 2, 3)) - data = np.arange(30).reshape(5, 2, 3) - container = Data('wrapped_data', data) with self.assertRaisesWith(ValueError, "cannot overwrite 'data' on DataIO"): - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - action='ignore', - category=DeprecationWarning, - message="Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.", - ) - container.set_dataio(dataio) + dataio.data=[1,2,3,4] def test_dataio_options(self): """ diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index c766dcf46..bed5cd134 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -1,7 +1,7 @@ import numpy as np from hdmf.testing import TestCase -from hdmf.utils import (docval, fmt_docval_args, get_docval, getargs, popargs, AllowPositional, get_docval_macro, - docval_macro, popargs_to_dict, call_docval_func) +from hdmf.utils import (docval, get_docval, getargs, popargs, AllowPositional, get_docval_macro, + docval_macro, popargs_to_dict) class MyTestClass(object): @@ -137,80 +137,6 @@ def method1(self, **kwargs): with self.assertRaises(ValueError): method1(self, arg1=[[1, 1, 1]]) - fmt_docval_warning_msg = ( - "fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " - "call the function directly with the kwargs. Please note that fmt_docval_args " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function." - ) - - def test_fmt_docval_args(self): - """ Test that fmt_docval_args parses the args and strips extra args """ - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw, test_kwargs) - exp_args = ['a string', 1] - self.assertListEqual(rec_args, exp_args) - exp_kwargs = {'arg3': True} - self.assertDictEqual(rec_kwargs, exp_kwargs) - - def test_fmt_docval_args_no_docval(self): - """ Test that fmt_docval_args raises an error when run on function without docval """ - def method1(self, **kwargs): - pass - - with self.assertRaisesRegex(ValueError, r"no docval found on .*method1.*"): - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - fmt_docval_args(method1, {}) - - def test_fmt_docval_args_allow_extra(self): - """ Test that fmt_docval_args works """ - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - with self.assertWarnsWith(PendingDeprecationWarning, self.fmt_docval_warning_msg): - rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw_allow_extra, test_kwargs) - exp_args = ['a string', 1] - self.assertListEqual(rec_args, exp_args) - exp_kwargs = {'arg3': True, 'hello': 'abc', 'list': ['abc', 1, 2, 3]} - self.assertDictEqual(rec_kwargs, exp_kwargs) - - def test_call_docval_func(self): - """Test that call_docval_func strips extra args and calls the function.""" - test_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True, - 'hello': 'abc', - 'list': ['abc', 1, 2, 3] - } - msg = ( - "call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " - "call the function directly with the kwargs. Please note that call_docval_func " - "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " - "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " - "is set), then you will need to pop the extra arguments out of kwargs before calling the function." - ) - with self.assertWarnsWith(PendingDeprecationWarning, msg): - ret_kwargs = call_docval_func(self.test_obj.basic_add2_kw, test_kwargs) - exp_kwargs = { - 'arg1': 'a string', - 'arg2': 1, - 'arg3': True - } - self.assertDictEqual(ret_kwargs, exp_kwargs) - def test_docval_add(self): """Test that docval works with a single positional argument diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index dd79cfce5..64667b3e0 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -524,6 +524,38 @@ def test_scalar_compound_dtype(self): results = self.vmap.validate(bar_builder) self.assertEqual(len(results), 0) +class TestReferenceValidation(ValidatorTestBase): + def getSpecs(self): + qux_spec = DatasetSpec( + doc='a simple scalar dataset', + data_type_def='Qux', + dtype='int', + shape=None + ) + bar_spec = GroupSpec('A test group specification with a reference dataset', + data_type_def='Bar', + datasets=[DatasetSpec('an example dataset', + dtype=RefSpec('Qux', reftype='object'), + name='data', + shape=(None, ))], + attributes=[AttributeSpec('attr1', + 'an example attribute', + dtype=RefSpec('Qux', reftype='object'), + shape=(None, ))]) + return (qux_spec, bar_spec) + + def test_invalid_reference(self): + """Test that validator does not allow another data type where a reference is specified.""" + value = np.array([1.0, 2.0, 3.0]) + bar_builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': value}, + datasets=[DatasetBuilder('data', value)]) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {"Bar/attr1 (my_bar.attr1): incorrect type - expected 'object reference', got 'float64'", + "Bar/data (my_bar/data): incorrect type - expected 'object reference', got 'float64'"} + self.assertEqual(result_strings, expected_errors) + class Test1DArrayValidation(TestCase): def set_up_spec(self, dtype): diff --git a/tox.ini b/tox.ini index 75b011aa0..4caa68a4b 100644 --- a/tox.ini +++ b/tox.ini @@ -4,55 +4,55 @@ # and then run "tox -e [envname]" from this directory. [tox] -requires = pip >= 22.0 +requires = pip >= 24.3.1 [testenv] download = True setenv = PYTHONDONTWRITEBYTECODE = 1 - VIRTUALENV_PIP = 23.3.1 recreate = - pinned, minimum, upgraded, prerelease: False + minimum, upgraded, prerelease: False build, wheelinstall: True # good practice to recreate the environment skip_install = - pinned, minimum, upgraded, prerelease, wheelinstall: False + minimum, upgraded, prerelease, wheelinstall: False build: True # no need to install anything when building install_command = # when using [testenv:wheelinstall] and --installpkg, the wheel and its dependencies # are installed, instead of the package in the current directory - pinned, minimum, wheelinstall: python -I -m pip install {opts} {packages} - upgraded: python -I -m pip install -U {opts} {packages} - prerelease: python -I -m pip install -U --pre {opts} {packages} + minimum, wheelinstall: python -I -m pip install {opts} {packages} + upgraded: python -I -m pip install -U {opts} {packages} + prerelease: python -I -m pip install -U --pre {opts} {packages} deps = - # use pinned, minimum, or neither (use dependencies in pyproject.toml) - pytest, gallery: -rrequirements-dev.txt - gallery: -rrequirements-doc.txt - optional: -rrequirements-opt.txt - pinned: -rrequirements.txt - minimum: -rrequirements-min.txt + # which requirements files to use (default: none) + minimum: -r requirements-min.txt +extras = + # which optional dependency set(s) to use (default: none) + pytest: test + gallery: doc + optional: tqdm,sparse,zarr,termset commands = + # commands to run for every environment python --version # print python version for debugging python -m pip check # check for conflicting packages python -m pip list # list installed packages for debugging + + # commands to run for select environments pytest: pytest -v gallery: python test_gallery.py build: python -m pip install -U build build: python -m build wheelinstall: python -c "import hdmf; import hdmf.common" -# list of pre-defined environments. (Technically environments not listed here -# like build-py312 can also be used.) -[testenv:pytest-py312-upgraded] -[testenv:pytest-py312-prerelease] -[testenv:pytest-py311-optional-pinned] # some optional reqs not compatible with py312 yet -[testenv:pytest-py{38,39,310,311,312}-pinned] -[testenv:pytest-py38-minimum] +# list of pre-defined environments +[testenv:pytest-py{39,310,311,312,313}-upgraded] +[testenv:pytest-py313-upgraded-optional] +[testenv:pytest-py313-prerelease-optional] +[testenv:pytest-py39-minimum] -[testenv:gallery-py312-upgraded] -[testenv:gallery-py312-prerelease] -[testenv:gallery-py311-optional-pinned] -[testenv:gallery-py{38,39,310,311,312}-pinned] -[testenv:gallery-py38-minimum] +# TODO: Update to 3.13 when linkml and its deps support 3.13 +[testenv:gallery-py312-upgraded-optional] +[testenv:gallery-py312-prerelease-optional] +[testenv:gallery-py39-minimum] [testenv:build] # using tox for this so that we can have a clean build environment [testenv:wheelinstall] # use with `--installpkg dist/*-none-any.whl`