diff --git a/.circleci/config.yml b/.circleci/config.yml index 3c2dd0913..a45ac70bc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,76 +1,46 @@ -# Python CircleCI 2.1 configuration file -# -# Check https://circleci.com/docs/2.1/language-python/ for more details -# version: 2.1 -orbs: - codecov: codecov/codecov@1.0.5 -jobs: - test_py38: +jobs: + set_up_conda: + machine: + image: ubuntu-2004:202201-02 working_directory: /home/circleci/src/CuBIDS - docker: - - image: continuumio/miniconda3 steps: - - checkout + - checkout: + path: /home/circleci/src/CuBIDS - run: - name: Generate environment - command: | - conda create -n py38_env python=3.8 pip -yq - conda install -c conda-forge -y datalad - - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator + name: install miniconda + command: | + export MINICONDA=/tmp/miniconda + export PATH="$MINICONDA/bin:$PATH" + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh + bash /tmp/miniconda.sh -b -f -p $MINICONDA + conda config --set always_yes yes + conda update conda + conda info -a - source activate py38_env - pip install -e .[tests] - - run: - name: Run tests - command: | - source activate py38_env - py.test --cov-append --cov-config=cubids/tests/coverage.cfg --cov-report=xml --cov=cubids cubids - mkdir /tmp/src/coverage - mv /home/circleci/src/CuBIDS/.coverage /tmp/src/coverage/.coverage.py38 + - persist_to_workspace: + root: /tmp + paths: + - miniconda - test_py39: + run_pytests: + machine: + image: ubuntu-2004:202201-02 working_directory: /home/circleci/src/CuBIDS - docker: - - image: continuumio/miniconda3 steps: - - checkout - - run: - name: Generate environment - command: | - conda create -n py39_env python=3.9 pip -yq - conda install -c conda-forge -y datalad - - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator + - checkout: + path: /home/circleci/src/CuBIDS - source activate py39_env - pip install -e .[tests] - - run: - name: Run tests - command: | - source activate py39_env - py.test --cov-append --cov-config=cubids/tests/coverage.cfg --cov-report=xml --cov=cubids cubids - mkdir /tmp/src/coverage - mv /home/circleci/src/CuBIDS/.coverage /tmp/src/coverage/.coverage.py39 + - attach_workspace: + at: /tmp - test_py310: - working_directory: /home/circleci/src/CuBIDS - docker: - - image: continuumio/miniconda3 - steps: - - checkout - run: - name: Generate environment - command: | - conda create -n py310_env python=3.10 pip -yq + name: Test CuBIDS (Python = ${{ matrix.python_version }}) + command: | + export PATH=/tmp/miniconda/bin:$PATH + conda create -n cubids python=${{ matrix.python_version }} pip + source activate cubids conda install -c conda-forge -y datalad # Add nodejs and the validator @@ -78,76 +48,72 @@ jobs: npm install -g yarn && \ npm install -g bids-validator - source activate py310_env + # Install CuBIDS pip install -e .[tests] - - run: - name: Run tests - command: | - source activate py310_env - py.test --cov-append --cov-config=cubids/tests/coverage.cfg --cov-report=xml --cov=cubids cubids - mkdir /tmp/src/coverage - mv /home/circleci/src/CuBIDS/.coverage /tmp/src/coverage/.coverage.py310 - test_py311: - working_directory: /home/circleci/src/CuBIDS + # Run tests + pytest cubids + + deployable: docker: - - image: continuumio/miniconda3 + - image: busybox:latest steps: - - checkout - - run: - name: Generate environment - command: | - conda create -n py311_env python=3.11 pip -yq - conda install -c conda-forge -y datalad - - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator - - source activate py311_env - pip install -e .[tests] - - run: - name: Run tests - command: | - source activate py311_env - py.test --cov-append --cov-config=cubids/tests/coverage.cfg --cov-report=xml --cov=cubids cubids - mkdir /tmp/src/coverage - mv /home/circleci/src/CuBIDS/.coverage /tmp/src/coverage/.coverage.py311 + - run: echo Deploying! - merge_coverage: + deploy_pypi: + machine: + image: ubuntu-2004:202201-02 working_directory: /home/circleci/src/CuBIDS - docker: - - image: continuumio/miniconda3 steps: - - attach_workspace: - at: /tmp - - checkout + - checkout: + path: /home/circleci/src/CuBIDS - run: - name: Merge coverage files - command: | - apt-get update - apt-get install -yqq curl - source activate py37_env - cd /tmp/src/coverage/ - coverage combine - coverage xml - - store_artifacts: - path: /tmp/src/coverage - - codecov/upload: - file: /tmp/src/coverage/coverage.xml + name: Update build tools + command: pip install --upgrade build twine + - run: + name: Build CuBIDS + command: python -m build + - run: + name: Upload package to PyPI + command: python -m twine upload -u __token__ -p ${PYPI_TOKEN} dist/cubids* workflows: - version: 2.1 - run_tests: + version: 2 + build_test_deploy: jobs: - - test_py38 - - test_py39 - - test_py310 - - test_py311 - - merge_coverage: + - set_up_conda: + filters: + tags: + only: /.*/ + + - run_pytests: + # Define the matrix for Python versions + matrix: + python_version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + requires: + - set_up_conda + filters: + tags: + only: /.*/ + + - deployable: + requires: + - run_pytests + filters: + branches: + only: main + tags: + only: /.*/ + + - deploy_pypi: requires: - - test_py38 - - test_py39 - - test_py310 - - test_py311 + - deployable + filters: + branches: + ignore: /.*/ + tags: + only: /.*/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..d57120b43 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,32 @@ +--- +name: Bug report +about: Something not working as described? Missing/incorrect documentation? This is the place. +title: '' +labels: 'bug' +assignees: '' + +--- +## Summary + + +## Additional details + +- CuBIDS version: +- Datalad version: + +### What were you trying to do? + +### What did you expect to happen? + +### What actually happened? + +## Reproducing the bug + + diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..574590db2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Usage question + url: https://neurostars.org/tags/c/software-support/234/cubids + about: Please ask questions about using CuBIDS on NeuroStars. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..f3d8ef67b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,16 @@ +--- +name: Feature request +about: Got an idea for a new feature, or changing an existing one? This is the place. +title: '' +labels: 'enhancement' +assignees: '' + +--- +## Summary + + +## Additional details + + +## Next steps + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..2b5fd1113 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + labels: ["maintenance", "ignore-for-release"] + assignees: ["tsalo"] + schedule: + interval: "weekly" + - package-ecosystem: pip + directory: "/" + labels: ["maintenance", "ignore-for-release"] + schedule: + interval: weekly diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..b3b3b454c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,9 @@ +Closes . + +## Changes proposed in this pull request + +- + +## Documentation that should be reviewed + +- diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 000000000..0c8fee8e8 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,20 @@ +changelog: + exclude: + labels: + - ignore-for-release + categories: + - title: 🛠 Breaking Changes + labels: + - breaking-change + - title: 🎉 Exciting New Features + labels: + - enhancement + - title: 👎 Deprecations + labels: + - deprecation + - title: 🐛 Bug Fixes + labels: + - bug + - title: Other Changes + labels: + - "*" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 42b2b94a0..abfb8efa3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,11 +9,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up environment - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # no need for the history fetch-depth: 1 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.9' - name: Install dependencies diff --git a/AUTHORS.rst b/AUTHORS.rst index 81d4937b1..eb67a8e3e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -4,14 +4,18 @@ Credits Development Lead ---------------- -* Sydney Covitz +* Taylor Salo Contributors ------------ -* Matt Cieslak - -* Tinashe Tapera +* Matt Cieslak Principal Investigator ---------------------- -* Theodore Satterthwaite +* Theodore Satterthwaite + +Previous Contributors +--------------------- +* Sydney Covitz (previous development lead) + +* Tinashe Tapera diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e4f95ef1a..65a6e2ec6 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -4,8 +4,8 @@ Contributing ============ -Contributions are welcome, and they are greatly appreciated! Every little bit -helps, and credit will always be given. +Contributions are welcome, and they are greatly appreciated! +Every little bit helps, and credit will always be given. You can contribute in many ways: @@ -26,21 +26,20 @@ If you are reporting a bug, please include: Fix Bugs ~~~~~~~~ -Look through the GitHub issues for bugs. Anything tagged with "bug" and "help -wanted" is open to whoever wants to implement it. +Look through the GitHub issues for bugs. +Anything tagged with "bug" and "help wanted" is open to whoever wants to implement it. Implement Features ~~~~~~~~~~~~~~~~~~ -Look through the GitHub issues for features. Anything tagged with "enhancement" -and "help wanted" is open to whoever wants to implement it. +Look through the GitHub issues for features. +Anything tagged with "enhancement" and "help wanted" is open to whoever wants to implement it. Write Documentation ~~~~~~~~~~~~~~~~~~~ -CuBIDS could always use more documentation, whether as part of the -official CuBIDS docs, in docstrings, or even on the web in blog posts, -articles, and such. +CuBIDS could always use more documentation, whether as part of the official CuBIDS docs, +in docstrings, or even on the web in blog posts, articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ @@ -59,60 +58,62 @@ Get Started! Ready to contribute? Here's how to set up `cubids` for local development. -1. Fork the `cubids` repo on GitHub. -2. Clone your fork locally:: +1. Fork the `cubids` repo on GitHub. +2. Clone your fork locally:: $ git clone git@github.com:your_name_here/cubids.git -3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: +3. Install your local copy into a virtualenv. + Assuming you have virtualenvwrapper installed, + this is how you set up your fork for local development:: $ mkvirtualenv cubids $ cd cubids/ $ python setup.py develop -4. Create a branch for local development:: +4. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. -5. When you're done making changes, check that your changes pass flake8 and the - tests, including testing other Python versions with tox:: +5. When you're done making changes, check that your changes pass flake8 and the + tests, including testing other Python versions with tox:: $ flake8 cubids tests $ python setup.py test or pytest $ tox - To get flake8 and tox, just pip install them into your virtualenv. + To get flake8 and tox, just pip install them into your virtualenv. -6. Commit your changes and push your branch to GitHub:: +6. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature -7. Submit a pull request through the GitHub website. +7. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- Before you submit a pull request, check that it meets these guidelines: -1. The pull request should include tests. -2. If the pull request adds functionality, the docs should be updated. Put - your new functionality into a function with a docstring, and add the - feature to the list in README.rst. -3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check - https://circleci.com/gh/PennLINC/CuBIDS - and make sure that the tests pass for all supported Python versions. +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. + Check https://circleci.com/gh/PennLINC/CuBIDS + and make sure that the tests pass for all supported Python versions. Tips ---- To run a subset of tests:: -$ cd PATH/TO/LOCAL/CuBIDS/CLONE -$ py.test -sv --pdb tests + $ cd PATH/TO/LOCAL/CuBIDS/CLONE + $ py.test -sv --pdb tests Deploying @@ -122,8 +123,8 @@ A reminder for the maintainers on how to deploy. Make sure all your changes are committed (including an entry in HISTORY.rst). Then run:: -$ bump2version patch # possible: major / minor / patch -$ git push -$ git push --tags + $ bump2version patch # possible: major / minor / patch + $ git push + $ git push --tags CircleCI will then deploy to PyPI if tests pass. diff --git a/HISTORY.rst b/HISTORY.rst index 0d1123c89..03ec01d9f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,29 @@ History ======= +1.0.2 (2023-09-07) +------------------ + +* Add image orientation by @scovitz in https://github.com/PennLINC/CuBIDS/pull/205 +* review feedback milestone: adding code/CuBIDS option and converting CSVs to TSVs by @scovitz in https://github.com/PennLINC/CuBIDS/pull/217 +* Reviewer feedback incorporated into docs and pybids layout update by @scovitz in https://github.com/PennLINC/CuBIDS/pull/227 +* Data dictionaries by @scovitz in https://github.com/PennLINC/CuBIDS/pull/230 +* No index metadata by @scovitz in https://github.com/PennLINC/CuBIDS/pull/231 +* updated _update_json to no longer use pybids by @scovitz in https://github.com/PennLINC/CuBIDS/pull/232 +* Minor tune ups: codespell'ing (fixes + tox + CI (github actions)), remove of unintended to be committed 2 files by @yarikoptic in https://github.com/PennLINC/CuBIDS/pull/239 +* ENH: Make "NumVolumes" an integer for 3D images by @cookpa in https://github.com/PennLINC/CuBIDS/pull/211 +* adding note about fmap renamekeygroups by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 +* Update usage.rst by @megardn in https://github.com/PennLINC/CuBIDS/pull/138 +* printing erroneous jsons and only rounding float parameters by @scovitz in https://github.com/PennLINC/CuBIDS/pull/257 + +New Contributors +````````````````` +* @yarikoptic made their first contribution in https://github.com/PennLINC/CuBIDS/pull/239 +* @cookpa made their first contribution in https://github.com/PennLINC/CuBIDS/pull/211 +* @megardn made their first contribution in https://github.com/PennLINC/CuBIDS/pull/140 + +**Full Changelog**: https://github.com/PennLINC/CuBIDS/compare/v1.0.1...1.0.2 + 0.1.0 (2020-10-07) ------------------ diff --git a/README.rst b/README.rst index 36a2d0167..db0440b24 100644 --- a/README.rst +++ b/README.rst @@ -18,16 +18,18 @@ About Curation of BIDS, or ``CuBIDS``, is a workflow and software package designed to facilitate reproducible curation of neuroimaging `BIDS `_ datasets. -CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using -various command line programs complete with version control capabilities. These components are not necessarily linear but all are critical -in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. +CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using +various command line programs complete with version control capabilities. +These components are not necessarily linear but all are critical +in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. 1. CuBIDS facilitates the validation of BIDS data. - 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. + 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. 3. CuBIDS helps users test pipelines on the entire parameter space of a BIDS dataset. 4. CuBIDS allows users to perform metadata-based quality control on their BIDS data. .. image:: https://github.com/PennLINC/CuBIDS/raw/main/docs/_static/cubids_workflow.png :width: 600 -For full documentation, please visit our `ReadTheDocs `_ \ No newline at end of file +For full documentation, please visit our +`ReadTheDocs `_ \ No newline at end of file diff --git a/cubids/cli.py b/cubids/cli.py index 40c2eca84..6fde0885e 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -1,8 +1,10 @@ """Console script for cubids.""" + import argparse import logging import os import warnings +from functools import partial from pathlib import Path from cubids import workflows @@ -14,14 +16,31 @@ logging.getLogger("datalad").setLevel(logging.ERROR) +def _path_exists(path, parser): + """Ensure a given path exists.""" + if path is None or not Path(path).exists(): + raise parser.error(f"Path does not exist: <{path}>.") + return Path(path).absolute() + + +def _is_file(path, parser): + """Ensure a given path exists and it is a file.""" + path = _path_exists(path, parser) + if not path.is_file(): + raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") + return path + + def _parse_validate(): parser = argparse.ArgumentParser( description="cubids-validate: Wrapper around the official BIDS Validator", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -61,16 +80,6 @@ def _parse_validate(): help="Disregard NIfTI header content during validation", required=False, ) - parser.add_argument( - "--ignore_subject_consistency", - action="store_true", - default=True, - help=( - "Skip checking that any given file for one " - "subject is present for all other subjects" - ), - required=False, - ) parser.add_argument( "--sequential-subjects", action="store", @@ -103,12 +112,19 @@ def _parse_bids_sidecar_merge(): description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - parser.add_argument("from_json", type=Path, action="store", help="Source json file.") + IsFile = partial(_is_file, parser=parser) + + parser.add_argument( + "from_json", + type=IsFile, + action="store", + help="Source json file.", + ) parser.add_argument( "to_json", - type=Path, + type=IsFile, action="store", - help=("destination json. This file will have data from `from_json` copied into it."), + help="destination json. This file will have data from `from_json` copied into it.", ) return parser @@ -130,9 +146,11 @@ def _parse_group(): description="cubids-group: find key and parameter groups in BIDS", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -166,7 +184,14 @@ def _parse_group(): help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( - "--config", action="store", type=Path, help="path to a config file for grouping" + "--config", + action="store", + type=PathExists, + default=None, + help=( + "Path to a config file for grouping. " + "If not provided, then the default config file from CuBIDS will be used." + ), ) return parser @@ -187,9 +212,12 @@ def _parse_apply(): description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -198,7 +226,7 @@ def _parse_apply(): ) parser.add_argument( "edited_summary_tsv", - type=Path, + type=IsFile, action="store", help=( "path to the _summary.tsv that has been edited " @@ -210,7 +238,7 @@ def _parse_apply(): ) parser.add_argument( "files_tsv", - type=Path, + type=IsFile, action="store", help=( "path to the _files.tsv that has been edited " @@ -236,6 +264,7 @@ def _parse_apply(): parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( @@ -251,7 +280,14 @@ def _parse_apply(): help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( - "--config", action="store", type=Path, help="path to a config file for grouping" + "--config", + action="store", + type=IsFile, + default=None, + help=( + "Path to a config file for grouping. " + "If not provided, then the default config file from CuBIDS will be used." + ), ) return parser @@ -273,16 +309,22 @@ def _parse_datalad_save(): description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) - parser.add_argument("-m", action="store", help="message for this commit") + parser.add_argument( + "-m", + action="store", + help="message for this commit", + ) parser.add_argument( "--container", action="store", @@ -309,9 +351,11 @@ def _parse_undo(): description="cubids-undo: revert most recent commit", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -346,41 +390,56 @@ def _parse_copy_exemplars(): ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", - help="path to the root of a BIDS dataset. " - "It should contain sub-X directories and " - "dataset_description.json.", + help=( + "path to the root of a BIDS dataset. " + "It should contain sub-X directories and " + "dataset_description.json." + ), ) parser.add_argument( "exemplars_dir", - type=Path, + type=PathExists, action="store", - help="absolute path to the root of a BIDS dataset " - "containing one subject from each Acquisition Group. " - "It should contain sub-X directories and " - "dataset_description.json.", + help=( + "absolute path to the root of a BIDS dataset " + "containing one subject from each Acquisition Group. " + "It should contain sub-X directories and " + "dataset_description.json." + ), ) parser.add_argument( "exemplars_tsv", - type=Path, + type=IsFile, action="store", - help="absolute path to the .tsv file that lists one " - "subject from each Acquisition Group " - "(*_AcqGrouping.tsv from the cubids-group output)", + help=( + "absolute path to the .tsv file that lists one " + "subject from each Acquisition Group " + "(*_AcqGrouping.tsv from the cubids-group output)" + ), ) parser.add_argument( - "--use-datalad", action="store_true", help="check exemplar dataset into DataLad" + "--use-datalad", + action="store_true", + default=False, + help="check exemplar dataset into DataLad", ) parser.add_argument( "--min-group-size", action="store", default=1, - help="minimum number of subjects an Acquisition Group " - "must have in order to be included in the exemplar " - "dataset ", + type=int, + help=( + "minimum number of subjects an Acquisition Group " + "must have in order to be included in the exemplar " + "dataset " + ), required=False, ) # parser.add_argument('--include-groups', @@ -418,9 +477,11 @@ def _parse_add_nifti_info(): ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "absolute path to the root of a BIDS dataset. " @@ -431,11 +492,13 @@ def _parse_add_nifti_info(): parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( "--force-unlock", action="store_true", + default=False, help="unlock dataset before adding nifti info ", ) parser.add_argument( @@ -463,9 +526,12 @@ def _parse_purge(): description="cubids-purge: purge associations from the dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "path to the root of a BIDS dataset. " @@ -475,13 +541,14 @@ def _parse_purge(): ) parser.add_argument( "scans", - type=Path, + type=IsFile, action="store", help="path to the txt file of scans whose associations should be purged.", ) parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( @@ -508,9 +575,11 @@ def _parse_remove_metadata_fields(): description="cubids-remove-metadata-fields: delete fields from metadata", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -534,6 +603,7 @@ def _parse_remove_metadata_fields(): def _enter_remove_metadata_fields(argv=None): + """Set entrypoint for "cubids-remove-metadata-fields" CLI.""" warnings.warn( "cubids-remove-metadata-fields is deprecated and will be removed in the future. " "Please use cubids remove-metadata-fields.", @@ -546,13 +616,16 @@ def _enter_remove_metadata_fields(argv=None): def _parse_print_metadata_fields(): + """Create the parser for the "cubids print-metadata-fields" command.""" parser = argparse.ArgumentParser( description="cubids-print-metadata-fields: print all unique metadata fields", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " diff --git a/cubids/config.py b/cubids/config.py index 59fd5a157..5d4948336 100644 --- a/cubids/config.py +++ b/cubids/config.py @@ -1,6 +1,4 @@ -""" -Functions for configuring CuBIDS -""" +"""Functions for configuring CuBIDS.""" from pathlib import Path @@ -9,8 +7,7 @@ def load_config(config_file): - """Loads a YAML file containing a configuration for param groups.""" - + """Load a YAML file containing a configuration for param groups.""" if config_file is None: config_file = Path(pkgrf("cubids", "data/config.yml")) diff --git a/cubids/constants.py b/cubids/constants.py index 21c8982cd..ec24b6691 100644 --- a/cubids/constants.py +++ b/cubids/constants.py @@ -1,7 +1,14 @@ """Constants for CuBIDS.""" + +# Names of identifier variables. +# Used to place KeyGroup and ParamGroup at the beginning of a dataframe, +# but both are hardcoded in the relevant function. ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"]) +# Entities that should not be used to group parameter sets NON_KEY_ENTITIES = set(["subject", "session", "extension"]) -# Multi-dimensional keys SliceTiming +# Multi-dimensional keys SliceTiming XXX: what is this line about? +# List of metadata fields and parameters (calculated by CuBIDS) +# Not sure what this specific list is used for. IMAGING_PARAMS = set( [ "ParallelReductionFactorInPlane", diff --git a/cubids/cubids.py b/cubids/cubids.py index 6fdba68b7..9195163aa 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1,4 +1,5 @@ """Main module.""" + import csv import json import os @@ -31,7 +32,58 @@ class CuBIDS(object): """The main CuBIDS class. - TODO: Complete docstring. + Parameters + ---------- + data_root : :obj:`str` + Path to the root of the BIDS dataset. + use_datalad : :obj:`bool`, optional + If True, use datalad to track changes to the BIDS dataset. + Default is False. + acq_group_level : :obj:`str`, optional + The level at which to group scans. Default is "subject". + grouping_config : :obj:`str`, optional + Path to the grouping config file. + Default is None, in which case the default config in CuBIDS is used. + force_unlock : :obj:`bool`, optional + If True, force unlock all files in the BIDS dataset. + Default is False. + + Attributes + ---------- + path : :obj:`str` + Path to the root of the BIDS dataset. + _layout : :obj:`bids.layout.BIDSLayout` + The BIDSLayout object. + keys_files : :obj:`dict` + A dictionary of key groups and the files that belong to them. + fieldmaps_cached : :obj:`bool` + If True, the fieldmaps have been cached. + datalad_ready : :obj:`bool` + If True, the datalad dataset has been initialized. + datalad_handle : :obj:`datalad.api.Dataset` + The datalad dataset handle. + old_filenames : :obj:`list` + A list of old filenames. + new_filenames : :obj:`list` + A list of new filenames. + IF_rename_paths : :obj:`list` + A list of IntendedFor paths that have been renamed. + grouping_config : :obj:`dict` + The grouping config dictionary. + acq_group_level : :obj:`str` + The level at which to group scans. + scans_txt : :obj:`str` + Path to the .txt file that lists the scans + you want to be deleted from the dataset, along + with their associations. + force_unlock : :obj:`bool` + If True, force unlock all files in the BIDS dataset. + cubids_code_dir : :obj:`bool` + If True, the CuBIDS code directory exists. + data_dict : :obj:`dict` + A data dictionary for TSV outputs. + use_datalad : :obj:`bool` + If True, use datalad to track changes to the BIDS dataset. """ def __init__( @@ -68,7 +120,7 @@ def __init__( def layout(self): """Return the BIDSLayout object. - TODO: Complete docstring. + If the BIDSLayout object has not been created, create it. """ if self._layout is None: # print("SETTING LAYOUT OBJECT") @@ -79,7 +131,12 @@ def layout(self): def reset_bids_layout(self, validate=False): """Reset the BIDS layout. - TODO: Complete docstring. + This sets the ``_layout`` attribute to a new :obj:`bids.layout.BIDSLayout` object. + + Parameters + ---------- + validate : :obj:`bool`, optional + If True, validate the BIDS dataset. Default is False. """ # create BIDS Layout Indexer class @@ -99,7 +156,16 @@ def reset_bids_layout(self, validate=False): def create_cubids_code_dir(self): """Create CuBIDS code directory. - TODO: Complete docstring. + This creates the CuBIDS code directory at self.path/code/CuBIDS. + + Returns + ------- + :obj:`str` + Path to the CuBIDS code directory. + + Notes + ----- + Why not use ``os.makedirs``? """ # check if BIDS_ROOT/code/CuBIDS exists if not self.cubids_code_dir: @@ -109,7 +175,12 @@ def create_cubids_code_dir(self): return self.cubids_code_dir def init_datalad(self): - """Initialize a datalad Dataset at self.path.""" + """Initialize a datalad Dataset at self.path. + + This creates a datalad dataset at self.path and sets the + ``datalad_ready`` attribute to True. + It also sets the ``datalad_handle`` attribute to the datalad.Dataset object. + """ self.datalad_ready = True self.datalad_handle = dlapi.Dataset(self.path) @@ -138,7 +209,18 @@ def datalad_save(self, message=None): raise Exception("Failed to save in DataLad") def is_datalad_clean(self): - """If True, no changes are detected in the datalad dataset.""" + """If True, no changes are detected in the datalad dataset. + + Returns + ------- + :obj:`bool` + True if the datalad dataset is clean, False otherwise. + + Raises + ------ + Exception + If datalad has not been initialized. + """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") statuses = set([status["state"] for status in self.datalad_handle.status()]) @@ -148,9 +230,14 @@ def datalad_undo_last_commit(self): """Revert the most recent commit, remove it from history. Uses git reset --hard to revert to the previous commit. + + Raises + ------ + Exception + If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception("Untracked changes present. " "Run clear_untracked_changes first") + raise Exception("Untracked changes present. Run clear_untracked_changes first") reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() @@ -166,12 +253,14 @@ def add_nifti_info(self): # ignore all dot directories if "/." in str(path): continue + if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): try: img = nb.load(str(path)) except Exception: print("Empty Nifti File: ", str(path)) continue + # get important info from niftis obliquity = np.any(nb.affines.obliquity(img.affine) > 1e-4) voxel_sizes = img.header.get_zooms() @@ -208,11 +297,13 @@ def add_nifti_info(self): orient = nb.orientations.aff2axcodes(img.affine) joined = "".join(orient) + "+" data["ImageOrientation"] = joined + with open(sidecar, "w") as file: json.dump(data, file, indent=4) if self.use_datalad: self.datalad_save(message="Added nifti info to sidecars") + self.reset_bids_layout() def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): @@ -224,10 +315,14 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T Parameters ---------- - summary_tsv - files_tsv - new_prefix + summary_tsv : :obj:`str` + Path to the edited summary tsv file. + files_tsv : :obj:`str` + Path to the edited files tsv file. + new_prefix : :obj:`str` + Path prefix to the new tsv files. raise_on_error : :obj:`bool` + If True, raise an error if the MergeInto column contains invalid merges. """ # reset lists of old and new filenames self.old_filenames = [] @@ -319,16 +414,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T full_cmd = "\n".join(merge_commands + move_ops) if full_cmd: - # write full_cmd to a .sh file - # Open file for writing - fileObject = open(new_prefix + "_full_cmd.sh", "w") - fileObject.write("#!/bin/bash\n") - fileObject.write(full_cmd) - # Close the file - fileObject.close() - renames = new_prefix + "_full_cmd.sh" + # write full_cmd to a .sh file + with open(renames, "w") as fo: + fo.write("#!/bin/bash\n") + fo.write(full_cmd) + if self.use_datalad: # first check if IntendedFor renames need to be saved if not self.is_datalad_clean(): @@ -366,19 +458,22 @@ def change_filename(self, filepath, entities): Parameters ---------- - filepath : str - Path prefix to a file in the affected key group change - entities : dictionary - A pybids dictionary of entities parsed from the new key - group name. + filepath : :obj:`str` + Path prefix to a file in the affected key group change. + entities : :obj:`dict` + A pybids dictionary of entities parsed from the new key group name. + + Notes + ----- + This is the function I need to spend the most time on, since it has entities hardcoded. """ exts = Path(filepath).suffixes - old_ext = "" - for ext in exts: - old_ext += ext + old_ext = "".join(exts) suffix = entities["suffix"] entity_file_keys = [] + + # Entities that may be in the filename? file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: @@ -390,6 +485,7 @@ def change_filename(self, filepath, entities): sub_ses = sub + "_" + ses if "run" in list(entities.keys()) and "run-0" in filepath: + # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) @@ -401,28 +497,31 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - filename = sub_ses + filename + "_" + suffix + old_ext + raise ValueError(f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED + # datatype may be overridden/changed if the original file is located in the wrong folder. dtypes = ["anat", "func", "perf", "fmap", "dwi"] - old = "" + dtype_orig = "" for dtype in dtypes: if dtype in filepath: - old = dtype + dtype_orig = dtype if "datatype" in entities.keys(): - dtype = entities["datatype"] - if entities["datatype"] != old: + dtype_new = entities["datatype"] + if entities["datatype"] != dtype_orig: print("WARNING: DATATYPE CHANGE DETECETD") else: - dtype = old - new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype + "/" + filename + dtype_new = dtype_orig + + # Construct the new filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename - # add the scan path + new path to the lists of old, new filenames + # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) self.new_filenames.append(new_path) - # NOW NEED TO RENAME ASSOCIATIONS + # NOW NEED TO RENAME ASSOCIATED FILES # bids_file = self.layout.get_file(filepath) bids_file = filepath # associations = bids_file.get_associations() @@ -439,34 +538,35 @@ def change_filename(self, filepath, entities): self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! + # Update DWI-specific files if "/dwi/" in filepath: # add the bval and bvec if there - if ( - Path(img_to_new_ext(filepath, ".bval")).exists() - and img_to_new_ext(filepath, ".bval") not in self.old_filenames - ): - self.old_filenames.append(img_to_new_ext(filepath, ".bval")) - self.new_filenames.append(img_to_new_ext(new_path, ".bval")) - - if ( - Path(img_to_new_ext(filepath, ".bvec")).exists() - and img_to_new_ext(filepath, ".bvec") not in self.old_filenames - ): - self.old_filenames.append(img_to_new_ext(filepath, ".bvec")) - self.new_filenames.append(img_to_new_ext(new_path, ".bvec")) - + bval_old = img_to_new_ext(filepath, ".bval") + bval_new = img_to_new_ext(new_path, ".bval") + if Path(bval_old).exists() and bval_old not in self.old_filenames: + self.old_filenames.append(bval_old) + self.new_filenames.append(bval_new) + + bvec_old = img_to_new_ext(filepath, ".bvec") + bvec_new = img_to_new_ext(new_path, ".bvec") + if Path(bvec_old).exists() and bvec_old not in self.old_filenames: + self.old_filenames.append(bvec_old) + self.new_filenames.append(bvec_new) + + # Update func-specific files # now rename _events and _physio files! old_suffix = parse_file_entities(filepath)["suffix"] scan_end = "_" + old_suffix + old_ext if "_task-" in filepath: old_events = filepath.replace(scan_end, "_events.tsv") - old_ejson = filepath.replace(scan_end, "_events.json") if Path(old_events).exists(): self.old_filenames.append(old_events) new_scan_end = "_" + suffix + old_ext new_events = new_path.replace(new_scan_end, "_events.tsv") self.new_filenames.append(new_events) + + old_ejson = filepath.replace(scan_end, "_events.json") if Path(old_ejson).exists(): self.old_filenames.append(old_ejson) new_scan_end = "_" + suffix + old_ext @@ -480,37 +580,63 @@ def change_filename(self, filepath, entities): new_physio = new_path.replace(new_scan_end, "_physio.tsv.gz") self.new_filenames.append(new_physio) + # Update ASL-specific files + if "/perf/" in filepath: + old_context = filepath.replace(scan_end, "_aslcontext.tsv") + if Path(old_context).exists(): + self.old_filenames.append(old_context) + new_scan_end = "_" + suffix + old_ext + new_context = new_path.replace(new_scan_end, "_aslcontext.tsv") + self.new_filenames.append(new_context) + + old_m0scan = filepath.replace(scan_end, "_m0scan.nii.gz") + if Path(old_m0scan).exists(): + self.old_filenames.append(old_m0scan) + new_scan_end = "_" + suffix + old_ext + new_m0scan = new_path.replace(new_scan_end, "_m0scan.nii.gz") + self.new_filenames.append(new_m0scan) + + old_mjson = filepath.replace(scan_end, "_m0scan.json") + if Path(old_mjson).exists(): + self.old_filenames.append(old_mjson) + new_scan_end = "_" + suffix + old_ext + new_mjson = new_path.replace(new_scan_end, "_m0scan.json") + self.new_filenames.append(new_mjson) + + old_labeling = filepath.replace(scan_end, "_asllabeling.jpg") + if Path(old_labeling).exists(): + self.old_filenames.append(old_labeling) + new_scan_end = "_" + suffix + old_ext + new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") + self.new_filenames.append(new_labeling) + # RENAME INTENDED FORS! ses_path = self.path + "/" + sub + "/" + ses - for path in Path(ses_path).rglob("fmap/*.json"): - self.IF_rename_paths.append(str(path)) - # json_file = self.layout.get_file(str(path)) + files_with_if = [] + files_with_if += Path(ses_path).rglob("fmap/*.json") + files_with_if += Path(ses_path).rglob("perf/*_m0scan.json") + for path_with_if in files_with_if: + filename_with_if = str(path_with_if) + self.IF_rename_paths.append(filename_with_if) + # json_file = self.layout.get_file(filename_with_if) # data = json_file.get_dict() - data = get_sidecar_metadata(str(path)) + data = get_sidecar_metadata(filename_with_if) if data == "Erroneous sidecar": - print("Error parsing sidecar: ", str(path)) + print("Error parsing sidecar: ", filename_with_if) continue if "IntendedFor" in data.keys(): - # check if IntendedFor field is a str or list - if isinstance(data["IntendedFor"], str): - if data["IntendedFor"] == _get_intended_for_reference(filepath): - # replace old filename with new one (overwrite string) - data["IntendedFor"] = _get_intended_for_reference(new_path) - - # update the json with the new data dictionary - _update_json(str(path), data) - - if isinstance(data["IntendedFor"], list): - for item in data["IntendedFor"]: - if item in _get_intended_for_reference(filepath): - # remove old filename - data["IntendedFor"].remove(item) - # add new filename - data["IntendedFor"].append(_get_intended_for_reference(new_path)) - - # update the json with the new data dictionary - _update_json(str(path), data) + # Coerce IntendedFor to a list. + data["IntendedFor"] = listify(data["IntendedFor"]) + for item in data["IntendedFor"]: + if item in _get_intended_for_reference(filepath): + # remove old filename + data["IntendedFor"].remove(item) + # add new filename + data["IntendedFor"].append(_get_intended_for_reference(new_path)) + + # update the json with the new data dictionary + _update_json(filename_with_if, data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -529,16 +655,18 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size): Parameters ---------- - exemplars_dir : str + exemplars_dir : :obj:`str` path to the directory that will contain one subject from each Acqusition Group (*_AcqGrouping.tsv) example path: /Users/Covitz/tsvs/CCNP_Acq_Groups/ - exemplars_tsv : str + exemplars_tsv : :obj:`str` path to the .tsv file that lists one subject from each Acqusition Group (*_AcqGrouping.tsv from the cubids-group output) example path: /Users/Covitz/tsvs/CCNP_Acq_Grouping.tsv - min_group_size + min_group_size : :obj:`int` + Minimum number of subjects in an acq group for it to be included + in the exemplar dataset. """ # create the exemplar ds if self.use_datalad: @@ -558,11 +686,11 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size): subs = pd.read_table(exemplars_tsv) # if min group size flag set, drop acq groups with less than min - if int(min_group_size) > 1: + if min_group_size > 1: for row in range(len(subs)): acq_group = subs.loc[row, "AcqGroup"] size = int(subs["AcqGroup"].value_counts()[acq_group]) - if size < int(min_group_size): + if size < min_group_size: subs = subs.drop([row]) # get one sub from each acq group @@ -612,7 +740,13 @@ def purge(self, scans_txt): self._purge_associations(scans) def _purge_associations(self, scans): - """Purge field map JSONs' IntendedFor references.""" + """Purge field map JSONs' IntendedFor references. + + Parameters + ---------- + scans : :obj:`list` of :obj:`str` + List of file paths to remove from field map JSONs. + """ # truncate all paths to intendedfor reference format # sub, ses, modality only (no self.path) if_scans = [] @@ -629,20 +763,14 @@ def _purge_associations(self, scans): # remove scan references in the IntendedFor if "IntendedFor" in data.keys(): - # check if IntendedFor field value is a list or a string - if isinstance(data["IntendedFor"], str): - if data["IntendedFor"] in if_scans: - data["IntendedFor"] = [] - # update the json with the new data dictionary - _update_json(str(path), data) + data["IntendedFor"] = listify(data["IntendedFor"]) - if isinstance(data["IntendedFor"], list): - for item in data["IntendedFor"]: - if item in if_scans: - data["IntendedFor"].remove(item) + for item in data["IntendedFor"]: + if item in if_scans: + data["IntendedFor"].remove(item) - # update the json with the new data dictionary - _update_json(str(path), data) + # update the json with the new data dictionary + _update_json(str(path), data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -675,6 +803,7 @@ def _purge_associations(self, scans): to_remove.append(img_to_new_ext(str(path), ".bval")) if Path(img_to_new_ext(str(path), ".bvec")).exists(): to_remove.append(img_to_new_ext(str(path), ".bvec")) + if "/func/" in str(path): # add tsvs tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") @@ -683,6 +812,7 @@ def _purge_associations(self, scans): # add tsv json (if exists) if Path(tsv.replace(".tsv", ".json")).exists(): to_remove.append(tsv.replace(".tsv", ".json")) + to_remove += scans # create rm commands for all files that need to be purged @@ -699,11 +829,10 @@ def _purge_associations(self, scans): path_prefix = str(Path(self.path).parent) - fileObject = open(path_prefix + "/" + "_full_cmd.sh", "w") - fileObject.write("#!/bin/bash\n") - fileObject.write(full_cmd) - # Close the file - fileObject.close() + with open(path_prefix + "/" + "_full_cmd.sh", "w") as fo: + fo.write("#!/bin/bash\n") + fo.write(full_cmd) + if self.scans_txt: cmt = f"Purged scans listed in {self.scans_txt} from dataset" else: @@ -725,13 +854,18 @@ def _purge_associations(self, scans): print("Not running any association removals") def get_nifti_associations(self, nifti): - """Get nifti associations.""" + """Get nifti associations. + + This uses globbing to find files with the same path, entities, and suffix as the NIfTI, + but with a different extension. + """ # get all assocation files of a nifti image no_ext_file = str(nifti).split("/")[-1].split(".")[0] associations = [] - for path in Path(self.path).rglob("sub-*/**/*.*"): - if no_ext_file in str(path) and ".nii.gz" not in str(path): + for path in Path(self.path).rglob(f"sub-*/**/{no_ext_file}.*"): + if ".nii.gz" not in str(path): associations.append(str(path)) + return associations def _cache_fieldmaps(self): @@ -809,6 +943,7 @@ def get_param_groups_from_key_group(self, key_group): for mod in modalities: if mod in filepath: modality = mod.replace("/", "").replace("/", "") + if modality == "": print("Unusual Modality Detected") modality = "other" @@ -1125,20 +1260,20 @@ def get_tsvs(self, path_prefix): summary_dict = self.get_data_dictionary(summary) # Save data dictionaires as JSONs - with open(path_prefix + "_files.json", "w") as outfile: + with open(f"{path_prefix}_files.json", "w") as outfile: json.dump(files_dict, outfile, indent=4) - with open(path_prefix + "_summary.json", "w") as outfile: + with open(f"{path_prefix}_summary.json", "w") as outfile: json.dump(summary_dict, outfile, indent=4) - big_df.to_csv(path_prefix + "_files.tsv", sep="\t", index=False) + big_df.to_csv(f"{path_prefix}_files.tsv", sep="\t", index=False) - summary.to_csv(path_prefix + "_summary.tsv", sep="\t", index=False) + summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(path_prefix + "_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) - print("CuBIDS detected " + str(len(summary)) + " Parameter Groups.") + print(f"CuBIDS detected {len(summary)} Parameter Groups.") def get_key_groups(self): """Identify the key groups for the bids dataset.""" @@ -1215,6 +1350,7 @@ def remove_metadata_fields(self, fields_to_remove): if ".git" not in str(json_file): with open(json_file, "r") as jsonr: metadata = json.load(jsonr) + offending_keys = remove_fields.intersection(metadata.keys()) # Quit if there are none in there if not offending_keys: @@ -1294,21 +1430,21 @@ def _get_param_groups( Parameters ---------- - files : list + files : :obj:`list` of :obj:`str` List of file names - fieldmap_lookup : defaultdict + fieldmap_lookup : :obj:`dict` mapping of filename strings relative to the bids root (e.g. "sub-X/ses-Y/func/sub-X_ses-Y_task-rest_bold.nii.gz") - grouping_config : dict + grouping_config : :obj:`dict` configuration for defining parameter groups Returns ------- - labeled_files : pd.DataFrame + labeled_files : :obj:`pandas.DataFrame` A data frame with one row per file where the ParamGroup column indicates which group each scan is a part of. - param_groups_with_counts : pd.DataFrame - A data frame with param group summaries + param_groups_with_counts : :obj:`pandas.DataFrame` + A data frame with param group summaries. """ if not files: print("WARNING: no files for", key_group_name) @@ -1393,7 +1529,7 @@ def _get_param_groups( # get the subset of columns to drop duplicates by check_cols = [] for col in list(df.columns): - if "Cluster_" + col not in list(df.columns) and col != "FilePath": + if f"Cluster_{col}" not in list(df.columns) and col != "FilePath": check_cols.append(col) # Find the unique ParamGroups and assign ID numbers in "ParamGroup"\ @@ -1446,13 +1582,14 @@ def _get_param_groups( def round_params(param_group_df, config, modality): - """Round parameters.""" + """Round columns' values in DataFrame according to requested precision.""" to_format = config["sidecar_params"][modality] to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue + if "precision" in column_fmt: if isinstance(param_group_df[column_name], float): param_group_df[column_name] = param_group_df[column_name].round( @@ -1465,7 +1602,7 @@ def round_params(param_group_df, config, modality): def get_sidecar_metadata(json_file): """Get all metadata values in a file's sidecar. - Transform json dictionary to python dictionary. + Transform json dictionary to Python dictionary. """ try: with open(json_file) as json_file: @@ -1477,13 +1614,51 @@ def get_sidecar_metadata(json_file): def format_params(param_group_df, config, modality): - """Run AgglomerativeClustering on param groups and add columns to dataframe.""" + """Run AgglomerativeClustering on param groups and add columns to dataframe. + + Parameters + ---------- + param_group_df : :obj:`pandas.DataFrame` + A data frame with one row per file where the ParamGroup column + indicates which group each scan is a part of. + config : :obj:`dict` + Configuration for defining parameter groups. + This dictionary has two keys: ``'sidecar_params'`` and ``'derived_params'``. + modality : :obj:`str` + Modality of the scan. + This is used to select the correct configuration from the config dict. + + Returns + ------- + param_group_df : :obj:`pandas.DataFrame` + An updated version of the input data frame, + with a new column added for each element in the modality's + ``'sidecar_params'`` and ``'derived_params'`` dictionaries. + The new columns will have the name ``'Cluster_' + column_name``, + and will contain the cluster labels for each parameter group. + + Notes + ----- + ``'sidecar_params'`` is a dictionary of dictionaries, where keys are modalities. + The modality-wise dictionary's keys are names of BIDS fields to directly include + in the Parameter Groupings, + and the values describe the parameters by which those BIDS' fields are compared. + For example, + {"RepetitionTime": {"tolerance": 0.000001, "precision": 6, "suggest_variant_rename": True} + means that the RepetitionTime field should be compared across files and flagged as a + variant if it differs from others by 0.000001 or more. + + ``'derived_params'`` is a dictionary of dictionaries, where keys are modalities. + The modality-wise dictionary's keys are names of BIDS fields to derive from the + NIfTI header and include in the Parameter Groupings. + """ to_format = config["sidecar_params"][modality] to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue + if "tolerance" in column_fmt and len(param_group_df) > 1: array = param_group_df[column_name].to_numpy().reshape(-1, 1) @@ -1495,12 +1670,13 @@ def format_params(param_group_df, config, modality): clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=tolerance, linkage="complete" ).fit(array) + for i in range(len(array)): if array[i, 0] == -999: array[i, 0] = np.nan # now add clustering_labels as a column - param_group_df["Cluster_" + column_name] = clustering.labels_ + param_group_df[f"Cluster_{column_name}"] = clustering.labels_ return param_group_df @@ -1510,6 +1686,12 @@ def _order_columns(df): This ensures that KeyGroup and ParamGroup are the first two columns, FilePath is the last, and the others are sorted alphabetically. + + Notes + ----- + This is the only place where the constant ID_VARS is used, + and the strings in that constant are hardcoded here, + so we might not need that constant at all. """ cols = set(df.columns.to_list()) non_id_cols = cols - ID_VARS @@ -1523,12 +1705,18 @@ def _order_columns(df): def img_to_new_ext(img_path, new_ext): - """Convert img to new extension.""" + """Convert img to new extension. + + Notes + ----- + The hardcoded suffix associated with each extension may not be comprehensive. + BIDS has been extended a lot in recent years. + """ # handle .tsv edge case if new_ext == ".tsv": # take out suffix return img_path.rpartition("_")[0] + "_events" + new_ext - if new_ext == ".tsv.gz": + elif new_ext == ".tsv.gz": return img_path.rpartition("_")[0] + "_physio" + new_ext else: return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext diff --git a/cubids/data/config.yml b/cubids/data/config.yml index ec7eb8ef7..eb442399f 100644 --- a/cubids/data/config.yml +++ b/cubids/data/config.yml @@ -1,28 +1,38 @@ # These are non-BIDS fields that can be added by CuBIDS derived_params: anat: + # Number of voxels in first dimension Dim1Size: suggest_variant_rename: yes + # Number of voxels in second dimension Dim2Size: suggest_variant_rename: yes + # Number of voxels in third dimension Dim3Size: suggest_variant_rename: yes + # Number of slice time values NSliceTimes: suggest_variant_rename: yes + # Number of volumes NumVolumes: suggest_variant_rename: yes + # Boolean indicating oblique acquisition Obliquity: suggest_variant_rename: yes + # String describing image orientation (e.g., LAS+) ImageOrientation: suggest_variant_rename: yes + # Size of voxels in first dimension, in mm VoxelSizeDim1: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + # Size of voxels in second dimension, in mm VoxelSizeDim2: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + # Size of voxels in third dimension, in mm VoxelSizeDim3: tolerance: 0.001 precision: 3 @@ -333,6 +343,18 @@ sidecar_params: precision: 6 suggest_variant_rename: yes perf: + ArterialSpinLabelingType: + suggest_variant_rename: yes + BackgroundSuppression: + suggest_variant_rename: yes + BackgroundSuppressionNumberPulses: + suggest_variant_rename: yes + BolusCutOffFlag: + suggest_variant_rename: yes + BolusCutOffTechnique: + suggest_variant_rename: yes + CASLType: + suggest_variant_rename: yes EchoTime: tolerance: 0.001 precision: 3 @@ -343,6 +365,17 @@ sidecar_params: suggest_variant_rename: yes FlipAngle: suggest_variant_rename: yes + LabelingDistance: + tolerance: 0.1 + suggest_variant_rename: yes + LabelingEfficiency: + tolerance: 0.001 + precision: 3 + suggest_variant_rename: yes + LookLocker: + suggest_variant_rename: yes + M0Type: + suggest_variant_rename: yes MultibandAccelerationFactor: suggest_variant_rename: yes NumberOfVolumesDiscardedByScanner: @@ -355,6 +388,10 @@ sidecar_params: suggest_variant_rename: yes PartialFourier: suggest_variant_rename: yes + PASLType: + suggest_variant_rename: yes + PCASLType: + suggest_variant_rename: yes PhaseEncodingDirection: suggest_variant_rename: yes RepetitionTime: @@ -367,6 +404,8 @@ sidecar_params: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + VascularCrushing: + suggest_variant_rename: yes VolumeTiming: tolerance: 0.000001 precision: 6 @@ -409,4 +448,4 @@ sidecar_params: VolumeTiming: tolerance: 0.000001 precision: 6 - suggest_variant_rename: yes \ No newline at end of file + suggest_variant_rename: yes diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index b542e691c..5bd3c9579 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -1,4 +1,5 @@ """Tools for merging metadata.""" + import json from collections import defaultdict from copy import deepcopy @@ -15,7 +16,24 @@ def check_merging_operations(action_tsv, raise_on_error=False): """Check that the merges in an action tsv are possible. - To be mergeable the + Parameters + ---------- + action_tsv : :obj:`str` + Path to the action tsv file. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + + Returns + ------- + ok_merges : :obj:`list` + List of tuples of ok merges. + deletions : :obj:`list` + List of tuples of deletions. + + Raises + ------ + :obj:`Exception` + If there are errors and ``raise_on_error`` is ``True``. """ actions = pd.read_table(action_tsv) ok_merges = [] @@ -45,8 +63,10 @@ def _check_sdc_cols(meta1, meta2): print("going to delete ", dest_param_key) deletions.append(dest_param_key) continue + if not source_row.shape[0] == 1: raise Exception("Could not identify a unique source group") + source_metadata = source_row.iloc[0].to_dict() merge_id = (source_param_key, dest_param_key) # Check for compatible fieldmaps @@ -59,6 +79,7 @@ def _check_sdc_cols(meta1, meta2): ): overwrite_merges.append(merge_id) continue + # add to the list of ok merges if there are no conflicts ok_merges.append(merge_id) @@ -89,7 +110,9 @@ def _check_sdc_cols(meta1, meta2): if overwrite_merges or sdc_incompatible: if raise_on_error: raise Exception(error_message) + print(error_message) + return ok_merges, deletions @@ -99,6 +122,25 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): Here, "safe" means that no non-NaN values in `dest_meta` are overwritten by the merge. If any overwrites occur an empty dictionary is returned. + + Parameters + ---------- + source_meta : :obj:`dict` + The metadata to merge from. + dest_meta_orig : :obj:`dict` + The metadata to merge into. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + + Returns + ------- + :obj:`dict` + The merged metadata. + + Raises + ------ + :obj:`Exception` + If there are errors and ``raise_on_error`` is ``True``. """ # copy the original json params dest_meta = deepcopy(dest_meta_orig) @@ -111,6 +153,7 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): % (source_meta.get("NSliceTimes"), source_meta.get("NSliceTimes")) ) return {} + for parameter in DIRECT_IMAGING_PARAMS: source_value = source_meta.get(parameter, nan) dest_value = dest_meta.get(parameter, nan) @@ -131,6 +174,7 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): return {} dest_meta[parameter] = source_value + return dest_meta @@ -155,7 +199,24 @@ def print_merges(merge_list): def merge_json_into_json(from_file, to_file, raise_on_error=False): - """Merge imaging metadata into JSON.""" + """Merge imaging metadata into JSON. + + Parameters + ---------- + from_file : :obj:`str` + Path to the JSON file to merge from. + to_file : :obj:`str` + Path to the JSON file to merge into. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + Defaults to ``False``. + + Returns + ------- + :obj:`int` + Exit code. + Either 255 if there was an error or 0 if there was not. + """ print(f"Merging imaging metadata from {from_file} to {to_file}") with open(from_file, "r") as fromf: source_metadata = json.load(fromf) @@ -165,7 +226,9 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False): orig_dest_metadata = deepcopy(dest_metadata) merged_metadata = merge_without_overwrite( - source_metadata, dest_metadata, raise_on_error=raise_on_error + source_metadata, + dest_metadata, + raise_on_error=raise_on_error, ) if not merged_metadata: @@ -185,12 +248,12 @@ def get_acq_dictionary(): Parameters ---------- - df: Pandas DataFrame - Pre export TSV that will be converted to a json dictionary + df : :obj:`pandas.DataFrame` + Pre export TSV that will be converted to a json dictionary. Returns ------- - acq_dict: dictionary + acq_dict : :obj:`dict` Python dictionary in BIDS data dictionary format """ acq_dict = {} @@ -204,7 +267,24 @@ def get_acq_dictionary(): def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): - """Find unique sets of Key/Param groups across subjects.""" + """Find unique sets of Key/Param groups across subjects. + + This writes out the following files: + - _AcqGrouping.tsv: A tsv with the mapping of subject/session to + acquisition group. + - _AcqGrouping.json: A data dictionary for the AcqGrouping.tsv. + - _AcqGroupInfo.txt: A text file with the summary of acquisition. + - _AcqGroupInfo.json: A data dictionary for the AcqGroupInfo.txt. + + Parameters + ---------- + files_tsv : :obj:`str` + Path to the files tsv. + output_prefix : :obj:`str` + Prefix for output files. + acq_group_level : {"subject", "session"} + Level at which to group acquisitions. + """ from bids import config from bids.layout import parse_file_entities diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 5a0c3aac1..15bfc35f0 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -1,4 +1,5 @@ """Tests for `cubids` package.""" + import json import os import subprocess diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 6f786e46f..22263f9ba 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -1,4 +1,5 @@ """Utility functions for CuBIDS' tests.""" + import hashlib import json import os diff --git a/cubids/utils.py b/cubids/utils.py index 56704d8c6..09c02e349 100644 --- a/cubids/utils.py +++ b/cubids/utils.py @@ -1,10 +1,27 @@ """Miscellaneous utility functions for CuBIDS.""" + import re from pathlib import Path def _get_container_type(image_name): - """Get and return the container type.""" + """Get and return the container type. + + Parameters + ---------- + image_name : :obj:`str` + The name of the container image. + + Returns + ------- + :obj:`str` + The container type, either "docker" or "singularity". + + Raises + ------ + :obj:`Exception` + If the container type cannot be determined. + """ # If it's a file on disk, it must be a singularity image if Path(image_name).exists(): return "singularity" diff --git a/cubids/validator.py b/cubids/validator.py index 40a130b8c..01dad11c8 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -1,4 +1,5 @@ """Methods for validating BIDS datasets.""" + import glob import json import logging @@ -11,15 +12,14 @@ logger = logging.getLogger("cubids-cli") -def build_validator_call(path, ignore_headers=False, ignore_subject=True): +def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # build docker call - command = ["bids-validator", "--verbose", "--json"] + # CuBIDS automatically ignores subject consistency. + command = ["bids-validator", "--verbose", "--json", "--ignoreSubjectConsistency"] if ignore_headers: command.append("--ignoreNiftiHeaders") - if ignore_subject: - command.append("--ignoreSubjectConsistency") command.append(path) @@ -39,7 +39,7 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError("Couldn't find any subjects " "in the specified directory:\n" + bids_dir) + raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -55,7 +55,18 @@ def build_subject_paths(bids_dir): def run_validator(call): - """Run the validator with subprocess.""" + """Run the validator with subprocess. + + Parameters + ---------- + call : :obj:`list` + List of strings to pass to subprocess.run(). + + Returns + ------- + :obj:`subprocess.CompletedProcess` + The result of the subprocess call. + """ # if verbose: # logger.info("Running the validator with call:") # logger.info('\"' + ' '.join(call) + '\"') @@ -69,15 +80,30 @@ def parse_validator_output(output): Parameters ---------- - path : string + output : :obj:`str` Path to JSON file of BIDS validator output Returns ------- - Pandas DataFrame + df : :obj:`pandas.DataFrame` + Dataframe of validator output. """ def get_nested(dct, *keys): + """Get a nested value from a dictionary. + + Parameters + ---------- + dct : :obj:`dict` + Dictionary to get value from. + keys : :obj:`list` + List of keys to get value from. + + Returns + ------- + :obj:`dict` + The nested value. + """ for key in keys: try: dct = dct[key] @@ -90,11 +116,23 @@ def get_nested(dct, *keys): issues = data["issues"] def parse_issue(issue_dict): + """Parse a single issue from the validator output. + + Parameters + ---------- + issue_dict : :obj:`dict` + Dictionary of issue. + + Returns + ------- + return_dict : :obj:`dict` + Dictionary of parsed issue. + """ return_dict = {} return_dict["files"] = [ get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") ] - return_dict["type"] = issue_dict.get("key" "") + return_dict["type"] = issue_dict.get("key", "") return_dict["severity"] = issue_dict.get("severity", "") return_dict["description"] = issue_dict.get("reason", "") return_dict["code"] = issue_dict.get("code", "") @@ -118,7 +156,13 @@ def parse_issue(issue_dict): def get_val_dictionary(): - """Get value dictionary.""" + """Get value dictionary. + + Returns + ------- + val_dict : dict + Dictionary of values. + """ val_dict = {} val_dict["files"] = {"Description": "File with warning orerror"} val_dict["type"] = {"Description": "BIDS validation warning or error"} diff --git a/cubids/workflows.py b/cubids/workflows.py index eea9bfa50..7984216b6 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -1,4 +1,5 @@ """First order workflows in CuBIDS.""" + import json import logging import os @@ -37,19 +38,23 @@ def validate( sequential, sequential_subjects, ignore_nifti_headers, - ignore_subject_consistency, ): """Run the bids validator. Parameters ---------- - bids_dir - output_prefix - container - sequential - sequential_subjects - ignore_nifti_headers - ignore_subject_consistency + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. + container : :obj:`str` + Container in which to run the workflow. + sequential : :obj:`bool` + Run the validator sequentially. + sequential_subjects : :obj:`list` of :obj:`str` + Filter the sequential run to only include the listed subjects. + ignore_nifti_headers : :obj:`bool` + Ignore NIfTI headers when validating. """ # check status of output_prefix, absolute or relative? abs_path_output = True @@ -57,10 +62,10 @@ def validate( # not an absolute path --> put in code/CuBIDS dir abs_path_output = False # check if code/CuBIDS dir exists - if not Path(str(bids_dir) + "/code/CuBIDS").is_dir(): + if not (bids_dir / "code" / "CuBIDS").is_dir(): # if not, create it - subprocess.run(["mkdir", str(bids_dir) + "/code"]) - subprocess.run(["mkdir", str(bids_dir) + "/code/CuBIDS/"]) + subprocess.run(["mkdir", str(bids_dir / "code")]) + subprocess.run(["mkdir", str(bids_dir / "code" / "CuBIDS")]) # Run directly from python using subprocess if container is None: @@ -69,7 +74,6 @@ def validate( call = build_validator_call( str(bids_dir), ignore_nifti_headers, - ignore_subject_consistency, ) ret = run_validator(call) @@ -148,8 +152,7 @@ def validate( # run the validator nifti_head = ignore_nifti_headers - subj_consist = ignore_subject_consistency - call = build_validator_call(tmpdirname, nifti_head, subj_consist) + call = build_validator_call(tmpdirname, nifti_head) ret = run_validator(call) # parse output if ret.returncode != 0: @@ -228,9 +231,6 @@ def validate( if ignore_nifti_headers: cmd.append("--ignore_nifti_headers") - if ignore_subject_consistency: - cmd.append("--ignore_subject_consistency") - elif container_type == "singularity": cmd = [ "singularity", @@ -250,9 +250,6 @@ def validate( if ignore_nifti_headers: cmd.append("--ignore_nifti_headers") - if ignore_subject_consistency: - cmd.append("--ignore_subject_consistency") - if sequential: cmd.append("--sequential") @@ -272,11 +269,16 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): Parameters ---------- - bids_dir - container - acq_group_level - config - output_prefix + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + acq_group_level : {"subject", "session"} + Level at which acquisition groups are created. + config : :obj:`pathlib.Path` + Path to the grouping config file. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. """ # Run directly from python using if container is None: @@ -367,16 +369,26 @@ def apply( Parameters ---------- - bids_dir - use_datalad - acq_group_level - config - edited_summary_tsv - edited_tsv_prefix - files_tsv - new_tsv_prefix - output_prefix - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + use_datalad : :obj:`bool` + Use datalad to track changes. + acq_group_level : {"subject", "session"} + Level at which acquisition groups are created. + config : :obj:`pathlib.Path` + Path to the grouping config file. + edited_summary_tsv : :obj:`pathlib.Path` + Path to the edited summary tsv. + edited_tsv_prefix : :obj:`pathlib.Path` + Path to the edited tsv prefix. + files_tsv : :obj:`pathlib.Path` + Path to the files tsv. + new_tsv_prefix : :obj:`pathlib.Path` + Path to the new tsv prefix. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python using if container is None: @@ -481,18 +493,17 @@ def apply( sys.exit(proc.returncode) -def datalad_save( - bids_dir, - container, - m, -): +def datalad_save(bids_dir, container, m): """Perform datalad save. Parameters ---------- - bids_dir - container - m + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + m : :obj:`str` + Commit message. """ # Run directly from python using if container is None: @@ -542,8 +553,10 @@ def undo(bids_dir, container): Parameters ---------- - bids_dir - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python using if container is None: @@ -597,13 +610,20 @@ def copy_exemplars( Parameters ---------- - bids_dir - container - use_datalad - exemplars_dir - exemplars_tsv - min_group_size - force_unlock + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + exemplars_dir : :obj:`pathlib.Path` + Path to the directory where the exemplars will be saved. + exemplars_tsv : :obj:`pathlib.Path` + Path to the tsv file with the exemplars. + min_group_size : :obj:`int` + Minimum number of subjects in a group to be considered for exemplar. + force_unlock : :obj:`bool` + Force unlock the dataset. """ # Run directly from python using if container is None: @@ -650,8 +670,10 @@ def copy_exemplars( if force_unlock: cmd.append("--force-unlock") + if min_group_size: cmd.append("--min-group-size") + elif container_type == "singularity": cmd = [ "singularity", @@ -685,10 +707,14 @@ def add_nifti_info(bids_dir, container, use_datalad, force_unlock): Parameters ---------- - bids_dir - container - use_datalad - force_unlock + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + force_unlock : :obj:`bool` + Force unlock the dataset. """ # Run directly from python using if container is None: @@ -749,10 +775,14 @@ def purge(bids_dir, container, use_datalad, scans): Parameters ---------- - bids_dir - container - use_datalad - scans + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + scans : :obj:`pathlib.Path` + Path to the scans tsv. """ # Run directly from python using if container is None: @@ -811,9 +841,12 @@ def remove_metadata_fields(bids_dir, container, fields): Parameters ---------- - bids_dir - container - fields + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + fields : :obj:`list` of :obj:`str` + List of fields to remove. """ # Run directly from python if container is None: @@ -859,8 +892,10 @@ def print_metadata_fields(bids_dir, container): Parameters ---------- - bids_dir - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python if container is None: diff --git a/docs/about.rst b/docs/about.rst index b07bb8b97..6dee284b0 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -1,5 +1,5 @@ =================== -Background +Background =================== Motivation @@ -19,7 +19,7 @@ their computational time and resources** effectively. ``CuBIDS`` is designed to facilitate the curation of large, neuroimaging data so that users can infer useful information from descriptive and accurate BIDS labels -before running pipelines *en masse*. ``CuBIDS`` accomplishes this by summarizing +before running pipelines *en masse*. ``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`keygroup`, :ref:`paramgroup`, and :ref:`acquisitiongroup` categorizations in your data (we'll explain what these are in more detail in the next section). @@ -40,7 +40,7 @@ Definitions * A set of scans whose filenames share all `BIDS filename key-value pairs `_, excluding subject and session * Derived from the BIDS Filename - * Example structure: ``acquisition-*_datatype-*_run-*_task-*_suffix`` + * Example structure: ``acquisition-*_datatype-*_run-*_task-*_suffix`` .. topic:: Parameter (Param) Group @@ -53,15 +53,15 @@ Definitions * The Param Group that contains the most scans in its Key Group .. topic:: Variant Group - + * Any Param Group that is non-dominant .. topic:: Rename Key Group - * Auto-generated, recommended new Key Group name for Variant Groups - * Based on the metadata parameters that cause scans in Variant Groups to vary from those in their respective Dominant Groups + * Auto-generated, recommended new Key Group name for Variant Groups + * Based on the metadata parameters that cause scans in Variant Groups to vary from those in their respective Dominant Groups -.. topic:: Acquisition Group +.. topic:: Acquisition Group * A collection of sessions across participants that contains the exact same set of Key and Param Groups @@ -85,4 +85,4 @@ In the next section, we'll discuss these definitions in more detail and demonstr .. [#f1] See the `BIDS Specification `_. .. [#f2] See this list of amazing `BIDS apps `_. -.. [#f3] See `DataLad `_. \ No newline at end of file +.. [#f3] See `DataLad `_. diff --git a/docs/authors.rst b/docs/authors.rst deleted file mode 100644 index e122f914a..000000000 --- a/docs/authors.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../AUTHORS.rst diff --git a/docs/conf.py b/docs/conf.py index 218cd3531..6b1bae97c 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,60 +16,61 @@ # directory, add these directories to sys.path here. If the directory is # relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. -# import os import sys -sys.path.insert(0, os.path.abspath('..')) -from sphinx import __version__ as sphinxversion + +sys.path.insert(0, os.path.abspath("..")) + import cubids -from packaging import version as pver # Avoid distutils.LooseVersion which is deprecated # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath('sphinxext')) -sys.path.insert(0, os.path.abspath('../wrapper')) +sys.path.append(os.path.abspath("sphinxext")) +sys.path.insert(0, os.path.abspath("../wrapper")) # -- General configuration --------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # -needs_sphinx = '1.5.3' +needs_sphinx = "1.5.3" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinxarg.ext', # argparse extension - 'sphinx.ext.viewcode' + "nbsphinx", + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinxarg.ext", # argparse extension + "sphinx.ext.viewcode", + "sphinx_gallery.load_style", ] # Mock modules in autodoc: autodoc_mock_imports = [ - 'numpy', - 'nitime', - 'matplotlib', + "numpy", + "nitime", + "matplotlib", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'CuBIDS' +project = "CuBIDS" copyright = "2020, PennLINC" author = "PennLINC" @@ -87,15 +88,15 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -106,8 +107,10 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -html_theme_path = ["_themes", ] +html_theme = "sphinx_rtd_theme" +html_theme_path = [ + "_themes", +] # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -118,13 +121,13 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'cubidsdoc' +htmlhelp_basename = "cubidsdoc" # -- Options for LaTeX output ------------------------------------------ @@ -133,15 +136,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -151,9 +151,7 @@ # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ - (master_doc, 'cubids.tex', - 'CuBIDS Documentation', - 'PennLINC', 'manual'), + (master_doc, "cubids.tex", "CuBIDS Documentation", "PennLINC", "manual"), ] @@ -161,11 +159,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'cubids', - 'CuBIDS Documentation', - [author], 1) -] +man_pages = [(master_doc, "cubids", "CuBIDS Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------- @@ -174,14 +168,16 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'cubids', - 'CuBIDS Documentation', - author, - 'cubids', - 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "cubids", + "CuBIDS Documentation", + author, + "cubids", + "One line description of project.", + "Miscellaneous", + ), ] # -- Fix automodule config add_module_names = False - diff --git a/docs/contributing.rst b/docs/contributing.rst deleted file mode 100644 index e582053ea..000000000 --- a/docs/contributing.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../CONTRIBUTING.rst diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 000000000..a9ed3a6ae --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,14 @@ +Thumbnails gallery +================== + +.. nbgallery:: + notebooks/Fieldmaps + notebooks/FirstProofofConcept + notebooks/HTML_param_groups + notebooks/JSON_PoC_read_write + notebooks/Key_and_Param_Groups + notebooks/keyparamgrouptest + notebooks/metadata_image_param + notebooks/PofC_Key_Values2 + notebooks/rename_files_work + notebooks/workwithtestdata diff --git a/docs/history.rst b/docs/history.rst deleted file mode 100644 index 250649964..000000000 --- a/docs/history.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../HISTORY.rst diff --git a/docs/index.rst b/docs/index.rst index d2daba54f..a492452e6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,6 +12,7 @@ Contents usage installation example - contributing - authors - history + examples + ../CONTRIBUTING + ../AUTHORS + ../HISTORY diff --git a/notebooks/Fieldmaps.ipynb b/docs/notebooks/Fieldmaps.ipynb similarity index 94% rename from notebooks/Fieldmaps.ipynb rename to docs/notebooks/Fieldmaps.ipynb index 0c14d5289..13ab18290 100644 --- a/notebooks/Fieldmaps.ipynb +++ b/docs/notebooks/Fieldmaps.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Fieldmaps OK?\n", + "# Check If Field Maps Are Defined For a Dataset\n", "\n", "This notebook shows how we check if fieldmaps are defined for the data set. There are two approaches:\n", "\n", @@ -29,19 +29,19 @@ "metadata": {}, "outputs": [], "source": [ - "# USE THIS BEFORE TESTING! \n", - "import sys \n", + "# USE THIS BEFORE TESTING!\n", + "import sys\n", "sys.path.append(\"..\")\n", - "from pathlib import Path \n", + "from pathlib import Path\n", "import shutil\n", "import os\n", "\n", - "from pkg_resources import resource_filename as pkgrf \n", + "from pkg_resources import resource_filename as pkgrf\n", "\n", "# returns string path to testdata\n", "TEST_DATA = pkgrf(\"cubids\", \"testdata\")\n", "\n", - "# should give you the full path \n", + "# should give you the full path\n", "tmp_path = Path().resolve()\n", "#print(tmp_path)\n", "\n", @@ -100,10 +100,10 @@ "import json\n", "\n", "def read_intendedfor(path):\n", - " \n", + "\n", " with open(str(path), 'r') as infile:\n", " data = json.load(infile)\n", - " \n", + "\n", " return data.get('IntendedFor')" ] }, @@ -143,9 +143,9 @@ "mapping = {}\n", "\n", "for fm in fmaps:\n", - " \n", + "\n", " intfor = read_intendedfor(fm)\n", - " \n", + "\n", " mapping[str(fm)] = intfor" ] }, @@ -184,21 +184,21 @@ "all_files = [str(x) for x in pathlib.Path(data_root).rglob(\"*.nii*\")]\n", "\n", "for k, v in mapping.items():\n", - " \n", + "\n", " if not v:\n", - " \n", + "\n", " print(\"{}: This fieldmap is not intended for any files!\".format(k))\n", - " \n", + "\n", " continue\n", - " \n", + "\n", " for fi in v:\n", - " \n", + "\n", " if any([fi in x for x in all_files]):\n", - " \n", + "\n", " print(\"{}: This fieldmap has a file\".format(k))\n", - " \n", + "\n", " else:\n", - " \n", + "\n", " print(\"{}: The file this fieldmap is intended for doesn't exist\".format(k))" ] }, diff --git a/notebooks/FirstProofofConcept.ipynb b/docs/notebooks/FirstProofofConcept.ipynb similarity index 100% rename from notebooks/FirstProofofConcept.ipynb rename to docs/notebooks/FirstProofofConcept.ipynb diff --git a/notebooks/HTML_param_groups.ipynb b/docs/notebooks/HTML_param_groups.ipynb similarity index 94% rename from notebooks/HTML_param_groups.ipynb rename to docs/notebooks/HTML_param_groups.ipynb index f04a82b10..e9854a736 100644 --- a/notebooks/HTML_param_groups.ipynb +++ b/docs/notebooks/HTML_param_groups.ipynb @@ -1,12 +1,19 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# HTML Param Groups" + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import sys \n", + "import sys\n", "sys.path.append(\"..\")" ] }, @@ -27,20 +34,20 @@ } ], "source": [ - "# USE THIS BEFORE TESTING! \n", + "# USE THIS BEFORE TESTING!\n", "\n", - "from pathlib import Path \n", + "from pathlib import Path\n", "import shutil\n", "import os\n", "#import cubids\n", - "from bids.layout import parse_file_entities \n", + "from bids.layout import parse_file_entities\n", "from cubids import CuBIDS\n", - "from pkg_resources import resource_filename as pkgrf \n", + "from pkg_resources import resource_filename as pkgrf\n", "\n", "# returns string path to testdata\n", "TEST_DATA = pkgrf(\"cubids\", \"testdata\")\n", "\n", - "# should give you the full path \n", + "# should give you the full path\n", "tmp_path = Path().resolve()\n", "#print(tmp_path)\n", "\n", @@ -235,7 +242,7 @@ "\n", "\n", "\n", - "# ISSUE! Grouping by char! \n", + "# ISSUE! Grouping by char!\n", "\n", "#param_group = cubids_obj.get_param_groups(key_group)\n", "# print(key_group)\n", @@ -254,14 +261,14 @@ "\n", "def file_to_entities(filename):\n", " entities = parse_file_entities(str(filename))\n", - " return entities \n", + " return entities\n", "\n", - "def file_to_key_group(filename): \n", + "def file_to_key_group(filename):\n", " entities = parse_file_entities(str(filename))\n", " keys = entities_to_key_group(entities)\n", " return keys\n", "\n", - "def key_group_to_entities(key_group): \n", + "def key_group_to_entities(key_group):\n", " return dict([group.split(\"-\") for group in key_group.split(\"_\")])\n", "\n", "def get_file_params(files):\n", @@ -276,14 +283,14 @@ " Returns:\n", " --------\n", "\n", - " files_params : dictionary \n", + " files_params : dictionary\n", " A dictionary of filename, param_dict pairs\n", "\n", " For each file in `files`, find critical parameters for metadata. Then find\n", " unique sets of these critical parameters.\n", " \"\"\"\n", "\n", - " # # DICTIONARY OF FILENAME, DICT_PARAMS \n", + " # # DICTIONARY OF FILENAME, DICT_PARAMS\n", " files_params = {}\n", " for path in files:\n", " metadata = self.layout.get_metadata(path)\n", @@ -301,11 +308,11 @@ " SliceNum, time in enumerate(SliceTime)})\n", " del example_data['SliceTiming']\n", "\n", - " # ADD TO THE DICTIONARY \n", - " files_params[path] = example_data \n", + " # ADD TO THE DICTIONARY\n", + " files_params[path] = example_data\n", "\n", " return files_params\n", - " \n", + "\n", "\n", "#def get_param_groups(key_group, path):\n", "# key_entities = key_group_to_entities(key_group)\n", @@ -319,8 +326,8 @@ "filename = \"/Users/Covitz/CuBIDS/cubids/testdata/complete/sub-01/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude1.json\"\n", "path = \"/Users/Covitz/CuBIDS/cubids/testdata/complete/\"\n", "ret_entities = file_to_entities(filename)\n", - "print(ret_entities) \n", - "key_group = entities_to_key_group(ret_entities) \n", + "print(ret_entities)\n", + "key_group = entities_to_key_group(ret_entities)\n", "print(key_group)\n", "\n", "entities = key_group_to_entities(key_group)\n", @@ -360,8 +367,8 @@ "source": [ "\n", "\n", - "# IMPORT SET TRACE \n", - "# assert 0, debug \n", + "# IMPORT SET TRACE\n", + "# assert 0, debug\n", "\n", "\n", "\n", @@ -408,15 +415,15 @@ "metadata": {}, "outputs": [], "source": [ - "import pathlib \n", + "import pathlib\n", "\n", "# @Params\n", - "# - path: a string containing the path to the bids directory inside which we want to change files \n", + "# - path: a string containing the path to the bids directory inside which we want to change files\n", "# @Returns\n", - "# - HTML report of acquisitions and their parameter groups \n", + "# - HTML report of acquisitions and their parameter groups\n", "\n", "\n", - "# WHERE DO WE FIND THE ACQUISITION TYPE? \n", + "# WHERE DO WE FIND THE ACQUISITION TYPE?\n", "\n", "\n", "\n", @@ -434,23 +441,23 @@ "\n", "def html_groups(bids_dir):\n", " # get key groups using cubids.get_key_groups\n", - " # use key_group_to_entities to get entities \n", - " # get param groups for each entity \n", - " \n", - " \n", - " # initialize dictionary of acquisition types \n", + " # use key_group_to_entities to get entities\n", + " # get param groups for each entity\n", + "\n", + "\n", + " # initialize dictionary of acquisition types\n", " d_acts = {}\n", " for path in pathlib.Path(path_to_dir).iterdir():\n", " if path.is_file():\n", " ext = path.suffix\n", - " # check if the file is a .json file \n", + " # check if the file is a .json file\n", " if ext == \".json\":\n", " # parse keys\n", " d_keys = parse_file_entities(path)\n", - " \n", - " \n", - " \n", - " # create html file \n" + "\n", + "\n", + "\n", + " # create html file\n" ] }, { diff --git a/notebooks/JSON_PoC_read_write.ipynb b/docs/notebooks/JSON_PoC_read_write.ipynb similarity index 98% rename from notebooks/JSON_PoC_read_write.ipynb rename to docs/notebooks/JSON_PoC_read_write.ipynb index 6e57b6c4b..db1526811 100644 --- a/notebooks/JSON_PoC_read_write.ipynb +++ b/docs/notebooks/JSON_PoC_read_write.ipynb @@ -4,6 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "# JSON Proof of Concept\n", + "\n", "In this proof of concept we will read & write JSON files in Jupyter notebook. \n", "\n", "1. display the data in the sidecar \n", @@ -17,11 +19,11 @@ "metadata": {}, "outputs": [], "source": [ - "#import json module to be able to read & write json files \n", + "#import json module to be able to read & write json files\n", "import json\n", "import pandas as pd\n", "from pandas.io.json import json_normalize\n", - "from glob import glob \n", + "from glob import glob\n", "from pathlib import Path" ] }, @@ -136,19 +138,19 @@ } ], "source": [ - "#testing the code with a single json file. \n", + "#testing the code with a single json file.\n", "\n", "file_test = open('/Users/bjaber/Projects/CuBIDS-use_cases/cubids/testdata/complete/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.json')\n", "sample_data = json.load(file_test)\n", "sample_data.keys()\n", "sample_data.get('SliceTiming')\n", - "SliceTime = sample_data.get('SliceTiming') #the way you can snatch things out of a dictionary \n", + "SliceTime = sample_data.get('SliceTiming') #the way you can snatch things out of a dictionary\n", "#if dict doesn't have the key it will return none vs. error\n", "\n", - "if SliceTime: \n", + "if SliceTime:\n", " sample_data.update({\"SliceTime%03d\"%SliceNum : time for SliceNum, time in enumerate(SliceTime)})\n", " del sample_data['SliceTiming']\n", - " \n", + "\n", "array_data = pd.DataFrame.from_dict(sample_data, orient='index')\n", "array_data" ] @@ -198,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Here we change the value for AcquisionNumber from 1 to 2. \n", + "#Here we change the value for AcquisionNumber from 1 to 2.\n", "#json_data[\"AcquisitionNumber\"] = 2" ] }, @@ -248,7 +250,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Uncomment below to view the python object as a JSON string \n", + "#Uncomment below to view the python object as a JSON string\n", "#json_string" ] }, @@ -258,9 +260,9 @@ "metadata": {}, "outputs": [], "source": [ - "#notes from Matt \n", + "#notes from Matt\n", "\n", - "# have a function that does the reading and creates 1 row then you have to loop and the dataframe grows through concatanation \n", + "# have a function that does the reading and creates 1 row then you have to loop and the dataframe grows through concatanation\n", "# pandas.concat" ] }, @@ -337,7 +339,7 @@ } ], "source": [ - "for path in Path('/Users/bjaber/Projects/CuBIDS/cubids/testdata/complete').rglob('*.json'): \n", + "for path in Path('/Users/bjaber/Projects/CuBIDS/cubids/testdata/complete').rglob('*.json'):\n", " #print(path)\n", "\n", " counter=0\n", @@ -350,18 +352,18 @@ " file_tree = open(s_path)\n", " example_data = json.load(file_tree)\n", " SliceTime = example_data.get('SliceTiming') #the way you can snatch things out of a dictionary #if dict doesn't have the key it will return none vs. error\n", - " if SliceTime: \n", + " if SliceTime:\n", " example_data.update({\"SliceTime%03d\"%SliceNum : time for SliceNum, time in enumerate(SliceTime)})\n", " del example_data['SliceTiming']\n", " print(example_data)\n", - " #data = pd.DataFrame.from_dict(example_data, orient='index') \n", + " #data = pd.DataFrame.from_dict(example_data, orient='index')\n", " #data\n", " counter += 1\n", - " \n", "\n", - "#NOTE: error when trying to put the data into a pandas dataframe. \n", - "# print(example_data) was used to make sure that inputs that are an array such as in the field SliceTiming are being separated into indenpendent values of SliceTime00x that should feed into the dataframe. \n", - "# it is doing that across all json files that are being loaded from the directory " + "\n", + "#NOTE: error when trying to put the data into a pandas dataframe.\n", + "# print(example_data) was used to make sure that inputs that are an array such as in the field SliceTiming are being separated into indenpendent values of SliceTime00x that should feed into the dataframe.\n", + "# it is doing that across all json files that are being loaded from the directory" ] }, { diff --git a/notebooks/Key_and_Param_Groups.ipynb b/docs/notebooks/Key_and_Param_Groups.ipynb similarity index 95% rename from notebooks/Key_and_Param_Groups.ipynb rename to docs/notebooks/Key_and_Param_Groups.ipynb index 94c49359e..3970ce6df 100644 --- a/notebooks/Key_and_Param_Groups.ipynb +++ b/docs/notebooks/Key_and_Param_Groups.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Key and Param Groups" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -90,7 +97,7 @@ "outputs": [], "source": [ "\n", - "# UNDOING RENAMING OF FILES \n", + "# UNDOING RENAMING OF FILES\n", "\n", "files_and_dirs = Path(\"/Users/scovitz/CuBIDS/cubids/testdata/complete/\").rglob('*')\n", "for path in files_and_dirs:\n", @@ -169,7 +176,7 @@ " output = bod.get_file_params(key_group)\n", " print(len(output))\n", " #print(output)\n", - " \n", + "\n", "\n", " #print(output)\n", " #output2 = bod.get_param_groups(key_group)\n", @@ -197,16 +204,16 @@ "#print(output[1])\n", "#print(output[0])\n", "# filenames = list(output.keys())\n", - "# first_params = output[filenames[0]] \n", - "# #for path in filenames: \n", + "# first_params = output[filenames[0]]\n", + "# #for path in filenames:\n", "# #print(path + \"\\n\")\n", "# #print(first_params)\n", "\n", "\n", "\n", "# GET ALL FILENAMES ASSOCIATED WITH A KEY GROUP\n", - "# USE GLOB TO FIND ALL INSTANCES \n", - "# IF THEY MATCH AND DICTS MATCH, DO THE REPLACEMENT \n", + "# USE GLOB TO FIND ALL INSTANCES\n", + "# IF THEY MATCH AND DICTS MATCH, DO THE REPLACEMENT\n", "\n", "\n", "# for i in range(len(files)):\n", @@ -220,8 +227,7 @@ "# print(filenames[0])\n", "\n", "# param_groups = bod.get_param_groups(key_group)\n", - "# print(len(param_groups))\n", - " " + "# print(len(param_groups))\n" ] }, { @@ -258,10 +264,10 @@ "source": [ "# TESTING SOMOE SHIT WOOOOOOOOOOOOO\n", "\n", - "dict_0 = {'EchoTime': 0.03, 'TotalReadoutTime': 0.0362102, 'RepetitionTime': 2.5, 'DwellTime': 3.1e-06, 'PartialFourier': 1, 'FlipAngle': 80, 'EffectiveEchoSpacing': 0.000510002, 'PhaseEncodingDirection': 'j-', 'SliceTime000': 1.2, 'SliceTime001': 0, 'SliceTime002': 1.3, 'SliceTime003': 0.1, 'SliceTime004': 1.4, 'SliceTime005': 0.1, 'SliceTime006': 1.4, 'SliceTime007': 0.2, 'SliceTime008': 1.5, 'SliceTime009': 0.3, 'SliceTime010': 1.6, 'SliceTime011': 0.3, 'SliceTime012': 1.6, 'SliceTime013': 0.4, 'SliceTime014': 1.7, 'SliceTime015': 0.5, 'SliceTime016': 1.8, 'SliceTime017': 0.5, 'SliceTime018': 1.8, 'SliceTime019': 0.6, 'SliceTime020': 1.9, 'SliceTime021': 0.7, 'SliceTime022': 2.0, 'SliceTime023': 0.7, 'SliceTime024': 2.0, 'SliceTime025': 0.8, 'SliceTime026': 2.1, 'SliceTime027': 0.9, 'SliceTime028': 2.2, 'SliceTime029': 0.9, 'SliceTime030': 2.2, 'SliceTime031': 1.0, 'SliceTime032': 2.3, 'SliceTime033': 1.0, 'SliceTime034': 2.4, 'SliceTime035': 1.1, 'SliceTime036': 2.4, 'SliceTime037': 1.2} \n", + "dict_0 = {'EchoTime': 0.03, 'TotalReadoutTime': 0.0362102, 'RepetitionTime': 2.5, 'DwellTime': 3.1e-06, 'PartialFourier': 1, 'FlipAngle': 80, 'EffectiveEchoSpacing': 0.000510002, 'PhaseEncodingDirection': 'j-', 'SliceTime000': 1.2, 'SliceTime001': 0, 'SliceTime002': 1.3, 'SliceTime003': 0.1, 'SliceTime004': 1.4, 'SliceTime005': 0.1, 'SliceTime006': 1.4, 'SliceTime007': 0.2, 'SliceTime008': 1.5, 'SliceTime009': 0.3, 'SliceTime010': 1.6, 'SliceTime011': 0.3, 'SliceTime012': 1.6, 'SliceTime013': 0.4, 'SliceTime014': 1.7, 'SliceTime015': 0.5, 'SliceTime016': 1.8, 'SliceTime017': 0.5, 'SliceTime018': 1.8, 'SliceTime019': 0.6, 'SliceTime020': 1.9, 'SliceTime021': 0.7, 'SliceTime022': 2.0, 'SliceTime023': 0.7, 'SliceTime024': 2.0, 'SliceTime025': 0.8, 'SliceTime026': 2.1, 'SliceTime027': 0.9, 'SliceTime028': 2.2, 'SliceTime029': 0.9, 'SliceTime030': 2.2, 'SliceTime031': 1.0, 'SliceTime032': 2.3, 'SliceTime033': 1.0, 'SliceTime034': 2.4, 'SliceTime035': 1.1, 'SliceTime036': 2.4, 'SliceTime037': 1.2}\n", "dict_1 = {'EchoTime': 0.03, 'TotalReadoutTime': 0.0362102, 'RepetitionTime': 2.5, 'DwellTime': 3.1e-06, 'PartialFourier': 1, 'FlipAngle': 80, 'EffectiveEchoSpacing': 0.000510002, 'PhaseEncodingDirection': 'j-', 'SliceTime000': 1.2, 'SliceTime001': 0, 'SliceTime002': 1.3, 'SliceTime003': 0.1, 'SliceTime004': 1.4, 'SliceTime005': 0.1, 'SliceTime006': 1.4, 'SliceTime007': 0.2, 'SliceTime008': 1.5, 'SliceTime009': 0.3, 'SliceTime010': 1.6, 'SliceTime011': 0.3, 'SliceTime012': 1.6, 'SliceTime013': 0.4, 'SliceTime014': 1.7, 'SliceTime015': 0.5, 'SliceTime016': 1.8, 'SliceTime017': 0.5, 'SliceTime018': 1.8, 'SliceTime019': 0.6, 'SliceTime020': 1.9, 'SliceTime021': 0.7, 'SliceTime022': 2.0, 'SliceTime023': 0.7, 'SliceTime024': 2.0, 'SliceTime025': 0.8, 'SliceTime026': 2.1, 'SliceTime027': 0.9, 'SliceTime028': 2.2, 'SliceTime029': 0.9, 'SliceTime030': 2.2, 'SliceTime031': 1.0, 'SliceTime032': 2.3, 'SliceTime033': 1.0, 'SliceTime034': 2.4, 'SliceTime035': 1.1, 'SliceTime036': 2.4, 'SliceTime037': 1.2}\n", "\n", - "if dict_0 == split_params: \n", + "if dict_0 == split_params:\n", " print(\"YAY\")\n", "else:\n", " print(\"STUPID\")" diff --git a/notebooks/PofC_Key_Values2.ipynb b/docs/notebooks/PofC_Key_Values2.ipynb similarity index 96% rename from notebooks/PofC_Key_Values2.ipynb rename to docs/notebooks/PofC_Key_Values2.ipynb index 0e17595f2..a647a2ef0 100644 --- a/notebooks/PofC_Key_Values2.ipynb +++ b/docs/notebooks/PofC_Key_Values2.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Key Values Proof of Concept" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -88,7 +95,7 @@ } ], "source": [ - "# use pybids to extract BIDS entities from single subject \n", + "# use pybids to extract BIDS entities from single subject\n", "\n", "path = all_files[0]\n", "dict1= parse_file_entities(path)\n", @@ -134,8 +141,8 @@ "\n", "for file in all_files:\n", "#for each file in the list, parse the information into a dictionary and add it to the list we just initialized\n", - " result = parse_file_entities(file) \n", - " \n", + " result = parse_file_entities(file)\n", + "\n", " entities.append(result)\n", " #entities.add(string_result)\n", "print(entities)" @@ -156,23 +163,23 @@ ], "source": [ "\n", - "# loop through files to create a bigger dictionary of discrete keys, adding each value to a list \n", + "# loop through files to create a bigger dictionary of discrete keys, adding each value to a list\n", "dictionary = {}\n", "# initialize a new dictionary\n", "for e in entities:\n", - "# for each dictionary in the list we created above \n", + "# for each dictionary in the list we created above\n", " for k,v in e.items():\n", - " #for each set of key-value pairs in each dictionary \n", + " #for each set of key-value pairs in each dictionary\n", " #print(k,v)\n", " if k not in dictionary.keys():\n", " #if the key is not in the larger dictionary keys, set the value as value, but in a list\n", " dictionary[k]=[v]\n", - " else: \n", - " #if the key is in the dictionary, add the new value to the existing value list \n", + " else:\n", + " #if the key is in the dictionary, add the new value to the existing value list\n", " dictionary[k].append(v)\n", - " \n", - " \n", - "print(dictionary) " + "\n", + "\n", + "print(dictionary)" ] }, { @@ -191,25 +198,25 @@ } ], "source": [ - "#create one dictionary value per key in original dictionary \n", - "# loop through dictionary values and create dictionaries for instances of each list \n", - "l_dicts = [] \n", + "#create one dictionary value per key in original dictionary\n", + "# loop through dictionary values and create dictionaries for instances of each list\n", + "l_dicts = []\n", "for key in dictionary.keys():\n", "# for each list that is the value of the big dictionary:\n", " #print (key)\n", - " counts = {} #initialize a new dictionary for # of instances \n", + " counts = {} #initialize a new dictionary for # of instances\n", " l_labels = dictionary[key]\n", " #print(l_labels)\n", " for item in l_labels:\n", - " #for each item in those lists \n", + " #for each item in those lists\n", " if item not in counts.keys():\n", " #if the item is not in the new dictionary, set it to 1\n", - " counts[item]= 1 \n", + " counts[item]= 1\n", " else:\n", " #if it already exists, add 1\n", " counts[item]+= 1\n", " l_dicts.append(counts)\n", - "#list of dictionaries where KEYS: BIDS entities values and VALUES: instances of that key \n", + "#list of dictionaries where KEYS: BIDS entities values and VALUES: instances of that key\n", "print(l_dicts)\n", "\n" ] @@ -231,14 +238,13 @@ "#make a new dictionary with KEYS: BIDS entities (ie: subject, session, etc) and VALUES: dictionaries of ID's and instances\n", "\n", "new_dictionary = {}\n", - "counter = 0 \n", + "counter = 0\n", "for key in dictionary.keys():\n", - " #assign values from l_dicts to each key \n", + " #assign values from l_dicts to each key\n", " new_dictionary[key] = l_dicts[counter]\n", " counter += 1\n", "\n", - "print(new_dictionary)\n", - " " + "print(new_dictionary)\n" ] }, { @@ -247,7 +253,7 @@ "metadata": {}, "outputs": [], "source": [ - "#initialize new list for tuples \n", + "#initialize new list for tuples\n", "l_tups= []\n", "for key in new_dictionary:\n", " #list out all keys\n", diff --git a/notebooks/Tests/datatype-anat_reconstruction-refaced_suffix-T1w.csv b/docs/notebooks/Tests/datatype-anat_reconstruction-refaced_suffix-T1w.csv similarity index 100% rename from notebooks/Tests/datatype-anat_reconstruction-refaced_suffix-T1w.csv rename to docs/notebooks/Tests/datatype-anat_reconstruction-refaced_suffix-T1w.csv diff --git a/notebooks/Tests/datatype-func_run-1_suffix-bold_task-rest.csv b/docs/notebooks/Tests/datatype-func_run-1_suffix-bold_task-rest.csv similarity index 100% rename from notebooks/Tests/datatype-func_run-1_suffix-bold_task-rest.csv rename to docs/notebooks/Tests/datatype-func_run-1_suffix-bold_task-rest.csv diff --git a/notebooks/Tests/datatype-func_run-2_suffix-bold_task-rest.csv b/docs/notebooks/Tests/datatype-func_run-2_suffix-bold_task-rest.csv similarity index 100% rename from notebooks/Tests/datatype-func_run-2_suffix-bold_task-rest.csv rename to docs/notebooks/Tests/datatype-func_run-2_suffix-bold_task-rest.csv diff --git a/notebooks/keyparamgrouptest.ipynb b/docs/notebooks/keyparamgrouptest.ipynb similarity index 99% rename from notebooks/keyparamgrouptest.ipynb rename to docs/notebooks/keyparamgrouptest.ipynb index a260cd146..150eb3df6 100644 --- a/notebooks/keyparamgrouptest.ipynb +++ b/docs/notebooks/keyparamgrouptest.ipynb @@ -1,11 +1,11 @@ { "cells": [ { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# Key and Param Group Test" + ] }, { "cell_type": "code", @@ -1645,7 +1645,7 @@ "source": [ "from cubids.cubids import *\n", "files = [\n", - " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz', \n", + " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz',\n", " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']\n", "\n", "dfs = []\n", @@ -1662,7 +1662,7 @@ " print(fieldmap_lookup[path])\n", " fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])\n", " for fmap_num, fmap_type in enumerate(fieldmap_types):\n", - " example_data['fieldmap_type%02d' % fmap_num] = fmap_type \n", + " example_data['fieldmap_type%02d' % fmap_num] = fmap_type\n", "\n", " # Expand slice timing to multiple columns\n", " SliceTime = example_data.get('SliceTiming')\n", diff --git a/notebooks/metadata_image_param.ipynb b/docs/notebooks/metadata_image_param.ipynb similarity index 96% rename from notebooks/metadata_image_param.ipynb rename to docs/notebooks/metadata_image_param.ipynb index b8732d970..f7fe247e1 100644 --- a/notebooks/metadata_image_param.ipynb +++ b/docs/notebooks/metadata_image_param.ipynb @@ -4,6 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "# Metadata Image Parameter Proof of Concept\n", + "\n", "In this proof of concept we will read & write JSON files in Jupyter notebook. \n", "\n", "1. display the data in the sidecar \n", @@ -17,11 +19,11 @@ "metadata": {}, "outputs": [], "source": [ - "#import json module to be able to read & write json files \n", + "#import json module to be able to read & write json files\n", "import json\n", "import pandas as pd\n", "from pandas.io.json import json_normalize\n", - "from glob import glob \n", + "from glob import glob\n", "from pathlib import Path" ] }, @@ -136,19 +138,19 @@ } ], "source": [ - "#testing the code with a single json file. \n", + "#testing the code with a single json file.\n", "\n", "file_test = open('/Users/bjaber/Projects/CuBIDS-use_cases/cubids/testdata/complete/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.json')\n", "sample_data = json.load(file_test)\n", "sample_data.keys()\n", "sample_data.get('SliceTiming')\n", - "SliceTime = sample_data.get('SliceTiming') #the way you can snatch things out of a dictionary \n", + "SliceTime = sample_data.get('SliceTiming') #the way you can snatch things out of a dictionary\n", "#if dict doesn't have the key it will return none vs. error\n", "\n", - "if SliceTime: \n", + "if SliceTime:\n", " sample_data.update({\"SliceTime%03d\"%SliceNum : time for SliceNum, time in enumerate(SliceTime)})\n", " del sample_data['SliceTiming']\n", - " \n", + "\n", "array_data = pd.DataFrame.from_dict(sample_data, orient='index', columns = ['1'])\n", "array_data" ] @@ -198,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Here we change the value for AcquisionNumber from 1 to 2. \n", + "#Here we change the value for AcquisionNumber from 1 to 2.\n", "#json_data[\"AcquisitionNumber\"] = 2" ] }, @@ -248,7 +250,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Uncomment below to view the python object as a JSON string \n", + "#Uncomment below to view the python object as a JSON string\n", "#json_string" ] }, @@ -258,9 +260,9 @@ "metadata": {}, "outputs": [], "source": [ - "#notes from Matt \n", + "#notes from Matt\n", "\n", - "# have a function that does the reading and creates 1 row then you have to loop and the dataframe grows through concatanation \n", + "# have a function that does the reading and creates 1 row then you have to loop and the dataframe grows through concatanation\n", "# pandas.concat" ] }, @@ -451,44 +453,44 @@ " file_tree = open(s_path)\n", " example_data = json.load(file_tree)\n", " wanted_keys = example_data.keys() & IMAGING_PARAMS\n", - " example_data = {key: example_data[key] for key in wanted_keys} \n", + " example_data = {key: example_data[key] for key in wanted_keys}\n", " SliceTime = example_data.get('SliceTiming') #the way you can snatch things out of a dictionary #if dict doesn't have the key it will return none vs. error\n", - " if SliceTime: \n", + " if SliceTime:\n", " example_data.update({\"SliceTime%03d\"%SliceNum : [time] for SliceNum, time in enumerate(SliceTime)})\n", " del example_data['SliceTiming']\n", " #if ShimSetting:\n", - " \n", + "\n", " dfs.append(example_data)\n", - " \n", + "\n", "df = pd.DataFrame(dfs)\n", "#df.drop_duplicates()\n", "df.head()\n", "\n", "\n", "\n", - "#create dataframe of unique rows \n", - "#bids entities filter in the cubids class to filter through the files \n", - "#loop over , get metadata, and put into the dataframe \n", + "#create dataframe of unique rows\n", + "#bids entities filter in the cubids class to filter through the files\n", + "#loop over , get metadata, and put into the dataframe\n", "\n", "\n", "\n", " #print(example_data)\n", "\n", "\n", - " \n", + "\n", "#for file in example_data:\n", " #data = pd.DataFrame.from_dict(example_data, orient='index') # read data frame from json file\n", " #dfs.append(data) # append the data frame to the list\n", " #temp = pd.concat(dfs, ignore_index=True) # concatenate all the data frames in the list.\n", "\n", - " #data = pd.DataFrame.from_dict(example_data, orient='index') \n", + " #data = pd.DataFrame.from_dict(example_data, orient='index')\n", " #data\n", " #counter += 1\n", - " \n", "\n", - "#NOTE: error when trying to put the data into a pandas dataframe. This error happens regardless of the way SliceTiming is setup. \n", - "# print(example_data) was used to make sure that inputs that are an array such as in the field SliceTiming are being separated into indenpendent values of SliceTime00x that should feed into the dataframe. \n", - "# it is doing that across all json files that are being loaded from the directory " + "\n", + "#NOTE: error when trying to put the data into a pandas dataframe. This error happens regardless of the way SliceTiming is setup.\n", + "# print(example_data) was used to make sure that inputs that are an array such as in the field SliceTiming are being separated into indenpendent values of SliceTime00x that should feed into the dataframe.\n", + "# it is doing that across all json files that are being loaded from the directory" ] }, { diff --git a/notebooks/rename_files_work.ipynb b/docs/notebooks/rename_files_work.ipynb similarity index 98% rename from notebooks/rename_files_work.ipynb rename to docs/notebooks/rename_files_work.ipynb index eaff64144..68b2df57b 100644 --- a/notebooks/rename_files_work.ipynb +++ b/docs/notebooks/rename_files_work.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Rename Files" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -14,7 +21,7 @@ } ], "source": [ - "# TEST BED \n", + "# TEST BED\n", "\n", "test = \"happy.py\"\n", "new = test.replace(\".py\", \".json\")\n", @@ -27,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "import sys \n", + "import sys\n", "sys.path.append(\"..\")" ] }, @@ -48,18 +55,18 @@ } ], "source": [ - "# USE THIS BEFORE TESTING! \n", + "# USE THIS BEFORE TESTING!\n", "\n", - "from pathlib import Path \n", + "from pathlib import Path\n", "import shutil\n", "import os\n", "\n", - "from pkg_resources import resource_filename as pkgrf \n", + "from pkg_resources import resource_filename as pkgrf\n", "\n", "# returns string path to testdata\n", "TEST_DATA = pkgrf(\"cubids\", \"testdata\")\n", "\n", - "# should give you the full path \n", + "# should give you the full path\n", "tmp_path = Path().resolve()\n", "#print(tmp_path)\n", "\n", @@ -237,7 +244,7 @@ "metadata": {}, "outputs": [], "source": [ - "import glob \n", + "import glob\n", "import os\n", "\n", "\n", @@ -246,13 +253,13 @@ "for path in Path(\"/Users/Covitz/CuBIDS/data/sub-1832999514/\").iterdir():\n", " if path.is_file():\n", " print(path.stem)\n", - " old_name = path.stem \n", + " old_name = path.stem\n", " old_ext = path.suffix\n", " directory = path.parent\n", " #print(type(directory))\n", " new_name = \"A_\" + old_name + old_ext\n", " path.rename(Path(directory, new_name))\n", - " \n" + "\n" ] }, { @@ -262,28 +269,28 @@ "outputs": [], "source": [ "# @Params\n", - "# - path: a string containing the path to the directory inside which we want to change files \n", + "# - path: a string containing the path to the directory inside which we want to change files\n", "# - pattern: the substring of the file we would like to replace\n", "# - replacement: the substring that will replace \"pattern\"\n", "# @Returns\n", - "# - None \n", + "# - None\n", "def rename_files_old(files, pattern, replacement):\n", " # what are \"pattern\" and \"replacement\"\n", - " # if you want to do a string replace, \n", - " # you need the sub string that needs to be added to the file \n", + " # if you want to do a string replace,\n", + " # you need the sub string that needs to be added to the file\n", " # and the portion you want cut\n", - " # but before you do the replace, shouldn't you run isValid() on the new filename? \n", + " # but before you do the replace, shouldn't you run isValid() on the new filename?\n", " new_files = []\n", " for file in files:\n", " new_filename = file.replace(pattern, replacement)\n", " os.rename(file, new_filename)\n", - " \n", + "\n", " #if isvalid(test_filename) == True:\n", " # new_file = test_filename\n", - " #else: \n", - " # exception will be raised inside the function isValid \n", + " #else:\n", + " # exception will be raised inside the function isValid\n", " # print(\"Invalid Filename\")\n", - " return new_files \n", + " return new_files\n", "\n" ] }, @@ -293,18 +300,18 @@ "metadata": {}, "outputs": [], "source": [ - "import pathlib \n", + "import pathlib\n", "\n", "# @Params\n", - "# - path: a string containing the path to the directory inside which we want to change files \n", + "# - path: a string containing the path to the directory inside which we want to change files\n", "# - pattern: the substring of the file we would like to replace\n", "# - replacement: the substring that will replace \"pattern\"\n", "# @Returns\n", - "# - None \n", + "# - None\n", "def rename_files_1(path_to_dir, pattern, replacement):\n", " for path in pathlib.Path(path_to_dir).iterdir():\n", " if path.is_file():\n", - " old_name = path.stem \n", + " old_name = path.stem\n", " old_ext = path.suffix\n", " directory = path.parent\n", " new_name = old_name.replace(pattern, replacement) + old_ext\n", @@ -317,14 +324,14 @@ "metadata": {}, "outputs": [], "source": [ - "import pathlib \n", + "import pathlib\n", "\n", "# @Params\n", - "# - path: a string containing the path to the bids directory inside which we want to change files \n", + "# - path: a string containing the path to the bids directory inside which we want to change files\n", "# - pattern: the substring of the file we would like to replace\n", "# - replacement: the substring that will replace \"pattern\"\n", "# @Returns\n", - "# - None \n", + "# - None\n", "def rename_files(bids_dir, pattern, replacement):\n", " files_and_dirs = Path(bids_dir).rglob('*')\n", " for path in files_and_dirs:\n", @@ -2283,11 +2290,11 @@ } ], "source": [ - "import glob \n", - "import pathlib \n", + "import glob\n", + "import pathlib\n", "\n", - "# testing out our function rename_files \n", - "# changes all filenames in all_files containing substrings \"PNC2\" to \"PNC20\" \n", + "# testing out our function rename_files\n", + "# changes all filenames in all_files containing substrings \"PNC2\" to \"PNC20\"\n", "\n", "#root_dir = \"/Users/Covitz/CuBIDS/data/sub-1832999514/ses-PNC2/func/\"\n", "\n", @@ -2316,21 +2323,21 @@ "metadata": {}, "outputs": [], "source": [ - "# PROCESS NOTES \n", + "# PROCESS NOTES\n", "\n", - "# in BIDS, want to replace everything up to the BIDS root \n", - "# don't want to replace all filenames up to the BIDS root \n", + "# in BIDS, want to replace everything up to the BIDS root\n", + "# don't want to replace all filenames up to the BIDS root\n", "\n", "# could have a rename subject function and a rename session function\n", - "# also have a rename files function \n", + "# also have a rename files function\n", "\n", - "# wants a single function that lets you replace any part of the string \n", + "# wants a single function that lets you replace any part of the string\n", "\n", "# pathlib.rglob - like \"find\" in the command line\n", "# bids_dir.rglob\n", - "# pybids.parsentities - if not valid BIDS, will get error from parse entities \n", + "# pybids.parsentities - if not valid BIDS, will get error from parse entities\n", "\n", - "# replace directory names and filenames " + "# replace directory names and filenames" ] }, { diff --git a/notebooks/workwithtestdata.ipynb b/docs/notebooks/workwithtestdata.ipynb similarity index 99% rename from notebooks/workwithtestdata.ipynb rename to docs/notebooks/workwithtestdata.ipynb index cbb6ce775..1270bcc97 100644 --- a/notebooks/workwithtestdata.ipynb +++ b/docs/notebooks/workwithtestdata.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Work with Test Data" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -28,7 +35,7 @@ " data_dir = test_data(Path(newdir))\n", " return data_dir\n", "\n", - "# copy the data \n", + "# copy the data\n", "data_root = copy_testing_data(\"test1\")" ] }, @@ -566,7 +573,7 @@ "source": [ "from cubids.cubids import *\n", "files = [\n", - " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz', \n", + " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz',\n", " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']\n", "\n", "dfs = []\n", @@ -583,7 +590,7 @@ " print(fieldmap_lookup[path])\n", " fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])\n", " for fmap_num, fmap_type in enumerate(fieldmap_types):\n", - " example_data['fieldmap_type%02d' % fmap_num] = fmap_type \n", + " example_data['fieldmap_type%02d' % fmap_num] = fmap_type\n", "\n", " # Expand slice timing to multiple columns\n", " SliceTime = example_data.get('SliceTiming')\n", diff --git a/docs/usage.rst b/docs/usage.rst index e9277781d..ff8fa367d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -255,4 +255,4 @@ In the next section, we'll introduce ``DataLad`` and walk through a real example .. rubric:: Footnotes -.. [#f1] PNC: `The Philadelphia Developmental Cohort `_. \ No newline at end of file +.. [#f1] PNC: `The Philadelphia Developmental Cohort `_. diff --git a/pyproject.toml b/pyproject.toml index 0c25e9bf6..5d9abf29f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,16 +21,13 @@ classifiers = [ license = {file = "LICENSE"} requires-python = ">=3.8" dependencies = [ + "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", + "numpy<=1.26.0", + "pandas<=2.2.0", + "pybids<=0.16.4", "pyyaml", - "pybids", - "pandas", + "scikit-learn<=1.4.0", "tqdm", - "numpy", - "scikit-learn", - "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", - "wrapt<2,>=1.10", - "Sphinx", - "jinja2 < 3.1", ] dynamic = ["version"] @@ -44,11 +41,12 @@ Paper = "https://doi.org/10.1016/j.neuroimage.2022.119609" doc = [ "nbsphinx", "packaging", + "recommonmark", "sphinx >= 2.2", "sphinx-argparse", - "sphinx_rtd_theme", + "sphinx_gallery", "sphinx_markdown_tables", - "recommonmark", + "sphinx_rtd_theme", ] tests = [ "codespell",