From 95ffd96cc9608d9182c115f6230ecd9a43374804 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Mon, 16 Nov 2020 00:27:32 -0500 Subject: [PATCH] [ENH] Add singularity and docker to CI tests (#30) --- .circleci/config.yml | 154 +++++++- .gitignore | 1 + Dockerfile | 57 ++- bond/bond.py | 204 +++++++++- bond/cli.py | 96 ++++- neurodebian.gpg | 71 ++++ notebooks/workwithtestdata.ipynb | 633 +++++++++++++++++++++++++++++++ setup.cfg | 20 +- setup.py | 6 +- tests/test_bond.py | 68 +++- 10 files changed, 1238 insertions(+), 72 deletions(-) create mode 100644 neurodebian.gpg create mode 100644 notebooks/workwithtestdata.ipynb diff --git a/.circleci/config.yml b/.circleci/config.yml index b4f7a74cd..ee849d5ef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -86,6 +86,118 @@ jobs: paths: - miniconda + get_singularity: + machine: + image: ubuntu-1604:202004-01 + steps: + - restore_cache: + keys: + - singularity-v3-{{ .Branch }}-{{ .Revision }} + + - run: + name: Download Singularity + command: | + if [ ! -f /usr/local/bin/singularity ] + then + sudo apt-get update && sudo apt-get -y install build-essential \ + libssl-dev \ + uuid-dev \ + libgpgme11-dev \ + squashfs-tools \ + libseccomp-dev \ + pkg-config \ + wget + export GOPATH=$HOME/go + mkdir -p $GOPATH + go get -u github.com/golang/dep/cmd/dep + export VERSION=3.6.4 + wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz + tar -xzf singularity-${VERSION}.tar.gz + cd ./singularity + ./mconfig && \ + make -j2 -C ./builddir + cd $HOME + tar cfz singularity.tar.gz project/singularity/ + tar cfz go.tar.gz go/ + fi + + - save_cache: + key: singularity-v3-{{ .Branch }}-{{ .Revision }}-{{ epoch }} + paths: + - /home/circleci/singularity.tar.gz + - /home/circleci/go.tar.gz + + + install_and_test_singularity: + machine: + image: ubuntu-1604:202004-01 + working_directory: /home/circleci/src/BOnD + steps: + - checkout: + path: /home/circleci/src/BOnD + + - attach_workspace: + at: /tmp + + - restore_cache: + keys: + - docker-v1-{{ .Branch }}-{{ .Revision }} + + - restore_cache: + keys: + - singularity-v3-{{ .Branch }}-{{ .Revision }} + - run: + name: Load Docker image layer cache + no_output_timeout: 30m + command: | + docker info + set +o pipefail + if [ -f /tmp/cache/docker.tar.gz ]; then + sudo apt update && sudo apt -y install pigz + pigz -d --stdout /tmp/cache/docker.tar.gz | docker load + docker images + fi + + - run: + name: Install BOnD + command: | + export PATH=/tmp/miniconda/bin:$PATH + source activate bond + pip install . + + - run: + name: Install Singularity + command: | + export GOPATH=$HOME/go + cd /home/circleci + tar xfz go.tar.gz + tar xfz singularity.tar.gz + cd project/singularity/builddir + sudo make install + + - run: + name: Build singularity image + no_output_timeout: 30m + command: | + singularity build \ + /home/circleci/bond-latest.sif \ + docker-daemon://pennlinc/bond:latest + + - run: + name: Test singularity + command: | + git config --global user.email "circleci@citesting.com" + git config --global user.name "CircleCI Test" + export PATH=/tmp/miniconda/bin:$PATH + source activate bond + mkdir -p /tmp/bids /tmp/group_testing + cp -r /home/circleci/src/BOnD/bond/testdata/complete /tmp/bids/singularity + bond-group \ + /tmp/bids/singularity \ + /tmp/group_testing/direct \ + --container /home/circleci/bond-latest.sif + + install_and_test: machine: @@ -119,14 +231,21 @@ jobs: command: | export PATH=/tmp/miniconda/bin:$PATH source activate bond - pip install .[all] + pip install . - run: - name: Run PyTest + name: Test Docker integration command: | + git config --global user.email "circleci@citesting.com" + git config --global user.name "CircleCI Test" export PATH=/tmp/miniconda/bin:$PATH source activate bond - py.test -sv tests + mkdir -p /tmp/bids /tmp/group_testing + cp -r /home/circleci/src/BOnD/bond/testdata/complete /tmp/bids/docker + bond-group \ + /tmp/bids/docker \ + /tmp/group_testing/docker \ + --container pennlinc/bond:latest build_docs: docker: @@ -146,7 +265,7 @@ jobs: name: Check Python version and upgrade pip command: | python --version - sudo python -m pip install -U pip + sudo python -m pip install -U pip sudo pip install flake8 - run: @@ -157,7 +276,7 @@ jobs: - run: name: Install BOnD. command: sudo python -m pip install ".[doc]" --no-cache-dir --progress-bar off - + - run: name: Build documentation no_output_timeout: 45m @@ -352,6 +471,30 @@ workflows: tags: only: /.*/ + - get_singularity: + requires: + - build + filters: + branches: + ignore: + - /tests?\/.*/ + - /docker\/.*/ + tags: + only: /.*/ + + - install_and_test_singularity: + requires: + - build + - setup_conda + - get_singularity + filters: + branches: + ignore: + - /tests?\/.*/ + - /docker\/.*/ + tags: + only: /.*/ + - build_docs: filters: branches: @@ -365,6 +508,7 @@ workflows: requires: - build_docs - install_and_test + - install_and_test_singularity filters: branches: only: master diff --git a/.gitignore b/.gitignore index bbe607506..974b70a2b 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,4 @@ data/* .DS_Store notebooks/testdata .vscode/settings.json +notebooks/test1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 116c7626b..2b0ef0b1c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,44 +1,35 @@ -FROM python:3 - -# install conda -RUN curl -sSLO https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \ - bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /usr/local/miniconda && \ - rm Miniconda3-4.5.12-Linux-x86_64.sh - -ENV PATH=/usr/local/miniconda/bin:$PATH - -# activate conda environment -RUN echo "source activate base" > ~/.bashrc - -RUN which conda +FROM ubuntu:bionic-20200921 # get the validator branch skip_session_check RUN apt-get update && \ - apt-get install -y git + apt-get install -y --no-install-recommends \ + curl ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# USE CONDA FOR INSTALLING NPM -RUN conda install nodejs +# Installing Neurodebian and nodejs packages +COPY neurodebian.gpg /usr/local/etc/neurodebian.gpg +RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \ + curl -sSL "http://neuro.debian.net/lists/$( lsb_release -c | cut -f2 ).us-ca.full" >> /etc/apt/sources.list.d/neurodebian.sources.list && \ + apt-key add /usr/local/etc/neurodebian.gpg && \ + (apt-key adv --refresh-keys --keyserver hkp://ha.pool.sks-keyservers.net 0xA5D32F012649A5A9 || true) -RUN npm --version +# get the validator branch skip_session_check +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + datalad nodejs python3 python3-pip python3-setuptools && \ + apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -RUN npm install -g yarn -RUN mkdir -p /home/validator && \ +RUN npm install -g yarn && \ + mkdir -p /home/validator && \ cd /home/validator && \ - git clone -b skip_session_checks --single-branch https://github.com/bids-standard/bids-validator.git - - -RUN ls /home/validator/bids-validator -RUN cd /home/validator/bids-validator && \ + git clone -b skip_session_checks \ + --single-branch https://github.com/bids-standard/bids-validator.git && \ + cd /home/validator/bids-validator && \ yarn && \ - npm install -g bids-validator - -RUN which bids-validator - -# prepare env -COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt + cd bids-validator && npm install -g -COPY . . +COPY . /src/BOnD +RUN pip3 install --no-cache-dir "/src/BOnD" -ENTRYPOINT [ "bids-validator"] +ENTRYPOINT [ "/bin/bash"] diff --git a/bond/bond.py b/bond/bond.py index c40cf56bf..cc79dd44f 100644 --- a/bond/bond.py +++ b/bond/bond.py @@ -7,6 +7,7 @@ from bids.utils import listify import numpy as np import pandas as pd +import datalad.api as dlapi from tqdm import tqdm bids.config.set_option('extension_initial_dot', True) @@ -26,13 +27,206 @@ class BOnD(object): - def __init__(self, data_root): + def __init__(self, data_root, use_datalad=False): self.path = data_root self.layout = bids.BIDSLayout(self.path, validate=False) # dictionary of KEYS: keys groups, VALUES: list of files self.keys_files = {} self.fieldmaps_cached = False + self.datalad_ready = False + self.datalad_handle = None + + # Initialize datalad if + if use_datalad: + self.init_datalad() + + def init_datalad(self, save=False, message=None): + """Initializes a datalad Dataset at self.path. + + Parameters: + ----------- + + save: bool + Run datalad save to add any untracked files + message: str or None + Message to add to + """ + self.datalad_ready = True + self.datalad_handle = dlapi.create(self.path, + cfg_proc='text2git', + force=True, + annex=True) + if save: + self.datalad_handle.save(message="Saved by BOnD") + if not save and not self.is_datalad_clean(): + raise Exception("Unsaved changes in %s" % self.path) + + def datalad_save(self, message=None): + if message is None: + message = "BOnD Save" + statuses = self.datalad_handle.save(message=message) + saved_status = set([status['status'] for status in statuses]) + if not saved_status == set(["ok"]): + raise Exception("Failed to save in DataLad") + + def is_datalad_clean(self): + """If True, no changes are detected in the datalad dataset.""" + if not self.datalad_ready: + raise Exception( + "Datalad not initialized, can't determine status") + statuses = set([status['state'] for status in + self.datalad_handle.status()]) + return statuses == set(["clean"]) + + def merge_params(self, merge_df, files_df): + key_param_merge = {} + for i in range(len(merge_df)): + key_group = merge_df.iloc[i]['KeyGroup'] + param_group = merge_df.iloc[i]['ParamGroup'] + merge_into = merge_df.iloc[i]['MergeInto'] + key_param_merge[(key_group, param_group)] = merge_into + pairs_to_change = list(key_param_merge.keys()) + + # locate files that need to change param groups/be deleted + for row in range(len(files_df)): + + key = files_df.iloc[row]['KeyGroup'] + param = files_df.iloc[row]['ParamGroup'] + + if (key, param) in pairs_to_change: + if key_param_merge[(key, param)] == 0: + file_path = files_df.iloc[row]['FilePath'] + file_to_rem = Path(file_path) + file_to_rem.unlink() + # else: + # need to merge the param groups + # NEED TO COPY THE METADATA FROM + # "MergeInto" --> "ParamGroup" + # self.change_metadata + + def change_key_groups(self, og_csv_dir, new_csv_dir): + files_df = pd.read_csv(og_csv_dir + 'files.csv') + summary_df = pd.read_csv(og_csv_dir + 'summary.csv') + + # TODO: IMPLEMENT merge_params (above) + # merge_df = summary_df[summary_df.MergeInto.notnull()] + # self.merge_params(merge_df, files_df) + + change_keys_df = summary_df[summary_df.RenameKeyGroup.notnull()] + + # dictionary + # KEYS = (orig key group, param num) + # VALUES = new key group + key_groups = {} + + for i in range(len(change_keys_df)): + new_key = change_keys_df.iloc[i]['RenameKeyGroup'] + old_key = change_keys_df.iloc[i]['KeyGroup'] + param_group = change_keys_df.iloc[i]['ParamGroup'] + + # add to dictionary + key_groups[(old_key, param_group)] = new_key + + # orig key/param tuples that will have new key group + pairs_to_change = key_groups.keys() + + for row in range(len(files_df)): + + key_group = files_df.iloc[row]['KeyGroup'] + param_group = files_df.iloc[row]['KeyGroup'] + + if (key_group, param_group) in pairs_to_change: + + file_path = files_df.iloc[row]['FilePath'] + orig_key = files_df.iloc[row]['KeyGroup'] + param_num = files_df.iloc[row]['ParamGroup'] + + new_key = key_groups[(orig_key, param_num)] + + new_entities = _key_group_to_entities(new_key) + + # change each filename according to new key group + self.change_filename(file_path, new_entities) + + # TODO: THROW AN EXCEPTION IF NEW_KEY NOT VALID! + # OR IF KEY CAN'T BE PARSED AS A DICT? + + self.layout = bids.BIDSLayout(self.path, validate=False) + self.get_CSVs(new_csv_dir) + + def change_filename(self, filepath, entities): + # TODO: NEED TO RGLOB self.path?????? + path = Path(filepath) + exts = path.suffixes + old_ext = "" + for ext in exts: + old_ext += ext + + # check if need to change the modality (one directory level up) + l_keys = list(entities.keys()) + + if "datatype" in l_keys: + # create path string a and add new modality + modality = entities['datatype'] + l_keys.remove('datatype') + else: + large = str(path.parent) + small = str(path.parents[1]) + '/' + modality = large.replace(small, '') + + # detect the subject/session string and keep it together + # front_stem is the string of subject/session paris + # these two entities don't change with the key group + front_stem = "" + cntr = 0 + for char in path.stem: + if char == "_" and cntr == 1: + cntr = 2 + break + if char == "_" and cntr == 0: + cntr += 1 + if cntr != 2: + front_stem = front_stem + char + + parent = str(path.parents[1]) + new_path_front = parent + '/' + modality + '/' + front_stem + + # remove fmap (not part of filename string) + if "fmap" in l_keys: + l_keys.remove("fmap") + + # now need to create the key/value string from the keys! + new_filename = "_".join(["{}-{}".format(key, entities[key]) + for key in l_keys]) + + # shorten "acquisition" in the filename + new_filename = new_filename.replace("acquisition", "acq") + + # shorten "reconstruction" in the filename + new_filename = new_filename.replace("reconstruction", "rec") + + # REMOVE "suffix-" + new_filename = new_filename.replace("suffix-", "") + + new_path = new_path_front + "_" + new_filename + old_ext + + path.rename(Path(new_path)) + + # now also rename json file + bidsfile = self.layout.get_file(filepath, scope='all') + + bidsjson_file = bidsfile.get_associations() + if bidsjson_file: + json_file = [x for x in bidsjson_file if 'json' in x.filename] + else: + print("NO JSON FILES FOUND IN ASSOCIATIONS") + if len(json_file) == 1: + json_file = json_file[0] + new_json_path = new_path_front + "_" + new_filename + ".json" + (Path(json_file.path)).rename(Path(new_json_path)) + else: + print("FOUND IRREGULAR NUMBER OF JSONS") def fieldmaps_ok(self): pass @@ -138,11 +332,11 @@ def get_CSVs(self, path_prefix): ----------- - None """ - big_df = self.get_param_groups_dataframes()[0] - summary = self.get_param_groups_dataframes()[1] - big_df.to_csv(path_prefix + "files.csv", index=False) - summary.to_csv(path_prefix + "summary.csv", index=False) + self._cache_fieldmaps() + big_df, summary = self.get_param_groups_dataframes() + big_df.to_csv(path_prefix + "_files.csv", index=False) + summary.to_csv(path_prefix + "_summary.csv", index=False) def get_file_params(self, key_group): key_entities = _key_group_to_entities(key_group) diff --git a/bond/cli.py b/bond/cli.py index 3d6b06336..51d0ee4dd 100644 --- a/bond/cli.py +++ b/bond/cli.py @@ -1,13 +1,18 @@ """Console script for bond.""" import argparse +import subprocess +from pathlib import Path +import os import sys +import re import logging +from bond import BOnD from .docker_run import (check_docker, check_image, build_validator_call, run, parse_validator) - logging.basicConfig(level=logging.INFO) logger = logging.getLogger('bond-cli') +GIT_CONFIG = os.path.join(os.path.expanduser("~"), '.gitconfig') def run_validator(bidsdir, output_path=None): @@ -42,17 +47,84 @@ def run_validator(bidsdir, output_path=None): return parsed -def main(): - """Console script for bond.""" - parser = argparse.ArgumentParser() - parser.add_argument('_', nargs='*') - args = parser.parse_args() +def bond_validate(): + pass + + +def bond_group(): + parser = argparse.ArgumentParser( + description="bond-group: find key and parameter groups in BIDS", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('bids_dir', + type=Path, + action='store', + help='the root of a BIDS dataset. It should contain ' + 'sub-X directories and dataset_description.json') + parser.add_argument('output_prefix', + type=Path, + action='store', + help='file prefix to which a _summary.csv, _files.csv ' + 'and _group.csv are written.') + parser.add_argument('--container', + action='store', + help='Docker image tag or Singularity image file.') + parser.add_argument('--use-datalad', + action='store_true', + help='ensure that there are no untracked changes ' + 'before finding groups') + opts = parser.parse_args() + + # Run directly from python using + if opts.container is None: + bod = BOnD(data_root=str(opts.bids_dir), + use_datalad=opts.use_datalad) + if opts.use_datalad and not bod.is_datalad_clean(): + raise Exception("Untracked change in " + str(opts.bids_dir)) + bod.get_CSVs(str(opts.output_prefix)) + sys.exit(0) + + # Run it through a container + container_type = _get_container_type(opts.container) + bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids" + output_dir_link = str(opts.output_prefix.parent.absolute()) + ":/csv:rw" + linked_output_prefix = "/csv/" + opts.output_prefix.name + if container_type == 'docker': + cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, + '-v', GIT_CONFIG+":/root/.gitconfig", + '-v', output_dir_link, '--entrypoint', 'bond-group', + opts.container, '/bids', linked_output_prefix] + elif container_type == 'singularity': + cmd = ['singularity', 'exec', '--cleanenv', + '-B', bids_dir_link, + '-B', output_dir_link, opts.container, 'bond-group', + '/bids', linked_output_prefix] + if opts.use_datalad: + cmd.append("--use-datalad") + print("RUNNING: " + ' '.join(cmd)) + proc = subprocess.run(cmd) + sys.exit(proc.returncode) + + +def bond_apply(): + pass + + +def bond_undo(): + pass + + +def param_group_merge(): + pass + + +def _get_container_type(image_name): - print("Arguments: " + str(args._)) - print("Replace this message by putting your code into " - "bond.cli.main") - return 0 + # If it's a file on disk, it must be a singularity image + if Path(image_name).exists(): + return "singularity" + # It needs to match a docker tag pattern to be docker + if re.match(r"(?:.+\/)?([^:]+)(?::.+)?", image_name): + return "docker" -if __name__ == "__main__": - sys.exit(main()) # pragma: no cover + raise Exception("Unable to determine the container type of " + image_name) diff --git a/neurodebian.gpg b/neurodebian.gpg new file mode 100644 index 000000000..c546d45d2 --- /dev/null +++ b/neurodebian.gpg @@ -0,0 +1,71 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQGiBEQ7TOgRBADvaRsIZ3VZ6Qy7PlDpdMm97m0OfvouOj/HhjOM4M3ECbGn4cYh +vN1gK586s3sUsUcNQ8LuWvNsYhxYsVTZymCReJMEDxod0U6/z/oIbpWv5svF3kpl +ogA66Ju/6cZx62RiCSOkskI6A3Waj6xHyEo8AGOPfzbMoOOQ1TS1u9s2FwCgxziL +wADvKYlDZnWM03QtqIJVD8UEAOks9Q2OqFoqKarj6xTRdOYIBVEp2jhozZUZmLmz +pKL9E4NKGfixqxdVimFcRUGM5h7R2w7ORqXjCzpiPmgdv3jJLWDnmHLmMYRYQc8p +5nqo8mxuO3zJugxBemWoacBDd1MJaH7nK20Hsk9L/jvU/qLxPJotMStTnwO+EpsK +HlihA/9ZpvzR1QWNUd9nSuNR3byJhaXvxqQltsM7tLqAT4qAOJIcMjxr+qESdEbx +NHM5M1Y21ZynrsQw+Fb1WHXNbP79vzOxHoZR0+OXe8uUpkri2d9iOocre3NUdpOO +JHtl6cGGTFILt8tSuOVxMT/+nlo038JQB2jARe4B85O0tkPIPbQybmV1cm8uZGVi +aWFuLm5ldCBhcmNoaXZlIDxtaWNoYWVsLmhhbmtlQGdtYWlsLmNvbT6IRgQQEQgA +BgUCTVHJKwAKCRCNEUVjdcAkyOvzAJ0abJz+f2a6VZG1c9T8NHMTYh1atwCgt0EE +3ZZd/2in64jSzu0miqhXbOKISgQQEQIACgUCSotRlwMFAXgACgkQ93+NsjFEvg8n +JgCfWcdJbILBtpLZCocvOzlLPqJ0Fn0AoI4EpJRxoUnrtzBGUC1MqecU7WsDiGAE +ExECACAFAkqLUWcCGwMGCwkIBwMCBBUCCAMEFgIDAQIeAQIXgAAKCRCl0y8BJkml +qVklAJ4h2V6MdQkSAThF5c2Gkq6eSoIQYQCeM0DWyB9Bl+tTPSTYXwwZi2uoif20 +QmFwc3kuZ3NlLnVuaS1tYWdkZWJ1cmcuZGUgRGViaWFuIEFyY2hpdmUgPG1pY2hh +ZWwuaGFua2VAZ21haWwuY29tPohGBBARAgAGBQJEO03FAAoJEPd/jbIxRL4PU18A +n3tn7i4qdlMi8kHbYWFoabsKc9beAJ9sl/leZNCYNMGhz+u6BQgyeLKw94heBBMR +AgAeBQJEO0zoAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEKXTLwEmSaWpVdoA +n27DvtZizNEbhz3wRUPQMiQjtqdvAJ9rS9YdPe5h5o5gHx3mw3BSkOttdYheBBMR +AgAeBQJEO0zoAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEKXTLwEmSaWpVdoA +oLhwWL+E+2I9lrUf4Lf26quOK9vLAKC9ZpIF2tUirFFkBWnQvu13/TA0SokCHAQQ +AQIABgUCTSNBgQAKCRDAc9Iof/uem4NpEACQ8jxmaCaS/qk/Y4GiwLA5bvKosG3B +iARZ2v5UWqCZQ1tS56yKse/lCIzXQqU9BnYW6wOI2rvFf9meLfd8h96peG6oKscs +fbclLDIf68bBvGBQaD0VYFi/Fk/rxmTQBOCQ3AJZs8O5rIM4gPGE0QGvSZ1h7VRw +3Uyeg4jKXLIeJn2xEmOJgt3auAR2FyKbzHaX9JCoByJZ/eU23akNl9hgt7ePlpXo +74KNYC58auuMUhCq3BQDB+II4ERYMcmFp1N5ZG05Cl6jcaRRHDXz+Ax6DWprRI1+ +RH/Yyae6LmKpeJNwd+vM14aawnNO9h8IAQ+aJ3oYZdRhGyybbin3giJ10hmWveg/ +Pey91Nh9vBCHdDkdPU0s9zE7z/PHT0c5ccZRukxfZfkrlWQ5iqu3V064ku5f4PBy +8UPSkETcjYgDnrdnwqIAO+oVg/SFlfsOzftnwUrvwIcZlXAgtP6MEEAs/38e/JIN +g4VrpdAy7HMGEUsh6Ah6lvGQr+zBnG44XwKfl7e0uCYkrAzUJRGM5vx9iXvFMcMu +jv9EBNNBOU8/Y6MBDzGZhgaoeI27nrUvaveJXjAiDKAQWBLjtQjINZ8I9uaSGOul +8kpbFavE4eS3+KhISrSHe4DuAa3dk9zI+FiPvXY1ZyfQBtNpR+gYFY6VxMbHhY1U +lSLHO2eUIQLdYbRITmV1cm9EZWJpYW4gQXJjaGl2ZSBLZXkgPHBrZy1leHBwc3kt +bWFpbnRhaW5lcnNAbGlzdHMuYWxpb3RoLmRlYmlhbi5vcmc+iEYEEBEIAAYFAk1R +yQYACgkQjRFFY3XAJMgEWwCggx4Gqlcrt76TSMlbU94cESo55AEAoJ3asQEMpe8t +QUX+5aikw3z1AUoCiEoEEBECAAoFAkqf/3cDBQF4AAoJEPd/jbIxRL4PxyMAoKUI +RPWlHCj/+HSFfwhos68wcSwmAKChuC00qutDro+AOo+uuq6YoHXj+ohgBBMRAgAg +BQJKn/8bAhsDBgsJCAcDAgQVAggDBBYCAwECHgECF4AACgkQpdMvASZJpalDggCe +KF9KOgOPdQbFnKXl8KtHory4EEwAnA7jxgorE6kk2QHEXFSF8LzOOH4GiGMEExEC +ACMCGwMGCwkIBwMCBBUCCAMEFgIDAQIeAQIXgAUCSp//RgIZAQAKCRCl0y8BJkml +qekFAKCRyt4+FoCzmBbRUUP3Cr8PzH++IgCgkno4vdjsWdyAey8e0KpITTXMFrmJ +AhwEEAECAAYFAk0jQYEACgkQwHPSKH/7npsFfw/+P8B8hpM3+T1fgboBa4R32deu +n8m6b8vZMXwuo/awQtMpzjem8JGXSUQm8iiX4hDtjq6ZoPrlN8T4jNmviBt/F5jI +Jji/PYmhq+Zn9s++mfx+aF4IJrcHJWFkg/6kJzn4oSdl/YlvKf4VRCcQNtj4xV87 +GsdamnzU17XapLVMbSaVKh+6Af7ZLDerEH+iAq733HsYaTK+1xKmN7EFVXgS7bZ1 +9C4LTzc97bVHSywpT9yIrg9QQs/1kshfVIHDKyhjF6IwzSVbeGAIL3Oqo5zOMkWv +7JlEIkkhTyl+FETxNMTMYjAk+Uei3kRodneq3YBF2uFYSEzrXQgHAyn37geiaMYj +h8wu6a85nG1NS0SdxiZDIePmbvD9vWxFZUWYJ/h9ifsLivWcVXlvHoQ0emd+n2ai +FhAck2xsuyHgnGIZMHww5IkQdu/TMqvbcR6d8Xulh+C4Tq7ppy+oTLADSBKII++p +JQioYydRD529EUJgVlhyH27X6YAk3FuRD3zYZRYS2QECiKXvS665o3JRJ0ZSqNgv +YOom8M0zz6bI9grnUoivMI4o7ISpE4ZwffEd37HVzmraaUHDXRhkulFSf1ImtXoj +V9nNSM5p/+9eP7OioTZhSote6Vj6Ja1SZeRkXZK7BwqPbdO0VsYOb7G//ZiOlqs+ +paRr92G/pwBfj5Dq8EK5Ag0ERDtM9RAIAN0EJqBPvLN0tEin/y4Fe0R4n+E+zNXg +bBsq4WidwyUFy3h/6u86FYvegXwUqVS2OsEs5MwPcCVJOfaEthF7I89QJnP9Nfx7 +V5I9yFB53o9ii38BN7X+9gSjpfwXOvf/wIDfggxX8/wRFel37GRB7TiiABRArBez +s5x+zTXvT++WPhElySj0uY8bjVR6tso+d65K0UesvAa7PPWeRS+3nhqABSFLuTTT +MMbnVXCGesBrYHlFVXClAYrSIOX8Ub/UnuEYs9+hIV7U4jKzRF9WJhIC1cXHPmOh +vleAf/I9h/0KahD7HLYud40pNBo5tW8jSfp2/Q8TIE0xxshd51/xy4MAAwUH+wWn +zsYVk981OKUEXul8JPyPxbw05fOd6gF4MJ3YodO+6dfoyIl3bewk+11KXZQALKaO +1xmkAEO1RqizPeetoadBVkQBp5xPudsVElUTOX0pTYhkUd3iBilsCYKK1/KQ9KzD +I+O/lRsm6L9lc6rV0IgPU00P4BAwR+x8Rw7TJFbuS0miR3lP1NSguz+/kpjxzmGP +LyHJ+LVDYFkk6t0jPXhqFdUY6McUTBDEvavTGlVO062l9APTmmSMVFDsPN/rBes2 +rYhuuT+lDp+gcaS1UoaYCIm9kKOteQBnowX9V74Z+HKEYLtwILaSnNe6/fNSTvyj +g0z+R+sPCY4nHewbVC+ISQQYEQIACQUCRDtM9QIbDAAKCRCl0y8BJkmlqbecAJ9B +UdSKVg9H+fQNyP5sbOjj4RDtdACfXHrRHa2+XjJP0dhpvJ8IfvYnQsU= +=fAJZ +-----END PGP PUBLIC KEY BLOCK----- diff --git a/notebooks/workwithtestdata.ipynb b/notebooks/workwithtestdata.ipynb new file mode 100644 index 000000000..43f367c11 --- /dev/null +++ b/notebooks/workwithtestdata.ipynb @@ -0,0 +1,633 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import os\n", + "import os.path as op\n", + "from pkg_resources import resource_filename as pkgrf\n", + "import shutil\n", + "import bond\n", + "TEST_DATA = pkgrf(\"bond\", \"testdata\")\n", + "\n", + "def test_data(tmp_path):\n", + " data_root = tmp_path / \"testdata\"\n", + " shutil.copytree(TEST_DATA, str(data_root))\n", + " assert len(list(data_root.rglob(\"*\"))) > 5\n", + " return data_root\n", + "\n", + "workdir = os.getcwd()\n", + "\n", + "def copy_testing_data(dirname):\n", + " newdir = op.join(workdir, dirname)\n", + " os.makedirs(newdir)\n", + " data_dir = test_data(Path(newdir))\n", + " return data_dir\n", + "\n", + "# copy the data \n", + "data_root = copy_testing_data(\"test1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "!rm -rf test1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test the key / param groups\n", + "\n", + "This test copies the data and makes sure we get the correct number of key and parameter groups out of it\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6/6 [00:00<00:00, 268.30it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from bond import BOnD\n", + "\n", + "bod = BOnD(str(first_test / \"complete\"))\n", + "bod._cache_fieldmaps()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['acquisition-HASC55AP_datatype-dwi_suffix-dwi', 'acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1', 'acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2', 'acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff', 'datatype-anat_suffix-T1w', 'datatype-fmap_direction-PA_fmap-epi_suffix-epi', 'datatype-func_suffix-bold_task-rest']\n" + ] + } + ], + "source": [ + "key_groups = bod.get_key_groups()\n", + "print(key_groups)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6/6 [00:00<00:00, 267.86it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ibod = BOnD(str(first_test / \"inconsistent\"))\n", + "misfits = ibod._cache_fieldmaps()\n", + "len(misfits)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "ikey_groups = ibod.get_key_groups()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ikey_groups == key_groups" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with datalad\n", + "\n", + "Here we try to initialize a datalad repo on the test data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO] Creating a new annex repo at /Users/mcieslak/projects/BOnD/notebooks/test1/testdata/inconsistent \n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "Cannot run the event loop while another loop is running", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatalad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdlapi\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdlapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_test\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m\"inconsistent\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36meval_func\u001b[0;34m(wrapped, instance, args, kwargs)\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 494\u001b[0m \u001b[0mlgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Returning return_func from eval_func for %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrapped_class\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 495\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mreturn_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenerator_func\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 497\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0meval_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36mreturn_func\u001b[0;34m(wrapped_, instance_, args_, kwargs_)\u001b[0m\n\u001b[1;32m 481\u001b[0m \u001b[0;31m# unwind generator if there is one, this actually runs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0;31m# any processing\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 483\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 484\u001b[0m \u001b[0;31m# render summaries\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mresult_xfm\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mresult_renderer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'tailored'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'default'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36mgenerator_func\u001b[0;34m(*_args, **_kwargs)\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 401\u001b[0m \u001b[0;31m# process main results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 402\u001b[0;31m for r in _process_results(\n\u001b[0m\u001b[1;32m 403\u001b[0m \u001b[0;31m# execution\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0m_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0m_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36m_process_results\u001b[0;34m(results, cmd_class, on_failure, action_summary, incomplete_results, result_renderer, result_log_level, allkwargs)\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0mrender_n_repetitions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstdout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misatty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"inf\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 562\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 563\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m'action'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[0;31m# XXX Yarik has to no clue on how to track the origin of the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/core/local/create.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(path, initopts, force, description, dataset, no_annex, annex, fake_dates, cfg_proc)\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;31m# always come with annex when created from scratch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 394\u001b[0m \u001b[0mlgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Creating a new annex repo at %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtbds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 395\u001b[0;31m tbrepo = AnnexRepo(\n\u001b[0m\u001b[1;32m 396\u001b[0m \u001b[0mtbds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/repo.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;31m# we have no such instance yet or the existing one is invalidated,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;31m# so we instantiate:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0minstance\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mnew_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mnew_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unique_instances\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mid_\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path, url, runner, backend, always_commit, create, create_sanity_checks, init, batch_size, version, description, git_opts, annex_opts, annex_init_opts, repo, fake_dates)\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdo_init\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 276\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mversion\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[0;31m# TODO: RM DIRECT eventually, but should remain while we have is_direct_mode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m_init\u001b[0;34m(self, version, description)\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0mwhere\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'local'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1276\u001b[0m reload=False)\n\u001b[0;32m-> 1277\u001b[0;31m self._run_annex_command(\n\u001b[0m\u001b[1;32m 1278\u001b[0m \u001b[0;34m'init'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1279\u001b[0m \u001b[0mrunner\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"gitwitless\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m_run_annex_command\u001b[0;34m(self, annex_cmd, git_options, annex_options, backend, jobs, files, merge_annex_branches, runner, protocol, **kwargs)\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;31m# TODO: RF to use --batch where possible instead of splitting\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1099\u001b[0m \u001b[0;31m# into multiple invocations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1100\u001b[0;31m return run_gitcommand_on_file_list_chunks(\n\u001b[0m\u001b[1;32m 1101\u001b[0m \u001b[0mrun_func\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1102\u001b[0m \u001b[0mcmd_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/cmd.py\u001b[0m in \u001b[0;36mrun_gitcommand_on_file_list_chunks\u001b[0;34m(func, cmd, files, *args, **kwargs)\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'--'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mfile_chunk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 144\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 145\u001b[0m \u001b[0;31m# if it was a WitlessRunner.run -- we would get dicts.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;31m# If old Runner -- stdout, stderr strings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/cmd.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, cmd, protocol, stdin, cwd, env, **kwargs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_event_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevent_loop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[0;31m# include the subprocess manager in the asyncio event loop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 480\u001b[0;31m results = event_loop.run_until_complete(\n\u001b[0m\u001b[1;32m 481\u001b[0m run_async_cmd(\n\u001b[1;32m 482\u001b[0m \u001b[0mevent_loop\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/asyncio/base_events.py\u001b[0m in \u001b[0;36mrun_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 590\u001b[0m \"\"\"\n\u001b[1;32m 591\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 592\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_running\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 593\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0mnew_task\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfuture\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/asyncio/base_events.py\u001b[0m in \u001b[0;36m_check_running\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This event loop is already running'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_running_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 554\u001b[0;31m raise RuntimeError(\n\u001b[0m\u001b[1;32m 555\u001b[0m 'Cannot run the event loop while another loop is running')\n\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Cannot run the event loop while another loop is running" + ] + } + ], + "source": [ + "import datalad.api as dlapi\n", + "\n", + "dl = dlapi.create(path=first_test / \"inconsistent\", force=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "files_df, summary_df = bod.get_param_groups_dataframes()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "%qtconsole" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
key_groupParamGroupCount
0acquisition-64dir_datatype-dwi_suffix-dwi13
1acquisition-HCP_datatype-anat_suffix-T1w13
2acquisition-HCP_datatype-anat_suffix-T2w11
3acquisition-dwi_datatype-fmap_direction-AP_suf...13
4acquisition-dwi_datatype-fmap_direction-PA_suf...13
5acquisition-fMRI_datatype-fmap_direction-AP_su...01
6acquisition-fMRI_datatype-fmap_direction-AP_su...11
7acquisition-fMRI_datatype-fmap_direction-PA_su...01
8acquisition-fMRI_datatype-fmap_direction-PA_su...11
9datatype-func_run-1_suffix-bold_task-peer12
10datatype-func_run-1_suffix-bold_task-rest12
11datatype-func_run-2_suffix-bold_task-peer12
12datatype-func_run-2_suffix-bold_task-rest11
13datatype-func_run-3_suffix-bold_task-peer12
14datatype-func_suffix-bold_task-movieDM11
15datatype-func_suffix-bold_task-movieTP12
\n", + "
" + ], + "text/plain": [ + " key_group ParamGroup Count\n", + "0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3\n", + "1 acquisition-HCP_datatype-anat_suffix-T1w 1 3\n", + "2 acquisition-HCP_datatype-anat_suffix-T2w 1 1\n", + "3 acquisition-dwi_datatype-fmap_direction-AP_suf... 1 3\n", + "4 acquisition-dwi_datatype-fmap_direction-PA_suf... 1 3\n", + "5 acquisition-fMRI_datatype-fmap_direction-AP_su... 0 1\n", + "6 acquisition-fMRI_datatype-fmap_direction-AP_su... 1 1\n", + "7 acquisition-fMRI_datatype-fmap_direction-PA_su... 0 1\n", + "8 acquisition-fMRI_datatype-fmap_direction-PA_su... 1 1\n", + "9 datatype-func_run-1_suffix-bold_task-peer 1 2\n", + "10 datatype-func_run-1_suffix-bold_task-rest 1 2\n", + "11 datatype-func_run-2_suffix-bold_task-peer 1 2\n", + "12 datatype-func_run-2_suffix-bold_task-rest 1 1\n", + "13 datatype-func_run-3_suffix-bold_task-peer 1 2\n", + "14 datatype-func_suffix-bold_task-movieDM 1 1\n", + "15 datatype-func_suffix-bold_task-movieTP 1 2" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n", + "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n", + "print(uniques.shape)\n", + "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n", + "print(counts.shape)\n", + "\n", + "params_and_counts = pd.merge(uniques, counts)\n", + "print(params_and_counts.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fname = 'sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bod.get_key_groups()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "self = bod\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bond.bond import *\n", + "suffix = '(phase1|phasediff|epi|fieldmap)'\n", + "fmap_files = self.layout.get(suffix=suffix, regex_search=True,\n", + " extension=['.nii.gz', '.nii'])\n", + "\n", + "files_to_fmaps = defaultdict(list)\n", + "\n", + "print(\"\\n\".join([f.path for f in fmap_files]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "for fmap_file in tqdm(fmap_files):\n", + " intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n", + " subject_prefix = \"sub-%s/\" % fmap_file.entities['subject']\n", + " for intended_for in intentions:\n", + " subject_relative_path = subject_prefix + intended_for\n", + " files_to_fmaps[subject_relative_path].append(fmap_file)\n", + "\"\"\"\n", + "fmap_file = fmap_files[0]\n", + "intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n", + "print(\"intentions:\", intentions)\n", + "subject_prefix = \"sub-%s/\" % fmap_file.entities['subject']\n", + "print(subject_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "suffix = '(phase1|phasediff|epi|fieldmap)'\n", + "fmap_files = self.layout.get(suffix=suffix, regex_search=True,\n", + " extension=['.nii.gz', '.nii'])\n", + "\n", + "files_to_fmaps = defaultdict(list)\n", + "for fmap_file in tqdm(fmap_files):\n", + " intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n", + " subject_prefix = \"sub-%s\" % fmap_file.entities['subject']\n", + " for intended_for in intentions:\n", + " full_path = Path(self.path) / subject_prefix / intended_for\n", + " files_to_fmaps[str(full_path)].append(fmap_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for data_file, fmap_files in bod.fieldmap_lookup.items():\n", + " print(data_file[44:])\n", + " for fmap_file in fmap_files:\n", + " print(\" \", fmap_file.path[44:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "files_to_fmaps.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bond.bond import *\n", + "files = [\n", + " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz', \n", + " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']\n", + "\n", + "dfs = []\n", + "fieldmap_lookup = bod.fieldmap_lookup\n", + "key_group_name = \"test\"\n", + "# path needs to be relative to the root with no leading prefix\n", + "for path in files:\n", + " metadata = bod.layout.get_metadata(path)\n", + " wanted_keys = metadata.keys() & IMAGING_PARAMS\n", + " example_data = {key: metadata[key] for key in wanted_keys}\n", + " example_data[\"key_group\"] = key_group_name\n", + "\n", + " # Get the fieldmaps out and add their types\n", + " print(fieldmap_lookup[path])\n", + " fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])\n", + " for fmap_num, fmap_type in enumerate(fieldmap_types):\n", + " example_data['fieldmap_type%02d' % fmap_num] = fmap_type \n", + "\n", + " # Expand slice timing to multiple columns\n", + " SliceTime = example_data.get('SliceTiming')\n", + " if SliceTime:\n", + " # round each slice time to one place after the decimal\n", + " for i in range(len(SliceTime)):\n", + " SliceTime[i] = round(SliceTime[i], 1)\n", + " example_data.update(\n", + " {\"SliceTime%03d\" % SliceNum: time for\n", + " SliceNum, time in enumerate(SliceTime)})\n", + " del example_data['SliceTiming']\n", + "\n", + " dfs.append(example_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_data" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/setup.cfg b/setup.cfg index 850607fb2..27052bd56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,18 +20,20 @@ exclude = *build/ per-file-ignores = # imported but unused __init__.py: F401 -putty-ignore = +putty-ignore = */__init__.py : +F401 /^\s*\.\. _.*?: http/ : +E501 [options] python_requires = >=3.7 -install_requires = +install_requires = pybids pandas tqdm numpy -test_requires = + datalad>=0.13.5 + wrapt<2,>=1.10 +test_requires = pytest==4.6.5 pytest-runner==5.1 pip==19.2.3 @@ -49,19 +51,19 @@ test_requires = [options.extras_require] datalad = datalad -doc = +doc = nbsphinx packaging sphinx >= 1.8 sphinx-argparse -docs = +docs = %(doc)s -tests = +tests = coverage codecov pytest pytest-env -all = +all = %(datalad)s %(doc)s %(tests)s @@ -70,8 +72,8 @@ all = norecursedirs = .git addopts = -svx --doctest-modules doctest_optionflags = ALLOW_UNICODE NORMALIZE_WHITESPACE ELLIPSIS -env = +env = PYTHONHASHSEED=0 -filterwarnings = +filterwarnings = ignore::DeprecationWarning junit_family = xunit2 diff --git a/setup.py b/setup.py index 3a8eab650..8cde8c60f 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,11 @@ description="BIDS On Disk Editor", entry_points={ 'console_scripts': [ - 'bond=bond.cli:main', + 'bond-group=bond.cli:bond_group', + 'bond-apply=bond.cli:bond_apply', + 'bond-revert=bond.cli:bond_revert', + 'bids-sidecar-merge=bond.cli:param_group_merge', + 'bond-validate=bond.cli:bond_validate' ], }, license="GNU General Public License v3", diff --git a/tests/test_bond.py b/tests/test_bond.py index f946ae115..53e814d2b 100644 --- a/tests/test_bond.py +++ b/tests/test_bond.py @@ -2,16 +2,12 @@ """Tests for `bond` package.""" import sys +import shutil +import json +from pkg_resources import resource_filename as pkgrf sys.path.append("..") -import os import pytest -from pkg_resources import resource_filename as pkgrf -import shutil from bond import BOnD -import os.path as op -from copy import deepcopy -import base64 -from glob import glob TEST_DATA = pkgrf("bond", "testdata") @@ -101,6 +97,64 @@ def test_csv_creation(tmp_path): assert isummary_df.shape[0] == 11 +def test_change_key_groups(tmp_path): + data_root = get_data(tmp_path) + + my_bond = BOnD(data_root) + my_bond._cache_fieldmaps() + my_bond.get_CSVs(str(tmp_path / "og_csv_dir")) + + +def _edit_a_json(json_file): + """Open a json file, write somthing to it and save it to the same name.""" + with open(json_file, "r") as metadatar: + metadata = json.load(metadatar) + + metadata["THIS_IS_A_TEST"] = True + with open(json_file, "w") as metadataw: + json.dump(metadata, metadataw) + + +def test_datalad_integration(tmp_path): + """Test that datalad works for basic file modification operations. + """ + data_root = get_data(tmp_path) + + # Test that an uninitialized BOnD raises exceptions + uninit_bond = BOnD(data_root / "complete", use_datalad=False) + + # Ensure an exception is raised if trying to use datalad without + # initializing + with pytest.raises(Exception): + uninit_bond.is_datalad_clean() + + # initialize the datalad repository and try again + uninit_bond.init_datalad(save=True) + assert uninit_bond.is_datalad_clean() + + # Now, the datalad repository is initialized and saved. + # Make sure if we make a new BOnD object it recognizes that + # the datalad status is OK + complete_bod = BOnD(data_root / "complete", use_datalad=True) + + assert complete_bod.datalad_ready + assert complete_bod.is_datalad_clean() + + # Edit a file and make sure that it's been detected by datalad + _edit_a_json(str(data_root / "complete" / "sub-03" / "ses-phdiff" / "func" + / "sub-03_ses-phdiff_task-rest_bold.json")) + assert not uninit_bond.is_datalad_clean() + assert not complete_bod.is_datalad_clean() + + # Make sure you can't initialize a BOnD object on a dirty directory + with pytest.raises(Exception): + BOnD(data_root / "complete", use_datalad=True) + + # Test BOnD.datalad_save() + uninit_bond.datalad_save(message="TEST SAVE!") + + + """ def test_fill_metadata(tmp_path): data_root = tmp_path / "testdata"