From 95ffd96cc9608d9182c115f6230ecd9a43374804 Mon Sep 17 00:00:00 2001
From: Matt Cieslak <mattcieslak@gmail.com>
Date: Mon, 16 Nov 2020 00:27:32 -0500
Subject: [PATCH] [ENH] Add singularity and docker to CI tests (#30)

---
 .circleci/config.yml             | 154 +++++++-
 .gitignore                       |   1 +
 Dockerfile                       |  57 ++-
 bond/bond.py                     | 204 +++++++++-
 bond/cli.py                      |  96 ++++-
 neurodebian.gpg                  |  71 ++++
 notebooks/workwithtestdata.ipynb | 633 +++++++++++++++++++++++++++++++
 setup.cfg                        |  20 +-
 setup.py                         |   6 +-
 tests/test_bond.py               |  68 +++-
 10 files changed, 1238 insertions(+), 72 deletions(-)
 create mode 100644 neurodebian.gpg
 create mode 100644 notebooks/workwithtestdata.ipynb

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b4f7a74cd..ee849d5ef 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -86,6 +86,118 @@ jobs:
           paths:
             - miniconda
 
+  get_singularity:
+    machine:
+      image: ubuntu-1604:202004-01
+    steps:
+      - restore_cache:
+          keys:
+            - singularity-v3-{{ .Branch }}-{{ .Revision }}
+
+      - run:
+          name: Download Singularity
+          command: |
+            if [ ! -f /usr/local/bin/singularity ]
+            then
+              sudo apt-get update && sudo apt-get -y install build-essential \
+                libssl-dev \
+                uuid-dev \
+                libgpgme11-dev \
+                squashfs-tools \
+                libseccomp-dev \
+                pkg-config \
+                wget
+              export GOPATH=$HOME/go
+              mkdir -p $GOPATH
+              go get -u github.com/golang/dep/cmd/dep
+              export VERSION=3.6.4
+              wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz
+              tar -xzf singularity-${VERSION}.tar.gz
+              cd ./singularity
+              ./mconfig && \
+                make -j2 -C ./builddir
+              cd $HOME
+              tar cfz singularity.tar.gz project/singularity/
+              tar cfz go.tar.gz go/
+            fi
+
+      - save_cache:
+         key: singularity-v3-{{ .Branch }}-{{ .Revision }}-{{ epoch }}
+         paths:
+            - /home/circleci/singularity.tar.gz
+            - /home/circleci/go.tar.gz
+
+
+  install_and_test_singularity:
+    machine:
+      image: ubuntu-1604:202004-01
+    working_directory: /home/circleci/src/BOnD
+    steps:
+      - checkout:
+          path: /home/circleci/src/BOnD
+
+      - attach_workspace:
+          at: /tmp
+
+      - restore_cache:
+          keys:
+            - docker-v1-{{ .Branch }}-{{ .Revision }}
+
+      - restore_cache:
+          keys:
+            - singularity-v3-{{ .Branch }}-{{ .Revision }}
+      - run:
+          name: Load Docker image layer cache
+          no_output_timeout: 30m
+          command: |
+            docker info
+            set +o pipefail
+            if [ -f /tmp/cache/docker.tar.gz ]; then
+              sudo apt update && sudo apt -y install pigz
+              pigz -d --stdout /tmp/cache/docker.tar.gz | docker load
+              docker images
+            fi
+
+      - run:
+          name: Install BOnD
+          command: |
+            export PATH=/tmp/miniconda/bin:$PATH
+            source activate bond
+            pip install .
+
+      - run:
+          name: Install Singularity
+          command: |
+              export GOPATH=$HOME/go
+              cd /home/circleci
+              tar xfz go.tar.gz
+              tar xfz singularity.tar.gz
+              cd project/singularity/builddir
+              sudo make install
+
+      - run:
+          name: Build singularity image
+          no_output_timeout: 30m
+          command: |
+              singularity build \
+                /home/circleci/bond-latest.sif \
+                docker-daemon://pennlinc/bond:latest
+
+      - run:
+          name: Test singularity
+          command: |
+            git config --global user.email "circleci@citesting.com"
+            git config --global user.name "CircleCI Test"
+            export PATH=/tmp/miniconda/bin:$PATH
+            source activate bond
+            mkdir -p /tmp/bids /tmp/group_testing
+            cp -r /home/circleci/src/BOnD/bond/testdata/complete /tmp/bids/singularity
+            bond-group \
+              /tmp/bids/singularity \
+              /tmp/group_testing/direct \
+              --container /home/circleci/bond-latest.sif
+
+
 
   install_and_test:
     machine:
@@ -119,14 +231,21 @@ jobs:
           command: |
             export PATH=/tmp/miniconda/bin:$PATH
             source activate bond
-            pip install .[all]
+            pip install .
 
       - run:
-          name: Run PyTest
+          name: Test Docker integration
           command: |
+            git config --global user.email "circleci@citesting.com"
+            git config --global user.name "CircleCI Test"
             export PATH=/tmp/miniconda/bin:$PATH
             source activate bond
-            py.test -sv tests
+            mkdir -p /tmp/bids /tmp/group_testing
+            cp -r /home/circleci/src/BOnD/bond/testdata/complete /tmp/bids/docker
+            bond-group \
+              /tmp/bids/docker \
+              /tmp/group_testing/docker \
+              --container pennlinc/bond:latest
 
   build_docs:
     docker:
@@ -146,7 +265,7 @@ jobs:
           name: Check Python version and upgrade pip
           command: |
             python --version
-            sudo python -m pip install -U pip 
+            sudo python -m pip install -U pip
             sudo pip install flake8
 
       - run:
@@ -157,7 +276,7 @@ jobs:
       - run:
           name: Install BOnD.
           command: sudo python -m pip install ".[doc]" --no-cache-dir --progress-bar off
-    
+
       - run:
           name: Build documentation
           no_output_timeout: 45m
@@ -352,6 +471,30 @@ workflows:
             tags:
               only: /.*/
 
+      - get_singularity:
+          requires:
+            - build
+          filters:
+            branches:
+              ignore:
+                - /tests?\/.*/
+                - /docker\/.*/
+            tags:
+              only: /.*/
+
+      - install_and_test_singularity:
+          requires:
+            - build
+            - setup_conda
+            - get_singularity
+          filters:
+            branches:
+              ignore:
+                - /tests?\/.*/
+                - /docker\/.*/
+            tags:
+              only: /.*/
+
       - build_docs:
           filters:
             branches:
@@ -365,6 +508,7 @@ workflows:
           requires:
             - build_docs
             - install_and_test
+            - install_and_test_singularity
           filters:
             branches:
               only: master
diff --git a/.gitignore b/.gitignore
index bbe607506..974b70a2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,4 @@ data/*
 .DS_Store
 notebooks/testdata
 .vscode/settings.json
+notebooks/test1
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 116c7626b..2b0ef0b1c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,44 +1,35 @@
-FROM python:3
-
-# install conda
-RUN curl -sSLO https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
-    bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /usr/local/miniconda && \
-    rm Miniconda3-4.5.12-Linux-x86_64.sh
-
-ENV PATH=/usr/local/miniconda/bin:$PATH
-
-# activate conda environment
-RUN echo "source activate base" > ~/.bashrc
-
-RUN which conda
+FROM ubuntu:bionic-20200921
 
 # get the validator branch skip_session_check
 RUN apt-get update && \
-    apt-get install -y git
+    apt-get install -y --no-install-recommends \
+    curl ca-certificates && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
-# USE CONDA FOR INSTALLING NPM
-RUN conda install nodejs
+# Installing Neurodebian and nodejs packages
+COPY neurodebian.gpg /usr/local/etc/neurodebian.gpg
+RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \
+    curl -sSL "http://neuro.debian.net/lists/$( lsb_release -c | cut -f2 ).us-ca.full" >> /etc/apt/sources.list.d/neurodebian.sources.list && \
+    apt-key add /usr/local/etc/neurodebian.gpg && \
+    (apt-key adv --refresh-keys --keyserver hkp://ha.pool.sks-keyservers.net 0xA5D32F012649A5A9 || true)
 
-RUN npm --version
+# get the validator branch skip_session_check
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    datalad nodejs python3 python3-pip python3-setuptools && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
-RUN npm install -g yarn
 
-RUN mkdir -p /home/validator && \
+RUN npm install -g yarn && \
+   mkdir -p /home/validator && \
     cd /home/validator && \
-    git clone -b skip_session_checks --single-branch https://github.com/bids-standard/bids-validator.git
-
-
-RUN ls /home/validator/bids-validator
-RUN cd /home/validator/bids-validator && \
+    git clone -b skip_session_checks \
+        --single-branch https://github.com/bids-standard/bids-validator.git  && \
+    cd /home/validator/bids-validator && \
     yarn && \
-    npm install -g bids-validator
-
-RUN which bids-validator
-
-# prepare env
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
+    cd bids-validator && npm install -g
 
-COPY . .
+COPY . /src/BOnD
+RUN pip3 install --no-cache-dir "/src/BOnD"
 
-ENTRYPOINT [ "bids-validator"]
+ENTRYPOINT [ "/bin/bash"]
diff --git a/bond/bond.py b/bond/bond.py
index c40cf56bf..cc79dd44f 100644
--- a/bond/bond.py
+++ b/bond/bond.py
@@ -7,6 +7,7 @@
 from bids.utils import listify
 import numpy as np
 import pandas as pd
+import datalad.api as dlapi
 from tqdm import tqdm
 
 bids.config.set_option('extension_initial_dot', True)
@@ -26,13 +27,206 @@
 
 class BOnD(object):
 
-    def __init__(self, data_root):
+    def __init__(self, data_root, use_datalad=False):
 
         self.path = data_root
         self.layout = bids.BIDSLayout(self.path, validate=False)
         # dictionary of KEYS: keys groups, VALUES: list of files
         self.keys_files = {}
         self.fieldmaps_cached = False
+        self.datalad_ready = False
+        self.datalad_handle = None
+
+        # Initialize datalad if
+        if use_datalad:
+            self.init_datalad()
+
+    def init_datalad(self, save=False, message=None):
+        """Initializes a datalad Dataset at self.path.
+
+        Parameters:
+        -----------
+
+            save: bool
+                Run datalad save to add any untracked files
+            message: str or None
+                Message to add to
+        """
+        self.datalad_ready = True
+        self.datalad_handle = dlapi.create(self.path,
+                                           cfg_proc='text2git',
+                                           force=True,
+                                           annex=True)
+        if save:
+            self.datalad_handle.save(message="Saved by BOnD")
+        if not save and not self.is_datalad_clean():
+            raise Exception("Unsaved changes in %s" % self.path)
+
+    def datalad_save(self, message=None):
+        if message is None:
+            message = "BOnD Save"
+        statuses = self.datalad_handle.save(message=message)
+        saved_status = set([status['status'] for status in statuses])
+        if not saved_status == set(["ok"]):
+            raise Exception("Failed to save in DataLad")
+
+    def is_datalad_clean(self):
+        """If True, no changes are detected in the datalad dataset."""
+        if not self.datalad_ready:
+            raise Exception(
+                "Datalad not initialized, can't determine status")
+        statuses = set([status['state'] for status in
+                        self.datalad_handle.status()])
+        return statuses == set(["clean"])
+
+    def merge_params(self, merge_df, files_df):
+        key_param_merge = {}
+        for i in range(len(merge_df)):
+            key_group = merge_df.iloc[i]['KeyGroup']
+            param_group = merge_df.iloc[i]['ParamGroup']
+            merge_into = merge_df.iloc[i]['MergeInto']
+            key_param_merge[(key_group, param_group)] = merge_into
+        pairs_to_change = list(key_param_merge.keys())
+
+        # locate files that need to change param groups/be deleted
+        for row in range(len(files_df)):
+
+            key = files_df.iloc[row]['KeyGroup']
+            param = files_df.iloc[row]['ParamGroup']
+
+            if (key, param) in pairs_to_change:
+                if key_param_merge[(key, param)] == 0:
+                    file_path = files_df.iloc[row]['FilePath']
+                    file_to_rem = Path(file_path)
+                    file_to_rem.unlink()
+                # else:
+                    # need to merge the param groups
+                    # NEED TO COPY THE METADATA FROM
+                    # "MergeInto" --> "ParamGroup"
+                    # self.change_metadata
+
+    def change_key_groups(self, og_csv_dir, new_csv_dir):
+        files_df = pd.read_csv(og_csv_dir + 'files.csv')
+        summary_df = pd.read_csv(og_csv_dir + 'summary.csv')
+
+        # TODO: IMPLEMENT merge_params (above)
+        # merge_df = summary_df[summary_df.MergeInto.notnull()]
+        # self.merge_params(merge_df, files_df)
+
+        change_keys_df = summary_df[summary_df.RenameKeyGroup.notnull()]
+
+        # dictionary
+        # KEYS = (orig key group, param num)
+        # VALUES = new key group
+        key_groups = {}
+
+        for i in range(len(change_keys_df)):
+            new_key = change_keys_df.iloc[i]['RenameKeyGroup']
+            old_key = change_keys_df.iloc[i]['KeyGroup']
+            param_group = change_keys_df.iloc[i]['ParamGroup']
+
+            # add to dictionary
+            key_groups[(old_key, param_group)] = new_key
+
+        # orig key/param tuples that will have new key group
+        pairs_to_change = key_groups.keys()
+
+        for row in range(len(files_df)):
+
+            key_group = files_df.iloc[row]['KeyGroup']
+            param_group = files_df.iloc[row]['KeyGroup']
+
+            if (key_group, param_group) in pairs_to_change:
+
+                file_path = files_df.iloc[row]['FilePath']
+                orig_key = files_df.iloc[row]['KeyGroup']
+                param_num = files_df.iloc[row]['ParamGroup']
+
+                new_key = key_groups[(orig_key, param_num)]
+
+                new_entities = _key_group_to_entities(new_key)
+
+                # change each filename according to new key group
+                self.change_filename(file_path, new_entities)
+
+        # TODO: THROW AN EXCEPTION IF NEW_KEY NOT VALID!
+        # OR IF KEY CAN'T BE PARSED AS A DICT?
+
+        self.layout = bids.BIDSLayout(self.path, validate=False)
+        self.get_CSVs(new_csv_dir)
+
+    def change_filename(self, filepath, entities):
+        # TODO: NEED TO RGLOB self.path??????
+        path = Path(filepath)
+        exts = path.suffixes
+        old_ext = ""
+        for ext in exts:
+            old_ext += ext
+
+        # check if need to change the modality (one directory level up)
+        l_keys = list(entities.keys())
+
+        if "datatype" in l_keys:
+            # create path string a and add new modality
+            modality = entities['datatype']
+            l_keys.remove('datatype')
+        else:
+            large = str(path.parent)
+            small = str(path.parents[1]) + '/'
+            modality = large.replace(small, '')
+
+        # detect the subject/session string and keep it together
+        # front_stem is the string of subject/session paris
+        # these two entities don't change with the key group
+        front_stem = ""
+        cntr = 0
+        for char in path.stem:
+            if char == "_" and cntr == 1:
+                cntr = 2
+                break
+            if char == "_" and cntr == 0:
+                cntr += 1
+            if cntr != 2:
+                front_stem = front_stem + char
+
+        parent = str(path.parents[1])
+        new_path_front = parent + '/' + modality + '/' + front_stem
+
+        # remove fmap (not part of filename string)
+        if "fmap" in l_keys:
+            l_keys.remove("fmap")
+
+        # now need to create the key/value string from the keys!
+        new_filename = "_".join(["{}-{}".format(key, entities[key])
+                                for key in l_keys])
+
+        # shorten "acquisition" in the filename
+        new_filename = new_filename.replace("acquisition", "acq")
+
+        # shorten "reconstruction" in the filename
+        new_filename = new_filename.replace("reconstruction", "rec")
+
+        # REMOVE "suffix-"
+        new_filename = new_filename.replace("suffix-", "")
+
+        new_path = new_path_front + "_" + new_filename + old_ext
+
+        path.rename(Path(new_path))
+
+        # now also rename json file
+        bidsfile = self.layout.get_file(filepath, scope='all')
+
+        bidsjson_file = bidsfile.get_associations()
+        if bidsjson_file:
+            json_file = [x for x in bidsjson_file if 'json' in x.filename]
+        else:
+            print("NO JSON FILES FOUND IN ASSOCIATIONS")
+        if len(json_file) == 1:
+            json_file = json_file[0]
+            new_json_path = new_path_front + "_" + new_filename + ".json"
+            (Path(json_file.path)).rename(Path(new_json_path))
+        else:
+            print("FOUND IRREGULAR NUMBER OF JSONS")
 
     def fieldmaps_ok(self):
         pass
@@ -138,11 +332,11 @@ def get_CSVs(self, path_prefix):
         -----------
             - None
         """
-        big_df = self.get_param_groups_dataframes()[0]
-        summary = self.get_param_groups_dataframes()[1]
 
-        big_df.to_csv(path_prefix + "files.csv", index=False)
-        summary.to_csv(path_prefix + "summary.csv", index=False)
+        self._cache_fieldmaps()
+        big_df, summary = self.get_param_groups_dataframes()
+        big_df.to_csv(path_prefix + "_files.csv", index=False)
+        summary.to_csv(path_prefix + "_summary.csv", index=False)
 
     def get_file_params(self, key_group):
         key_entities = _key_group_to_entities(key_group)
diff --git a/bond/cli.py b/bond/cli.py
index 3d6b06336..51d0ee4dd 100644
--- a/bond/cli.py
+++ b/bond/cli.py
@@ -1,13 +1,18 @@
 """Console script for bond."""
 import argparse
+import subprocess
+from pathlib import Path
+import os
 import sys
+import re
 import logging
+from bond import BOnD
 from .docker_run import (check_docker, check_image, build_validator_call,
                          run, parse_validator)
 
-
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger('bond-cli')
+GIT_CONFIG = os.path.join(os.path.expanduser("~"), '.gitconfig')
 
 
 def run_validator(bidsdir, output_path=None):
@@ -42,17 +47,84 @@ def run_validator(bidsdir, output_path=None):
             return parsed
 
 
-def main():
-    """Console script for bond."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('_', nargs='*')
-    args = parser.parse_args()
+def bond_validate():
+    pass
+
+
+def bond_group():
+    parser = argparse.ArgumentParser(
+        description="bond-group: find key and parameter groups in BIDS",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('bids_dir',
+                        type=Path,
+                        action='store',
+                        help='the root of a BIDS dataset. It should contain '
+                        'sub-X directories and dataset_description.json')
+    parser.add_argument('output_prefix',
+                        type=Path,
+                        action='store',
+                        help='file prefix to which a _summary.csv, _files.csv '
+                        'and _group.csv are written.')
+    parser.add_argument('--container',
+                        action='store',
+                        help='Docker image tag or Singularity image file.')
+    parser.add_argument('--use-datalad',
+                        action='store_true',
+                        help='ensure that there are no untracked changes '
+                        'before finding groups')
+    opts = parser.parse_args()
+
+    # Run directly from python using
+    if opts.container is None:
+        bod = BOnD(data_root=str(opts.bids_dir),
+                   use_datalad=opts.use_datalad)
+        if opts.use_datalad and not bod.is_datalad_clean():
+            raise Exception("Untracked change in " + str(opts.bids_dir))
+        bod.get_CSVs(str(opts.output_prefix))
+        sys.exit(0)
+
+    # Run it through a container
+    container_type = _get_container_type(opts.container)
+    bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids"
+    output_dir_link = str(opts.output_prefix.parent.absolute()) + ":/csv:rw"
+    linked_output_prefix = "/csv/" + opts.output_prefix.name
+    if container_type == 'docker':
+        cmd = ['docker', 'run', '--rm', '-v', bids_dir_link,
+               '-v', GIT_CONFIG+":/root/.gitconfig",
+               '-v', output_dir_link, '--entrypoint', 'bond-group',
+               opts.container, '/bids', linked_output_prefix]
+    elif container_type == 'singularity':
+        cmd = ['singularity', 'exec', '--cleanenv',
+               '-B', bids_dir_link,
+               '-B', output_dir_link, opts.container, 'bond-group',
+               '/bids', linked_output_prefix]
+    if opts.use_datalad:
+        cmd.append("--use-datalad")
+    print("RUNNING: " + ' '.join(cmd))
+    proc = subprocess.run(cmd)
+    sys.exit(proc.returncode)
+
+
+def bond_apply():
+    pass
+
+
+def bond_undo():
+    pass
+
+
+def param_group_merge():
+    pass
+
+
+def _get_container_type(image_name):
 
-    print("Arguments: " + str(args._))
-    print("Replace this message by putting your code into "
-          "bond.cli.main")
-    return 0
+    # If it's a file on disk, it must be a singularity image
+    if Path(image_name).exists():
+        return "singularity"
 
+    # It needs to match a docker tag pattern to be docker
+    if re.match(r"(?:.+\/)?([^:]+)(?::.+)?", image_name):
+        return "docker"
 
-if __name__ == "__main__":
-    sys.exit(main())  # pragma: no cover
+    raise Exception("Unable to determine the container type of " + image_name)
diff --git a/neurodebian.gpg b/neurodebian.gpg
new file mode 100644
index 000000000..c546d45d2
--- /dev/null
+++ b/neurodebian.gpg
@@ -0,0 +1,71 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1
+
+mQGiBEQ7TOgRBADvaRsIZ3VZ6Qy7PlDpdMm97m0OfvouOj/HhjOM4M3ECbGn4cYh
+vN1gK586s3sUsUcNQ8LuWvNsYhxYsVTZymCReJMEDxod0U6/z/oIbpWv5svF3kpl
+ogA66Ju/6cZx62RiCSOkskI6A3Waj6xHyEo8AGOPfzbMoOOQ1TS1u9s2FwCgxziL
+wADvKYlDZnWM03QtqIJVD8UEAOks9Q2OqFoqKarj6xTRdOYIBVEp2jhozZUZmLmz
+pKL9E4NKGfixqxdVimFcRUGM5h7R2w7ORqXjCzpiPmgdv3jJLWDnmHLmMYRYQc8p
+5nqo8mxuO3zJugxBemWoacBDd1MJaH7nK20Hsk9L/jvU/qLxPJotMStTnwO+EpsK
+HlihA/9ZpvzR1QWNUd9nSuNR3byJhaXvxqQltsM7tLqAT4qAOJIcMjxr+qESdEbx
+NHM5M1Y21ZynrsQw+Fb1WHXNbP79vzOxHoZR0+OXe8uUpkri2d9iOocre3NUdpOO
+JHtl6cGGTFILt8tSuOVxMT/+nlo038JQB2jARe4B85O0tkPIPbQybmV1cm8uZGVi
+aWFuLm5ldCBhcmNoaXZlIDxtaWNoYWVsLmhhbmtlQGdtYWlsLmNvbT6IRgQQEQgA
+BgUCTVHJKwAKCRCNEUVjdcAkyOvzAJ0abJz+f2a6VZG1c9T8NHMTYh1atwCgt0EE
+3ZZd/2in64jSzu0miqhXbOKISgQQEQIACgUCSotRlwMFAXgACgkQ93+NsjFEvg8n
+JgCfWcdJbILBtpLZCocvOzlLPqJ0Fn0AoI4EpJRxoUnrtzBGUC1MqecU7WsDiGAE
+ExECACAFAkqLUWcCGwMGCwkIBwMCBBUCCAMEFgIDAQIeAQIXgAAKCRCl0y8BJkml
+qVklAJ4h2V6MdQkSAThF5c2Gkq6eSoIQYQCeM0DWyB9Bl+tTPSTYXwwZi2uoif20
+QmFwc3kuZ3NlLnVuaS1tYWdkZWJ1cmcuZGUgRGViaWFuIEFyY2hpdmUgPG1pY2hh
+ZWwuaGFua2VAZ21haWwuY29tPohGBBARAgAGBQJEO03FAAoJEPd/jbIxRL4PU18A
+n3tn7i4qdlMi8kHbYWFoabsKc9beAJ9sl/leZNCYNMGhz+u6BQgyeLKw94heBBMR
+AgAeBQJEO0zoAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEKXTLwEmSaWpVdoA
+n27DvtZizNEbhz3wRUPQMiQjtqdvAJ9rS9YdPe5h5o5gHx3mw3BSkOttdYheBBMR
+AgAeBQJEO0zoAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEKXTLwEmSaWpVdoA
+oLhwWL+E+2I9lrUf4Lf26quOK9vLAKC9ZpIF2tUirFFkBWnQvu13/TA0SokCHAQQ
+AQIABgUCTSNBgQAKCRDAc9Iof/uem4NpEACQ8jxmaCaS/qk/Y4GiwLA5bvKosG3B
+iARZ2v5UWqCZQ1tS56yKse/lCIzXQqU9BnYW6wOI2rvFf9meLfd8h96peG6oKscs
+fbclLDIf68bBvGBQaD0VYFi/Fk/rxmTQBOCQ3AJZs8O5rIM4gPGE0QGvSZ1h7VRw
+3Uyeg4jKXLIeJn2xEmOJgt3auAR2FyKbzHaX9JCoByJZ/eU23akNl9hgt7ePlpXo
+74KNYC58auuMUhCq3BQDB+II4ERYMcmFp1N5ZG05Cl6jcaRRHDXz+Ax6DWprRI1+
+RH/Yyae6LmKpeJNwd+vM14aawnNO9h8IAQ+aJ3oYZdRhGyybbin3giJ10hmWveg/
+Pey91Nh9vBCHdDkdPU0s9zE7z/PHT0c5ccZRukxfZfkrlWQ5iqu3V064ku5f4PBy
+8UPSkETcjYgDnrdnwqIAO+oVg/SFlfsOzftnwUrvwIcZlXAgtP6MEEAs/38e/JIN
+g4VrpdAy7HMGEUsh6Ah6lvGQr+zBnG44XwKfl7e0uCYkrAzUJRGM5vx9iXvFMcMu
+jv9EBNNBOU8/Y6MBDzGZhgaoeI27nrUvaveJXjAiDKAQWBLjtQjINZ8I9uaSGOul
+8kpbFavE4eS3+KhISrSHe4DuAa3dk9zI+FiPvXY1ZyfQBtNpR+gYFY6VxMbHhY1U
+lSLHO2eUIQLdYbRITmV1cm9EZWJpYW4gQXJjaGl2ZSBLZXkgPHBrZy1leHBwc3kt
+bWFpbnRhaW5lcnNAbGlzdHMuYWxpb3RoLmRlYmlhbi5vcmc+iEYEEBEIAAYFAk1R
+yQYACgkQjRFFY3XAJMgEWwCggx4Gqlcrt76TSMlbU94cESo55AEAoJ3asQEMpe8t
+QUX+5aikw3z1AUoCiEoEEBECAAoFAkqf/3cDBQF4AAoJEPd/jbIxRL4PxyMAoKUI
+RPWlHCj/+HSFfwhos68wcSwmAKChuC00qutDro+AOo+uuq6YoHXj+ohgBBMRAgAg
+BQJKn/8bAhsDBgsJCAcDAgQVAggDBBYCAwECHgECF4AACgkQpdMvASZJpalDggCe
+KF9KOgOPdQbFnKXl8KtHory4EEwAnA7jxgorE6kk2QHEXFSF8LzOOH4GiGMEExEC
+ACMCGwMGCwkIBwMCBBUCCAMEFgIDAQIeAQIXgAUCSp//RgIZAQAKCRCl0y8BJkml
+qekFAKCRyt4+FoCzmBbRUUP3Cr8PzH++IgCgkno4vdjsWdyAey8e0KpITTXMFrmJ
+AhwEEAECAAYFAk0jQYEACgkQwHPSKH/7npsFfw/+P8B8hpM3+T1fgboBa4R32deu
+n8m6b8vZMXwuo/awQtMpzjem8JGXSUQm8iiX4hDtjq6ZoPrlN8T4jNmviBt/F5jI
+Jji/PYmhq+Zn9s++mfx+aF4IJrcHJWFkg/6kJzn4oSdl/YlvKf4VRCcQNtj4xV87
+GsdamnzU17XapLVMbSaVKh+6Af7ZLDerEH+iAq733HsYaTK+1xKmN7EFVXgS7bZ1
+9C4LTzc97bVHSywpT9yIrg9QQs/1kshfVIHDKyhjF6IwzSVbeGAIL3Oqo5zOMkWv
+7JlEIkkhTyl+FETxNMTMYjAk+Uei3kRodneq3YBF2uFYSEzrXQgHAyn37geiaMYj
+h8wu6a85nG1NS0SdxiZDIePmbvD9vWxFZUWYJ/h9ifsLivWcVXlvHoQ0emd+n2ai
+FhAck2xsuyHgnGIZMHww5IkQdu/TMqvbcR6d8Xulh+C4Tq7ppy+oTLADSBKII++p
+JQioYydRD529EUJgVlhyH27X6YAk3FuRD3zYZRYS2QECiKXvS665o3JRJ0ZSqNgv
+YOom8M0zz6bI9grnUoivMI4o7ISpE4ZwffEd37HVzmraaUHDXRhkulFSf1ImtXoj
+V9nNSM5p/+9eP7OioTZhSote6Vj6Ja1SZeRkXZK7BwqPbdO0VsYOb7G//ZiOlqs+
+paRr92G/pwBfj5Dq8EK5Ag0ERDtM9RAIAN0EJqBPvLN0tEin/y4Fe0R4n+E+zNXg
+bBsq4WidwyUFy3h/6u86FYvegXwUqVS2OsEs5MwPcCVJOfaEthF7I89QJnP9Nfx7
+V5I9yFB53o9ii38BN7X+9gSjpfwXOvf/wIDfggxX8/wRFel37GRB7TiiABRArBez
+s5x+zTXvT++WPhElySj0uY8bjVR6tso+d65K0UesvAa7PPWeRS+3nhqABSFLuTTT
+MMbnVXCGesBrYHlFVXClAYrSIOX8Ub/UnuEYs9+hIV7U4jKzRF9WJhIC1cXHPmOh
+vleAf/I9h/0KahD7HLYud40pNBo5tW8jSfp2/Q8TIE0xxshd51/xy4MAAwUH+wWn
+zsYVk981OKUEXul8JPyPxbw05fOd6gF4MJ3YodO+6dfoyIl3bewk+11KXZQALKaO
+1xmkAEO1RqizPeetoadBVkQBp5xPudsVElUTOX0pTYhkUd3iBilsCYKK1/KQ9KzD
+I+O/lRsm6L9lc6rV0IgPU00P4BAwR+x8Rw7TJFbuS0miR3lP1NSguz+/kpjxzmGP
+LyHJ+LVDYFkk6t0jPXhqFdUY6McUTBDEvavTGlVO062l9APTmmSMVFDsPN/rBes2
+rYhuuT+lDp+gcaS1UoaYCIm9kKOteQBnowX9V74Z+HKEYLtwILaSnNe6/fNSTvyj
+g0z+R+sPCY4nHewbVC+ISQQYEQIACQUCRDtM9QIbDAAKCRCl0y8BJkmlqbecAJ9B
+UdSKVg9H+fQNyP5sbOjj4RDtdACfXHrRHa2+XjJP0dhpvJ8IfvYnQsU=
+=fAJZ
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/notebooks/workwithtestdata.ipynb b/notebooks/workwithtestdata.ipynb
new file mode 100644
index 000000000..43f367c11
--- /dev/null
+++ b/notebooks/workwithtestdata.ipynb
@@ -0,0 +1,633 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import os\n",
+    "import os.path as op\n",
+    "from pkg_resources import resource_filename as pkgrf\n",
+    "import shutil\n",
+    "import bond\n",
+    "TEST_DATA = pkgrf(\"bond\", \"testdata\")\n",
+    "\n",
+    "def test_data(tmp_path):\n",
+    "    data_root = tmp_path / \"testdata\"\n",
+    "    shutil.copytree(TEST_DATA, str(data_root))\n",
+    "    assert len(list(data_root.rglob(\"*\"))) > 5\n",
+    "    return data_root\n",
+    "\n",
+    "workdir = os.getcwd()\n",
+    "\n",
+    "def copy_testing_data(dirname):\n",
+    "    newdir = op.join(workdir, dirname)\n",
+    "    os.makedirs(newdir)\n",
+    "    data_dir = test_data(Path(newdir))\n",
+    "    return data_dir\n",
+    "\n",
+    "# copy the data \n",
+    "data_root = copy_testing_data(\"test1\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!rm -rf  test1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test the key / param groups\n",
+    "\n",
+    "This test copies the data and makes sure we get the correct number of key and parameter groups out of it\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 6/6 [00:00<00:00, 268.30it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from bond import BOnD\n",
+    "\n",
+    "bod = BOnD(str(first_test / \"complete\"))\n",
+    "bod._cache_fieldmaps()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['acquisition-HASC55AP_datatype-dwi_suffix-dwi', 'acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1', 'acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2', 'acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff', 'datatype-anat_suffix-T1w', 'datatype-fmap_direction-PA_fmap-epi_suffix-epi', 'datatype-func_suffix-bold_task-rest']\n"
+     ]
+    }
+   ],
+   "source": [
+    "key_groups = bod.get_key_groups()\n",
+    "print(key_groups)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 6/6 [00:00<00:00, 267.86it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ibod = BOnD(str(first_test / \"inconsistent\"))\n",
+    "misfits = ibod._cache_fieldmaps()\n",
+    "len(misfits)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ikey_groups = ibod.get_key_groups()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ikey_groups == key_groups"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Working with datalad\n",
+    "\n",
+    "Here we try to initialize a datalad repo on the test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] Creating a new annex repo at /Users/mcieslak/projects/BOnD/notebooks/test1/testdata/inconsistent \n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "Cannot run the event loop while another loop is running",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-5-88fa9c70c810>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatalad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdlapi\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdlapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_test\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m\"inconsistent\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36meval_func\u001b[0;34m(wrapped, instance, args, kwargs)\u001b[0m\n\u001b[1;32m    493\u001b[0m                     \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    494\u001b[0m             \u001b[0mlgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Returning return_func from eval_func for %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrapped_class\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 495\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mreturn_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenerator_func\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    497\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0meval_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36mreturn_func\u001b[0;34m(wrapped_, instance_, args_, kwargs_)\u001b[0m\n\u001b[1;32m    481\u001b[0m                     \u001b[0;31m# unwind generator if there is one, this actually runs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    482\u001b[0m                     \u001b[0;31m# any processing\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 483\u001b[0;31m                     \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    484\u001b[0m                 \u001b[0;31m# render summaries\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    485\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mresult_xfm\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mresult_renderer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'tailored'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'default'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36mgenerator_func\u001b[0;34m(*_args, **_kwargs)\u001b[0m\n\u001b[1;32m    400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    401\u001b[0m             \u001b[0;31m# process main results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 402\u001b[0;31m             for r in _process_results(\n\u001b[0m\u001b[1;32m    403\u001b[0m                     \u001b[0;31m# execution\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    404\u001b[0m                     \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0m_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0m_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/interface/utils.py\u001b[0m in \u001b[0;36m_process_results\u001b[0;34m(results, cmd_class, on_failure, action_summary, incomplete_results, result_renderer, result_log_level, allkwargs)\u001b[0m\n\u001b[1;32m    560\u001b[0m     \u001b[0mrender_n_repetitions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstdout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misatty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"inf\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 562\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    563\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m'action'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    564\u001b[0m             \u001b[0;31m# XXX Yarik has to no clue on how to track the origin of the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/core/local/create.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(path, initopts, force, description, dataset, no_annex, annex, fake_dates, cfg_proc)\u001b[0m\n\u001b[1;32m    393\u001b[0m             \u001b[0;31m# always come with annex when created from scratch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    394\u001b[0m             \u001b[0mlgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Creating a new annex repo at %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtbds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 395\u001b[0;31m             tbrepo = AnnexRepo(\n\u001b[0m\u001b[1;32m    396\u001b[0m                 \u001b[0mtbds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    397\u001b[0m                 \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/repo.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m    149\u001b[0m             \u001b[0;31m# we have no such instance yet or the existing one is invalidated,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m             \u001b[0;31m# so we instantiate:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m             \u001b[0minstance\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mnew_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mnew_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m             \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unique_instances\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mid_\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path, url, runner, backend, always_commit, create, create_sanity_checks, init, batch_size, version, description, git_opts, annex_opts, annex_init_opts, repo, fake_dates)\u001b[0m\n\u001b[1;32m    274\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    275\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdo_init\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 276\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mversion\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    278\u001b[0m         \u001b[0;31m# TODO: RM DIRECT  eventually, but should remain while we have is_direct_mode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m_init\u001b[0;34m(self, version, description)\u001b[0m\n\u001b[1;32m   1275\u001b[0m                                 \u001b[0mwhere\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'local'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1276\u001b[0m                                 reload=False)\n\u001b[0;32m-> 1277\u001b[0;31m         self._run_annex_command(\n\u001b[0m\u001b[1;32m   1278\u001b[0m             \u001b[0;34m'init'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1279\u001b[0m             \u001b[0mrunner\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"gitwitless\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/support/annexrepo.py\u001b[0m in \u001b[0;36m_run_annex_command\u001b[0;34m(self, annex_cmd, git_options, annex_options, backend, jobs, files, merge_annex_branches, runner, protocol, **kwargs)\u001b[0m\n\u001b[1;32m   1098\u001b[0m             \u001b[0;31m# TODO: RF to use --batch where possible instead of splitting\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1099\u001b[0m             \u001b[0;31m# into multiple invocations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1100\u001b[0;31m             return run_gitcommand_on_file_list_chunks(\n\u001b[0m\u001b[1;32m   1101\u001b[0m                 \u001b[0mrun_func\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1102\u001b[0m                 \u001b[0mcmd_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/cmd.py\u001b[0m in \u001b[0;36mrun_gitcommand_on_file_list_chunks\u001b[0;34m(func, cmd, files, *args, **kwargs)\u001b[0m\n\u001b[1;32m    142\u001b[0m             \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'--'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mfile_chunk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 144\u001b[0;31m             \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    145\u001b[0m     \u001b[0;31m# if it was a WitlessRunner.run -- we would get dicts.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m     \u001b[0;31m# If old Runner -- stdout, stderr strings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/site-packages/datalad/cmd.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, cmd, protocol, stdin, cwd, env, **kwargs)\u001b[0m\n\u001b[1;32m    478\u001b[0m         \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_event_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevent_loop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    479\u001b[0m         \u001b[0;31m# include the subprocess manager in the asyncio event loop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 480\u001b[0;31m         results = event_loop.run_until_complete(\n\u001b[0m\u001b[1;32m    481\u001b[0m             run_async_cmd(\n\u001b[1;32m    482\u001b[0m                 \u001b[0mevent_loop\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/asyncio/base_events.py\u001b[0m in \u001b[0;36mrun_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m    590\u001b[0m         \"\"\"\n\u001b[1;32m    591\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 592\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_running\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    593\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    594\u001b[0m         \u001b[0mnew_task\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfuture\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/bond/lib/python3.8/asyncio/base_events.py\u001b[0m in \u001b[0;36m_check_running\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    552\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This event loop is already running'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    553\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_running_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 554\u001b[0;31m             raise RuntimeError(\n\u001b[0m\u001b[1;32m    555\u001b[0m                 'Cannot run the event loop while another loop is running')\n\u001b[1;32m    556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: Cannot run the event loop while another loop is running"
+     ]
+    }
+   ],
+   "source": [
+    "import datalad.api as dlapi\n",
+    "\n",
+    "dl = dlapi.create(path=first_test / \"inconsistent\", force=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "files_df, summary_df = bod.get_param_groups_dataframes()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "%qtconsole"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>key_group</th>\n",
+       "      <th>ParamGroup</th>\n",
+       "      <th>Count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>acquisition-64dir_datatype-dwi_suffix-dwi</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>acquisition-HCP_datatype-anat_suffix-T1w</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>acquisition-HCP_datatype-anat_suffix-T2w</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>acquisition-dwi_datatype-fmap_direction-AP_suf...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>acquisition-dwi_datatype-fmap_direction-PA_suf...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>acquisition-fMRI_datatype-fmap_direction-AP_su...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>acquisition-fMRI_datatype-fmap_direction-AP_su...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>acquisition-fMRI_datatype-fmap_direction-PA_su...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>acquisition-fMRI_datatype-fmap_direction-PA_su...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>datatype-func_run-1_suffix-bold_task-peer</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>datatype-func_run-1_suffix-bold_task-rest</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>datatype-func_run-2_suffix-bold_task-peer</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>datatype-func_run-2_suffix-bold_task-rest</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>datatype-func_run-3_suffix-bold_task-peer</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>datatype-func_suffix-bold_task-movieDM</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>datatype-func_suffix-bold_task-movieTP</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                            key_group  ParamGroup  Count\n",
+       "0           acquisition-64dir_datatype-dwi_suffix-dwi           1      3\n",
+       "1            acquisition-HCP_datatype-anat_suffix-T1w           1      3\n",
+       "2            acquisition-HCP_datatype-anat_suffix-T2w           1      1\n",
+       "3   acquisition-dwi_datatype-fmap_direction-AP_suf...           1      3\n",
+       "4   acquisition-dwi_datatype-fmap_direction-PA_suf...           1      3\n",
+       "5   acquisition-fMRI_datatype-fmap_direction-AP_su...           0      1\n",
+       "6   acquisition-fMRI_datatype-fmap_direction-AP_su...           1      1\n",
+       "7   acquisition-fMRI_datatype-fmap_direction-PA_su...           0      1\n",
+       "8   acquisition-fMRI_datatype-fmap_direction-PA_su...           1      1\n",
+       "9           datatype-func_run-1_suffix-bold_task-peer           1      2\n",
+       "10          datatype-func_run-1_suffix-bold_task-rest           1      2\n",
+       "11          datatype-func_run-2_suffix-bold_task-peer           1      2\n",
+       "12          datatype-func_run-2_suffix-bold_task-rest           1      1\n",
+       "13          datatype-func_run-3_suffix-bold_task-peer           1      2\n",
+       "14             datatype-func_suffix-bold_task-movieDM           1      1\n",
+       "15             datatype-func_suffix-bold_task-movieTP           1      2"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n",
+    "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n",
+    "print(uniques.shape)\n",
+    "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n",
+    "print(counts.shape)\n",
+    "\n",
+    "params_and_counts = pd.merge(uniques, counts)\n",
+    "print(params_and_counts.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fname = 'sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bod.get_key_groups()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "self = bod\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bond.bond import *\n",
+    "suffix = '(phase1|phasediff|epi|fieldmap)'\n",
+    "fmap_files = self.layout.get(suffix=suffix, regex_search=True,\n",
+    "                             extension=['.nii.gz', '.nii'])\n",
+    "\n",
+    "files_to_fmaps = defaultdict(list)\n",
+    "\n",
+    "print(\"\\n\".join([f.path for f in fmap_files]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "for fmap_file in tqdm(fmap_files):\n",
+    "    intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n",
+    "    subject_prefix = \"sub-%s/\" % fmap_file.entities['subject']\n",
+    "    for intended_for in intentions:\n",
+    "        subject_relative_path = subject_prefix + intended_for\n",
+    "        files_to_fmaps[subject_relative_path].append(fmap_file)\n",
+    "\"\"\"\n",
+    "fmap_file = fmap_files[0]\n",
+    "intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n",
+    "print(\"intentions:\", intentions)\n",
+    "subject_prefix = \"sub-%s/\" % fmap_file.entities['subject']\n",
+    "print(subject_prefix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "suffix = '(phase1|phasediff|epi|fieldmap)'\n",
+    "fmap_files = self.layout.get(suffix=suffix, regex_search=True,\n",
+    "                             extension=['.nii.gz', '.nii'])\n",
+    "\n",
+    "files_to_fmaps = defaultdict(list)\n",
+    "for fmap_file in tqdm(fmap_files):\n",
+    "    intentions = listify(fmap_file.get_metadata().get(\"IntendedFor\"))\n",
+    "    subject_prefix = \"sub-%s\" % fmap_file.entities['subject']\n",
+    "    for intended_for in intentions:\n",
+    "        full_path = Path(self.path) / subject_prefix / intended_for\n",
+    "        files_to_fmaps[str(full_path)].append(fmap_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for data_file, fmap_files in bod.fieldmap_lookup.items():\n",
+    "    print(data_file[44:])\n",
+    "    for fmap_file in fmap_files:\n",
+    "        print(\"   \", fmap_file.path[44:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "files_to_fmaps.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bond.bond import *\n",
+    "files = [\n",
+    " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz', \n",
+    " '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']\n",
+    "\n",
+    "dfs = []\n",
+    "fieldmap_lookup = bod.fieldmap_lookup\n",
+    "key_group_name = \"test\"\n",
+    "# path needs to be relative to the root with no leading prefix\n",
+    "for path in files:\n",
+    "    metadata = bod.layout.get_metadata(path)\n",
+    "    wanted_keys = metadata.keys() & IMAGING_PARAMS\n",
+    "    example_data = {key: metadata[key] for key in wanted_keys}\n",
+    "    example_data[\"key_group\"] = key_group_name\n",
+    "\n",
+    "    # Get the fieldmaps out and add their types\n",
+    "    print(fieldmap_lookup[path])\n",
+    "    fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])\n",
+    "    for fmap_num, fmap_type in enumerate(fieldmap_types):\n",
+    "        example_data['fieldmap_type%02d' % fmap_num] = fmap_type    \n",
+    "\n",
+    "    # Expand slice timing to multiple columns\n",
+    "    SliceTime = example_data.get('SliceTiming')\n",
+    "    if SliceTime:\n",
+    "        # round each slice time to one place after the decimal\n",
+    "        for i in range(len(SliceTime)):\n",
+    "            SliceTime[i] = round(SliceTime[i], 1)\n",
+    "        example_data.update(\n",
+    "            {\"SliceTime%03d\" % SliceNum: time for\n",
+    "             SliceNum, time in enumerate(SliceTime)})\n",
+    "        del example_data['SliceTiming']\n",
+    "\n",
+    "    dfs.append(example_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_data"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/setup.cfg b/setup.cfg
index 850607fb2..27052bd56 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,18 +20,20 @@ exclude = *build/
 per-file-ignores =
     # imported but unused
     __init__.py: F401
-putty-ignore = 
+putty-ignore =
 	*/__init__.py : +F401
 	/^\s*\.\. _.*?: http/ : +E501
 
 [options]
 python_requires = >=3.7
-install_requires = 
+install_requires =
 	pybids
 	pandas
 	tqdm
 	numpy
-test_requires = 
+	datalad>=0.13.5
+	wrapt<2,>=1.10
+test_requires =
 	pytest==4.6.5
 	pytest-runner==5.1
 	pip==19.2.3
@@ -49,19 +51,19 @@ test_requires =
 
 [options.extras_require]
 datalad = datalad
-doc = 
+doc =
 	nbsphinx
 	packaging
 	sphinx >= 1.8
 	sphinx-argparse
-docs = 
+docs =
 	%(doc)s
-tests = 
+tests =
 	coverage
 	codecov
 	pytest
 	pytest-env
-all = 
+all =
 	%(datalad)s
 	%(doc)s
 	%(tests)s
@@ -70,8 +72,8 @@ all =
 norecursedirs = .git
 addopts = -svx --doctest-modules
 doctest_optionflags = ALLOW_UNICODE NORMALIZE_WHITESPACE ELLIPSIS
-env = 
+env =
 	PYTHONHASHSEED=0
-filterwarnings = 
+filterwarnings =
 	ignore::DeprecationWarning
 junit_family = xunit2
diff --git a/setup.py b/setup.py
index 3a8eab650..8cde8c60f 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,11 @@
     description="BIDS On Disk Editor",
     entry_points={
         'console_scripts': [
-            'bond=bond.cli:main',
+            'bond-group=bond.cli:bond_group',
+            'bond-apply=bond.cli:bond_apply',
+            'bond-revert=bond.cli:bond_revert',
+            'bids-sidecar-merge=bond.cli:param_group_merge',
+            'bond-validate=bond.cli:bond_validate'
         ],
     },
     license="GNU General Public License v3",
diff --git a/tests/test_bond.py b/tests/test_bond.py
index f946ae115..53e814d2b 100644
--- a/tests/test_bond.py
+++ b/tests/test_bond.py
@@ -2,16 +2,12 @@
 
 """Tests for `bond` package."""
 import sys
+import shutil
+import json
+from pkg_resources import resource_filename as pkgrf
 sys.path.append("..")
-import os
 import pytest
-from pkg_resources import resource_filename as pkgrf
-import shutil
 from bond import BOnD
-import os.path as op
-from copy import deepcopy
-import base64
-from glob import glob
 
 TEST_DATA = pkgrf("bond", "testdata")
 
@@ -101,6 +97,64 @@ def test_csv_creation(tmp_path):
     assert isummary_df.shape[0] == 11
 
 
+def test_change_key_groups(tmp_path):
+    data_root = get_data(tmp_path)
+
+    my_bond = BOnD(data_root)
+    my_bond._cache_fieldmaps()
+    my_bond.get_CSVs(str(tmp_path / "og_csv_dir"))
+
+
+def _edit_a_json(json_file):
+    """Open a json file, write somthing to it and save it to the same name."""
+    with open(json_file, "r") as metadatar:
+        metadata = json.load(metadatar)
+
+    metadata["THIS_IS_A_TEST"] = True
+    with open(json_file, "w") as metadataw:
+        json.dump(metadata, metadataw)
+
+
+def test_datalad_integration(tmp_path):
+    """Test that datalad works for basic file modification operations.
+    """
+    data_root = get_data(tmp_path)
+
+    # Test that an uninitialized BOnD raises exceptions
+    uninit_bond = BOnD(data_root / "complete", use_datalad=False)
+
+    # Ensure an exception is raised if trying to use datalad without
+    # initializing
+    with pytest.raises(Exception):
+        uninit_bond.is_datalad_clean()
+
+    # initialize the datalad repository and try again
+    uninit_bond.init_datalad(save=True)
+    assert uninit_bond.is_datalad_clean()
+
+    # Now, the datalad repository is initialized and saved.
+    # Make sure if we make a new BOnD object it recognizes that
+    # the datalad status is OK
+    complete_bod = BOnD(data_root / "complete", use_datalad=True)
+
+    assert complete_bod.datalad_ready
+    assert complete_bod.is_datalad_clean()
+
+    # Edit a file and make sure that it's been detected by datalad
+    _edit_a_json(str(data_root / "complete" / "sub-03" / "ses-phdiff" / "func"
+                 / "sub-03_ses-phdiff_task-rest_bold.json"))
+    assert not uninit_bond.is_datalad_clean()
+    assert not complete_bod.is_datalad_clean()
+
+    # Make sure you can't initialize a BOnD object on a dirty directory
+    with pytest.raises(Exception):
+        BOnD(data_root / "complete", use_datalad=True)
+
+    # Test BOnD.datalad_save()
+    uninit_bond.datalad_save(message="TEST SAVE!")
+
+
+
 """
 def test_fill_metadata(tmp_path):
     data_root = tmp_path / "testdata"