From 70d6ce0ed2535ba7cdf2d4a10cf0140f71b68271 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:21:57 -0400 Subject: [PATCH] [Cloud Deployment I]: Main dockerfile (#383) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ben Dichter --- .../build_and_upload_docker_image_dev.yml | 41 ++++++ ...and_upload_docker_image_latest_release.yml | 46 ++++++ ..._and_upload_docker_image_yaml_variable.yml | 37 +++++ .github/workflows/docker_testing.yml | 82 +++++++++++ CHANGELOG.md | 11 +- dockerfiles/neuroconv_dev_dockerfile | 6 + .../neuroconv_latest_release_dockerfile | 6 + dockerfiles/neuroconv_latest_yaml_variable | 4 + docs/developer_guide.rst | 1 + docs/developer_guide/docker_images.rst | 96 +++++++++++++ docs/developer_guide/testing_suite.rst | 1 + docs/user_guide/docker_demo.rst | 118 ++++++++++++++++ docs/user_guide/user_guide.rst | 7 +- ...ocker_yaml_conversion_specification_cli.py | 131 ++++++++++++++++++ 14 files changed, 581 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/build_and_upload_docker_image_dev.yml create mode 100644 .github/workflows/build_and_upload_docker_image_latest_release.yml create mode 100644 .github/workflows/build_and_upload_docker_image_yaml_variable.yml create mode 100644 .github/workflows/docker_testing.yml create mode 100644 dockerfiles/neuroconv_dev_dockerfile create mode 100644 dockerfiles/neuroconv_latest_release_dockerfile create mode 100644 dockerfiles/neuroconv_latest_yaml_variable create mode 100644 docs/developer_guide/docker_images.rst create mode 100644 docs/user_guide/docker_demo.rst create mode 100644 tests/docker_yaml_conversion_specification_cli.py diff --git a/.github/workflows/build_and_upload_docker_image_dev.yml b/.github/workflows/build_and_upload_docker_image_dev.yml new file mode 100644 index 000000000..b2353bfc2 --- /dev/null +++ b/.github/workflows/build_and_upload_docker_image_dev.yml @@ -0,0 +1,41 @@ +name: Build and Upload Docker Image of Current Dev Branch to GHCR + +on: + schedule: + - cron: "0 16 * * 1" # Weekly at noon EST on Monday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-image: + name: Build and Upload Docker Image of Current Dev Branch to GHCR + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} + password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + - name: Get current date + id: date + run: | + date_tag="$(date +'%Y-%m-%d')" + echo "date_tag=$date_tag" >> $GITHUB_OUTPUT + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true # Push is a shorthand for --output=type=registry + tags: ghcr.io/catalystneuro/neuroconv:dev,ghcr.io/catalystneuro/neuroconv:${{ steps.date.outputs.date_tag }} + context: . + file: dockerfiles/neuroconv_dev_dockerfile + provenance: false diff --git a/.github/workflows/build_and_upload_docker_image_latest_release.yml b/.github/workflows/build_and_upload_docker_image_latest_release.yml new file mode 100644 index 000000000..423686ec9 --- /dev/null +++ b/.github/workflows/build_and_upload_docker_image_latest_release.yml @@ -0,0 +1,46 @@ +name: Build and Upload Docker Image of Latest Release to GHCR + +on: + workflow_run: + workflows: [auto-publish] + types: [completed] + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-image: + name: Build and Upload Docker Image of Latest Release to GHCR + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Parse the version from the GitHub latest release tag + id: parsed_version + run: | + git fetch --prune --unshallow --tags + tags="$(git tag --list)" + version_tag=${tags: -6 : 6} + echo "version_tag=$version_tag" >> $GITHUB_OUTPUT + - name: Printout parsed version for GitHub Action log + run: echo ${{ steps.parsed_version.outputs.version_tag }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} + password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true # Push is a shorthand for --output=type=registry + tags: ghcr.io/catalystneuro/neuroconv:latest,ghcr.io/catalystneuro/neuroconv:${{ steps.parsed_version.outputs.version_tag }} + context: . + file: dockerfiles/neuroconv_latest_release_dockerfile + provenance: false diff --git a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml new file mode 100644 index 000000000..7ff2dc63c --- /dev/null +++ b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml @@ -0,0 +1,37 @@ +name: Build and Upload Docker Image of latest with YAML variable to GHCR + +on: + workflow_run: + workflows: [build_and_upload_docker_image_latest_release] + types: [completed] + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-image: + name: Build and Upload Docker Image of latest with YAML variable to GHCR + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} + password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + - name: Build and push YAML variable image based on latest + uses: docker/build-push-action@v5 + with: + push: true # Push is a shorthand for --output=type=registry + tags: ghcr.io/catalystneuro/neuroconv:yaml_variable + context: . + file: dockerfiles/neuroconv_latest_yaml_variable + provenance: false diff --git a/.github/workflows/docker_testing.yml b/.github/workflows/docker_testing.yml new file mode 100644 index 000000000..6916e0e4e --- /dev/null +++ b/.github/workflows/docker_testing.yml @@ -0,0 +1,82 @@ +name: Docker CLI tests +on: + schedule: + - cron: "0 16 * * *" # Daily at noon EST + workflow_dispatch: + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] #, macos-latest, windows-latest] # Seems docker might only be available for ubuntu on GitHub Actions + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: python -m pip install -U pip # Official recommended way + + - name: Install pytest and neuroconv minimal + run: | + pip install pytest + pip install . + + - name: Get ephy_testing_data current head hash + id: ephys + run: echo "::set-output name=HASH_EPHY_DATASET::$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" + - name: Cache ephys dataset - ${{ steps.ephys.outputs.HASH_EPHY_DATASET }} + uses: actions/cache@v4 + id: cache-ephys-datasets + with: + path: ./ephy_testing_data + key: ephys-datasets-2023-06-26-${{ matrix.os }}-${{ steps.ephys.outputs.HASH_EPHY_DATASET }} + - name: Get ophys_testing_data current head hash + id: ophys + run: echo "::set-output name=HASH_OPHYS_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1)" + - name: Cache ophys dataset - ${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} + uses: actions/cache@v4 + id: cache-ophys-datasets + with: + path: ./ophys_testing_data + key: ophys-datasets-2022-08-18-${{ matrix.os }}-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} + - name: Get behavior_testing_data current head hash + id: behavior + run: echo "::set-output name=HASH_BEHAVIOR_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1)" + - name: Cache behavior dataset - ${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }} + uses: actions/cache@v4 + id: cache-behavior-datasets + with: + path: ./behavior_testing_data + key: behavior-datasets-2023-07-26-${{ matrix.os }}-${{ steps.behavior.outputs.HASH_behavior_DATASET }} + + - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' || steps.cache-ophys-datasets.outputs.cache-hit != 'true' || steps.cache-behavior-datasets.outputs.cache-hit != 'true' + name: Install and configure AWS CLI + run: | + pip install awscli + aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} + - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' + name: Download ephys dataset from S3 + run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ephy_testing_data ./ephy_testing_data + - if: steps.cache-ophys-datasets.outputs.cache-hit != 'true' + name: Download ophys dataset from S3 + run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ophys_testing_data ./ophys_testing_data + - if: steps.cache-behavior-datasets.outputs.cache-hit != 'true' + name: Download behavior dataset from S3 + run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/behavior_testing_data ./behavior_testing_data + + - name: Pull docker image + run: | + docker pull ghcr.io/catalystneuro/neuroconv:latest + docker pull ghcr.io/catalystneuro/neuroconv:yaml_variable + + - name: Run docker tests + run: pytest tests/docker_yaml_conversion_specification_cli.py -vv -rsx diff --git a/CHANGELOG.md b/CHANGELOG.md index c59c118d3..919bb46a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,17 @@ # Upcoming -### Bug fixes -* Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799) - ### Deprecations * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794) * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799) +### Features +* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383) + +### Bug fixes +* Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799) + + + # v0.4.8 (March 20, 2024) ### Bug fixes diff --git a/dockerfiles/neuroconv_dev_dockerfile b/dockerfiles/neuroconv_dev_dockerfile new file mode 100644 index 000000000..0de15ac74 --- /dev/null +++ b/dockerfiles/neuroconv_dev_dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11.7-slim +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv +LABEL org.opencontainers.image.description="A docker image for the most recent daily build of the main branch." +ADD ./ neuroconv +RUN cd neuroconv && pip install .[full] +CMD ["python -m"] diff --git a/dockerfiles/neuroconv_latest_release_dockerfile b/dockerfiles/neuroconv_latest_release_dockerfile new file mode 100644 index 000000000..918b64453 --- /dev/null +++ b/dockerfiles/neuroconv_latest_release_dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11.7-slim +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv +LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package." +RUN apt update && apt install musl-dev python3-dev -y +RUN pip install neuroconv[full] +CMD ["python -m"] diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable new file mode 100644 index 000000000..ea411ee44 --- /dev/null +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -0,0 +1,4 @@ +FROM ghcr.io/catalystneuro/neuroconv:latest +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv +LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." +CMD echo "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite diff --git a/docs/developer_guide.rst b/docs/developer_guide.rst index 409f4d765..04fc6b035 100644 --- a/docs/developer_guide.rst +++ b/docs/developer_guide.rst @@ -15,5 +15,6 @@ The most common contribution is for a user/developer to help us integrate a new Testing Suite Coding Style Building the Documentation + Building the Docker Image Or feel free to raise a `bug report `_ or `feature request `_ for our maintainers to prioritize! diff --git a/docs/developer_guide/docker_images.rst b/docs/developer_guide/docker_images.rst new file mode 100644 index 000000000..d78b3cfd0 --- /dev/null +++ b/docs/developer_guide/docker_images.rst @@ -0,0 +1,96 @@ +Manually Build Docker Images +---------------------------- + +.. note:: + + It is recommended to build the docker image on the same system architecture that you intend to run it on, *i.e.*, AWS Linux AMI 64-bit (x86), as it may experience difficulties running on other significantly different systems (like an M1 Mac). + +.. note:: + + The NeuroConv docker container comes prepackaged with all required installations, equivalent to running ``pip install neuroconv[full]``. As such it is relatively heavy, so be sure that whatever environment you intend to use it in (such as in continuous integration) has sufficient disk space. + + +Latest Release +~~~~~~~~~~~~~~ + +To manually build the most recent release, navigate to the ``neuroconv/dockerfiles`` folder and run... + +.. code:: + + docker build -f neuroconv_latest_release_dockerfile -t neuroconv_latest_release . + + +Dev Branch +~~~~~~~~~~ + +Checkout to a specific branch on a local clone, then... + +.. code:: + + docker build -f neuroconv_dev_dockerfile -t neuroconv_dev . + + + +Publish Container to GitHub +--------------------------- + +The ``LABEL`` is important to include as it determines the host repository on the GitHub Container Registry (GHCR). In each dockerfile we wish to publish on the GHCR, we will add this label right after the ``FROM`` clause... + +.. code:: + + FROM PARENT_IMAGE:TAG + LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv + +After building the image itself, we can publish the container with... + +.. code:: + + docker tag IMAGE_NAME ghcr.io/catalystneuro/IMAGE_NAME:TAG + export CR_PAT="" + echo $CR_PAT | docker login ghcr.io -u --password-stdin + docker push ghcr.io/catalystneuro/IMAGE_NAME:TAG + +.. note:: + + Though it may appear confusing, the use of the ``IMAGE_NAME`` in these steps determines only the _name_ of the package as available from the 'packages' screen of the host repository; the ``LABEL`` itself ensured the upload and linkage to the NeuroConv GHCR. + + + +Run Docker container on local YAML conversion specification file +---------------------------------------------------------------- + +You can either perform a manual build locally following the instructions above, or pull the container from the GitHub Container Registry (GHCR) with... + +.. code:: + + docker pull ghcr.io/catalystneuro/neuroconv:latest + +and can then run the entrypoint (equivalent to the usual command line usage) on a YAML specification file (named ``your_specification_file.yml``) with... + +.. code:: + + docker run -it --volume /your/local/volume/:/desired/alias/of/volume/ ghcr.io/catalystneuro/neuroconv:latest neuroconv /desired/alias/of/drive/your_specification_file.yml + + + +Run Docker container on YAML conversion specification environment variable +-------------------------------------------------------------------------- + +An alternative approach that simplifies usage on systems such as AWS Batch is to specify the YAML contents as an environment variable. The YAML file is constructed in the first step of the container launch. + +The only potential downside with this usage is the maximum size of an environment variable (~13,000 characters). Typical YAML specification files should not come remotely close to this limit. + +Otherwise, in any cloud deployment, the YAML file transfer will have to be managed separately, likely as a part of the data transfer or an entirely separate step. + +To use this alternative image on a local environment, you no longer need to invoke the ``neuroconv`` entrypoint pointing to a file. Instead, just set the environment variables and run the docker container on the mounted volume... + +.. code:: + + export YAML_STREAM="" + export NEUROCONV_DATA_PATH="/desired/alias/of/volume/" + export NEUROCONV_OUTPUT_PATH="/desired/alias/of/volume/" + docker run -it --volume /your/local/volume/:/desired/alias/of/volume/ ghcr.io/catalystneuro/neuroconv:yaml_variable + +.. note:: + + On Windows, use ``set`` instead of ``export``. diff --git a/docs/developer_guide/testing_suite.rst b/docs/developer_guide/testing_suite.rst index 73631da32..1b395c284 100644 --- a/docs/developer_guide/testing_suite.rst +++ b/docs/developer_guide/testing_suite.rst @@ -61,6 +61,7 @@ These can be run in isolation using ``pip install -e neuroconv[test,]` ``pytest tests/test_`` where ```` can be any of ``ophys``, ``ecephys``, ``text``, or ``behavior``. +.. _example_data: Testing On Example Data ----------------------- diff --git a/docs/user_guide/docker_demo.rst b/docs/user_guide/docker_demo.rst new file mode 100644 index 000000000..92a1d5b2f --- /dev/null +++ b/docs/user_guide/docker_demo.rst @@ -0,0 +1,118 @@ +Docker Demo +----------- + +The following is an explicit demonstration of how to use the Docker-based NeuroConv YAML specification via the command line. + +It relies on some of the GIN data from the main testing suite, see :ref:`example_data` for more details. + + +.. note:: + + Docker relies heavily on absolute system paths, but these can vary depending on your system. + + For Windows, this might be something like: ``C:/Users/MyUser/Downloads/``. + + For MacOSX it might be: ``/Users/username/``. + + For this demo, we will use the home directory of an Ubuntu (Linux) system for a user named 'MyUser' as our base: ``/home/MyUser``. + + +.. note:: + + For Unix systems (MacOSX/Linux) you will likely require sudo access in order to run the ``docker`` based commands. + + If this is the case for your system, then any time you see the ``docker`` usage on the command like you will need to prepend as ``sudo docker``. + + +1. In your base directory (which this demo will refer to as ``/home/MyUser/``), make a new folder for the demo conversion named ``demo_neuroconv_docker``. + +2. Make a subfolder in ``demo_neuroconv_docker`` called ``demo_output``. + +3. Create a file in this folder named ``demo_neuroconv_docker_yaml.yml`` with the following content... + +.. code:: + + metadata: + NWBFile: + lab: My Lab + institution: My Institution + + data_interfaces: + ap: SpikeGLXRecordingInterface + phy: PhySortingInterface + + experiments: + my_experiment: + metadata: + NWBFile: + session_description: My session. + + sessions: + - nwbfile_name: spikeglx_from_docker_yaml.nwb + source_data: + ap: + file_path: /demo_neuroconv_docker/spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + Subject: + subject_id: "1" + sex: F + age: P35D + species: Mus musculus + - nwbfile_name: phy_from_docker_yaml.nwb + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + Subject: + subject_id: "002" + sex: F + age: P35D + species: Mus musculus + source_data: + phy: + folder_path: /demo_neuroconv_docker/phy/phy_example_0/ + + +4. To make things easier for volume mounting, copy and paste the ``Noise4Sam_g0`` and ``phy_example_0`` folders into this Docker demo folder so that you have the following folder structure... + +.. code:: + + demo_neuroconv_docker/ + ¦ demo_output/ + ¦ demo_neuroconv_docker_yaml.yml + ¦ spikeglx/ + ¦ +-- Noise4Sam_g0/ + ¦ +-- ... # .nidq streams + ¦ ¦ +-- Noise4Sam_g0_imec0/ + ¦ ¦ +-- Noise4Sam_g0_t0.imec0.ap.bin + ¦ ¦ +-- Noise4Sam_g0_t0.imec0.ap.meta + ¦ ¦ +-- ... # .lf streams + ¦ phy/ + ¦ +-- phy_example_0/ + ¦ ¦ +-- ... # The various file contents from the example Phy folder + +5. Pull the latest NeuroConv docker image from GitHub... + +.. code:: + + docker pull ghcr.io/catalystneuro/neuroconv:latest + +6. Run the command line interface on the YAML file using the docker container (instead of a local installation of the Python package)... + +.. code:: + + docker run -t --volume /home/user/demo_neuroconv_docker:/demo_neuroconv_docker ghcr.io/catalystneuro/neuroconv:latest neuroconv /demo_neuroconv_docker/demo_neuroconv_docker_yaml.yml --output-folder-path /demo_neuroconv_docker/demo_output + +Voilà! If everything occurred successfully, you should see... + +.. code:: + + Source data is valid! + Metadata is valid! + conversion_options is valid! + NWB file saved at /demo_neuroconv_docker/demo_output/spikeglx_from_docker_yaml.nwb! + Source data is valid! + Metadata is valid! + conversion_options is valid! + NWB file saved at /demo_neuroconv_docker/demo_output/phy_from_docker_yaml.nwb! diff --git a/docs/user_guide/user_guide.rst b/docs/user_guide/user_guide.rst index 8cf3336c9..0752b3bd6 100644 --- a/docs/user_guide/user_guide.rst +++ b/docs/user_guide/user_guide.rst @@ -2,7 +2,7 @@ User Guide ========== NeuroConv allows you to easily build programs to convert data from neurophysiology experiments -to NWB. The building-blocks of these conversions are ``DataInterface`` classes. Each +to NWB. The building blocks of these conversions are ``DataInterface`` classes. Each ``DataInterface`` is responsible for a specific format of data, and contains methods to read data and metadata from that format and write it to NWB. We have pre-built ``DataInterface`` classes for many common data formats available in our :ref:`Conversion Gallery `. @@ -18,8 +18,9 @@ and synchronize data across multiple sources. datainterfaces nwbconverter - yaml - temporal_alignment schemas + temporal_alignment csvs expand_path + yaml + docker_demo diff --git a/tests/docker_yaml_conversion_specification_cli.py b/tests/docker_yaml_conversion_specification_cli.py new file mode 100644 index 000000000..dd5da8d17 --- /dev/null +++ b/tests/docker_yaml_conversion_specification_cli.py @@ -0,0 +1,131 @@ +""" +This file is hidden from normal pytest globbing by not including 'test' in the filename. + +Instead, the tests must be invoked directly from the file. This is designed mostly for use in the GitHub Actions. +""" + +import os +import unittest +from datetime import datetime +from pathlib import Path + +from hdmf.testing import TestCase +from pynwb import NWBHDF5IO + +from neuroconv.tools import deploy_process + +from .test_on_data.setup_paths import ECEPHY_DATA_PATH as DATA_PATH +from .test_on_data.setup_paths import OUTPUT_PATH + + +class TestLatestDockerYAMLConversionSpecification(TestCase): + test_folder = OUTPUT_PATH + tag = os.getenv("NEUROCONV_DOCKER_TESTS_TAG", "latest") + source_volume = os.getenv("NEUROCONV_DOCKER_TESTS_SOURCE_VOLUME", "/home/runner/work/neuroconv/neuroconv") + + def test_run_conversion_from_yaml_cli(self): + path_to_test_yml_files = Path(__file__).parent / "test_on_data" / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification.yml" + + output = deploy_process( + command=( + "docker run -t " + f"--volume {self.source_volume}:{self.source_volume} " + f"--volume {self.test_folder}:{self.test_folder} " + f"ghcr.io/catalystneuro/neuroconv:{self.tag} " + f"neuroconv {yaml_file_path} " + f"--data-folder-path {self.source_volume}/{DATA_PATH} --output-folder-path {self.test_folder} --overwrite" + ), + catch_output=True, + ) + print(output) + + nwbfile_path = self.test_folder / "example_converter_spec_1.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Subject navigating a Y-shaped maze." + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-09T21:19:09+00:00") + assert nwbfile.subject.subject_id == "1" + assert "ElectricalSeriesAP" in nwbfile.acquisition + + nwbfile_path = self.test_folder / "example_converter_spec_2.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Subject navigating a Y-shaped maze." + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-10T21:19:09+00:00") + assert nwbfile.subject.subject_id == "002" + + nwbfile_path = self.test_folder / "example_converter_spec_3.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Auto-generated by neuroconv" + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-11T21:19:09+00:00") + assert nwbfile.subject.subject_id == "Subject Name" + assert "spike_times" in nwbfile.units + + def test_run_conversion_from_yaml_variable(self): + path_to_test_yml_files = Path(__file__).parent / "test_on_data" / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification.yml" + + with open(file=yaml_file_path, mode="r") as io: + yaml_lines = io.readlines() + + yaml_string = "".join(yaml_lines) + os.environ["NEUROCONV_YAML"] = yaml_string + os.environ["NEUROCONV_DATA_PATH"] = self.source_volume + str(DATA_PATH) + os.environ["NEUROCONV_OUTPUT_PATH"] = str(self.test_folder) + + output = deploy_process( + command=( + "docker run -t " + f"--volume {self.source_volume}:{self.source_volume} " + f"--volume {self.test_folder}:{self.test_folder} " + '-e NEUROCONV_YAML="$NEUROCONV_YAML" ' + '-e NEUROCONV_DATA_PATH="$NEUROCONV_DATA_PATH" ' + '-e NEUROCONV_OUTPUT_PATH="$NEUROCONV_OUTPUT_PATH" ' + "ghcr.io/catalystneuro/neuroconv:yaml_variable" + ), + catch_output=True, + ) + print(output) + + nwbfile_path = self.test_folder / "example_converter_spec_1.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Subject navigating a Y-shaped maze." + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-09T21:19:09+00:00") + assert nwbfile.subject.subject_id == "1" + assert "ElectricalSeriesAP" in nwbfile.acquisition + + nwbfile_path = self.test_folder / "example_converter_spec_2.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Subject navigating a Y-shaped maze." + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-10T21:19:09+00:00") + assert nwbfile.subject.subject_id == "002" + + nwbfile_path = self.test_folder / "example_converter_spec_3.nwb" + assert nwbfile_path.exists(), f"`run_conversion_from_yaml` failed to create the file at '{nwbfile_path}'! " + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + nwbfile = io.read() + assert nwbfile.session_description == "Auto-generated by neuroconv" + assert nwbfile.lab == "My Lab" + assert nwbfile.institution == "My Institution" + assert nwbfile.session_start_time == datetime.fromisoformat("2020-10-11T21:19:09+00:00") + assert nwbfile.subject.subject_id == "Subject Name" + assert "spike_times" in nwbfile.units