Continuous Integration - part 2 (#47)

* [LINT] cleaning some files following PEP8 * [REFAC] moving description and results to narps_open/data/ [REFAC] creating narps_open.data.participants * Rebasing branch to main * [TEST] modifying helpers for pipeline testing & correlation values comparison * [ENH] runner can use a number of subjects from a fixed list * [ENH] sorting participant list so that it alternates participants from the equalRange and the equalIndifference groups * [BUG] artifacts in pylint action * [REFAC] participants infos from TSV * [TEST] 2T6S test_execution * [TEST] new pipeline test helper, with a number of trials * [ENH] adding a configuration parameter for results using the neurovault naming * [TEST] import issue in test * [TEST] consider unthresholded maps only [skip ci] * [2T6S] issue with hypo names [skip ci] * [TEST] consider unthresholded maps only [skip ci] * Rebasing branch on main * [TEST] refac + change hypothesis order + change correlation score thresholds [skip ci] * [TEST] refac + change hypothesis order + change correlation score thresholds [skip ci] * [TEST] execution with 20 subjects [skip ci] * [TEST] test pipeline output file formatting [skip ci] * [TEST][CI] displaying test results in GitHub Actions summary [skip ci] * [TEST] remove previous results before testing pipeline [skip ci] * [TEST] issue with keys of results [skip ci] * [2T6S] adjusting use of contrasts [skip ci] * [2T6S] adjusting use of contrasts [skip ci] * [2T6S] adjusting use of contrasts [skip ci] * [2T6S] adjusting use of contrasts [skip ci] * [TEST] correlation values are now configurable [skip ci] * [LINT] conform to PEP8 * [CI] git diff exclude deleted files * [CI] some actions run on self-hosted runner * [CI] installing python and cache mechanism are not needed in the self-hosted runner * [CI] Regex for pipeline files search * Resolving conflicts * Resolving conflicts * [CI] self-hosted runner config * [CI] self-hosted runner config * [CI] self-hosted runner config * [CI] pipeline_tests and change_tests only run on PR events * [CI] pipeline_tests and change_tests only run on PR events * [CI] pipeline_tests and change_tests only run on PR events * [CI] pipeline_tests and change_tests only run on PR events * [CI] skip testing if test list is empty * [CI] skip testing if test list is empty * [CI] git checkout strategy * [CI] git checkout strategy * [CI] git checkout strategy * [CI] git checkout strategy * [CI] git checkout strategy * [CI] git checkout strategy * [CI] git checkout strategy
Inria-Empenn · Jul 24, 2023 · ddcd11c · ddcd11c
1 parent 58fb06b
commit ddcd11c
Show file tree

Hide file tree

Showing 31 changed files with 1,194 additions and 423 deletions.
diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
@@ -45,7 +45,7 @@ jobs:
     - name: Analyse the code with pylint
       run: |
         pylint --exit-zero narps_open > pylint_report_narps_open.txt
-        pylint --exit-zero tests > pylint_report_narps_open.txt
+        pylint --exit-zero tests > pylint_report_tests.txt
 
     - name: Archive pylint results
       uses: actions/upload-artifact@v3

diff --git a/.github/workflows/pipeline_tests.yml b/.github/workflows/pipeline_tests.yml
@@ -5,28 +5,21 @@ name: pipeline_tests
 
 # Define when it runs
 on:
-  push:
-    paths:
-      - 'narps_open/pipelines/team**'
   pull_request:
     paths:
       - 'narps_open/pipelines/team**'
 
 # Jobs that define the workflow
 jobs:
 
+
   # A job to list the tests to be run
   identify-tests:
     runs-on: ubuntu-latest
     outputs:
+      teams: ${{ steps.identify.outputs.teams }}
       tests: ${{ steps.identify.outputs.tests }}
     steps:
-    - name: Checkout main branch for comparison
-      uses: actions/checkout@v3
-      with:
-        ref: main
-        fetch-depth: 0
-
     - name: Checkout current branch
       uses: actions/checkout@v3
       with:
@@ -35,48 +28,64 @@ jobs:
     - name: Create a list of tests for changed pipelines
       id: identify
       run: |
-        # Loop through modified files
-        for file in $(git diff --name-only remotes/origin/main...$GITHUB_SHA)
+        # Loop through modified files between PR base and last head
+        for file in $(git diff --name-only --diff-filter=d remotes/origin/main...$GITHUB_SHA)
         do
           # echo each file
           echo $file
           # List team id corresponding to team_* files
-          if [[ "$file" =~ .*"narps_open/pipelines/team_".* ]]; then
+          if [[ "$file" =~ narps_open/pipelines/team_[A-Z0-9]{4}.py ]]; then
             echo "Modified pipeline = $file"
             tmp=${file#*"team_"} # remove prefix ending in "team_"
             team_id=${tmp%".py"*}   # remove suffix starting with ".py"
-            # Populate the list of test files
+            # Populate the lists of test files and teams
             test_files="$test_files tests/pipelines/test_team_$team_id.py"
+            teams="$teams $team_id"
           fi
         done
         # Send the test list as job output
         echo $test_files
         echo "tests=$test_files" >> $GITHUB_OUTPUT
+        echo "teams=$teams" >> $GITHUB_OUTPUT
 
-  # A job to run the identified tests
+  # A job to identify and run the tests
   pytest:
     needs: identify-tests
-    runs-on: ubuntu-latest
+    runs-on: self-hosted
     steps:
-    - name: Checkout repository
+    - name: Checkout PR branch
       uses: actions/checkout@v3
 
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v3
-      with:
-        python-version: 3.9
-
-    - uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
+    - name: Load configuration for self-hosted runner
+      run: cp /home/neuro/local_testing_config.toml narps_open/utils/configuration/testing_config.toml
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install .[tests]
 
-    - name: Collect tests with pytest
-      run: pytest --collect-only -q -m "pipeline_test" ${{ needs.identify-tests.outputs.tests }}
+    - name: Remove test reports if any existing
+      run: rm -f test_pipeline-*.txt
+
+    - name: Execute tests with pytest
+      run: |
+        if [[ "${{ needs.identify-tests.outputs.tests }}" != "" ]]; then
+          pytest -q -m "pipeline_test" ${{ needs.identify-tests.outputs.tests }}
+        fi
+
+    - name: Report results on GitHub
+      run: |
+        # Start report
+        echo "# Correlation values" >> $GITHUB_STEP_SUMMARY
+        echo "Unthresholded maps, reproduced vs. results" >> $GITHUB_STEP_SUMMARY
+        echo "Correlation values are sorted from hypotheses 1 to 9" >> $GITHUB_STEP_SUMMARY
+
+        # Start report table
+        echo "| Team    | Number of subjects | Test status | Correlation values |" >> $GITHUB_STEP_SUMMARY
+        echo "| -------- | ------- | ------- | ------- |" >> $GITHUB_STEP_SUMMARY
+
+        # Loop through test report files
+        for team in ${{ needs.identify-tests.outputs.teams }}
+        do
+          cat test_pipeline-$team.txt >> $GITHUB_STEP_SUMMARY
+        done
diff --git a/.github/workflows/test_changes.yml b/.github/workflows/test_changes.yml
@@ -5,9 +5,6 @@ name: test_changes
 
 # Define when it runs
 on:
-  push:
-    paths:
-      - 'tests/**/test_*.py'
   pull_request:
     paths:
       - 'tests/**/test_*.py'
@@ -16,37 +13,21 @@ on:
 jobs:
 
   # A job to list the tests to be run
-  pytest:
+  identify-tests:
     runs-on: ubuntu-latest
+    outputs:
+      tests: ${{ steps.identify.outputs.tests }}
     steps:
     - name: Checkout main branch for comparison
       uses: actions/checkout@v3
       with:
-        ref: main
         fetch-depth: 0
 
-    - name: Checkout current branch
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v3
-      with:
-        python-version: 3.9
-
-    - uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
     - name: Create a list of tests for changed tests
       id: identify
       run: |
-        # Loop through modified files
-        for file in $(git diff --name-only remotes/origin/main...$GITHUB_SHA)
+        # Loop through modified files between PR base and last head
+        for file in $(git diff --name-only --diff-filter=d remotes/origin/main...$GITHUB_SHA)
         do
           # List files corresponding to tests/**/test_**.py
           if [[ "$file" =~ .*"tests".*"test_".*".py" ]]; then
@@ -57,10 +38,24 @@ jobs:
         echo $test_files
         echo "tests=$test_files" >> $GITHUB_OUTPUT
 
+  # A job to list the tests to be run
+  pytest:
+    needs: identify-tests
+    runs-on: self-hosted
+    steps:
+    - name: Checkout PR branch
+      uses: actions/checkout@v3
+
+    - name: Load configuration for self-hosted runner
+      run: cp /home/neuro/local_testing_config.toml narps_open/utils/configuration/testing_config.toml
+
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install .[tests]
 
     - name: Collect tests with pytest
-      run: pytest --collect-only -q ${{ steps.identify.outputs.tests }}
+      run: |
+        if [[ "${{ needs.identify-tests.outputs.tests }}" != "" ]]; then
+          pytest -q ${{ needs.identify-tests.outputs.tests }}
+        fi
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,6 @@
 
 # to avoid commiting data
-data/
-tests/data/
+./data/
 
 # neuro user in docker image
 neuro

diff --git a/docs/description.md b/docs/description.md
@@ -1,17 +1,17 @@
 # Access the descriptions of NARPS teams pipelines
 
-The file `narps_open/utils/description/analysis_pipelines_full_descriptions.tsv` contains the description provided by each team participating to NARPS.
+The file `narps_open/data/description/analysis_pipelines_full_descriptions.tsv` contains the description provided by each team participating to NARPS.
 It is a convertion into tsv format (tab-separated values) of the [original .xlsx file published in NARPS](https://github.com/poldrack/narps/blob/1.0.1/ImageAnalyses/metadata_files/analysis_pipelines_for_analysis.xlsx
 ), which allows easier parsing with python.
 
-The file `narps_open/utils/description/analysis_pipelines_derived_descriptions.tsv` contains for each team a set of programatically usable data based on the textual descriptions of the previous file. This data is available in the `derived` sub dictionary (see examples hereafter).
+The file `narps_open/data/description/analysis_pipelines_derived_descriptions.tsv` contains for each team a set of programatically usable data based on the textual descriptions of the previous file. This data is available in the `derived` sub dictionary (see examples hereafter).
 
-The class `TeamDescription` of module `narps_open.utils.description` acts as a parser for these two files.
+The class `TeamDescription` of module `narps_open.data.description` acts as a parser for these two files.
 
 You can also use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, and `categorized_for_analysis`.
 
 ```bash
-python narps_open/utils/description -h
+python narps_open/data/description -h
 # usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}]
 #
 # Get description of a NARPS pipeline.
@@ -22,7 +22,7 @@ python narps_open/utils/description -h
 #   -d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}, --dictionary {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}
 #                         the sub dictionary of team description
 
-python narps_open/utils/description -t 2T6S -d general
+python narps_open/data/description -t 2T6S -d general
 # {
 #    "teamID": "2T6S",
 #    "NV_collection_link": "https://neurovault.org/collections/4881/",
@@ -35,10 +35,10 @@ python narps_open/utils/description -t 2T6S -d general
 # }
 ```
 
-Of course the `narps_open.utils.description` module is accessible programmatically, here is an example on how to use it:
+Of course the `narps_open.data.description` module is accessible programmatically, here is an example on how to use it:
 
 ```python
-from narps_open.utils.description import TeamDescription
+from narps_open.data.description import TeamDescription
 description = TeamDescription('2T6S') # Set the id of the team here
 # Access the object as a dict
 print(description)

diff --git a/narps_open/data/__init__.py b/narps_open/data/__init__.py
diff --git a/narps_open/utils/description/__init__.py → narps_open/data/description/__init__.py b/narps_open/utils/description/__init__.py → narps_open/data/description/__init__.py
@@ -14,10 +14,10 @@ class TeamDescription(dict):
     """
 
     description_file = join(
-        files('narps_open.utils.description'),
+        files('narps_open.data.description'),
         'analysis_pipelines_full_descriptions.tsv')
     derived_description_file = join(
-        files('narps_open.utils.description'),
+        files('narps_open.data.description'),
         'analysis_pipelines_derived_descriptions.tsv')
 
     def __init__(self, team_id):

diff --git a/narps_open/utils/description/__main__.py → narps_open/data/description/__main__.py b/narps_open/utils/description/__main__.py → narps_open/data/description/__main__.py
@@ -1,12 +1,12 @@
 #!/usr/bin/python
 # coding: utf-8
 
-""" Provide a command-line interface for the package narps_open.utils.description """
+""" Provide a command-line interface for the package narps_open.data.description """
 
 from argparse import ArgumentParser
 from json import dumps
 
-from narps_open.utils.description import TeamDescription
+from narps_open.data.description import TeamDescription
 
 # Parse arguments
 parser = ArgumentParser(description='Get description of a NARPS pipeline.')

diff --git a/...alysis_pipelines_derived_descriptions.tsv → ...alysis_pipelines_derived_descriptions.tsv b/...alysis_pipelines_derived_descriptions.tsv → ...alysis_pipelines_derived_descriptions.tsv
diff --git a/.../analysis_pipelines_full_descriptions.tsv → .../analysis_pipelines_full_descriptions.tsv b/.../analysis_pipelines_full_descriptions.tsv → .../analysis_pipelines_full_descriptions.tsv
diff --git a/narps_open/data/participants.py b/narps_open/data/participants.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" A set of functions to get the participants data for the narps_open package """
+
+from os.path import join
+
+from pandas import read_csv
+
+from narps_open.data.description import TeamDescription
+from narps_open.utils.configuration import Configuration
+
+def get_participants_information():
+    """ Get a list of participants information from the tsv file from NARPS """
+    return read_csv(join(Configuration()['directories']['dataset'], 'participants.tsv'), sep='\t')
+
+def get_all_participants() -> list:
+    """ Return a list of all participants included in NARPS.
+        This list is ordered so that subsets of 20, 40, 60, 80, 108 participants
+        are balanced in terms of belonging to the equal indifference and equal
+        range groups.
+    """
+    return [
+        '020', '001', '070', '013', '120', '109', '118', '035', '002', '025',
+        '018', '053', '046', '073', '066', '121', '098', '011', '116', '087',
+        '008', '069', '106', '095', '004', '113', '104', '115', '092', '089',
+        '090', '045', '016', '117', '124', '093', '088', '021', '094', '041',
+        '062', '017', '040', '083', '084', '107', '056', '119', '064', '103',
+        '044', '057', '060', '061', '112', '085', '050', '027', '082', '059',
+        '022', '019', '052', '047', '030', '039', '100', '029', '108', '067',
+        '096', '009', '058', '055', '024', '015', '080', '077', '006', '003',
+        '076', '072', '014', '102', '010', '074', '038', '114', '026', '079',
+        '054', '071', '032', '051', '110', '081', '036', '037', '068', '099',
+        '105', '063', '075', '033', '049', '123', '043', '005']
+
+def get_participants(team_id: str) -> list:
+    """ Return a list of participants that were taken into account by a given team
+
+    Args:
+        team_id: str, the ID of the team.
+
+    Returns: a list of participants labels
+    """
+    description = TeamDescription(team_id)
+    excluded_participants = description.derived['excluded_participants'].replace(' ','').split(',')
+
+    return [p for p in get_all_participants() if p not in excluded_participants]
+
+def get_participants_subset(nb_participants: int = 108) -> list:
+    """ Return a list of participants of length nb_participants """
+    return get_all_participants()[0:nb_participants]