From c220ce6a051be65a0f3b6975a2c340ad6903bc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:58:19 +0200 Subject: [PATCH] Comments section in team description (#122) * [BUG] inside unit_tests workflow * Description markdown export option * [TEST] testing json and markdown export for team description * [DOC] update of team description * Adding comments file to team description * [TEST] adding comments file to team description * [DOC] adding comments file to team description * [PACK] updating packaging file --- docs/description.md | 9 ++- narps_open/data/description/__init__.py | 40 ++++++++++- narps_open/data/description/__main__.py | 5 +- .../analysis_pipelines_comments.tsv | 71 +++++++++++++++++++ setup.py | 1 + tests/data/test_description.py | 3 + .../data/description/test_markdown.md | 5 ++ .../test_data/data/description/test_str.json | 6 +- 8 files changed, 133 insertions(+), 7 deletions(-) create mode 100644 narps_open/data/description/analysis_pipelines_comments.tsv diff --git a/docs/description.md b/docs/description.md index f0475117..ac17f588 100644 --- a/docs/description.md +++ b/docs/description.md @@ -6,20 +6,22 @@ It is a conversion into tsv format (tab-separated values) of the [original .xlsx The file `narps_open/data/description/analysis_pipelines_derived_descriptions.tsv` contains for each team a set of programmatically usable data based on the textual descriptions of the previous file. This data is available in the `derived` sub dictionary (see examples hereafter). +The file `narps_open/data/description/analysis_pipelines_comments.tsv` contains for each team a set of comments made by the NARPS Open Pipelines team about reproducibility and exclusions of the pipeline. This data is available in the `comments` sub dictionary (see examples hereafter). + The class `TeamDescription` of module `narps_open.data.description` acts as a parser for these two files. -You can use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, and `categorized_for_analysis`. Options `--json` and `--md` allow to choose the export format you prefer between JSON and Markdown. +You can use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, `categorized_for_analysis`, `derived`, and `comments`. Options `--json` and `--md` allow to choose the export format you prefer between JSON and Markdown. ```bash python narps_open/data/description -h -# usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}] +# usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}] # # Get description of a NARPS pipeline. # # options: # -h, --help show this help message and exit # -t TEAM, --team TEAM the team ID -# -d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}, --dictionary {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived} +# -d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}, --dictionary {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments} # the sub dictionary of team description # --json output team description as JSON # --md output team description as Markdown @@ -91,6 +93,7 @@ description.preprocessing description.analysis description.categorized_for_analysis description.derived +description.comments # Access values of sub dictionaries description.general['teamID'] # Other keys in general are: ['teamID', 'NV_collection_link', 'results_comments', 'preregistered', 'link_preregistration_form', 'regions_definition', 'softwares', 'general_comments'] diff --git a/narps_open/data/description/__init__.py b/narps_open/data/description/__init__.py index 908ebfe7..cda40729 100644 --- a/narps_open/data/description/__init__.py +++ b/narps_open/data/description/__init__.py @@ -20,6 +20,9 @@ class TeamDescription(dict): derived_description_file = join( files('narps_open.data.description'), 'analysis_pipelines_derived_descriptions.tsv') + comments_description_file = join( + files('narps_open.data.description'), + 'analysis_pipelines_comments.tsv') def __init__(self, team_id): super().__init__() @@ -59,6 +62,11 @@ def derived(self) -> dict: """ Getter for the sub dictionary containing derived team description """ return self._get_sub_dict('derived') + @property + def comments(self) -> dict: + """ Getter for the sub dictionary containing comments for NARPS Open Pipeline """ + return self._get_sub_dict('comments') + def markdown(self): """ Return the team description as a string formatted in markdown """ return_string = f'# NARPS team description : {self.team_id}\n' @@ -69,7 +77,8 @@ def markdown(self): self.preprocessing, self.analysis, self.categorized_for_analysis, - self.derived + self.derived, + self.comments ] names = [ @@ -78,7 +87,8 @@ def markdown(self): 'Preprocessing', 'Analysis', 'Categorized for analysis', - 'Derived' + 'Derived', + 'Comments' ] for dictionary, name in zip(dictionaries, names): @@ -175,3 +185,29 @@ def _load(self): if not found: raise AttributeError(f'Team {self.team_id}\ was not found in the derived description.') + + # Parsing third file : self.comments_description_file + with open(self.comments_description_file, newline='', encoding='utf-8') as csv_file: + # Prepare first line (whose elements are second part of the keys) + first_line = csv_file.readline().replace('\n','').split('\t') + + # Read the rest of the file as a dict + reader = DictReader( + csv_file, + fieldnames = ['comments.' + k2 for k2 in first_line], + delimiter = '\t' + ) + + # Update self with the key/value pairs from the file + found = False + for row in reader: + if row['comments.teamID'] == self.team_id: + found = True + row.pop('comments.teamID', None) # Remove useless 'comments.teamID' key + self.update(row) + break + + # If team id was not found in the file + if not found: + raise AttributeError(f'Team {self.team_id}\ + was not found in the comments description.') diff --git a/narps_open/data/description/__main__.py b/narps_open/data/description/__main__.py index 49daad22..e538ff4d 100644 --- a/narps_open/data/description/__main__.py +++ b/narps_open/data/description/__main__.py @@ -19,7 +19,8 @@ 'preprocessing', 'analysis', 'categorized_for_analysis', - 'derived' + 'derived', + 'comments' ], help='the sub dictionary of team description') formats = parser.add_mutually_exclusive_group(required = False) @@ -49,5 +50,7 @@ print(dumps(information.categorized_for_analysis, indent = 4)) elif arguments.dictionary == 'derived': print(dumps(information.derived, indent = 4)) + elif arguments.dictionary == 'comments': + print(dumps(information.comments, indent = 4)) else: print(dumps(information, indent = 4)) diff --git a/narps_open/data/description/analysis_pipelines_comments.tsv b/narps_open/data/description/analysis_pipelines_comments.tsv new file mode 100644 index 00000000..93cd4f24 --- /dev/null +++ b/narps_open/data/description/analysis_pipelines_comments.tsv @@ -0,0 +1,71 @@ +teamID excluded_from_narps_analysis exclusion_comment reproducibility reproducibility_comment +50GV no N/A ? Uses custom software (Denoiser) +9Q6R no N/A +O21U no N/A +U26C no N/A +43FJ no N/A +C88N no N/A +4TQ6 yes Resampled image offset and too large compared to template. +T54A no N/A +2T6S no N/A +L7J7 no N/A +0JO0 no N/A +X1Y5 no N/A +51PW no N/A +6VV2 no N/A +O6R6 no N/A +C22U no N/A ? Custom Matlab script for white matter PCA confounds +3PQ2 no N/A +UK24 no N/A +4SZ2 yes Resampled image offset from template brain. +9T8E no N/A +94GU no N/A ? Multiple software dependencies : SPM + ART + TAPAS + Matlab. +I52Y no N/A +5G9K no N/A ? ? +2T7P yes Missing thresholded images. ? ? +UI76 no N/A +B5I6 no N/A +V55J yes Bad histogram : very small values. +X19V no N/A +0C7Q yes Appears to be a p-value distribution, with slight excursions below and above zero. +R5K7 no N/A +0I4U no N/A +3C6G no N/A +R9K3 no N/A +O03M no N/A +08MQ no N/A +80GC no N/A +J7F9 no N/A +R7D1 no N/A +Q58J yes Bad histogram : bimodal, zero-inflated with a second distribution centered around 5. +L3V8 yes Rejected due to large amount of missing brain in center. +SM54 no N/A +1KB2 no N/A +0H5E yes Rejected due to large amount of missing brain in center. +P5F3 yes Rejected due to large amounts of missing data across brain. +Q6O0 no N/A +R42Q no N/A ? Uses fMRIflows, a custom software based on NiPype. +L9G5 no N/A +DC61 no N/A +E3B6 yes Bad histogram : very long tail, with substantial inflation at a value just below zero. +16IN no N/A ? Multiple software dependencies : matlab + SPM + FSL + R + TExPosition + neuroim +46CD no N/A +6FH5 yes Missing much of the central brain. +K9P0 no N/A +9U7M no N/A +VG39 no N/A +1K0E yes Used surface-based analysis, only provided data for cortical ribbon. ? ? +X1Z4 yes Used surface-based analysis, only provided data for cortical ribbon. ? Multiple software dependencies : FSL + fmriprep + ciftify + HCP workbench + Freesurfer + ANTs +I9D6 no N/A +E6R3 no N/A +27SS no N/A +B23O no N/A +AO86 no N/A +L1A8 yes Resampled image much smaller than template brain. ? ? +IZ20 no N/A +3TR7 no N/A +98BT yes Rejected due to very bad normalization. +XU70 no N/A ? Uses custom software : FSL + 4drealign +0ED6 no N/A ? ? +I07H yes Bad histogram : bimodal, with second distribution centered around 2.5. +1P0Y no N/A diff --git a/setup.py b/setup.py index 7d961c60..2c6c9b06 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ data_files = [ ('narps_open/utils/configuration', ['narps_open/utils/configuration/default_config.toml']), ('narps_open/utils/configuration', ['narps_open/utils/configuration/testing_config.toml']), + ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_comments.tsv']), ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_derived_descriptions.tsv']), ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_full_descriptions.tsv']) ] diff --git a/tests/data/test_description.py b/tests/data/test_description.py index 03f95d4b..c66e23b3 100644 --- a/tests/data/test_description.py +++ b/tests/data/test_description.py @@ -55,6 +55,7 @@ def test_arguments_properties(): assert description['analysis.RT_modeling'] == 'duration' assert description['categorized_for_analysis.analysis_SW_with_version'] == 'SPM12' assert description['derived.func_fwhm'] == '8' + assert description['comments.excluded_from_narps_analysis'] == 'no' # 4 - Check properties assert isinstance(description.general, dict) @@ -63,6 +64,7 @@ def test_arguments_properties(): assert isinstance(description.analysis, dict) assert isinstance(description.categorized_for_analysis, dict) assert isinstance(description.derived, dict) + assert isinstance(description.comments, dict) assert list(description.general.keys()) == [ 'teamID', @@ -82,6 +84,7 @@ def test_arguments_properties(): assert description.analysis['RT_modeling'] == 'duration' assert description.categorized_for_analysis['analysis_SW_with_version'] == 'SPM12' assert description.derived['func_fwhm'] == '8' + assert description.comments['excluded_from_narps_analysis'] == 'no' # 6 - Test another team description = TeamDescription('9Q6R') diff --git a/tests/test_data/data/description/test_markdown.md b/tests/test_data/data/description/test_markdown.md index 080d397c..1749e7c1 100644 --- a/tests/test_data/data/description/test_markdown.md +++ b/tests/test_data/data/description/test_markdown.md @@ -96,3 +96,8 @@ Model EVs (2): eq_indiff, eq_range * `excluded_participants` : 018, 030, 088, 100 * `func_fwhm` : 5 * `con_fwhm` : +## Comments +* `excluded_from_narps_analysis` : no +* `exclusion_comment` : N/A +* `reproducibility` : +* `reproducibility_comment` : diff --git a/tests/test_data/data/description/test_str.json b/tests/test_data/data/description/test_str.json index 0d27767e..c2550fcd 100644 --- a/tests/test_data/data/description/test_str.json +++ b/tests/test_data/data/description/test_str.json @@ -53,5 +53,9 @@ "derived.n_participants": "104", "derived.excluded_participants": "018, 030, 088, 100", "derived.func_fwhm": "5", - "derived.con_fwhm": "" + "derived.con_fwhm": "", + "comments.excluded_from_narps_analysis": "no", + "comments.exclusion_comment": "N/A", + "comments.reproducibility": "", + "comments.reproducibility_comment": "" } \ No newline at end of file