diff --git a/.github/check_module_versions.py b/.github/check_module_versions.py index ac8ce59..f1266a2 100755 --- a/.github/check_module_versions.py +++ b/.github/check_module_versions.py @@ -14,7 +14,7 @@ import re import os -PIPELINE_REPO = "PlantandFoodResearch/pangene" +PIPELINE_REPO = "PlantandFoodResearch/genepal" def get_logger(): formatter = colorlog.ColoredFormatter( diff --git a/.gitignore b/.gitignore index 8bdcb04..e141945 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,3 @@ __pycache__ *.stdout *.stderr - -pangene-test/ diff --git a/.prettierignore b/.prettierignore index 543341f..8d22eff 100644 --- a/.prettierignore +++ b/.prettierignore @@ -14,5 +14,3 @@ __pycache__ *.stdout *.stderr - -pangene-test/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 13c2478..a83f65e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# PlantandFoodResearch/pangene: Changelog +# PlantandFoodResearch/genepal: Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). @@ -9,21 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 1. Added `orthofinder_annotations` param 2. Added `FASTA_GFF_ORTHOFINDER` sub-workflow -3. Added evaluation by BUSCO [#41](https://github.com/PlantandFoodResearch/pangene/issues/41) -4. Included common tax ids for eggnog mapper [#27](https://github.com/PlantandFoodResearch/pangene/issues/27) -5. Implemented hierarchical naming scheme: geneI.tJ, geneI.tJ.exonK, geneI.tJ.cdsK [#19](https://github.com/PlantandFoodResearch/pangene/issues/19), [#34](https://github.com/PlantandFoodResearch/pangene/issues/34) +3. Added evaluation by BUSCO [#41](https://github.com/PlantandFoodResearch/genepal/issues/41) +4. Included common tax ids for eggnog mapper [#27](https://github.com/PlantandFoodResearch/genepal/issues/27) +5. Implemented hierarchical naming scheme: geneI.tJ, geneI.tJ.exonK, geneI.tJ.cdsK [#19](https://github.com/PlantandFoodResearch/genepal/issues/19), [#34](https://github.com/PlantandFoodResearch/genepal/issues/34) 6. Now sorting list of bam and list of fastq before cat to avoid resume cache misses -7. Allowed BAM files for RNA evidence [#3](https://github.com/PlantandFoodResearch/pangene/issues/3) -8. Added `GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES` sub-workflow for splice type statistics [#11](https://github.com/PlantandFoodResearch/pangene/issues/11) -9. Changed `orthofinder_annotations` from FASTA/GFF to protein FASTA [#43](https://github.com/PlantandFoodResearch/pangene/issues/43) -10. Added param `enforce_full_intron_support` to turn on/off strict model purging by TSEBRA [#21](https://github.com/PlantandFoodResearch/pangene/issues/21) -11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](ttps://github.com/PlantandFoodResearch/pangene/issues/28) +7. Allowed BAM files for RNA evidence [#3](https://github.com/PlantandFoodResearch/genepal/issues/3) +8. Added `GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES` sub-workflow for splice type statistics [#11](https://github.com/PlantandFoodResearch/genepal/issues/11) +9. Changed `orthofinder_annotations` from FASTA/GFF to protein FASTA [#43](https://github.com/PlantandFoodResearch/genepal/issues/43) +10. Added param `enforce_full_intron_support` to turn on/off strict model purging by TSEBRA [#21](https://github.com/PlantandFoodResearch/genepal/issues/21) +11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](ttps://github.com/PlantandFoodResearch/genepal/issues/28) 12. Added a script to automatically check module version updates 13. Updated modules: `AGAT/CONVERTSPGFF2GTF`, `CAT_FASTQ`, `CUSTOM/DUMPSOFTWAREVERSIONS`, `EGGNOGMAPPER`, `FASTP`, `GFFREAD`, `SAMTOOLS/CAT`, `CUSTOM/RESTOREGFFIDS`, `CUSTOM/SHORTENFASTAIDS`, `EDTA/EDTA`, `CAT/CAT`, `FASTQC`, `GUNZIP`, `LIFTOFF`, `STAR/ALIGN`, `STAR/GENOMEGENERATE`, `UMITOOLS/EXTRACT`, 14. Updated sub-workflows: `FASTQ_FASTQC_UMITOOLS_FASTP` and `FASTA_EDTA_LAI` -15. Reduced `BRAKER3` threads to 8 [#55](https://github.com/PlantandFoodResearch/pangene/issues/55) -16. Now the final annotations are stored in the `annotations` folder [#53](https://github.com/PlantandFoodResearch/pangene/issues/53) -17. Added `-gff` flag to `REPEATMASKER` to save the gff file [#54](https://github.com/PlantandFoodResearch/pangene/issues/54) +15. Reduced `BRAKER3` threads to 8 [#55](https://github.com/PlantandFoodResearch/genepal/issues/55) +16. Now the final annotations are stored in the `annotations` folder [#53](https://github.com/PlantandFoodResearch/genepal/issues/53) +17. Added `-gff` flag to `REPEATMASKER` to save the gff file [#54](https://github.com/PlantandFoodResearch/genepal/issues/54) 18. Now a single `fasta` file can be directly specified for `protein_evidence` 19. `eggnogmapper_db_dir` is not a required parameter anymore 20. `eggnogmapper_tax_scope` is now set to 1 (root div) by default @@ -31,9 +31,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -1. Fixed BRAKER spellings [#36](https://github.com/PlantandFoodResearch/pangene/issues/36) -2. Fixed liftoff failure when lifting off from a single reference [#40](https://github.com/PlantandFoodResearch/pangene/issues/40) -3. Added versions from GFF_STORE sub-workflows [#33](https://github.com/PlantandFoodResearch/pangene/issues/33) +1. Fixed BRAKER spellings [#36](https://github.com/PlantandFoodResearch/genepal/issues/36) +2. Fixed liftoff failure when lifting off from a single reference [#40](https://github.com/PlantandFoodResearch/genepal/issues/40) +3. Added versions from GFF_STORE sub-workflows [#33](https://github.com/PlantandFoodResearch/genepal/issues/33) ### `Dependencies` @@ -50,7 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 6. Removed dependency on for `BRAKER3` and `REPEATMASKER` modules which are now installed from 7. Removed dependency on 8. Now the final annotations are not stored in the `final` folder -9. Now BRAKER3 outputs are not saved by default [#53](https://github.com/PlantandFoodResearch/pangene/issues/53) and saved under `etc` folder when enabled +9. Now BRAKER3 outputs are not saved by default [#53](https://github.com/PlantandFoodResearch/genepal/issues/53) and saved under `etc` folder when enabled 10. Removed `local` profile. Local executor is the default when no executor is specified. Therefore, the `local` profile was not needed. ## 0.3.3 - [18-Jun-2024] @@ -112,8 +112,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2. Changed license to MIT 3. Updated `.editorconfig` 4. Moved .literature to test/ branch -5. Renamed `pangene_local` to `local_pangene` -6. Renamed `pangene_pfr` to `pfr_pangene` +5. Renamed `genepal_local` to `local_genepal` +6. Renamed `genepal_pfr` to `pfr_genepal` 7. Added versioning checking 8. Updated github workflow to use pre-commit instead of prettier and editorconfig check 9. Added central singularity cache dir for pfr config @@ -145,7 +145,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 35. `external_protein_fastas` and `eggnogmapper_db_dir` are not mandatory parameters 36. Added contributors 37. Add a document for the pipeline parameters -38. Updated `pfr_pangene` and `pfr/profile.config` +38. Updated `pfr_genepal` and `pfr/profile.config` 39. Now using local tests/stub files for GitHub CI 40. Now removing iso-forms left by TSEBRA using `AGAT_SPFILTERFEATUREFROMKILLLIST` 41. Added `pyproject.toml` diff --git a/README.md b/README.md index 24edfeb..712a5d1 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,28 @@ -# PANGENE +# GENEPAL -[![Lint/stub on Linux/Docker](https://github.com/PlantandFoodResearch/pangene/actions/workflows/test.yml/badge.svg)](https://github.com/PlantandFoodResearch/pangene/actions/workflows/test.yml) +[![Lint/stub on Linux/Docker](https://github.com/PlantandFoodResearch/genepal/actions/workflows/test.yml/badge.svg)](https://github.com/PlantandFoodResearch/genepal/actions/workflows/test.yml) -A NextFlow pipeline for pan-genome annotation. It can also be used for annotation of a single genome. +A NextFlow pipeline for single genome and pan-genome annotation. ## Flowchart -

+

## Alpha Release -This release is not fully documented and under alpha testing by the Bioinformatics Team. There are several [outstanding issues](https://github.com/PlantandFoodResearch/pangene/issues) which will be addressed before a general release. +This release is not fully documented and under alpha testing by the Bioinformatics Team. There are several [outstanding issues](https://github.com/PlantandFoodResearch/genepal/issues) which will be addressed before a general release. ## Plant&Food Users Download the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution. For a description of the parameters, see [parameters.md](./docs/parameters.md). ```bash -sbatch ./pfr_pangene +sbatch ./pfr_genepal ``` ## Credits -plantandfoodresearch/pangene workflows were originally scripted by Jason Shiller ([@jasonshiller](https://github.com/jasonshiller)). Usman Rashid ([@gallvp](https://github.com/gallvp)) wrote the NextFLow pipeline. +plantandfoodresearch/genepal workflows were originally scripted by Jason Shiller ([@jasonshiller](https://github.com/jasonshiller)). Usman Rashid ([@gallvp](https://github.com/gallvp)) wrote the NextFLow pipeline. We thank the following people for their extensive assistance in the development of this pipeline: diff --git a/assets/schema_input.json b/assets/schema_input.json index 287b222..1da6b87 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_input.json", - "title": "plantandfoodresearch/pangene pipeline - params.input schema", + "$id": "https://raw.githubusercontent.com/plantandfoodresearch/genepal/master/assets/schema_input.json", + "title": "plantandfoodresearch/genepal pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { diff --git a/assets/schema_liftoff.json b/assets/schema_liftoff.json index d32f5f0..fc7046f 100644 --- a/assets/schema_liftoff.json +++ b/assets/schema_liftoff.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_liftoff.json", - "title": "plantandfoodresearch/pangene pipeline - params.liftoff_annotations schema", + "$id": "https://raw.githubusercontent.com/plantandfoodresearch/genepal/master/assets/schema_liftoff.json", + "title": "plantandfoodresearch/genepal pipeline - params.liftoff_annotations schema", "description": "Schema for the file provided with params.liftoff_annotations", "type": "array", "items": { diff --git a/assets/schema_orthofinder.json b/assets/schema_orthofinder.json index 49301fe..68486bd 100644 --- a/assets/schema_orthofinder.json +++ b/assets/schema_orthofinder.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_orthofinder.json", - "title": "plantandfoodresearch/pangene pipeline - params.orthofinder_annotations schema", + "$id": "https://raw.githubusercontent.com/plantandfoodresearch/genepal/master/assets/schema_orthofinder.json", + "title": "plantandfoodresearch/genepal pipeline - params.orthofinder_annotations schema", "description": "Schema for the file provided with params.orthofinder_annotations", "type": "array", "items": { diff --git a/assets/schema_rna.json b/assets/schema_rna.json index da9b273..89c646d 100644 --- a/assets/schema_rna.json +++ b/assets/schema_rna.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_rna.json", - "title": "plantandfoodresearch/pangene pipeline - params.rna_evidence schema", + "$id": "https://raw.githubusercontent.com/plantandfoodresearch/genepal/master/assets/schema_rna.json", + "title": "plantandfoodresearch/genepal pipeline - params.rna_evidence schema", "description": "Schema for the file provided with params.rna_evidence", "type": "array", "items": { diff --git a/docs/img/pangene.drawio b/docs/img/genepal.drawio similarity index 100% rename from docs/img/pangene.drawio rename to docs/img/genepal.drawio diff --git a/docs/img/pangene.png b/docs/img/genepal.png similarity index 100% rename from docs/img/pangene.png rename to docs/img/genepal.png diff --git a/docs/parameters.md b/docs/parameters.md index ee94077..a4638c5 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1,6 +1,6 @@ -# plantandfoodresearch/pangene pipeline parameters +# plantandfoodresearch/genepal pipeline parameters -A NextFlow pipeline for pan-genome annotation +A NextFlow pipeline for single genome and pan-genome annotation ## Input/output options diff --git a/local_pangene b/local_genepal similarity index 100% rename from local_pangene rename to local_genepal diff --git a/main.nf b/main.nf index 067425b..5e4d042 100755 --- a/main.nf +++ b/main.nf @@ -6,12 +6,12 @@ include { validateParameters } from 'plugin/nf-validation' validateParameters() -include { PANGENE } from './workflows/pangene.nf' +include { GENEPAL } from './workflows/genepal.nf' workflow { - PFR_PANGENE() + PFR_GENEPAL() } -workflow PFR_PANGENE { - PANGENE() +workflow PFR_GENEPAL { + GENEPAL() } diff --git a/modules.json b/modules.json index 9e92540..9736949 100644 --- a/modules.json +++ b/modules.json @@ -1,6 +1,6 @@ { - "name": "PlantandFoodResearch/pangene", - "homePage": "https://github.com/PlantandFoodResearch/pangene", + "name": "PlantandFoodResearch/genepal", + "homePage": "https://github.com/PlantandFoodResearch/genepal", "repos": { "https://github.com/GallVp/nxf-components.git": { "modules": { diff --git a/nextflow.config b/nextflow.config index df016c3..57d8d4e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -94,10 +94,10 @@ trace { } manifest { - name = 'pangene' + name = 'genepal' author = """Usman Rashid, Jason Shiller""" - homePage = 'https://github.com/PlantandFoodResearch/pangene' - description = """A NextFlow pipeline for pan-genome annotation""" + homePage = 'https://github.com/PlantandFoodResearch/genepal' + description = """A NextFlow pipeline for single genome and pan-genome annotation""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.4' version = '0.4.0+dev' diff --git a/nextflow_schema.json b/nextflow_schema.json index 584ff6e..13706a9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/nextflow_schema.json", - "title": "plantandfoodresearch/pangene pipeline parameters", - "description": "A NextFlow pipeline for pan-genome annotation", + "$id": "https://raw.githubusercontent.com/plantandfoodresearch/genepal/master/nextflow_schema.json", + "title": "plantandfoodresearch/genepal pipeline parameters", + "description": "A NextFlow pipeline for single genome and pan-genome annotation", "type": "object", "definitions": { "input_output_options": { diff --git a/pfr/params.json b/pfr/params.json index c0a1200..e547f98 100644 --- a/pfr/params.json +++ b/pfr/params.json @@ -1,8 +1,8 @@ { - "input": "/workspace/pangene/test_data/assemblysheet.csv", - "protein_evidence": "/workspace/pangene/test_data/external-protein-fastas.txt", + "input": "/workspace/genepal/test_data/assemblysheet.csv", + "protein_evidence": "/workspace/genepal/test_data/external-protein-fastas.txt", "eggnogmapper_db_dir": "/workspace/ComparativeDataSources/emapperdb/5.0.2", "eggnogmapper_tax_scope": 33090, - "rna_evidence": "/workspace/pangene/test_data/fastqsheet.csv", - "liftoff_annotations": "/workspace/pangene/test_data/liftoffannotations.csv" + "rna_evidence": "/workspace/genepal/test_data/fastqsheet.csv", + "liftoff_annotations": "/workspace/genepal/test_data/liftoffannotations.csv" } diff --git a/pfr/profile.config b/pfr/profile.config index b0eba29..0d40c25 100644 --- a/pfr/profile.config +++ b/pfr/profile.config @@ -6,7 +6,7 @@ profiles { apptainer { envWhitelist = "APPTAINER_BINDPATH,APPTAINER_BIND" - cacheDir = "/workspace/pangene/singularity" + cacheDir = "/workspace/genepal/singularity" } } } diff --git a/pfr_pangene b/pfr_genepal similarity index 89% rename from pfr_pangene rename to pfr_genepal index 9b31b44..3da5a74 100644 --- a/pfr_pangene +++ b/pfr_genepal @@ -1,13 +1,13 @@ #!/bin/bash -e -#SBATCH --job-name PANGENE +#SBATCH --job-name GENEPAL #SBATCH --time=14-00:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 -#SBATCH --output pfr_pangene.stdout -#SBATCH --error pfr_pangene.stderr +#SBATCH --output pfr_genepal.stdout +#SBATCH --error pfr_genepal.stderr #SBATCH --mem=4G full_test_flag=0 diff --git a/subworkflows/local/fasta_orthofinder.nf b/subworkflows/local/fasta_orthofinder.nf index 4170ddc..715d78e 100644 --- a/subworkflows/local/fasta_orthofinder.nf +++ b/subworkflows/local/fasta_orthofinder.nf @@ -24,7 +24,7 @@ workflow FASTA_ORTHOFINDER { | collect | filter { it.size() > 1 } - ORTHOFINDER ( ch_orthofinder_peps.map { fastas -> [ [ id: 'pangene' ], fastas ] } ) + ORTHOFINDER ( ch_orthofinder_peps.map { fastas -> [ [ id: 'genepal' ], fastas ] } ) ch_versions = ch_versions.mix(ORTHOFINDER.out.versions) diff --git a/workflows/pangene.nf b/workflows/genepal.nf similarity index 99% rename from workflows/pangene.nf rename to workflows/genepal.nf index 010bc14..9543981 100644 --- a/workflows/pangene.nf +++ b/workflows/genepal.nf @@ -21,7 +21,7 @@ include { GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES } from '../subworkflows log.info paramsSummaryLog(workflow) -workflow PANGENE { +workflow GENEPAL { // Versions channel ch_versions = Channel.empty()