From 19cdd3ccc47969808693ba672ed9e6ddd62a8e47 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Fri, 20 Sep 2024 12:44:46 +1200 Subject: [PATCH] Added parameter add_attrs_to_proteins_fasta --- CHANGELOG.md | 7 ++-- conf/modules.config | 2 +- local_genepal | 3 +- nextflow.config | 1 + nextflow_schema.json | 82 ++++++++++++++++++++++++++++++-------------- 5 files changed, 64 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 953ed20..ae1790a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 0.4.0+dev - [13-Sep-2024] +## 0.4.0+dev - [20-Sep-2024] ### `Added` @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 8. Added `GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES` sub-workflow for splice type statistics [#11](https://github.com/plant-food-research-open/genepal/issues/11) 9. Changed `orthofinder_annotations` from FASTA/GFF to protein FASTA [#43](https://github.com/plant-food-research-open/genepal/issues/43) 10. Added param `enforce_full_intron_support` to turn on/off strict model purging by TSEBRA [#21](https://github.com/plant-food-research-open/genepal/issues/21) -11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](ttps://github.com/plant-food-research-open/genepal/issues/28) +11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](https://github.com/plant-food-research-open/genepal/issues/28) 12. Added a script to automatically check module version updates 13. Reduced `BRAKER3` threads to 8 [#55](https://github.com/plant-food-research-open/genepal/issues/55) 14. Now the final annotations are stored in the `annotations` folder [#53](https://github.com/plant-food-research-open/genepal/issues/53) @@ -26,7 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 17. `eggnogmapper_db_dir` is not a required parameter anymore 18. `eggnogmapper_tax_scope` is now set to 1 (root div) by default 19. Added a `test` profile based on public data -20. Updated modules and sub-workflows +20. Added parameter `add_attrs_to_proteins_fasta` to enable/disable addition of decoded gff attributes to proteins fasta [#58](https://github.com/plant-food-research-open/genepal/issues/58) +21. Updated modules and sub-workflows ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index 262927a..a5d9fed 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -271,7 +271,7 @@ process { // SUBWORKFLOW: GFF_STORE } withName: '.*:GFF_STORE:EXTRACT_PROTEINS' { - ext.args = '-y' + ext.args = params.add_attrs_to_proteins_fasta ? '-F -D -y' : '-y' ext.prefix = { "${meta.id}.pep" } publishDir = [ diff --git a/local_genepal b/local_genepal index 6f287b1..525f930 100755 --- a/local_genepal +++ b/local_genepal @@ -20,4 +20,5 @@ nextflow run \ --max_cpus 8 \ --max_memory '32.GB' \ --eggnogmapper_tax_scope 33090 \ - --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 + --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 \ + --outdir results diff --git a/nextflow.config b/nextflow.config index 010e561..aa8ed74 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,6 +51,7 @@ params { eggnogmapper_evalue = 0.00001 eggnogmapper_pident = 35 eggnogmapper_purge_nohits = false + add_attrs_to_proteins_fasta = false // Evaluation options busco_skip = false diff --git a/nextflow_schema.json b/nextflow_schema.json index be0832e..abee05d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,7 +38,8 @@ "type": "integer", "description": "Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1", "minimum": 1, - "default": 1 + "default": 1, + "fa_icon": "fas fa-hashtag" }, "rna_evidence": { "type": "string", @@ -85,21 +86,26 @@ "type": "string", "default": "repeatmodeler", "enum": ["edta", "repeatmodeler"], - "description": "'edta' or 'repeatmodeler'" + "description": "'edta' or 'repeatmodeler'", + "fa_icon": "fas fa-tasks" }, "save_annotated_te_lib": { "type": "boolean", - "description": "Save annotated TE library or not?" + "description": "Save annotated TE library or not?", + "fa_icon": "fas fa-question-circle" }, "edta_is_sensitive": { "type": "boolean", - "description": "Use '--sensitive 1' flag with EDTA or not?" + "description": "Use '--sensitive 1' flag with EDTA or not?", + "fa_icon": "fas fa-question-circle" }, "repeatmasker_save_outputs": { "type": "boolean", - "description": "Save the repeat-masked genome or not?" + "description": "Save the repeat-masked genome or not?", + "fa_icon": "fas fa-question-circle" } - } + }, + "fa_icon": "fab fa-adn" }, "rnaseq_pre_processing_options": { "title": "RNASeq pre-processing options", @@ -109,33 +115,40 @@ "properties": { "skip_fastqc": { "type": "boolean", - "description": "Skip FASTQC or not?" + "description": "Skip FASTQC or not?", + "fa_icon": "fas fa-question-circle" }, "skip_fastp": { "type": "boolean", - "description": "Skip trimming by FASTQP or not?" + "description": "Skip trimming by FASTQP or not?", + "fa_icon": "fas fa-question-circle" }, "min_trimmed_reads": { "type": "integer", "default": 10000, "description": "Exclude a sample if its reads after trimming are below this number", - "minimum": 0 + "minimum": 0, + "fa_icon": "fas fa-hashtag" }, "extra_fastp_args": { "type": "string", - "description": "Extra FASTP arguments" + "description": "Extra FASTP arguments", + "fa_icon": "fas fa-terminal" }, "save_trimmed": { "type": "boolean", - "description": "Save FASTQ files after trimming or not?" + "description": "Save FASTQ files after trimming or not?", + "fa_icon": "fas fa-question-circle" }, "remove_ribo_rna": { "type": "boolean", - "description": "Remove Ribosomal RNA or not?" + "description": "Remove Ribosomal RNA or not?", + "fa_icon": "fas fa-question-circle" }, "save_non_ribo_reads": { "type": "boolean", - "description": "Save FASTQ files after Ribosomal RNA removal or not?" + "description": "Save FASTQ files after Ribosomal RNA removal or not?", + "fa_icon": "fas fa-question-circle" }, "ribo_database_manifest": { "type": "string", @@ -146,7 +159,8 @@ "fa_icon": "fas fa-database", "description": "Ribosomal RNA fastas listed in a text sheet" } - } + }, + "fa_icon": "fas fa-filter" }, "rnaseq_alignment_options": { "title": "RNAseq alignment options", @@ -158,21 +172,26 @@ "type": "integer", "default": 16000, "minimum": 0, - "description": "Maximum intron length for STAR alignment" + "description": "Maximum intron length for STAR alignment", + "fa_icon": "fas fa-hashtag" }, "star_align_extra_args": { "type": "string", - "description": "EXTRA arguments for STAR" + "description": "EXTRA arguments for STAR", + "fa_icon": "fas fa-terminal" }, "star_save_outputs": { "type": "boolean", - "description": "Save BAM files from STAR or not?" + "description": "Save BAM files from STAR or not?", + "fa_icon": "fas fa-question-circle" }, "save_cat_bam": { "type": "boolean", - "description": "SAVE a concatenated BAM file per assembly or not?" + "description": "SAVE a concatenated BAM file per assembly or not?", + "fa_icon": "fas fa-question-circle" } - } + }, + "fa_icon": "fas fa-align-center" }, "annotation_options": { "title": "Annotation options", @@ -182,7 +201,8 @@ "properties": { "braker_extra_args": { "type": "string", - "description": "Extra arguments for BRAKER" + "description": "Extra arguments for BRAKER", + "fa_icon": "fas fa-terminal" }, "braker_save_outputs": { "type": "boolean", @@ -194,12 +214,14 @@ "default": 0.9, "minimum": 0, "maximum": 1, - "description": "Liftoff coverage parameter" + "description": "Liftoff coverage parameter", + "fa_icon": "fas fa-hashtag" }, "liftoff_identity": { "type": "number", "default": 0.9, - "description": "Liftoff identity parameter" + "description": "Liftoff identity parameter", + "fa_icon": "fas fa-hashtag" }, "allow_isoforms": { "type": "boolean", @@ -222,20 +244,28 @@ "eggnogmapper_evalue": { "type": "number", "default": 1e-5, - "description": "Only report alignments below or equal the e-value threshold" + "description": "Only report alignments below or equal the e-value threshold", + "fa_icon": "fas fa-hashtag" }, "eggnogmapper_pident": { "type": "integer", "default": 35, "description": "Only report alignments above or equal to the given percentage of identity (0-100)", "minimum": 0, - "maximum": 100 + "maximum": 100, + "fa_icon": "fas fa-hashtag" }, "eggnogmapper_purge_nohits": { "type": "boolean", - "description": "Purge transcripts which do not have a hit against eggnog" + "description": "Purge transcripts which do not have a hit against eggnog", + "fa_icon": "fas fa-question-circle" + }, + "add_attrs_to_proteins_fasta": { + "type": "boolean", + "fa_icon": "fas fa-question-circle" } - } + }, + "fa_icon": "fab fa-adn" }, "evaluation_options": { "title": "Evaluation options",