From 19cdd3ccc47969808693ba672ed9e6ddd62a8e47 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Fri, 20 Sep 2024 12:44:46 +1200
Subject: [PATCH] Added parameter add_attrs_to_proteins_fasta

---
 CHANGELOG.md         |  7 ++--
 conf/modules.config  |  2 +-
 local_genepal        |  3 +-
 nextflow.config      |  1 +
 nextflow_schema.json | 82 ++++++++++++++++++++++++++++++--------------
 5 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 953ed20..ae1790a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## 0.4.0+dev - [13-Sep-2024]
+## 0.4.0+dev - [20-Sep-2024]
 
 ### `Added`
 
@@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 8. Added `GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES` sub-workflow for splice type statistics [#11](https://github.com/plant-food-research-open/genepal/issues/11)
 9. Changed `orthofinder_annotations` from FASTA/GFF to protein FASTA [#43](https://github.com/plant-food-research-open/genepal/issues/43)
 10. Added param `enforce_full_intron_support` to turn on/off strict model purging by TSEBRA [#21](https://github.com/plant-food-research-open/genepal/issues/21)
-11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](ttps://github.com/plant-food-research-open/genepal/issues/28)
+11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](https://github.com/plant-food-research-open/genepal/issues/28)
 12. Added a script to automatically check module version updates
 13. Reduced `BRAKER3` threads to 8 [#55](https://github.com/plant-food-research-open/genepal/issues/55)
 14. Now the final annotations are stored in the `annotations` folder [#53](https://github.com/plant-food-research-open/genepal/issues/53)
@@ -26,7 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 17. `eggnogmapper_db_dir` is not a required parameter anymore
 18. `eggnogmapper_tax_scope` is now set to 1 (root div) by default
 19. Added a `test` profile based on public data
-20. Updated modules and sub-workflows
+20. Added parameter `add_attrs_to_proteins_fasta` to enable/disable addition of decoded gff attributes to proteins fasta [#58](https://github.com/plant-food-research-open/genepal/issues/58)
+21. Updated modules and sub-workflows
 
 ### `Fixed`
 
diff --git a/conf/modules.config b/conf/modules.config
index 262927a..a5d9fed 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -271,7 +271,7 @@ process { // SUBWORKFLOW: GFF_STORE
     }
 
     withName: '.*:GFF_STORE:EXTRACT_PROTEINS' {
-        ext.args = '-y'
+        ext.args = params.add_attrs_to_proteins_fasta ? '-F -D -y' : '-y'
         ext.prefix = { "${meta.id}.pep" }
 
         publishDir = [
diff --git a/local_genepal b/local_genepal
index 6f287b1..525f930 100755
--- a/local_genepal
+++ b/local_genepal
@@ -20,4 +20,5 @@ nextflow run \
     --max_cpus 8 \
     --max_memory '32.GB' \
     --eggnogmapper_tax_scope 33090 \
-    --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2
+    --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 \
+    --outdir results
diff --git a/nextflow.config b/nextflow.config
index 010e561..aa8ed74 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,6 +51,7 @@ params {
     eggnogmapper_evalue         = 0.00001
     eggnogmapper_pident         = 35
     eggnogmapper_purge_nohits   = false
+    add_attrs_to_proteins_fasta = false
 
     // Evaluation options
     busco_skip                  = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index be0832e..abee05d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -38,7 +38,8 @@
                     "type": "integer",
                     "description": "Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1",
                     "minimum": 1,
-                    "default": 1
+                    "default": 1,
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "rna_evidence": {
                     "type": "string",
@@ -85,21 +86,26 @@
                     "type": "string",
                     "default": "repeatmodeler",
                     "enum": ["edta", "repeatmodeler"],
-                    "description": "'edta' or 'repeatmodeler'"
+                    "description": "'edta' or 'repeatmodeler'",
+                    "fa_icon": "fas fa-tasks"
                 },
                 "save_annotated_te_lib": {
                     "type": "boolean",
-                    "description": "Save annotated TE library or not?"
+                    "description": "Save annotated TE library or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "edta_is_sensitive": {
                     "type": "boolean",
-                    "description": "Use '--sensitive 1' flag with EDTA or not?"
+                    "description": "Use '--sensitive 1' flag with EDTA or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "repeatmasker_save_outputs": {
                     "type": "boolean",
-                    "description": "Save the repeat-masked genome or not?"
+                    "description": "Save the repeat-masked genome or not?",
+                    "fa_icon": "fas fa-question-circle"
                 }
-            }
+            },
+            "fa_icon": "fab fa-adn"
         },
         "rnaseq_pre_processing_options": {
             "title": "RNASeq pre-processing options",
@@ -109,33 +115,40 @@
             "properties": {
                 "skip_fastqc": {
                     "type": "boolean",
-                    "description": "Skip FASTQC or not?"
+                    "description": "Skip FASTQC or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "skip_fastp": {
                     "type": "boolean",
-                    "description": "Skip trimming by FASTQP or not?"
+                    "description": "Skip trimming by FASTQP or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "min_trimmed_reads": {
                     "type": "integer",
                     "default": 10000,
                     "description": "Exclude a sample if its reads after trimming are below this number",
-                    "minimum": 0
+                    "minimum": 0,
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "extra_fastp_args": {
                     "type": "string",
-                    "description": "Extra FASTP arguments"
+                    "description": "Extra FASTP arguments",
+                    "fa_icon": "fas fa-terminal"
                 },
                 "save_trimmed": {
                     "type": "boolean",
-                    "description": "Save FASTQ files after trimming or not?"
+                    "description": "Save FASTQ files after trimming or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "remove_ribo_rna": {
                     "type": "boolean",
-                    "description": "Remove Ribosomal RNA or not?"
+                    "description": "Remove Ribosomal RNA or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "save_non_ribo_reads": {
                     "type": "boolean",
-                    "description": "Save FASTQ files after Ribosomal RNA removal or not?"
+                    "description": "Save FASTQ files after Ribosomal RNA removal or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "ribo_database_manifest": {
                     "type": "string",
@@ -146,7 +159,8 @@
                     "fa_icon": "fas fa-database",
                     "description": "Ribosomal RNA fastas listed in a text sheet"
                 }
-            }
+            },
+            "fa_icon": "fas fa-filter"
         },
         "rnaseq_alignment_options": {
             "title": "RNAseq alignment options",
@@ -158,21 +172,26 @@
                     "type": "integer",
                     "default": 16000,
                     "minimum": 0,
-                    "description": "Maximum intron length for STAR alignment"
+                    "description": "Maximum intron length for STAR alignment",
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "star_align_extra_args": {
                     "type": "string",
-                    "description": "EXTRA arguments for STAR"
+                    "description": "EXTRA arguments for STAR",
+                    "fa_icon": "fas fa-terminal"
                 },
                 "star_save_outputs": {
                     "type": "boolean",
-                    "description": "Save BAM files from STAR or not?"
+                    "description": "Save BAM files from STAR or not?",
+                    "fa_icon": "fas fa-question-circle"
                 },
                 "save_cat_bam": {
                     "type": "boolean",
-                    "description": "SAVE a concatenated BAM file per assembly or not?"
+                    "description": "SAVE a concatenated BAM file per assembly or not?",
+                    "fa_icon": "fas fa-question-circle"
                 }
-            }
+            },
+            "fa_icon": "fas fa-align-center"
         },
         "annotation_options": {
             "title": "Annotation options",
@@ -182,7 +201,8 @@
             "properties": {
                 "braker_extra_args": {
                     "type": "string",
-                    "description": "Extra arguments for BRAKER"
+                    "description": "Extra arguments for BRAKER",
+                    "fa_icon": "fas fa-terminal"
                 },
                 "braker_save_outputs": {
                     "type": "boolean",
@@ -194,12 +214,14 @@
                     "default": 0.9,
                     "minimum": 0,
                     "maximum": 1,
-                    "description": "Liftoff coverage parameter"
+                    "description": "Liftoff coverage parameter",
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "liftoff_identity": {
                     "type": "number",
                     "default": 0.9,
-                    "description": "Liftoff identity parameter"
+                    "description": "Liftoff identity parameter",
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "allow_isoforms": {
                     "type": "boolean",
@@ -222,20 +244,28 @@
                 "eggnogmapper_evalue": {
                     "type": "number",
                     "default": 1e-5,
-                    "description": "Only report alignments below or equal the e-value threshold"
+                    "description": "Only report alignments below or equal the e-value threshold",
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "eggnogmapper_pident": {
                     "type": "integer",
                     "default": 35,
                     "description": "Only report alignments above or equal to the given percentage of identity (0-100)",
                     "minimum": 0,
-                    "maximum": 100
+                    "maximum": 100,
+                    "fa_icon": "fas fa-hashtag"
                 },
                 "eggnogmapper_purge_nohits": {
                     "type": "boolean",
-                    "description": "Purge transcripts which do not have a hit against eggnog"
+                    "description": "Purge transcripts which do not have a hit against eggnog",
+                    "fa_icon": "fas fa-question-circle"
+                },
+                "add_attrs_to_proteins_fasta": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-question-circle"
                 }
-            }
+            },
+            "fa_icon": "fab fa-adn"
         },
         "evaluation_options": {
             "title": "Evaluation options",