From 3114adb0783494b4ebf36e3ada9c66ce2d90a15f Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 10 Dec 2024 21:09:25 +1300
Subject: [PATCH 1/6] Added parameter filter_genes_by_aa_length

---
 CHANGELOG.md                                  |   1 +
 README.md                                     |   1 +
 conf/modules.config                           |   4 +
 docs/output.md                                |   4 +-
 docs/parameters.md                            |  13 +--
 modules.json                                  |   5 +
 .../agat/spfilterbyorfsize/environment.yml    |   7 ++
 modules/gallvp/agat/spfilterbyorfsize/main.nf |  60 +++++++++++
 .../gallvp/agat/spfilterbyorfsize/meta.yml    |  67 ++++++++++++
 .../agat/spfilterbyorfsize/tests/main.nf.test |  62 +++++++++++
 .../spfilterbyorfsize/tests/main.nf.test.snap | 100 ++++++++++++++++++
 nextflow.config                               |   1 +
 nextflow_schema.json                          |   7 ++
 subworkflows/local/gff_merge_cleanup.nf       |  23 +++-
 workflows/genepal.nf                          |   3 +-
 15 files changed, 345 insertions(+), 13 deletions(-)
 create mode 100644 modules/gallvp/agat/spfilterbyorfsize/environment.yml
 create mode 100644 modules/gallvp/agat/spfilterbyorfsize/main.nf
 create mode 100644 modules/gallvp/agat/spfilterbyorfsize/meta.yml
 create mode 100644 modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
 create mode 100644 modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4d0ca6..33813b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 1. Added cDNA and CDS outputs to <OUTPUT_DIR>/annotations/<SAMPLE> directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118)
 2. Added parameter `add_attrs_to_proteins_cds_fastas`
+3. Added parameter `filter_genes_by_aa_length` with default set to `24` which allows removal of genes with ORFs shorter than 24 [#125](https://github.com/Plant-Food-Research-Open/genepal/issues/125)
 
 ### `Fixed`
 
diff --git a/README.md b/README.md
index 51f3a3e..177e8f5 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@
   - Optionally, allow or remove iso-forms
   - Remove BRAKER models from Liftoff loci
   - Merge Liftoff and BRAKER models
+  - Optionally, remove models with ORFs shorter than `N` amino acids
   - Optionally, remove models without any EggNOG-mapper hits
 - [EggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper): Add functional annotation to gff
 - [GenomeTools](https://github.com/genometools/genometools): GFF format validation
diff --git a/conf/modules.config b/conf/modules.config
index 44e6123..fbb5f52 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -240,6 +240,10 @@ process { // SUBWORKFLOW: GFF_MERGE_CLEANUP
         ext.prefix = { "${meta.id}.liftoff.braker" }
     }
 
+    withName: '.*:GFF_MERGE_CLEANUP:AGAT_SPFILTERBYORFSIZE' {
+        ext.args = params.filter_genes_by_aa_length ? "-s ${params.filter_genes_by_aa_length}" : ''
+    }
+
     withName: '.*:GFF_MERGE_CLEANUP:GT_GFF3' {
         ext.args = '-tidy -retainids -sort'
     }
diff --git a/docs/output.md b/docs/output.md
index f4793b5..40b546c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -169,8 +169,8 @@ If more than one genome is included in the pipeline, [ORTHOFINDER](https://githu
   - `Y/`
     - `Y.gt.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations
     - `Y.pep.fasta`: Protein sequences for the gene models
-    - 'Y.cdna.fasta': cDNA sequences for the gene models
-    - 'Y.cds.fasta': Coding sequences for the gene models
+    - `Y.cdna.fasta`: cDNA sequences for the gene models
+    - `Y.cds.fasta`: Coding sequences for the gene models
 
 </details>
 
diff --git a/docs/parameters.md b/docs/parameters.md
index 9297c4a..0c2bb09 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -59,12 +59,13 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation.
 
 ## Post-annotation filtering options
 
-| Parameter                     | Description                                                       | Type      | Default | Required | Hidden |
-| ----------------------------- | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `allow_isoforms`              | Allow multiple isoforms for gene models                           | `boolean` | True    |          |        |
-| `enforce_full_intron_support` | Require every model to have external evidence for all its introns | `boolean` | True    |          |        |
-| `filter_liftoff_by_hints`     | Use BRAKER hints to filter Liftoff models                         | `boolean` | True    |          |        |
-| `eggnogmapper_purge_nohits`   | Purge transcripts which do not have a hit against eggnog          | `boolean` |         |          |        |
+| Parameter                     | Description                                                                                                                            | Type      | Default | Required | Hidden |
+| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
+| `allow_isoforms`              | Allow multiple isoforms for gene models                                                                                                | `boolean` | True    |          |        |
+| `enforce_full_intron_support` | Require every model to have external evidence for all its introns                                                                      | `boolean` | True    |          |        |
+| `filter_liftoff_by_hints`     | Use BRAKER hints to filter Liftoff models                                                                                              | `boolean` | True    |          |        |
+| `eggnogmapper_purge_nohits`   | Purge transcripts which do not have a hit against eggnog                                                                               | `boolean` |         |          |        |
+| `filter_genes_by_aa_length`   | Filter genes with open reading frames shorter than the specified number of amino acids. If set to `null`, this filter step is skipped. | `integer` | 24      |          |        |
 
 ## Annotation output options
 
diff --git a/modules.json b/modules.json
index da05f16..6b9d74a 100644
--- a/modules.json
+++ b/modules.json
@@ -15,6 +15,11 @@
                         "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4",
                         "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"]
                     },
+                    "agat/spfilterbyorfsize": {
+                        "branch": "main",
+                        "git_sha": "a0054cdffbd84f002fb6582b28575b699e01098e",
+                        "installed_by": ["modules"]
+                    },
                     "agat/spflagshortintrons": {
                         "branch": "main",
                         "git_sha": "d8f08700c82a3bd14811a3dfe7e7d63838130693",
diff --git a/modules/gallvp/agat/spfilterbyorfsize/environment.yml b/modules/gallvp/agat/spfilterbyorfsize/environment.yml
new file mode 100644
index 0000000..2c3daab
--- /dev/null
+++ b/modules/gallvp/agat/spfilterbyorfsize/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::agat=1.4.2"
diff --git a/modules/gallvp/agat/spfilterbyorfsize/main.nf b/modules/gallvp/agat/spfilterbyorfsize/main.nf
new file mode 100644
index 0000000..502a9cd
--- /dev/null
+++ b/modules/gallvp/agat/spfilterbyorfsize/main.nf
@@ -0,0 +1,60 @@
+process AGAT_SPFILTERBYORFSIZE {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/agat:1.4.2--pl5321hdfd78af_0':
+        'biocontainers/agat:1.4.2--pl5321hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(gxf)
+    path config
+
+    output:
+    tuple val(meta), path("*.passed.gff")   , emit: passed_gff
+    tuple val(meta), path("*.failed.gff")   , emit: failed_gff
+    path "versions.yml"                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args        = task.ext.args ?: ''
+    def prefix      = task.ext.prefix ?: "${meta.id}"
+    def config_arg  = config ? "-c $config" : ''
+    if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    agat_sp_filter_by_ORF_size.pl \\
+        -g $gxf \\
+        $args \\
+        $config_arg \\
+        -o $prefix
+
+    mv \\
+        ${prefix}_NOT* \\
+        "${prefix}.failed.gff"
+
+    mv \\
+        ${prefix}_* \\
+        "${prefix}.passed.gff"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    touch ${prefix}.passed.gff
+    touch ${prefix}.failed.gff
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+    END_VERSIONS
+    """
+}
diff --git a/modules/gallvp/agat/spfilterbyorfsize/meta.yml b/modules/gallvp/agat/spfilterbyorfsize/meta.yml
new file mode 100644
index 0000000..cf399da
--- /dev/null
+++ b/modules/gallvp/agat/spfilterbyorfsize/meta.yml
@@ -0,0 +1,67 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "agat_spfilterbyorfsize"
+description: The script reads a gff annotation file, and create two output files,
+  one contains the gene models with ORF passing the test, the other contains the rest.
+  By default the test is "> 100" that means all gene models that have ORF longer than
+  100 Amino acids, will pass the test.
+keywords:
+  - genomics
+  - GFF/GTF
+  - filter
+  - annotation
+tools:
+  - "agat":
+      description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene
+        annotations in any GTF/GFF format."
+      homepage: "https://agat.readthedocs.io/en/latest/"
+      documentation: "https://agat.readthedocs.io/en/latest/"
+      tool_dev_url: "https://github.com/NBISweden/AGAT"
+      doi: "10.5281/zenodo.3552717"
+      licence: ["GPL v3"]
+      identifier: biotools:AGAT
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - gxf:
+        type: file
+        description: Input GFF3/GTF file
+        pattern: "*.{gff,gff3,gtf}"
+  - - config:
+        type: file
+        description: |
+          Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any,
+          otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose".
+          The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently).
+        pattern: "*.yaml"
+output:
+  - passed_gff:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]
+      - "*.passed.gff":
+          type: file
+          description: GFF file with gene models which pass the filter test
+  - failed_gff:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]
+      - "*.failed.gff":
+          type: file
+          description: GFF file with remaining gene models
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
new file mode 100644
index 0000000..4a6e1fc
--- /dev/null
+++ b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+    name "Test Process AGAT_SPFILTERBYORFSIZE"
+    script "../main.nf"
+    process "AGAT_SPFILTERBYORFSIZE"
+
+    tag "modules"
+    tag "modules_gallvp"
+    tag "agat"
+    tag "agat/spfilterbyorfsize"
+
+    test("actinidia_chinensis - genome - gtf") {
+
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gtf.gz', checkIfExists: true)
+                ]
+                input[1] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - genome - gtf - stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true)
+                ]
+                input[1] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+
+}
diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap
new file mode 100644
index 0000000..22b26fe
--- /dev/null
+++ b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap
@@ -0,0 +1,100 @@
+{
+    "homo_sapiens - genome - gtf - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
+                ],
+                "failed_gff": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "passed_gff": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-12-10T17:07:11.619928"
+    },
+    "actinidia_chinensis - genome - gtf": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
+                ],
+                "failed_gff": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb"
+                    ]
+                ],
+                "passed_gff": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-12-10T17:07:06.829402"
+    }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 363f0c5..c3ce861 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,6 +54,7 @@ params {
     enforce_full_intron_support         = true
     filter_liftoff_by_hints             = true
     eggnogmapper_purge_nohits           = false
+    filter_genes_by_aa_length           = 24
 
     // Annotation output options
     braker_save_outputs                 = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b7b5cc4..abe26a9 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -272,6 +272,13 @@
                     "type": "boolean",
                     "description": "Purge transcripts which do not have a hit against eggnog",
                     "fa_icon": "fas fa-question-circle"
+                },
+                "filter_genes_by_aa_length": {
+                    "type": "integer",
+                    "default": 24,
+                    "fa_icon": "fas fa-hashtag",
+                    "description": "Filter genes with open reading frames shorter than the specified number of amino acids. If set to `null`, this filter step is skipped.",
+                    "minimum": 3
                 }
             }
         },
diff --git a/subworkflows/local/gff_merge_cleanup.nf b/subworkflows/local/gff_merge_cleanup.nf
index fc6c75e..fbdea37 100644
--- a/subworkflows/local/gff_merge_cleanup.nf
+++ b/subworkflows/local/gff_merge_cleanup.nf
@@ -1,18 +1,20 @@
 include { AGAT_SPMERGEANNOTATIONS               } from '../../modules/nf-core/agat/spmergeannotations/main'
 include { GT_GFF3                               } from '../../modules/nf-core/gt/gff3/main'
+include { AGAT_SPFILTERBYORFSIZE                } from '../../modules/gallvp/agat/spfilterbyorfsize/main'
 include { AGAT_CONVERTSPGXF2GXF                 } from '../../modules/nf-core/agat/convertspgxf2gxf/main'
 
 workflow GFF_MERGE_CLEANUP {
     take:
     ch_braker_gff               // Channel: [ meta, gff ]
     ch_liftoff_gff              // Channel: [ meta, gff ]
+    val_filter_by_aa_length     // val(null|Integer)
 
     main:
     ch_versions                 = Channel.empty()
 
     ch_gff_branch               = ch_braker_gff
                                 | join(ch_liftoff_gff, remainder:true)
-                                | branch { meta, braker_gff, liftoff_gff ->
+                                | branch { _meta, braker_gff, liftoff_gff ->
                                     both        : (     braker_gff      &&      liftoff_gff )
                                     braker_only : (     braker_gff      && ( !  liftoff_gff ) )
                                     liftoff_only: ( ( ! braker_gff )    &&      liftoff_gff )
@@ -25,12 +27,25 @@ workflow GFF_MERGE_CLEANUP {
     )
 
     ch_merged_gff               = AGAT_SPMERGEANNOTATIONS.out.gff
-                                | mix ( ch_gff_branch.liftoff_only.map { meta, braker_gff, liftoff_gff -> [ meta, liftoff_gff ] } )
-                                | mix ( ch_gff_branch.braker_only.map { meta, braker_gff, liftoff_gff -> [ meta, braker_gff ] } )
+                                | mix ( ch_gff_branch.liftoff_only.map { meta, _braker_gff, liftoff_gff -> [ meta, liftoff_gff ] } )
+                                | mix ( ch_gff_branch.braker_only.map { meta, braker_gff, _liftoff_gff -> [ meta, braker_gff ] } )
     ch_versions                 = ch_versions.mix(AGAT_SPMERGEANNOTATIONS.out.versions.first())
 
+    // MODULE: AGAT_SPFILTERBYORFSIZE
+    ch_filter_input             = ch_merged_gff
+                                | branch {
+                                    filter: val_filter_by_aa_length != null
+                                    pass: val_filter_by_aa_length == null
+                                }
+
+    AGAT_SPFILTERBYORFSIZE ( ch_filter_input.filter, [] )
+
+    ch_filtered_gff             = AGAT_SPFILTERBYORFSIZE.out.passed_gff
+                                | mix ( ch_filter_input.pass )
+    ch_versions                 = ch_versions.mix(AGAT_SPFILTERBYORFSIZE.out.versions.first())
+
     // MODULE: GT_GFF3
-    GT_GFF3 ( ch_merged_gff )
+    GT_GFF3 ( ch_filtered_gff )
 
     ch_gt_gff                   = GT_GFF3.out.gt_gff3
     ch_versions                 = ch_versions.mix(GT_GFF3.out.versions.first())
diff --git a/workflows/genepal.nf b/workflows/genepal.nf
index 538fcfe..6ee525b 100644
--- a/workflows/genepal.nf
+++ b/workflows/genepal.nf
@@ -178,7 +178,8 @@ workflow GENEPAL {
     // SUBWORKFLOW: GFF_MERGE_CLEANUP
     GFF_MERGE_CLEANUP(
         ch_braker_purged_gff,
-        ch_liftoff_gff3
+        ch_liftoff_gff3,
+        params.filter_genes_by_aa_length
     )
 
     ch_merged_gff               = GFF_MERGE_CLEANUP.out.gff

From d694431b94985b425e9212c1078975d6020a2006 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 11 Dec 2024 06:37:33 +1300
Subject: [PATCH 2/6] Updated snapshots

---
 pfr/params.json                 |  3 ++-
 tests/minimal/main.nf.test.snap | 11 +++++++----
 tests/stub/main.nf.test.snap    | 11 +++++++----
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/pfr/params.json b/pfr/params.json
index 7e993bf..9983398 100644
--- a/pfr/params.json
+++ b/pfr/params.json
@@ -32,8 +32,9 @@
     "enforce_full_intron_support": true,
     "filter_liftoff_by_hints": true,
     "eggnogmapper_purge_nohits": false,
+    "filter_genes_by_aa_length": 24,
     "braker_save_outputs": false,
-    "add_attrs_to_proteins_fasta": false,
+    "add_attrs_to_proteins_cds_fastas": false,
     "busco_skip": false,
     "busco_lineage_datasets": "embryophyta_odb10"
 }
diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap
index 48dae90..f4b2aba 100644
--- a/tests/minimal/main.nf.test.snap
+++ b/tests/minimal/main.nf.test.snap
@@ -2,7 +2,7 @@
     "profile - test": {
         "content": [
             {
-                "successful tasks": 20,
+                "successful tasks": 21,
                 "versions": {
                     "AGAT_CONVERTSPGFF2GTF": {
                         "agat": "v1.4.0"
@@ -16,6 +16,9 @@
                     "AGAT_SPEXTRACTSEQUENCES": {
                         "agat": "v1.4.0"
                     },
+                    "AGAT_SPFILTERBYORFSIZE": {
+                        "agat": "v1.4.1"
+                    },
                     "BRAKER3": {
                         "braker3": "3.0.8",
                         "augustus": "3.5.0",
@@ -92,8 +95,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.04.2"
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-12-05T07:51:43.818374"
+        "timestamp": "2024-12-11T06:36:01.956188"
     }
-}
+}
\ No newline at end of file
diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap
index 4516d50..1548c96 100644
--- a/tests/stub/main.nf.test.snap
+++ b/tests/stub/main.nf.test.snap
@@ -2,7 +2,7 @@
     "full - stub": {
         "content": [
             {
-                "successful tasks": 162,
+                "successful tasks": 166,
                 "versions": {
                     "AGAT_CONVERTSPGFF2GTF": {
                         "agat": "v1.4.0"
@@ -16,6 +16,9 @@
                     "AGAT_SPEXTRACTSEQUENCES": {
                         "agat": "v1.4.0"
                     },
+                    "AGAT_SPFILTERBYORFSIZE": {
+                        "agat": "v1.4.1"
+                    },
                     "AGAT_SPFILTERFEATUREFROMKILLLIST": {
                         "agat": "v1.4.0"
                     },
@@ -203,8 +206,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.04.2"
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-12-05T07:56:38.915238"
+        "timestamp": "2024-12-10T21:52:10.308719"
     }
-}
+}
\ No newline at end of file

From 0f7784ccf5ecd7fcf14798145086afd912458745 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 11 Dec 2024 21:18:26 +1300
Subject: [PATCH 3/6] Added test to verify that GFFREAD can filter mRNA by CDS
 length

---
 modules/local/tests/gffread/main.nf.test      | 38 +++++++++++++++
 modules/local/tests/gffread/main.nf.test.snap | 47 +++++++++++++++++++
 modules/local/tests/gffread/nextflow.config   |  5 ++
 modules/local/tests/gffread/testdata/t.gff    | 47 +++++++++++++++++++
 subworkflows/local/gff_eggnogmapper.nf        |  8 ++--
 5 files changed, 141 insertions(+), 4 deletions(-)
 create mode 100644 modules/local/tests/gffread/main.nf.test
 create mode 100644 modules/local/tests/gffread/main.nf.test.snap
 create mode 100644 modules/local/tests/gffread/nextflow.config
 create mode 100644 modules/local/tests/gffread/testdata/t.gff

diff --git a/modules/local/tests/gffread/main.nf.test b/modules/local/tests/gffread/main.nf.test
new file mode 100644
index 0000000..60e588b
--- /dev/null
+++ b/modules/local/tests/gffread/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+    name "Test Process GFFREAD"
+    script "../../../nf-core/gffread/main.nf"
+    config "./nextflow.config"
+    process "GFFREAD"
+
+    tag "gffread"
+    tag "modules_nfcore"
+    tag "modules"
+
+    test("filter by length") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [id: 'test'],
+                    file("$baseDir" + '/modules/local/tests/gffread/testdata/t.gff', checkIfExists: true)
+                ]
+                input[1] = []
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert file(process.out.gffread_gff[0][1]).text.contains('gene19851') },
+                { assert file(process.out.gffread_gff[0][1]).text.contains('gene19851.t1') },
+                { assert ! file(process.out.gffread_gff[0][1]).text.contains('gene19851.t2') } // This is the only transcript which is being knocked out
+            )
+        }
+
+    }
+
+}
diff --git a/modules/local/tests/gffread/main.nf.test.snap b/modules/local/tests/gffread/main.nf.test.snap
new file mode 100644
index 0000000..261f436
--- /dev/null
+++ b/modules/local/tests/gffread/main.nf.test.snap
@@ -0,0 +1,47 @@
+{
+    "filter by length": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,59a7d6ff7123589ef2b90b20043a347c"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+                ],
+                "gffread_fasta": [
+                    
+                ],
+                "gffread_gff": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,59a7d6ff7123589ef2b90b20043a347c"
+                    ]
+                ],
+                "gtf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-12-11T21:11:59.953464"
+    }
+}
\ No newline at end of file
diff --git a/modules/local/tests/gffread/nextflow.config b/modules/local/tests/gffread/nextflow.config
new file mode 100644
index 0000000..734d066
--- /dev/null
+++ b/modules/local/tests/gffread/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: GFFREAD {
+        ext.args = '--no-pseudo --keep-genes -C -l 72'
+    }
+}
diff --git a/modules/local/tests/gffread/testdata/t.gff b/modules/local/tests/gffread/testdata/t.gff
new file mode 100644
index 0000000..6b1c076
--- /dev/null
+++ b/modules/local/tests/gffread/testdata/t.gff
@@ -0,0 +1,47 @@
+##gff-version 3
+###
+chr23	AUGUSTUS	gene	16515075	16516672	.	-	.	ID=gene19849;description=Protein%20of%20unknown%20function%20%28DUF1635%29
+chr23	AUGUSTUS	mRNA	16515075	16516597	1	-	.	ID=gene19849.t1;Parent=gene19849;description=Protein%20of%20unknown%20function%20%28DUF1635%29
+chr23	AUGUSTUS	exon	16515075	16515794	.	-	.	ID=gene19849.t1.exon1;Parent=gene19849.t1
+chr23	AUGUSTUS	CDS	16515075	16515794	1	-	0	ID=gene19849.t1.cds1;Parent=gene19849.t1
+chr23	AUGUSTUS	exon	16516562	16516597	.	-	.	ID=gene19849.t1.exon2;Parent=gene19849.t1
+chr23	AUGUSTUS	CDS	16516562	16516597	1	-	0	ID=gene19849.t1.cds2;Parent=gene19849.t1
+chr23	gmst	mRNA	16515075	16516672	.	-	.	ID=gene19849.t2;Parent=gene19849;description=Protein%20of%20unknown%20function%20%28DUF1635%29
+chr23	gmst	exon	16515075	16515794	50.2	-	0	ID=gene19849.t2.exon1;Parent=gene19849.t2
+chr23	gmst	CDS	16515075	16515794	50.2	-	0	ID=gene19849.t2.cds1;Parent=gene19849.t2
+chr23	gmst	exon	16516562	16516672	50.2	-	0	ID=gene19849.t2.exon2;Parent=gene19849.t2
+chr23	gmst	CDS	16516562	16516672	50.2	-	0	ID=gene19849.t2.cds2;Parent=gene19849.t2
+###
+chr23	gmst	gene	16530414	16531453	.	-	.	ID=gene19850;description=Myb-like%20DNA-binding%20domain
+chr23	gmst	mRNA	16530414	16531453	.	-	.	ID=gene19850.t1;Parent=gene19850;description=Myb-like%20DNA-binding%20domain
+chr23	gmst	exon	16530414	16531041	42.7	-	1	ID=gene19850.t1.exon1;Parent=gene19850.t1
+chr23	gmst	CDS	16530414	16531041	42.7	-	1	ID=gene19850.t1.cds1;Parent=gene19850.t1
+chr23	gmst	exon	16531197	16531453	42.7	-	0	ID=gene19850.t1.exon2;Parent=gene19850.t1
+chr23	gmst	CDS	16531197	16531453	42.7	-	0	ID=gene19850.t1.cds2;Parent=gene19850.t1
+###
+chr23	AUGUSTUS	gene	16530414	16531542	.	-	.	ID=gene19851;description=Differing%20isoform%20descriptions
+chr23	AUGUSTUS	mRNA	16530414	16531542	1	-	.	ID=gene19851.t1;Parent=gene19851;description=Myb-like%20DNA-binding%20domain
+chr23	AUGUSTUS	exon	16530414	16530721	.	-	.	ID=gene19851.t1.exon1;Parent=gene19851.t1
+chr23	AUGUSTUS	CDS	16530414	16530721	1	-	2	ID=gene19851.t1.cds1;Parent=gene19851.t1
+chr23	AUGUSTUS	exon	16530824	16531041	.	-	.	ID=gene19851.t1.exon2;Parent=gene19851.t1
+chr23	AUGUSTUS	CDS	16530824	16531041	1	-	1	ID=gene19851.t1.cds2;Parent=gene19851.t1
+chr23	AUGUSTUS	exon	16531197	16531326	.	-	.	ID=gene19851.t1.exon3;Parent=gene19851.t1
+chr23	AUGUSTUS	CDS	16531197	16531326	1	-	2	ID=gene19851.t1.cds3;Parent=gene19851.t1
+chr23	AUGUSTUS	exon	16531428	16531542	.	-	.	ID=gene19851.t1.exon4;Parent=gene19851.t1
+chr23	AUGUSTUS	CDS	16531428	16531542	1	-	0	ID=gene19851.t1.cds4;Parent=gene19851.t1
+chr23	GeneMark.hmm3	mRNA	16531514	16531542	.	-	.	ID=gene19851.t2;Parent=gene19851;description=Hypothetical%20protein%20%7C%20no%20eggnog%20hit
+chr23	GeneMark.hmm3	exon	16531514	16531542	.	-	0	ID=gene19851.t2.exon1;Parent=gene19851.t2
+chr23	GeneMark.hmm3	CDS	16531514	16531542	.	-	0	ID=gene19851.t2.cds1;Parent=gene19851.t2
+###
+chr23	AUGUSTUS	gene	16539401	16545431	.	+	.	ID=gene19852;description=nuclease%20HARBI1
+chr23	AUGUSTUS	mRNA	16539401	16545431	1	+	.	ID=gene19852.t1;Parent=gene19852;description=nuclease%20HARBI1
+chr23	AUGUSTUS	exon	16539401	16539509	.	+	.	ID=gene19852.t1.exon1;Parent=gene19852.t1
+chr23	AUGUSTUS	CDS	16539401	16539509	1	+	0	ID=gene19852.t1.cds1;Parent=gene19852.t1
+chr23	AUGUSTUS	exon	16544386	16545431	.	+	.	ID=gene19852.t1.exon2;Parent=gene19852.t1
+chr23	AUGUSTUS	CDS	16544386	16545431	1	+	2	ID=gene19852.t1.cds2;Parent=gene19852.t1
+###
+chr23	AUGUSTUS	gene	16556338	16556796	.	+	.	ID=gene19853;description=Zinc%20finger%20protein
+chr23	AUGUSTUS	mRNA	16556338	16556796	1	+	.	ID=gene19853.t1;Parent=gene19853;description=Zinc%20finger%20protein
+chr23	AUGUSTUS	exon	16556338	16556796	.	+	.	ID=gene19853.t1.exon1;Parent=gene19853.t1
+chr23	AUGUSTUS	CDS	16556338	16556796	1	+	0	ID=gene19853.t1.cds1;Parent=gene19853.t1
+###
diff --git a/subworkflows/local/gff_eggnogmapper.nf b/subworkflows/local/gff_eggnogmapper.nf
index 841a243..8e402d4 100644
--- a/subworkflows/local/gff_eggnogmapper.nf
+++ b/subworkflows/local/gff_eggnogmapper.nf
@@ -16,8 +16,8 @@ workflow GFF_EGGNOGMAPPER {
                                 | join(ch_fasta)
 
     GFF2FASTA_FOR_EGGNOGMAPPER(
-        ch_gffread_inputs.map { meta, gff, fasta -> [ meta, gff ] },
-        ch_gffread_inputs.map { meta, gff, fasta -> fasta }
+        ch_gffread_inputs.map { meta, gff, _fasta -> [ meta, gff ] },
+        ch_gffread_inputs.map { _meta, _gff, fasta -> fasta }
     )
 
     ch_gffread_fasta            = GFF2FASTA_FOR_EGGNOGMAPPER.out.gffread_fasta
@@ -30,9 +30,9 @@ workflow GFF_EGGNOGMAPPER {
                                 | combine(Channel.fromPath(db_folder))
 
     EGGNOGMAPPER(
-        ch_eggnogmapper_inputs.map { meta, fasta, db -> [ meta, fasta ] },
+        ch_eggnogmapper_inputs.map { meta, fasta, _db -> [ meta, fasta ] },
         [],
-        ch_eggnogmapper_inputs.map { meta, fasta, db -> db },
+        ch_eggnogmapper_inputs.map { _meta, _fasta, db -> db },
         [ [], [] ]
     )
 

From f9807724bace7b0d2b3a9b3ce892dc49c887996d Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 11 Dec 2024 21:55:17 +1300
Subject: [PATCH 4/6] Updated snapshots

---
 conf/modules.config                           |   4 +-
 modules.json                                  |   5 -
 .../agat/spfilterbyorfsize/environment.yml    |   7 --
 modules/gallvp/agat/spfilterbyorfsize/main.nf |  60 -----------
 .../gallvp/agat/spfilterbyorfsize/meta.yml    |  67 ------------
 .../agat/spfilterbyorfsize/tests/main.nf.test |  62 -----------
 .../spfilterbyorfsize/tests/main.nf.test.snap | 100 ------------------
 subworkflows/local/gff_merge_cleanup.nf       |  10 +-
 tests/minimal/main.nf.test.snap               |  12 +--
 tests/stub/main.nf.test.snap                  |   8 +-
 10 files changed, 17 insertions(+), 318 deletions(-)
 delete mode 100644 modules/gallvp/agat/spfilterbyorfsize/environment.yml
 delete mode 100644 modules/gallvp/agat/spfilterbyorfsize/main.nf
 delete mode 100644 modules/gallvp/agat/spfilterbyorfsize/meta.yml
 delete mode 100644 modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
 delete mode 100644 modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap

diff --git a/conf/modules.config b/conf/modules.config
index fbb5f52..dd15d6c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -240,8 +240,8 @@ process { // SUBWORKFLOW: GFF_MERGE_CLEANUP
         ext.prefix = { "${meta.id}.liftoff.braker" }
     }
 
-    withName: '.*:GFF_MERGE_CLEANUP:AGAT_SPFILTERBYORFSIZE' {
-        ext.args = params.filter_genes_by_aa_length ? "-s ${params.filter_genes_by_aa_length}" : ''
+    withName: '.*:GFF_MERGE_CLEANUP:FILTER_BY_ORF_SIZE' {
+        ext.args = params.filter_genes_by_aa_length ? "--no-pseudo --keep-genes -C -l ${params.filter_genes_by_aa_length * 3}" : ''
     }
 
     withName: '.*:GFF_MERGE_CLEANUP:GT_GFF3' {
diff --git a/modules.json b/modules.json
index 6b9d74a..da05f16 100644
--- a/modules.json
+++ b/modules.json
@@ -15,11 +15,6 @@
                         "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4",
                         "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"]
                     },
-                    "agat/spfilterbyorfsize": {
-                        "branch": "main",
-                        "git_sha": "a0054cdffbd84f002fb6582b28575b699e01098e",
-                        "installed_by": ["modules"]
-                    },
                     "agat/spflagshortintrons": {
                         "branch": "main",
                         "git_sha": "d8f08700c82a3bd14811a3dfe7e7d63838130693",
diff --git a/modules/gallvp/agat/spfilterbyorfsize/environment.yml b/modules/gallvp/agat/spfilterbyorfsize/environment.yml
deleted file mode 100644
index 2c3daab..0000000
--- a/modules/gallvp/agat/spfilterbyorfsize/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - "bioconda::agat=1.4.2"
diff --git a/modules/gallvp/agat/spfilterbyorfsize/main.nf b/modules/gallvp/agat/spfilterbyorfsize/main.nf
deleted file mode 100644
index 502a9cd..0000000
--- a/modules/gallvp/agat/spfilterbyorfsize/main.nf
+++ /dev/null
@@ -1,60 +0,0 @@
-process AGAT_SPFILTERBYORFSIZE {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/agat:1.4.2--pl5321hdfd78af_0':
-        'biocontainers/agat:1.4.2--pl5321hdfd78af_0' }"
-
-    input:
-    tuple val(meta), path(gxf)
-    path config
-
-    output:
-    tuple val(meta), path("*.passed.gff")   , emit: passed_gff
-    tuple val(meta), path("*.failed.gff")   , emit: failed_gff
-    path "versions.yml"                     , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args        = task.ext.args ?: ''
-    def prefix      = task.ext.prefix ?: "${meta.id}"
-    def config_arg  = config ? "-c $config" : ''
-    if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
-    """
-    agat_sp_filter_by_ORF_size.pl \\
-        -g $gxf \\
-        $args \\
-        $config_arg \\
-        -o $prefix
-
-    mv \\
-        ${prefix}_NOT* \\
-        "${prefix}.failed.gff"
-
-    mv \\
-        ${prefix}_* \\
-        "${prefix}.passed.gff"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    if( "$gxf" in [ "${prefix}.passed.gff", "${prefix}.failed.gff" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
-    """
-    touch ${prefix}.passed.gff
-    touch ${prefix}.failed.gff
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
-    END_VERSIONS
-    """
-}
diff --git a/modules/gallvp/agat/spfilterbyorfsize/meta.yml b/modules/gallvp/agat/spfilterbyorfsize/meta.yml
deleted file mode 100644
index cf399da..0000000
--- a/modules/gallvp/agat/spfilterbyorfsize/meta.yml
+++ /dev/null
@@ -1,67 +0,0 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: "agat_spfilterbyorfsize"
-description: The script reads a gff annotation file, and create two output files,
-  one contains the gene models with ORF passing the test, the other contains the rest.
-  By default the test is "> 100" that means all gene models that have ORF longer than
-  100 Amino acids, will pass the test.
-keywords:
-  - genomics
-  - GFF/GTF
-  - filter
-  - annotation
-tools:
-  - "agat":
-      description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene
-        annotations in any GTF/GFF format."
-      homepage: "https://agat.readthedocs.io/en/latest/"
-      documentation: "https://agat.readthedocs.io/en/latest/"
-      tool_dev_url: "https://github.com/NBISweden/AGAT"
-      doi: "10.5281/zenodo.3552717"
-      licence: ["GPL v3"]
-      identifier: biotools:AGAT
-
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. `[ id:'sample1' ]`
-    - gxf:
-        type: file
-        description: Input GFF3/GTF file
-        pattern: "*.{gff,gff3,gtf}"
-  - - config:
-        type: file
-        description: |
-          Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any,
-          otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose".
-          The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently).
-        pattern: "*.yaml"
-output:
-  - passed_gff:
-      - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. `[ id:'sample1' ]
-      - "*.passed.gff":
-          type: file
-          description: GFF file with gene models which pass the filter test
-  - failed_gff:
-      - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. `[ id:'sample1' ]
-      - "*.failed.gff":
-          type: file
-          description: GFF file with remaining gene models
-  - versions:
-      - versions.yml:
-          type: file
-          description: File containing software versions
-          pattern: "versions.yml"
-authors:
-  - "@GallVp"
-maintainers:
-  - "@GallVp"
diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
deleted file mode 100644
index 4a6e1fc..0000000
--- a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test
+++ /dev/null
@@ -1,62 +0,0 @@
-nextflow_process {
-
-    name "Test Process AGAT_SPFILTERBYORFSIZE"
-    script "../main.nf"
-    process "AGAT_SPFILTERBYORFSIZE"
-
-    tag "modules"
-    tag "modules_gallvp"
-    tag "agat"
-    tag "agat/spfilterbyorfsize"
-
-    test("actinidia_chinensis - genome - gtf") {
-
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gtf.gz', checkIfExists: true)
-                ]
-                input[1] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-    test("homo_sapiens - genome - gtf - stub") {
-
-        options '-stub'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true)
-                ]
-                input[1] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-
-}
diff --git a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap b/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap
deleted file mode 100644
index 22b26fe..0000000
--- a/modules/gallvp/agat/spfilterbyorfsize/tests/main.nf.test.snap
+++ /dev/null
@@ -1,100 +0,0 @@
-{
-    "homo_sapiens - genome - gtf - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "2": [
-                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
-                ],
-                "failed_gff": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.failed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "passed_gff": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.passed.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.04.4"
-        },
-        "timestamp": "2024-12-10T17:07:11.619928"
-    },
-    "actinidia_chinensis - genome - gtf": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb"
-                    ]
-                ],
-                "2": [
-                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
-                ],
-                "failed_gff": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.failed.gff:md5,d7eb6ae1c3dc30675138029b513073eb"
-                    ]
-                ],
-                "passed_gff": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.passed.gff:md5,e2558c89e50df32d654f19f9a69e46a3"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,bc298e3688f3f90f287f56ee6929bd29"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.2",
-            "nextflow": "24.04.4"
-        },
-        "timestamp": "2024-12-10T17:07:06.829402"
-    }
-}
\ No newline at end of file
diff --git a/subworkflows/local/gff_merge_cleanup.nf b/subworkflows/local/gff_merge_cleanup.nf
index fbdea37..8a77eda 100644
--- a/subworkflows/local/gff_merge_cleanup.nf
+++ b/subworkflows/local/gff_merge_cleanup.nf
@@ -1,6 +1,6 @@
 include { AGAT_SPMERGEANNOTATIONS               } from '../../modules/nf-core/agat/spmergeannotations/main'
 include { GT_GFF3                               } from '../../modules/nf-core/gt/gff3/main'
-include { AGAT_SPFILTERBYORFSIZE                } from '../../modules/gallvp/agat/spfilterbyorfsize/main'
+include { GFFREAD as FILTER_BY_ORF_SIZE         } from '../../modules/nf-core/gffread/main'
 include { AGAT_CONVERTSPGXF2GXF                 } from '../../modules/nf-core/agat/convertspgxf2gxf/main'
 
 workflow GFF_MERGE_CLEANUP {
@@ -31,18 +31,18 @@ workflow GFF_MERGE_CLEANUP {
                                 | mix ( ch_gff_branch.braker_only.map { meta, braker_gff, _liftoff_gff -> [ meta, braker_gff ] } )
     ch_versions                 = ch_versions.mix(AGAT_SPMERGEANNOTATIONS.out.versions.first())
 
-    // MODULE: AGAT_SPFILTERBYORFSIZE
+    // MODULE: GFFREAD as FILTER_BY_ORF_SIZE
     ch_filter_input             = ch_merged_gff
                                 | branch {
                                     filter: val_filter_by_aa_length != null
                                     pass: val_filter_by_aa_length == null
                                 }
 
-    AGAT_SPFILTERBYORFSIZE ( ch_filter_input.filter, [] )
+    FILTER_BY_ORF_SIZE ( ch_filter_input.filter, [] )
 
-    ch_filtered_gff             = AGAT_SPFILTERBYORFSIZE.out.passed_gff
+    ch_filtered_gff             = FILTER_BY_ORF_SIZE.out.gffread_gff
                                 | mix ( ch_filter_input.pass )
-    ch_versions                 = ch_versions.mix(AGAT_SPFILTERBYORFSIZE.out.versions.first())
+    ch_versions                 = ch_versions.mix(FILTER_BY_ORF_SIZE.out.versions.first())
 
     // MODULE: GT_GFF3
     GT_GFF3 ( ch_filtered_gff )
diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap
index f4b2aba..e0f2ce3 100644
--- a/tests/minimal/main.nf.test.snap
+++ b/tests/minimal/main.nf.test.snap
@@ -16,9 +16,6 @@
                     "AGAT_SPEXTRACTSEQUENCES": {
                         "agat": "v1.4.0"
                     },
-                    "AGAT_SPFILTERBYORFSIZE": {
-                        "agat": "v1.4.1"
-                    },
                     "BRAKER3": {
                         "braker3": "3.0.8",
                         "augustus": "3.5.0",
@@ -40,6 +37,9 @@
                     "FASTAVALIDATOR": {
                         "py_fasta_validator": 0.6
                     },
+                    "FILTER_BY_ORF_SIZE": {
+                        "gffread": "0.12.7"
+                    },
                     "FINAL_GFF_CHECK": {
                         "genometools": "1.6.5"
                     },
@@ -70,9 +70,9 @@
                 "stable paths": [
                     "a_thaliana.cdna.fasta:md5,12b9bef973e488640aec8c04ba3882fe",
                     "a_thaliana.cds.fasta:md5,b81060419355a590560f92aec8536281",
-                    "a_thaliana.gt.gff3:md5,8ab16549095f605ff8715ac4a3de58ed",
+                    "a_thaliana.gt.gff3:md5,528459cf9596523bf66de99d24c37e20",
                     "a_thaliana.pep.fasta:md5,4994c0393ca0245a1c57966d846d101e",
-                    "a_thaliana.gff3:md5,d23d16cd86499d48a30ffb981ed27891",
+                    "a_thaliana.gff3:md5,30adac1b21d7aaed6ca7fb71ab33f32d",
                     "summary_stats.json:md5,007ba5cf2b7a2fd395a27d9458ca2d2e"
                 ],
                 "stable names": [
@@ -97,6 +97,6 @@
             "nf-test": "0.9.2",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-12-11T06:36:01.956188"
+        "timestamp": "2024-12-11T21:49:09.751422"
     }
 }
\ No newline at end of file
diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap
index 1548c96..7ed6f6e 100644
--- a/tests/stub/main.nf.test.snap
+++ b/tests/stub/main.nf.test.snap
@@ -16,9 +16,6 @@
                     "AGAT_SPEXTRACTSEQUENCES": {
                         "agat": "v1.4.0"
                     },
-                    "AGAT_SPFILTERBYORFSIZE": {
-                        "agat": "v1.4.1"
-                    },
                     "AGAT_SPFILTERFEATUREFROMKILLLIST": {
                         "agat": "v1.4.0"
                     },
@@ -73,6 +70,9 @@
                     "FASTP": {
                         "fastp": "0.23.4"
                     },
+                    "FILTER_BY_ORF_SIZE": {
+                        "gffread": "0.12.7"
+                    },
                     "FINAL_GFF_CHECK": {
                         "genometools": "1.6.5"
                     },
@@ -208,6 +208,6 @@
             "nf-test": "0.9.2",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-12-10T21:52:10.308719"
+        "timestamp": "2024-12-11T21:51:12.841395"
     }
 }
\ No newline at end of file

From ab3ae3755423e16218e95c7e1de64d94adbd6a15 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 12 Dec 2024 09:58:01 +1300
Subject: [PATCH 5/6] Updated README and snapshot

---
 README.md                       |  5 +++--
 tests/minimal/main.nf.test      |  3 +++
 tests/minimal/main.nf.test.snap | 17 +++++++++++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 177e8f5..af463db 100644
--- a/README.md
+++ b/README.md
@@ -39,11 +39,12 @@
   - Optionally, allow or remove iso-forms
   - Remove BRAKER models from Liftoff loci
   - Merge Liftoff and BRAKER models
-  - Optionally, remove models with ORFs shorter than `N` amino acids
   - Optionally, remove models without any EggNOG-mapper hits
 - [EggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper): Add functional annotation to gff
 - [GenomeTools](https://github.com/genometools/genometools): GFF format validation
-- [GffRead](https://github.com/gpertea/gffread): Extraction of protein sequences
+- [GffRead](https://github.com/gpertea/gffread)
+  - Extraction of protein sequences
+  - Optionally, remove models with ORFs shorter than `N` amino acids
 - [OrthoFinder](https://github.com/davidemms/OrthoFinder): Perform phylogenetic orthology inference across genomes
 - [GffCompare](https://github.com/gpertea/gffcompare): Compare and benchmark against an existing annotation
 - [BUSCO](https://gitlab.com/ezlab/busco): Completeness statistics for genome and annotation through proteins
diff --git a/tests/minimal/main.nf.test b/tests/minimal/main.nf.test
index cce8a77..5f1d1af 100644
--- a/tests/minimal/main.nf.test
+++ b/tests/minimal/main.nf.test
@@ -38,6 +38,8 @@ nextflow_pipeline {
                 ['**']
             )
 
+            def summary_stats = (Map) new groovy.json.JsonSlurper().parseText(file("$outputDir/genepal_data/summary_stats.json").text)
+
             assertAll(
                 { assert workflow.success},
                 { assert snapshot(
@@ -46,6 +48,7 @@ nextflow_pipeline {
                         'versions': removeNextflowVersion("$outputDir/pipeline_info/genepal_software_mqc_versions.yml"),
                         'stable paths': stable_path,
                         'stable names': getRelativePath(stable_name, outputDir),
+                        'summary_stats': summary_stats
                     ]
                 ).match() }
             )
diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap
index e0f2ce3..96c8444 100644
--- a/tests/minimal/main.nf.test.snap
+++ b/tests/minimal/main.nf.test.snap
@@ -90,13 +90,26 @@
                     "genepal_report.html",
                     "multiqc_report.html",
                     "pipeline_info"
-                ]
+                ],
+                "summary_stats": {
+                    "stats": [
+                        {
+                            "ID": "a_thaliana",
+                            "Genes": 252,
+                            "mRNA": 265,
+                            "CDS": 1340,
+                            "Exons": 1340,
+                            "Intron": 1075,
+                            "Non canon splice sites": 18
+                        }
+                    ]
+                }
             }
         ],
         "meta": {
             "nf-test": "0.9.2",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-12-11T21:49:09.751422"
+        "timestamp": "2024-12-12T09:36:52.952048"
     }
 }
\ No newline at end of file

From c65ebaac0bd81f29c38391be1fd46cf46170284e Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 16 Dec 2024 10:04:21 +1300
Subject: [PATCH 6/6] Added 1 to filter_genes_by_aa_length to exclude stop
 codon from filter length

---
 CHANGELOG.md         |  2 +-
 conf/modules.config  |  2 +-
 docs/parameters.md   | 14 +++++++-------
 nextflow_schema.json |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 33813b7..9937888 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v0.6.0 - [10-Dec-2024]
+## v0.6.0 - [16-Dec-2024]
 
 ### 'Added'
 
diff --git a/conf/modules.config b/conf/modules.config
index dd15d6c..2a14621 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -241,7 +241,7 @@ process { // SUBWORKFLOW: GFF_MERGE_CLEANUP
     }
 
     withName: '.*:GFF_MERGE_CLEANUP:FILTER_BY_ORF_SIZE' {
-        ext.args = params.filter_genes_by_aa_length ? "--no-pseudo --keep-genes -C -l ${params.filter_genes_by_aa_length * 3}" : ''
+        ext.args = params.filter_genes_by_aa_length ? "--no-pseudo --keep-genes -C -l ${ ( params.filter_genes_by_aa_length + 1 ) * 3 }" : ''
     }
 
     withName: '.*:GFF_MERGE_CLEANUP:GT_GFF3' {
diff --git a/docs/parameters.md b/docs/parameters.md
index 0c2bb09..7ccd67a 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -59,13 +59,13 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation.
 
 ## Post-annotation filtering options
 
-| Parameter                     | Description                                                                                                                            | Type      | Default | Required | Hidden |
-| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `allow_isoforms`              | Allow multiple isoforms for gene models                                                                                                | `boolean` | True    |          |        |
-| `enforce_full_intron_support` | Require every model to have external evidence for all its introns                                                                      | `boolean` | True    |          |        |
-| `filter_liftoff_by_hints`     | Use BRAKER hints to filter Liftoff models                                                                                              | `boolean` | True    |          |        |
-| `eggnogmapper_purge_nohits`   | Purge transcripts which do not have a hit against eggnog                                                                               | `boolean` |         |          |        |
-| `filter_genes_by_aa_length`   | Filter genes with open reading frames shorter than the specified number of amino acids. If set to `null`, this filter step is skipped. | `integer` | 24      |          |        |
+| Parameter                     | Description                                                                                                                                                     | Type      | Default | Required | Hidden |
+| ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
+| `allow_isoforms`              | Allow multiple isoforms for gene models                                                                                                                         | `boolean` | True    |          |        |
+| `enforce_full_intron_support` | Require every model to have external evidence for all its introns                                                                                               | `boolean` | True    |          |        |
+| `filter_liftoff_by_hints`     | Use BRAKER hints to filter Liftoff models                                                                                                                       | `boolean` | True    |          |        |
+| `eggnogmapper_purge_nohits`   | Purge transcripts which do not have a hit against eggnog                                                                                                        | `boolean` |         |          |        |
+| `filter_genes_by_aa_length`   | Filter genes with open reading frames shorter than the specified number of amino acids excluding the stop codon. If set to `null`, this filter step is skipped. | `integer` | 24      |          |        |
 
 ## Annotation output options
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index abe26a9..1012531 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -277,7 +277,7 @@
                     "type": "integer",
                     "default": 24,
                     "fa_icon": "fas fa-hashtag",
-                    "description": "Filter genes with open reading frames shorter than the specified number of amino acids. If set to `null`, this filter step is skipped.",
+                    "description": "Filter genes with open reading frames shorter than the specified number of amino acids excluding the stop codon. If set to `null`, this filter step is skipped.",
                     "minimum": 3
                 }
             }