-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Now REPEATMASKER GFF output is saved via CUSTOM_RMOUTTOGFF3
- Loading branch information
Showing
11 changed files
with
349 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::perl-bioperl=1.7.8" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
process CUSTOM_RMOUTTOGFF3 { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/perl-bioperl:1.7.8--hdfd78af_1': | ||
'biocontainers/perl-bioperl:1.7.8--hdfd78af_1' }" | ||
|
||
input: | ||
tuple val(meta), path(rmout) | ||
|
||
output: | ||
tuple val(meta), path("*.gff3") , emit: gff3 | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
shell: | ||
prefix = task.ext.prefix ?: "${meta.id}" | ||
template 'rmouttogff3.pl' | ||
|
||
stub: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
touch ${prefix}.gff3 | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
perl: \$(perl --version | sed -n 's|This is perl.*(v\\(.*\\)) .*|\\1|p' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "custom_rmouttogff3" | ||
description: Convert RepeatMasker out file to gff3 | ||
keywords: | ||
- genomics | ||
- repeat | ||
- gff | ||
tools: | ||
- "perl": | ||
description: "Bioinformatics Toolkit" | ||
homepage: "https://www.perl.org" | ||
documentation: "https://www.perl.org" | ||
tool_dev_url: "https://www.perl.org" | ||
licence: ["GPL"] | ||
identifier: "" | ||
|
||
input: | ||
# Only when we have meta | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- rmout: | ||
type: file | ||
description: RepeatMasker out file | ||
pattern: "*.out" | ||
output: | ||
- gff3: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*.gff3": | ||
type: file | ||
description: GFF3 formatted output | ||
- versions: | ||
- versions.yml: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@GallVp" | ||
maintainers: | ||
- "@GallVp" |
92 changes: 92 additions & 0 deletions
92
modules/gallvp/custom/rmouttogff3/templates/rmouttogff3.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env perl | ||
use strict; | ||
use warnings; | ||
|
||
# Originally written by Ross Crowhurst | ||
# Adapted by Usman Rashid for Nextflow | ||
# AS IS WHERE IS - USE AT YOUR OWN RISK | ||
# License: MIT | ||
|
||
=head1 DESCRIPTION | ||
Converts a RepeatMasker .out file to gff3 format. The | ||
standard gff output from RepeatMasker is gff version 2. | ||
RepeatMasker "out.gff" | ||
seq1 RepeatMasker similarity 1 1295 28.1 - . Target "Motif:Gypsy7-PTR_I-int" 3544 4847 | ||
RepeatMasker "out" file has the following format: | ||
SW perc perc perc query position in query matching repeat position in repeat | ||
score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID | ||
4634 28.1 1.2 0.5 seq1 1 1295 (0) C Gypsy7-PTR_I-int LTR/Gypsy (1215) 4847 3544 1 | ||
After conversion to gff3: | ||
seq1 RepeatMasker dispersed_repeat 1 1295 4634 - . ID=1_seq1_1_1295_Gypsy7-PTR_I-int;Name=Gypsy7-PTR_I-int;class=LTR;family=Gypsy;percDiv=28.1;percDel=1.2;percIns=0.5 | ||
Notes: | ||
- The Target attribute is not added in this implementation | ||
=cut | ||
|
||
my $repeatmaskerOutFile = "!{rmout}"; | ||
my $gff3Outfile = "!{prefix}.gff3"; | ||
|
||
my $source = "RepeatMasker"; | ||
my $type = "dispersed_repeat"; | ||
|
||
open(IN, "<$repeatmaskerOutFile") or die "ERROR can not open repeatmasker out file\n"; | ||
open(OUT, ">$gff3Outfile") or die "ERROR can not open gff3 out file\n"; | ||
select OUT; print OUT "##gff-version 3\n"; | ||
my $lastqName = ""; | ||
while ( my $line = <IN>) | ||
{ | ||
next if ($line =~ m/^$/); | ||
next if ($line =~ m/(perc|score|SW)/); | ||
chomp $line; | ||
$line =~ s/^([ ]+)//; | ||
$line =~ s/ / /g; | ||
$line =~ s/ /\t/g; | ||
$line =~ s/([\t]+)/\t/g; | ||
my ($SWscore, $percDiv, $percDel, $percIns, $qName, $qStart, $qEnd, $left, $ori, $repeatName, $repeatClassFamily, $rStart, $rEnd, $rLeft, $rId, @junk) = split/\t/, $line; | ||
($ori eq "C") and $ori = "-"; | ||
my $id = join("_", $rId, $qName, $qStart, $qEnd, $repeatName); | ||
my ($class, $family) = split/\//, $repeatClassFamily; | ||
$class ||= "na"; | ||
$family ||= "na"; | ||
my $gff3Line = join("\t", | ||
$qName, | ||
"$source", | ||
"$type", | ||
$qStart, | ||
$qEnd, | ||
$SWscore, | ||
$ori, | ||
".", | ||
"ID=$id;Name=$repeatName;class=$class;family=$family;percDiv=$percDiv;percDel=$percDel;percIns=$percIns"); | ||
if (($lastqName ne $qName) and ($lastqName ne "")) | ||
{ | ||
select OUT; print OUT "###\n"; | ||
} | ||
select OUT; print OUT "$gff3Line\n"; | ||
$lastqName = $qName; | ||
} | ||
select OUT; print OUT "###\n"; | ||
|
||
close(OUT); | ||
close(IN); | ||
|
||
# Capture the Perl version | ||
my $perl_version = `perl --version`; | ||
$perl_version =~ s/.*\(v(.*?)\).*/$1/s; | ||
|
||
# Open the file and write the YAML content | ||
open my $fh, '>', 'versions.yml' or die "Could not open versions.yml file"; | ||
print $fh qq{!{task.process}:\n perl: $perl_version\n}; | ||
close $fh; | ||
|
||
exit(0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
nextflow_process { | ||
|
||
name "Test Process CUSTOM_RMOUTTOGFF3" | ||
script "../main.nf" | ||
process "CUSTOM_RMOUTTOGFF3" | ||
|
||
tag "modules" | ||
tag "modules_gallvp" | ||
tag "custom" | ||
tag "custom/rmouttogff3" | ||
tag "repeatmasker/repeatmasker" | ||
|
||
setup { | ||
run("REPEATMASKER_REPEATMASKER") { | ||
script "../../../repeatmasker/repeatmasker/main.nf" | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) | ||
] | ||
input[1] = [] | ||
""" | ||
} | ||
} | ||
} | ||
|
||
test("sarscov2 - genome - fasta - repeatmasker - out") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = REPEATMASKER_REPEATMASKER.out.out | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("sarscov2 - genome - fasta - repeatmasker - out - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = input[0] = REPEATMASKER_REPEATMASKER.out.out | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
{ | ||
"sarscov2 - genome - fasta - repeatmasker - out - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,46901143ed4508b93cb4b64cd0b352f2" | ||
], | ||
"gff3": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,46901143ed4508b93cb4b64cd0b352f2" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-27T16:27:50.770327" | ||
}, | ||
"sarscov2 - genome - fasta - repeatmasker - out": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.gff3:md5,689df952b225e56b521d12f0dfab2ab8" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,029655f0760e918db2ef104e09d379c1" | ||
], | ||
"gff3": [ | ||
[ | ||
{ | ||
"id": "test", | ||
"single_end": false | ||
}, | ||
"test.gff3:md5,689df952b225e56b521d12f0dfab2ab8" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,029655f0760e918db2ef104e09d379c1" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-27T16:49:03.738128" | ||
} | ||
} |
Oops, something went wrong.