Skip to content

Commit

Permalink
hifiasm: expose --trio-dual parameter and parental read list inputs…
Browse files Browse the repository at this point in the history
… for trio mode (#1339)

* wrapper update for 0.19.7

* clean up

* fixed conditional to make more sense

* expose trio-dual parameter

* remove falsevalue

* starting on readlist input

* parameter edits for readlist input

* edited cmd for trio reads

* edited cmd for list input

* add type to triodual param

* edit actual cmd for reads/lists input

* moved child reads macro out of when arguments

* actually moved the child input properly lolllll

* make parental list input only one file per parent

* fixed trio-dual parameter throwing false into things

* actually fix it?

* test 5 fix?

* test 15: trio list input test

* whitespace

* add tabular to list datatypes

Co-authored-by: Björn Grüning <[email protected]>

* moving reads outside of condl

Co-authored-by: Björn Grüning <[email protected]>

* moving reads outside of condl

Co-authored-by: Björn Grüning <[email protected]>

---------

Co-authored-by: Björn Grüning <[email protected]>
  • Loading branch information
abueg and bgruening authored Oct 20, 2023
1 parent f0e75df commit 7dca92b
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 22 deletions.
94 changes: 72 additions & 22 deletions tools/hifiasm/hifiasm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<description>haplotype-resolved de novo assembler for PacBio Hifi reads</description>
<macros>
<token name="@TOOL_VERSION@">0.19.7</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token>
<xml name="reads">
<param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" />
Expand Down Expand Up @@ -50,20 +50,26 @@
#end for
#end if
#if str($mode.mode_selector) == 'trio':
#for idx, read in enumerate($mode.hap1_reads):
#set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension)
ln -s '$read' $inputfile &&
$hap1_inputs.append($inputfile)
#end for
#for idx, read in enumerate($mode.hap2_reads):
#set $inputfile = 'hap2_input_%d.%s' % ($idx, $read.dataset.extension)
ln -s '$read' $inputfile &&
$hap2_inputs.append($inputfile)
#end for
#set $hap1_filenames = ' '.join($hap1_inputs)
#set $hap2_filenames = ' '.join($hap2_inputs)
yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap1.yak $hap1_filenames &&
yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap2.yak $hap2_filenames &&
#if str($mode.trioinput.trio_input_selector) == 'reads':
#for idx, read in enumerate($mode.trioinput.hap1_reads):
#set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension)
ln -s '$read' $inputfile &&
$hap1_inputs.append($inputfile)
#end for
#for idx, read in enumerate($mode.trioinput.hap2_reads):
#set $inputfile = 'hap2_input_%d.%s' % ($idx, $read.dataset.extension)
ln -s '$read' $inputfile &&
$hap2_inputs.append($inputfile)
#end for
#set $hap1_filenames = ' '.join($hap1_inputs)
#set $hap2_filenames = ' '.join($hap2_inputs)
yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap1.yak $hap1_filenames &&
yak count -k$mode.yak_kmer_length -b$filter_bits -t\${GALAXY_SLOTS:-1} -o hap2.yak $hap2_filenames &&
#end if
#if str($mode.trioinput.trio_input_selector) == 'lists':
#set $hap1_filenames = $mode.trioinput.hap1_list
#set $hap2_filenames = $mode.trioinput.hap2_list
#end if
#end if
hifiasm
-t \${GALAXY_SLOTS:-1}
Expand Down Expand Up @@ -99,10 +105,17 @@
#end if
#end if
#if str($mode.mode_selector) == 'trio':
-1 hap1.yak
-2 hap2.yak
#if str($mode.trioinput.trio_input_selector) == 'reads':
-1 hap1.yak
-2 hap2.yak
#end if
#if str($mode.trioinput.trio_input_selector) == 'lists':
-3 $hap1_filenames
-4 $hap2_filenames
#end if
-c $mode.max_kmers
-d $mode.min_kmers
$mode.trio_dual
#end if
#if str($purge_options.purge_selector) == 'set':
-l $purge_options.purge_level
Expand Down Expand Up @@ -175,11 +188,24 @@
</when>
<when value="trio">
<expand macro="reads" />
<param name="hap1_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 1 reads" />
<param name="hap2_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 2 reads" />
<conditional name="trioinput">
<param name="trio_input_selector" type="select" label="What parental information are you using?">
<option value="reads">Parental reads (FASTQ files, gzipped or otherwise)</option>
<option value="lists">Lists of reads assigned by parent (text files)</option>
</param>
<when value="reads">
<param name="hap1_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 1 reads" />
<param name="hap2_reads" type="data" format="fastq,fastq.gz" multiple="true" label="Haplotype 2 reads" />
</when>
<when value="lists">
<param name="hap1_list" type="data" format="txt,tabular" label="Haplotype 1 read list" />
<param name="hap2_list" type="data" format="txt,tabular" label="Haplotype 2 read list" />
</when>
</conditional>
<param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" />
<param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" />
<param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" />
<param name="trio_dual" argument="--trio-dual" type="boolean" truevalue="--trio-dual" falsevalue="" label="Utilize homology information to correct trio-phasing errors" />
</when>
</conditional>
<param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" />
Expand Down Expand Up @@ -428,9 +454,12 @@
<param name="filter_bits" value="0"/>
<conditional name="mode">
<param name="mode_selector" value="trio"/>
<param name="reads" value="child.fasta.gz"/>
<param name="hap1_reads" value="paternal.fasta.gz"/>
<param name="hap2_reads" value="maternal.fasta.gz"/>
<param name="trio_input_selector" value="reads"/>
<conditional name="trioinput">
<param name="reads" value="child.fasta.gz"/>
<param name="hap1_reads" value="paternal.fasta.gz"/>
<param name="hap2_reads" value="maternal.fasta.gz"/>
</conditional>
<param name="max_kmers" value="2"/>
<param name="min_kmers" value="5"/>
</conditional>
Expand Down Expand Up @@ -556,6 +585,27 @@
<param name="bins_out" value="yes" />
<output_collection name="bin_files" type="list" count="3" />
</test>
<!-- TEST 15: Test trio LIST mode -->
<test expect_num_outputs="6">
<param name="filter_bits" value="0"/>
<param name="log_out" value="yes"/>
<conditional name="mode">
<param name="mode_selector" value="trio"/>
<param name="reads" value="child.fasta.gz"/>
<conditional name="trioinput">
<param name="trio_input_selector" value="lists"/>
<param name="hap1_list" value="maternal.headers.txt"/>
<param name="hap2_list" value="paternal.headers.txt"/>
</conditional>
<param name="max_kmers" value="2"/>
<param name="min_kmers" value="5"/>
</conditional>
<output name="log_file" ftype="txt">
<assert_contents>
<has_text text="flagged 100 reads, out of 100 lines in file"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
Expand Down
100 changes: 100 additions & 0 deletions tools/hifiasm/test-data/maternal.headers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
K12_1
K12_3
K12_5
K12_7
K12_9
K12_11
K12_13
K12_15
K12_17
K12_19
K12_21
K12_23
K12_25
K12_27
K12_29
K12_31
K12_33
K12_35
K12_37
K12_39
K12_41
K12_43
K12_45
K12_47
K12_49
K12_51
K12_53
K12_55
K12_57
K12_59
K12_61
K12_63
K12_65
K12_67
K12_69
K12_71
K12_73
K12_75
K12_77
K12_79
K12_81
K12_83
K12_85
K12_87
K12_89
K12_91
K12_93
K12_95
K12_97
K12_99
K12_101
K12_103
K12_105
K12_107
K12_109
K12_111
K12_113
K12_115
K12_117
K12_119
K12_121
K12_123
K12_125
K12_127
K12_129
K12_131
K12_133
K12_135
K12_137
K12_139
K12_141
K12_143
K12_145
K12_147
K12_149
K12_151
K12_153
K12_155
K12_157
K12_159
K12_161
K12_163
K12_165
K12_167
K12_169
K12_171
K12_173
K12_175
K12_177
K12_179
K12_181
K12_183
K12_185
K12_187
K12_189
K12_191
K12_193
K12_195
K12_197
K12_199
100 changes: 100 additions & 0 deletions tools/hifiasm/test-data/paternal.headers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
K12_699
K12_701
K12_703
K12_705
K12_707
K12_709
K12_711
K12_713
K12_715
K12_717
K12_719
K12_721
K12_723
K12_725
K12_727
K12_729
K12_731
K12_733
K12_735
K12_737
K12_739
K12_741
K12_743
K12_745
K12_747
K12_749
K12_751
K12_753
K12_755
K12_757
K12_759
K12_761
K12_763
K12_765
K12_767
K12_769
K12_771
K12_773
K12_775
K12_777
K12_779
K12_781
K12_783
K12_785
K12_787
K12_789
K12_791
K12_793
K12_795
K12_797
K12_799
K12_801
K12_803
K12_805
K12_807
K12_809
K12_811
K12_813
K12_815
K12_817
K12_819
K12_821
K12_823
K12_825
K12_827
K12_829
K12_831
K12_833
K12_835
K12_837
K12_839
K12_841
K12_843
K12_845
K12_847
K12_849
K12_851
K12_853
K12_855
K12_857
K12_859
K12_861
K12_863
K12_865
K12_867
K12_869
K12_871
K12_873
K12_875
K12_877
K12_879
K12_881
K12_883
K12_885
K12_887
K12_889
K12_891
K12_893
K12_895
K12_897

0 comments on commit 7dca92b

Please sign in to comment.