-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcondor.nf
701 lines (561 loc) · 22 KB
/
condor.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
nextflow.enable.dsl=2
path_file = "$baseDir/test_data/"
/*
//c3c4 sedges
params.align = path_file + "C3C4/cyp_coding.aa.coor_mays.fa"
params.tree = path_file + "C3C4/cyp_coding.phy_phyml_tree.txt"
params.outgroup = path_file+"C3C4/outgroup.txt"
params.phenotype = path_file+"C3C4/besnard2009_convergent_species.txt"
*/
/*
//no c3c4 clade sedges adapted pheno
params.align = path_file + "no_c3c4_clade/cyp_coding_noc3c4.aa.coor_mays.fa"
params.tree = path_file + "no_c3c4_clade/cyp_coding_noc3c4.phy_phyml_tree.txt"
params.outgroup = path_file+"no_c3c4_clade/outgroup.txt"
params.phenotype = path_file+"no_c3c4_clade/besnard2009_c4pheno.txt" //phenotype could be optional ???
*/
/*
//HIV
params.align = path_file + "HIV/align.noCRF.jphmm_outgroup.aa.fa"
params.tree = path_file + "HIV/root.align.noCRF.jphmm_outgroup.fa.treefile"
params.outgroup = path_file+"HIV/outgroup.txt"
params.phenotype = path_file+"HIV/id_phenotype.txt"
*/
/*
//rhodopsin
params.align = path_file + "rhodopsin/lessgappy.align_reroot.fa"
params.tree = path_file + "rhodopsin/tree_rerooted.nhx"
params.outgroup = path_file+"rhodopsin/outgroup_bis.txt"
params.phenotype = path_file+"rhodopsin/id_phenotype_marine.txt" //id_phenotype_marine.txt id_phenotype.txt
*/
//declaration of parameters
params.help=false
params.resdir=path_file+"results/c3c4_phenotype/JTT+R3/" //do not forget to change the resdir
params.model = "JTT+R3" // best: will choose the best model, other wise will take the given model
params.matrices = "$baseDir/assets/protein_model.txt"
params.nb_simu = 10000 //number of simulations to perform
params.min_seq = 2 //at least (11 for rhodopsine, 2 for c3c4, 10 for synthetic and HIV) = 0.5% seq
params.min_eem = 3 // At least 3 EEMs
params.positions= "none" // if the file exists, then only the given positions (0-based coordinates) are analysed by condor and min_seq is useless, but the branches and rates are optimised on the full alignment.
params.freqmode = "Fmodel" //Fmodel, if something else: FO. Need to be changed to allow other frequencies
/////OPTIONAL PARAMETERS
params.branches = "condor" // condor, correlation, emergence.
//params.branches_eem = "true"
//params.branches_corr = "true"
params.correction = 'holm' // holm bonferroni correction , could be fdr_bh for benjamini hochberg
params.alpha = 0.1 //limit threshold (included)
params.bayes = 2
params.align="null"
params.tree="null"
params.outgroup="null"
params.phenotype="null"
params.rates="rates.txt"
//creation of parameters
align = file(params.align)
tree = file(params.tree)
outgroup = file(params.outgroup)
matrices = file(params.matrices)
model = params.model
nb_simu = params.nb_simu
min_seq = params.min_seq
min_eem = params.min_eem
positions = file(params.positions)
freqmode = params.freqmode
//////////// Optional parameters
phenotype = file(params.phenotype)
correction = params.correction
alpha = params.alpha //risk 0.1 0.05
bayes = params.bayes //limit log Bayes Factor 2 20
branches = params.branches
//branches_corr = params.branches_corr
//branches_eem = params.branches_eem
// create result directory
resdir=file(params.resdir)
resdir.with {mkdirs()}
def usage(){
help="""CONDOR Usage:
nextflow run condor.nf --align <input alignment (FASTA)> \\
--tree <input tree (NEWICK)> \\
--outgroup <outgroup file, one tip per line> \\
--phenotype <input tip phenotype data file, only with branches==(condor or correlation)> \\
--resdir <output directory> \\
--model <model or 'best'> \\
--matrices <directory where matrices are stored, default '$baseDir/assets/protein_model.txt'> \\
--nb_simu <number of simulations, default:10000> \\
--min_seq <min number of sequences having the mutation for convergence detection> \\
--min_eem <min number of EEMs> \\
--freqmode <amino acid frequencies: Fmodel or FO, default: Fmodel> \\
--branches <workflow run mode: condor, correlation or emergence, default: condor> \\
--correction <multiple test correction, holm (holm bonferroni) or fdr_bh, default: holm> \\
--alpha <alpha cutoff, default 0.1> \\
--bayes <log bayes factor threshold, default 2>
"""
log.info(help.stripMargin())
}
//run iqtree model finder and take the best model or take the model given by user
process find_model {
publishDir "${resdir}", mode: 'copy'
label 'iqtree'
input:
path tree
path align
val model
output:
path "best_fit_model.txt"
shell:
if ( model == 'best' )
// modelfinder on user input tree and alignment
'''
iqtree -m MFP -s !{align} -te !{tree} -nt !{task.cpus} -pre mfp_!{align}
grep "Best-fit model" mfp_!{align}.iqtree | cut -d " " -f 6 > best_fit_model.txt
'''
else
'''
printf "!{model}" > best_fit_model.txt
'''
}
//transform rate matrix in proper format
process build_matrices {
label 'python'
input:
path iqtree_modelrate
path matrices
output:
path "*pastml_matrix"
path "*simulator_matrix.model"
shell:
//matrices of substitution rate readable by simulator or pastml
'''
matrix_name=`awk 'BEGIN{FS="+"} {print toupper($1) }' !{iqtree_modelrate}`
matrix_pastml_format.py ${matrix_name} !{matrices}
'''
}
process info_align {
// retrieve length of alignment
label 'goalign'
input:
path align
output:
stdout
shell:
'''
len=`goalign stats -i !{align} | grep "length" | cut -f 2`
printf $len
'''
}
process info_align_nbseq {
//retrieve Nb seqs in alignment
label 'goalign'
input:
path align
output:
stdout
shell:
'''
nb_seq=`goalign stats -i !{align} | grep "nseqs" | cut -f 2`
printf $nb_seq
'''
}
//reoptimize tree branch lengths and estimate rates and frequencies by ML
process reoptimize_tree {
label 'iqtree'
publishDir "${resdir}", mode: 'copy'
input:
val freqmode
path align
path tree
path iqtree_mode
path matrices
tuple val(length), val(nb_seq)
output:
path "align.treefile"
tuple val(length), path(align), path("reestimated_rate"), path("frequencies.txt")
path "reestimated_rate"
path "frequencies.txt"
shell:
//te : fixed tree no tree search performed
if ( freqmode == 'Fmodel' )
//run iqtree with mode given by user. Reestimation rates. Frequencies of model retrieved from matrices file.
'''
iqtreemode=`cat !{iqtree_mode}`
iqtree -m ${iqtreemode} -nt !{task.cpus} -s !{align} -te !{tree} -wsr -pre align
tail -n+10 align.rate | cut -f 2 > reestimated_rate
len=`wc -l reestimated_rate | cut -d " " -f 1`
if [ "$len" -eq "0" ] ; then for i in {1..!{length}} ; do echo 1 >> reestimated_rate ;done ; fi
model=`awk 'BEGIN { FS="+" } {printf $1}' !{iqtree_mode}`
freqs=`grep -i $model -A 20 !{matrices} | tail -n 1 | sed 's/;//'`
AA="A R N D C Q E G H I L K M F P S T W Y V"
paste <(tr ' ' '\n' <<< ${AA[*]}) <(tr ' ' '\n' <<< ${freqs[*]}) > frequencies.txt
'''
else
//optimized frequences. For now cannot work with empirical or given vector of frequencies
//should work with no F or I
'''
sed '/+F/!s/$/+FO/' !{iqtree_mode} | sed 's/+F[^+$]*/+FO/' > corrected_model
iqtreemode=`cat corrected_model`
iqtree -m ${iqtreemode} -nt !{task.cpus} -s !{align} -te !{tree} -wsr -pre align
tail -n+10 align.rate | cut -f 2 > reestimated_rate
len=`wc -l reestimated_rate | cut -d " " -f 1`
if [ "$len" -eq "0" ] ; then for i in {1..!{length}} ; do echo 1 >> reestimated_rate ;done ; fi
for i in A R N D C Q E G H I L K M F P S T W Y V ; do grep "pi($i)" align.iqtree | awk -v var="$i" 'BEGIN{ORS="";print var"\\t"} {print $NF"\\n"}'; done > frequencies.txt
'''
}
process tree_rename{
label 'gotree'
publishDir "${resdir}", mode: 'copy'
input:
path tree
path outgroup
output:
path "named_tree"
path "rooted_*"
shell:
//Remove branch length of root
//remove outgroup ofter rerooting
//give a name to internal nodes
'''
sed '/);/!s/)[0-9]*.[0-9]*;/);/' !{tree} > !{tree}_tmp
sed 's/[\\/\\|]/_/g' !{outgroup} > outgroup_tmp
gotree reroot outgroup -r -i !{tree}_tmp -l outgroup_tmp -o rooted_!{tree}
gotree rename --internal --tips=false --auto -i rooted_!{tree} -o named_tree
'''
}
process pars_align_file{
label 'python'
input :
tuple val(length), path(align), path(rates), path(freq)
path positions
val min_seq
output:
tuple val(length), path(rates), path(freq), path('*pastml_input.tsv.gz')
path "positions_to_test.txt"
shell:
//create a table of positions we test and for which we do acr
'''
pars_fasta_subset.py !{align} !{length} !{min_seq} refalign_ !{positions}
'''
}
process acr_pastml{
label 'pastml'
publishDir "${resdir}", pattern: "work_pastml/named*.nw", mode: 'copy'
input:
tuple val(length), path(rate), path(freq), path(input)
path tree
path positions
path matrix
output:
tuple val(length),path(positions), path(rate), path("work_pastml/named.*nwk"), path("*pastml.ML.out.gz"), path("marginal_root.txt")
//rate sed numerotation from 1 ($line -1)
//input numerotation from 0
//create parameter file for pastml including rate per site (scaling factor) for each site and frequencies of amino acids
//run pastml
//remove some temp files
//retrieve marginal proba for root (sed -n 2p)
shell:
'''
align="!{input}"
while read -r line; do
R=`sed -n "${line}"p !{rate}`
echo -e 'parameter\tvalue' > parameter_$((${line}-1))
cat !{freq} >> parameter_$((${line}-1))
echo -e "scaling_factor\t${R}" >> parameter_$((${line}-1)) ; done < !{positions}
gunzip -c !{input} | sed 's/[\\/\\|]/_/g' > ${align%.*.*}.input
VAR=`while read -r line; do echo parameter_$((${line}-1)); done < !{positions}`
ID=`while read -r line; do echo $((${line}-1)); done < !{positions}`
MATRIX=`while read -r line; do echo !{matrix}; done < !{positions}`
pastml --threads !{task.cpus} -t !{tree} -d ${align%.*.*}.input --prediction_method MAP -m CUSTOM_RATES --rate_matrix $MATRIX -c $ID -o ${align%.*.*}.pastml.ML.out --work_dir work_pastml --parameter $VAR
gzip ${align%.*.*}.pastml.ML.out
rm work_pastml/params*.tab
rm parameter_*
for i in `ls work_pastml/marginal_probabilities.character_*` ; do echo ${i//[^0-9]/} ; sed -n 2p $i ; done > marginal_root.txt
'''
}
process pre_count{
label 'python'
publishDir "${resdir}", mode: 'copy'
input :
tuple val(length), path(positions), path(rate), path(tree), path(pastml_acr), path(marginal_root)
val nb_simu
output :
tuple path(positions), path(rate), path(tree), path("*pastml_acr.fasta"), path("reconstructed_root.txt")
path "reconstructed_root.txt" //positions with min_seq //first column positions associated //root num from 1
path "*marginal_posterior.txt" //still positions associated root num from 0
shell:
//transform pastml outpout in a fasta file and retrieve root with marginal proba
'''
pastml_fasta.py !{pastml_acr} !{positions} !{length} !{marginal_root} !{nb_simu} ACR_
'''
}
process count_apparitions{
label 'python'
input :
val min_eem
val min_seq
path align
tuple path(positions), path(rate), path(tree), path (align_acr), path(root)
output :
tuple path(rate), path(align_acr), path("*substitutions_even_root.tsv"), path("*substitutions_aa_tips_per_base.tsv")
path "positions_to_test_eem.txt" //num from 1
path "pos_mut_to_test_eem.txt"
shell:
//count EEMs from real data acr
//min eem is >= so we subtract 1 to be strict >
'''
count_substitutions_from_tips.py !{align_acr} !{align} !{tree} !{positions} !{min_seq} !{min_eem-1} !{root}
'''
}
//////////END OF FIRST PART OF WORKFLOW : OBSERVED NB OF EEMS.
//my simulator of sequence in python, using root, nb simulations and the ROOTED tree.
process simulator {
//errorStrategy 'retry'
//maxRetries 3
label 'python'
publishDir "${resdir}", pattern: "count*.tsv.gz", mode: 'copy'
input :
each pos //each tested positions (num from 1) ListPositionsChannel.readLines()
path simulation_model //substitution matrix
path rates //reestimated rates
path freq //frequencies (model or optimised)
path named_tree //rooted tree with named internal nodes
path root //root corresponding to marginal proba
output :
path "count*npz"
//when: branches_eem == "true" //("emergence" || "condor")
when : branches == "emergence" || branches == 'condor'
shell:
//simulate and count EEMs from tips
'''
sed -n "/^$((!{pos}-1))\t/p" !{root} | cut -f 2 > root.txt #root num from 0
rate=`sed -n "!{pos}p" !{rates}` # sed numerotation from 1
output="!{pos}_!{named_tree}_"
simulator_counting_rates_from_root.py root.txt !{named_tree} ${output} ${rate} !{freq} !{simulation_model}
'''
}
process conclude_convergence{
label 'python' //need to add statsmodels.api and statsmodels.stats in the python docker
publishDir "${resdir}", mode: 'copy'
input:
path simulation_model
path align
path freq
tuple path(rate), path(acralign), path(ref_matrix), path(substitutions)//rates for all positions
path counts
path root //only the interesting positions starts from 1
path positions // starts from 1
val nb_simu
val min_seq // >=
val min_eem // >= so we subtract 1 to be > strict
val alpha //0.1
val correction //holm or fdr_bh
//named*.phy
output:
tuple path("detected_metrics.tsv"), path("all_results_metrics.tsv")
shell:
'''
convergent_substitutions_pvalue.py !{positions} !{root} !{rate} !{align} !{ref_matrix} !{substitutions} !{nb_simu} !{freq} !{simulation_model} !{min_seq} !{min_eem-1} !{alpha} !{correction}
'''
}
///////////END OF EMERGENCE PART OF WORKFLOW : EXPECTED NB OF EEMS.
process prepare_BT {
//conda '/pasteur/appa/homes/mamorel/miniconda3/envs/jupyter-notebook'
publishDir "${resdir}", mode: 'copy'
label 'python'
input:
path align
path positions
path phenotype
val branches
output:
path "*binary_tested_sites.tsv"
when : branches == "correlation" || branches == 'condor'
//when: branches_corr == 'true' //("correlation" || "condor")
shell:
'''
bayes_traits_preps.py !{align} !{positions} !{phenotype}
'''
}
process prepare_tree {
label 'gotree'
input:
path tree
val branches
output:
path "root_tree.nx"
when : branches == "correlation" || branches == 'condor'
//when: branches_corr == 'true' //("correlation" || "condor")
shell:
'''
gotree support clear -i !{tree} | gotree rename -e ".*" -b "" --tips=false --internal | gotree reformat nexus --translate -o root_tree.nx
'''
}
process DataTraits {
input:
path binary
output:
path "data*"
shell:
'''
END=`awk -F '\t' '{print NF}' !{binary} | sort -nu | tail -n 1`
for (( i=2; i<=$((${END}-1)); i++ )); do cut -f 1,$i,$END !{binary} | tail -n+2 > data_$i.txt ; done
'''
}
process BayesTraits {
label 'bayestraits'
input:
path data
path nx_tree
output:
path "*Stones*"
shell:
'''
y=`echo !{data}`
x=${y//[!0-9]/}
echo -e "3\n2\nPriorAll uniform 0 100\nStones 100 1000\nLogFile Dependent_MCMC_10_${x}\nRun" > cmd_MCMC$x.txt;
BayesTraitsV3 !{nx_tree} !{data} < cmd_MCMC$x.txt;
echo -e "2\n2\nPriorAll uniform 0 100\nStones 100 1000\nLogFile Independent_MCMC_10_${x}\nRun" > cmd_Independent_MCMC$x.txt;
BayesTraitsV3 !{nx_tree} !{data} < cmd_Independent_MCMC$x.txt;
'''
}
process BayesFactor {
input:
path stones
path nx_tree
path binary
output:
tuple path(binary), path("BayesFactor_raw.txt")
shell:
'''
END=`awk -F '\t' '{print NF}' !{binary} | sort -nu | tail -n 1`
for (( i=2; i<=$((${END}-1)); i++ )); do printf $i"\t" ; grep "Log marginal likelihood:" Dependent_MCMC_10_$i.Stones.txt | cut -f 2 ; done > Dependent_results.txt
for (( i=2; i<=$((${END}-1)); i++ )); do printf $i"\t" ; grep "Log marginal likelihood:" Independent_MCMC_10_$i.Stones.txt | cut -f 2 ; done > Independent_results.txt
paste Dependent_results.txt Independent_results.txt > Dep_Indep_results.txt
while read p; do
d=`echo $p | awk -F ' ' '{print $2}'`;
i=`echo $p | awk -F ' ' '{print $4}'`;
echo "2*($d- $i)" | bc >> tmp
done <Dep_Indep_results.txt
for i in `head -n 1 binary_tested_sites.tsv` ; do echo $i ; done | head -n -1 > names.txt
paste Dep_Indep_results.txt tmp names.txt > BayesFactor_raw.txt
'''
}
process Conclude_BayesTraits {
label 'python'
input:
val bayes
tuple path(binary), path(bayesfactor)
path positions
val branches
output:
tuple path("bayes_detected_results.tsv"), path("bayes_tested_results.tsv")
when : branches == "correlation"
//when branches_corr == "true" && branches_eem == "false" //"correlation"
shell:
'''
bayes_traits_filter.py !{bayesfactor} !{binary} !{bayes} !{positions}
'''
}
///////////END OF CORRELATION PART OF WORKFLOW.
process Correlation {
label 'python'
publishDir "${resdir}", mode: 'copy'
input:
val bayes
tuple path(binary), path(bayesfactor)
tuple path(detected), path(tested)
output:
tuple path("BayesFactor.txt"), path("tested_results.tsv"), path("significant_results.tsv")
shell:
'''
merge_results.py !{bayesfactor} !{binary} !{bayes} !{tested}
'''
}
workflow {
if (params.help){
usage()
exit(0);
}
if (params.align==null || params.align == '' || !file(params.align).exists()){
log.error("Error: Alignment file not defined or does not exist")
usage()
exit(1)
}
if (params.tree==null || params.tree == '' || !file(params.tree).exists()){
log.error("Error: Tree file not defined or does not exist")
usage()
exit(1)
}
if (params.outgroup==null || params.outgroup == '' || !file(params.outgroup).exists()){
log.error("Error: outgroup file not defined or does not exist")
usage()
exit(1)
}
if (params.matrices==null || params.matrices == '' || !file(params.matrices).exists()){
log.error("Error: matrice file not defined or does not exist")
usage()
exit(1)
}
if (params.nb_simu <= 0){
log.error("Error: number of simulations must be > 0")
usage()
exit(1)
}
if (!['Fmodel','FO'].contains(params.freqmode)){
log.error("Error: wrong amino acid frequncy mode")
usage()
exit(1)
}
if (!["condor","correlation","emergence"].contains(params.branches)){
log.error("Error: --branches must specify a valid run mode : condor, correlation, or emergence")
usage()
exit(1)
}
if (['condor','correlation'].contains(params.branches) && (params.phenotype==null || params.phenotype == '' || !file(params.phenotype).exists())){
log.error("Error: phenotype file must be defined and must exist when --branches is condor or correlation")
usage()
exit(1)
}
// CONDOR Workflow
omodel = find_model(tree, align, model)
omatrices = build_matrices(omodel, matrices)
pastmlmatrix = omatrices[0]
simulatormatrix = omatrices[1]
length = info_align(align)
nbseq = info_align_nbseq(align)
//lenght and nb seqs in alignment (I think we can remove nb seqs)
statsalign = length.combine(nbseq)
otree = reoptimize_tree(freqmode, align, tree, omodel, matrices, statsalign)
treechannel = otree[0]
ratesparamchannel = otree[1]
rateschannel = otree[2]
freqchannel = otree[3]
otreerename = tree_rename(treechannel, outgroup)
namedtreechannel = otreerename[0]
rootedtreechannel = otreerename[1]
opars = pars_align_file(ratesparamchannel, positions, min_seq)
pastmlalign = opars[0].transpose()
positionschannel = opars[1]
opastml = acr_pastml(pastmlalign, rootedtreechannel, positionschannel, pastmlmatrix)
oprecount = pre_count(opastml, nb_simu)
pythoncount = oprecount[0]
rootseq = oprecount[1]
marginalroot = oprecount[2]
ocount = count_apparitions(min_eem, min_seq, align, pythoncount)
refcounting = ocount[0]
positionseem = ocount[1]
positionseemfilter = ocount[2]
refcounting.subscribe{rate, align, freqs, substitutions -> freqs.copyTo(file("${resdir}").resolve('ref_substitutions.txt'));}
simulchannel = simulator(positionseem.flatMap{it.readLines()}, simulatormatrix, rateschannel, freqchannel, namedtreechannel, marginalroot)
collectsimu = simulchannel.collect()
emergencechannel = conclude_convergence(simulatormatrix, align, freqchannel, refcounting, collectsimu, rootseq, positionseem, nb_simu, min_seq, min_eem, alpha, correction)
emergencechannel.subscribe{detected, tested -> tested.copyTo(file("${resdir}").resolve('tested_results.tsv')); detected.copyTo(file("${resdir}").resolve('significant_results.tsv'));}
binarytraits = prepare_BT(align, positionseemfilter, phenotype, branches)
nexustree = prepare_tree(rootedtreechannel, branches)
bayestraitstmp = DataTraits(binarytraits)
bayestraits = bayestraitstmp.flatten()
stonestmp = BayesTraits(bayestraits, nexustree.first())
stones = stonestmp.flatten().collect()
bayeschannel = BayesFactor(stones, nexustree,binarytraits)
obayestraits = Conclude_BayesTraits(bayes, bayeschannel, positionseemfilter, branches)
obayestraits.subscribe{detected, tested -> tested.copyTo(file("${resdir}").resolve('tested_results.tsv')); detected.copyTo(file("${resdir}").resolve('significant_results.tsv'));}
occorelation = Correlation(bayes, bayeschannel, emergencechannel)
}