Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
crutching authored Jun 26, 2019
2 parents 208a1ea + 2eda0fa commit 691561a
Show file tree
Hide file tree
Showing 813 changed files with 178,088 additions and 514 deletions.
2 changes: 1 addition & 1 deletion CNV_plotter/gtrellis_CNV_NT.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ correctCopies <- function(data_sheet, log_name) {
#remove empty lines
data_sheet<-data.frame(data_sheet[complete.cases(data_sheet),])
#get the tumor corrected copies number
data_sheet$tumorCorrectedCopies = ((2^(log_name + 1))-(2*(1-tumorPercent))/tumorPercent)
data_sheet$tumorCorrectedCopies = (((2^(log_name + 1))-(2*(1-tumorPercent)))/tumorPercent)
#replace the negatives with .1
data_sheet$Tumor_Corrected_Copies_STPv3 = (ifelse(data_sheet$tumorCorrectedCopies > 0.1, data_sheet$tumorCorrectedCopies, .1))
#return only the needed columns, contig start end and tumore corrected copies
Expand Down
10 changes: 6 additions & 4 deletions CNV_plotter/png_to_pdf.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
<tool id="png_to_pdf" name="PNG to PDF Converter" version="1.0">
<tool id="png_to_pdf" name="PNG to PDF Converter" version="1.0.1">
<description>Converts a PNG to a PDF.</description>
<command detect_errors="exit_code"><![CDATA[
ln -s $input input.png &&
convert input.png input.pdf
#for $filenum, $input in enumerate($inputs)
ln -s ${input} input${filenum}.png &&
#end for
convert input*.png input.pdf
]]></command>

<inputs>
<param name="input" type="data" format="png" label="PNG File"/>
<param name="inputs" multiple="true" type="data" format="png" label="PNG File"/>
</inputs>
<outputs>
<data name="output" format="pdf" from_work_dir="input.pdf" label="${tool.name} on ${on_string}: PDF"/>
Expand Down
32 changes: 26 additions & 6 deletions CNV_plotter/stpv3_CNV_grapher_NT.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="stpv3_cnv_plotter_nt" name="STPv3 CNV Plotter (NT)" version="1.0">
<tool id="stpv3_cnv_plotter_nt" name="STPv3 CNV Plotter (NT)" version="1.0.1">
<description>Creates two plots of CNVs using gtrellis (Does not use tidyverse).</description>
<requirements>
<requirement type="package" version="1.14.0">bioconductor-gtrellis</requirement>
Expand All @@ -14,11 +14,15 @@
$count_file $segment_file $sample_name $gene_file
#if $chrom_file:
$chrom_file
#else:
$__tool_directory__/"CHR_bed_hg19.txt"
#if $seqdict_source.seqdict_source_selector != "no_seq_dict"
#if $seqdict_source.seqdict_source_selector != "history"
#set seq_dict_loc = ''.join($seqdict_source.seqdict_sequence.fields.path.split('.')[:-1]) + '.dict'
$seq_dict_loc
#else
${seqdict_source.seqdict_sequence}
#end if
#end if
$upper $lower
]]></command>

Expand All @@ -28,7 +32,23 @@
<param name="count_file" type="data" format="tabular" label="Read Counts File"/>
<param name="segment_file" type="data" format="tabular" label="Copy Ratio Segements"/>
<param name="gene_file" type="data" format="tabular" label="Gene Interval File"/>
<param name="chrom_file" type="data" format="tabular" optional="true" label="Chromosome File" help="File for placing the bounds of normal line. Will default to full chromosome."/>
<conditional name="seqdict_source">
<param name="seqdict_source_selector" type="select" label="Choose the source for the sequence dictionary">
<option value="cached">Locally cached</option>
<option value="history">History</option>
<option value="no_seq_dict" selected="true">Do not pass</option>
</param>
<when value="cached">
<param name="seqdict_sequence" type="select" label="Sequence Dictionary" help="Sequence dictionary file. This is used to define chromosome endpoints during graphing." >
<options from_data_table="all_fasta" >
<validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
</options>
</param>
</when>
<when value="history">
<param name="seqdict_sequence" type="data" format="txt" label="Sequence Dictionary" help="Sequence dictionary file. Must be in dict format." />
</when>
</conditional>
<param name="upper" type="float" value="5" label="Upper Bound"/>
<param name="lower" type="float" value=".5" label="Lower Bound"/>
</inputs>
Expand Down
225 changes: 225 additions & 0 deletions Suzi_pipeline/BIC-seq.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#!/usr/bin/env perl
use strict;

use FindBin qw($Bin);
my $path = $Bin;
#print "The home directory of this Perl script is $path.\n\n";

#my $BICseq = "$path/BIC-seq-choose-lambda/BIC-seq";
my $BICseq = "$path/BICseq/BIC-seq";
my $BIC_post = "$path/R/BIC-postprocessing.R";

if(!(-e $BICseq)){
die("$BICseq not found.\n");
}

if($path =~/\ / || $path =~/\t/) {print("Error: No space or tab is allowed in the path of this perl pipeline\n"); die("The current path is: $path\n");}

use Getopt::Long;

my $out_dir;
my $help;
my $lambda;
my $bin_size;
my $numTypeI="";
my $description;
my $multiplicity = 2;
my $window = 200;
my $bootstrap = 0;
my $insert = "";
my $paired = "";

my $invalid;
$invalid = GetOptions("help"=>\$help,"lambda=f"=>\$lambda, "bin_size=i"=>\$bin_size,"multiplicity=i"=>\$multiplicity,"window=i"=>\$window, "f=f"=>\$numTypeI,"B=i"=>\$bootstrap, "paired"=>\$paired, "I=s"=>\$insert);

my $size = $#ARGV+1;

if($help|!$invalid||$size!=3) {
print "Usage: BIC-seq.pl [options] <ConfigFile> <OutputDir> <Description>\n";
print "Options:\n";
print " --help\n";
print " --lambda=<float>: default 2.\n";
print " --bin_size=<int>: default 100.\n";
print " --multiplicity=<float>: default 2\n";
print " --window=<int>: the window for removing the outliers; default 200.\n";
print " --f=<float>: expected number of type I errors in the merging process; An alternative way to specify lambda.\n";
print " --B=<int>: number of permutations for FDR estimate. default 0.\n";
print " --paired: if specified the data is treated as paired-end data.\n";
print " --I=<Insert,SDofInsert>: specify the insert size and standard deviation of insert size. Default <200,20>.\n";
die("Remove outliers, bin, run BIC-seq and postprocessing.\n");
}

my $bicinfofile = $ARGV[0];
my $out_dir = $ARGV[1];
my $description = $ARGV[2];

if($lambda<0) {die("lambda must be positive.\n");}
if($bin_size<0) {die("Bin size must be positive.\n");}
if($bootstrap<0) {die("The value for option --B must be nonnegative\n");}
if($numTypeI<=0 && $numTypeI ne "") {die("The value for option --f must be positive\n");}

if(!$bin_size) {$bin_size = 100;}
if(!$lambda) {$lambda = 2;}

if($paired) {$paired = "-2";}
if($insert) {
my @tmp=split(/,/,$insert);
if($#tmp+1!=2) {die("incorrect format of option --I\n");}
if($tmp[0]<=0||$tmp[1]<=0) {die "Insert size and its standard deviation must be positive\n";}
$insert = "-I $insert";
}


if(!(-e $bicinfofile)){
die("No such file or directory: $bicinfofile\n");
}


## test if the files exists
open(INFOIN, "<$bicinfofile");
my $i = 1;
my $num_chrom = 0;
while(<INFOIN>){
chomp;
my @row = split(/\t/);
my $num_elem = $#row+1;
if($num_elem!=3 && $num_elem!=0) {die("<ConfigFile> must be a tab-delimited three column file\n");}
if($i>1&& $num_elem>3){
my $chrom = $row[0];
my $tumor_file = $row[1];
my $normal_file = $row[2];
if(!(-e $tumor_file)) {die("No such file $tumor_file\n");}
if(!(-e $normal_file)) {die("No such file $normal_file\n");}
$num_chrom = $num_chrom +1;
}
$i = $i+1;
}

close(INFOIN);


if(-d $out_dir) {die("Cannot create the directory $out_dir: the directory already exists.\n");}
else {mkdir $out_dir or die("Cannot create the directory: $out_dir\n");}

if($out_dir!~/\/$/) {$out_dir = $out_dir."/";}
#my $bin_dir = $out_dir."bin/";
my $bic_dir = $out_dir."bic/";
my $Bbic_dir = $out_dir."Permbic/";

#mkdir $bin_dir or die("Cannot create the dircectory $bin_dir\n");
mkdir $bic_dir or die("Cannot create the dircectory $bic_dir\n");
mkdir $Bbic_dir or die("Cannot create the dircectory $Bbic_dir\n");


open(INFOIN, "<$bicinfofile");
my $i = 1;
my $bic_files="";
my $chromosomes="";
while(<INFOIN>){
chomp;
my @row = split(/\t/);
my $num_elem = $#row+1;
if($num_elem!=3 && $num_elem!=0) {die("<ConfigFile> must be a tab-delimited three column file\n");}
if($i>1 && $num_elem>0){
my $chrom = $row[0];
my $tumor_file = $row[1];
my $normal_file = $row[2];
if(!(-e $tumor_file)) {die("No such file $tumor_file\n");}
if(!(-e $normal_file)) {die("No such file $normal_file\n");}

#my $bin_out = $bin_dir.$chrom."\.bin";
my $bic_out = $bic_dir.$chrom.".bic";


my $cmd = "$BICseq $paired -o $bic_out -w $window --multiplicity $multiplicity -b $bin_size -l $lambda $tumor_file $normal_file";
if($numTypeI) {
my $fmerge = $numTypeI/$num_chrom;
$cmd = "$BICseq $paired -o $bic_out -w $window --multiplicity $multiplicity -b $bin_size -f $fmerge $tumor_file $normal_file";
}
print $cmd."\n";
if(system($cmd)!=0) {die("\n");}
print "\n";

$chromosomes = $chromosomes.$chrom.",";
$bic_files = $bic_files.$bic_out.",";
}
$i = $i+1;
}

close(INFOIN);

chop($bic_files);
chop($chromosomes);

####postprocessing

my $R_bic = $out_dir.$description.".bicseg";
my $wig_file = $out_dir.$description."\.wig";

my $cmd = "R --slave --args $bic_files $chromosomes $out_dir $description < $BIC_post";
print $cmd."\n";
if(system($cmd)!=0){die("\n");};
print "\n";



## get the overall frequency
open(INRBIC,"<$R_bic");
my $total_tumor=0;
my $total_normal=0;

my $i = 1;
while(<INRBIC>){
chomp;
my @row = split(/\t/);
if($i>1){
$total_tumor = $total_tumor + $row[3];
$total_normal = $total_normal + $row[4];
}
$i = $i+1;
}

#die("tumor = $total_tumor\nnormal = $total_normal\n");

my $resampled_bic = "";
if($bootstrap>0){
my $tumor_freq = $total_tumor/($total_tumor+$total_normal);
open(INFOIN, "<$bicinfofile");
my $i = 1;

while(<INFOIN>){
chomp;
my @row = split(/\t/);
my $num_elem = $#row+1;
if($num_elem!=3 && $num_elem!=0) {die("<ConfigFile> must be a tab-delimited three column file\n");}
if($i>1 && $num_elem>0){
my $chrom = $row[0];
my $tumor_file = $row[1];
my $normal_file = $row[2];
if(!(-e $tumor_file)) {die("No such file $tumor_file\n");}
if(!(-e $normal_file)) {die("No such file $normal_file\n");}


#my $bin_out = $bin_dir.$chrom."\.bin";
my $boostrapped_bicout = $Bbic_dir.$chrom."_Perm$bootstrap\.Bbic";

my $cmd = "$BICseq $paired $insert -B $bootstrap -p $tumor_freq -o $boostrapped_bicout -w $window --multiplicity $multiplicity -b $bin_size -l $lambda $tumor_file $normal_file";
if($numTypeI) {
my $fmerge = $numTypeI/$num_chrom;
$cmd = "$BICseq $paired $insert -w $window --multiplicity $multiplicity -b $bin_size -f $fmerge -B $bootstrap -p $tumor_freq -o $boostrapped_bicout $tumor_file $normal_file";
}
print $cmd."\n";
if(system($cmd)!=0){die("\n");}
print "\n";
$resampled_bic = $resampled_bic.$boostrapped_bicout.",";
}
$i = $i+1;
}
chop($resampled_bic);
close(INFOIN);

my $R_bic = $out_dir.$description."\.resampled\.bicseg";
my $cmd = "R --slave --args $resampled_bic $chromosomes $R_bic < $BIC_post";
print "$cmd\n";
system($cmd);
}
Loading

0 comments on commit 691561a

Please sign in to comment.