From b0dd62230f914a1b5bee4482cfa14532a568f999 Mon Sep 17 00:00:00 2001 From: raysloks Date: Fri, 5 May 2023 09:56:31 +0200 Subject: [PATCH] Fixes the shared baf.tmp issue --- utils/generate_gens_data.pl | 133 ++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/utils/generate_gens_data.pl b/utils/generate_gens_data.pl index 309202e8..7f439499 100755 --- a/utils/generate_gens_data.pl +++ b/utils/generate_gens_data.pl @@ -23,7 +23,7 @@ # Calculate coverage data open( COVOUT, ">".$COV_OUTPUT ); for my $i (0..$#COV_WINDOW_SIZES) { - generate_cov_bed($cov_fn, $COV_WINDOW_SIZES[$i], $PREFIXES[$i]); + generate_cov_bed($cov_fn, $COV_WINDOW_SIZES[$i], $PREFIXES[$i]); } close COVOUT; @@ -31,13 +31,14 @@ print STDERR "Calculating BAFs from gvcf...\n"; # Calculate BAFs system( - $SCRIPT_ROOT."/gvcfvaf.pl ". - "$gvcf_fn $GNOMAD > baf.tmp" - ); + $SCRIPT_ROOT."/gvcfvaf.pl ". + "$gvcf_fn $GNOMAD > ".$SAMPLE_ID.".baf.tmp" + ); + open( BAFOUT, ">".$BAF_OUTPUT ); for my $i (0..$#BAF_SKIP_N) { - print STDERR "Outputting BAF $PREFIXES[$i]...\n"; - generate_baf_bed("baf.tmp", $BAF_SKIP_N[$i], $PREFIXES[$i]); + print STDERR "Outputting BAF $PREFIXES[$i]...\n"; + generate_baf_bed($SAMPLE_ID.".baf.tmp", $BAF_SKIP_N[$i], $PREFIXES[$i]); } close BAFOUT; @@ -46,76 +47,76 @@ system("tabix -f -p bed $BAF_OUTPUT.gz"); system("bgzip -f -\@10 $COV_OUTPUT"); system("tabix -f -p bed $COV_OUTPUT.gz"); -unlink("baf.tmp"); +unlink($SAMPLE_ID.".baf.tmp"); sub generate_baf_bed { - my( $fn, $skip, $prefix ) = @_; - open( my $fh, $fn ); - my $i = 0; - while(<$fh>) { - if( $i++ % $skip == 0 ) { - chomp; - my @a = split /\t/; - print BAFOUT $prefix."_".$a[0]."\t".($a[1]-1)."\t".$a[1]."\t".$a[2]."\n"; + my( $fn, $skip, $prefix ) = @_; + open( my $fh, $fn ); + my $i = 0; + while(<$fh>) { + if( $i++ % $skip == 0 ) { + chomp; + my @a = split /\t/; + next if $#a != 2; + print BAFOUT $prefix."_".$a[0]."\t".($a[1]-1)."\t".$a[1]."\t".$a[2]."\n"; + } } - } - close $fn; + close $fn; } sub generate_cov_bed { - my( $fn, $win_size, $prefix ) = @_; - - open(my $fh, $fn); - my( $reg_start, $reg_end, $reg_chr, $force_end ); - my @reg_ratios; - while(<$fh>) { - next if /^@/ or /^CONTIG/; - chomp; - my ($chr, $start, $end, $ratio ) = split /\t/; - my $orig_end = $end; - unless( $reg_start ) { - $reg_start = $start; - $reg_end = $end; - $reg_chr = $chr; - } - - if( $chr eq $reg_chr ) { - if( $start - $reg_end < $win_size ) { - push @reg_ratios, $ratio; - $reg_end = $end; - } + my( $fn, $win_size, $prefix ) = @_; + + open(my $fh, $fn); + my( $reg_start, $reg_end, $reg_chr, $force_end ); + my @reg_ratios; + while(<$fh>) { + next if /^@/ or /^CONTIG/; + chomp; + my ($chr, $start, $end, $ratio ) = split /\t/; + my $orig_end = $end; + unless( $reg_start ) { + $reg_start = $start; + $reg_end = $end; + $reg_chr = $chr; + } - # If there is a large gap to the next region, prematurely end region - else { - $force_end = 1; - $end = $reg_end; - } - } - else { - $force_end = 1; - $end = $reg_end; - } - if( $end - $reg_start + 1 >= $win_size or $force_end ) { - my $mid_point = $reg_start + int(($end - $reg_start)/2); - print COVOUT $prefix."_".$reg_chr."\t".($mid_point-1)."\t".$mid_point."\t".mean(@reg_ratios)."\n"; - undef $reg_start; - undef $reg_end; - undef $reg_chr; - undef @reg_ratios; - } + if( $chr eq $reg_chr ) { + if( $start - $reg_end < $win_size ) { + push @reg_ratios, $ratio; + $reg_end = $end; + } + # If there is a large gap to the next region, prematurely end region + else { + $force_end = 1; + $end = $reg_end; + } + } + else { + $force_end = 1; + $end = $reg_end; + } + if( $end - $reg_start + 1 >= $win_size or $force_end ) { + my $mid_point = $reg_start + int(($end - $reg_start)/2); + print COVOUT $prefix."_".$reg_chr."\t".($mid_point-1)."\t".$mid_point."\t".mean(@reg_ratios)."\n"; + undef $reg_start; + undef $reg_end; + undef $reg_chr; + undef @reg_ratios; + } - if( $force_end ) { - $reg_start = $start; - $reg_end = $orig_end; - $reg_chr = $chr; - push @reg_ratios, $ratio; - undef $force_end; + if( $force_end ) { + $reg_start = $start; + $reg_end = $orig_end; + $reg_chr = $chr; + push @reg_ratios, $ratio; + undef $force_end; + } } - } - close $fh; + close $fh; } - + sub mean { - return sum(@_)/@_; + return sum(@_)/@_; }