forked from khmiga/nanopore_kmer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmer_ins.pl
64 lines (56 loc) · 1.58 KB
/
kmer_ins.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#! /usr/bin/perl -w
use strict;
die "must provide last2fasta file and cmpKmer.out file\nusage kmer_ins.pl last2faFile cmpKmer.out\ncmpKmerout is needed to obtain the list of all possible kmers for randomized null\n" unless($ARGV[0]);
die "must provide last2fasta file and cmpKmer.out file\nusage kmer_ins.pl last2faFile cmpKmer.out\ncmpKmerout is needed to obtain the list of all possible kmers for randomized null\n" unless($ARGV[1]);
open(FILE,"$ARGV[0]");
open(RAND,"$ARGV[1]");
my @rand;
open(INS,">cmpKmer.ins.txt");
my $kmerSize;
while(<RAND>){
chomp;
my @l=split(/\t+/,$_);
$kmerSize=length($l[0]);
push(@rand,$l[0]);
}
close RAND;
my %rand;
my %del;
my %ins;
my $tot=0;
while(<FILE>){
chomp;
my @l=split(/\t+/,$_);
my $l1=length($l[0]);
my $l2=length($l[1]);
my $start = 0;
my $slide = 1;
my $windowSize=$kmerSize;
my $end = $start + $windowSize;
my $o=0;
my $s1=$l[0];
my $s2=$l[1];
while ($end <= $l1) {
my $oligo1 = substr($s1,$start,$windowSize);
my $oligo2 = substr($s2,$start,$windowSize);
if(($oligo1=~ /\-/)&&($oligo2!~ /\-/)){
my $randVal=rand(@rand);
$rand{$rand[$randVal]}=1 unless ($rand{$rand[$randVal]}++);
$del{$oligo2}=1 unless ($del{$oligo2}++);
$tot++;
}
$start += $slide;
$end = $start + $slide;
#print\t$start\t$oligo\t$rc\n";
}
}
close FILE;
foreach my $d(keys %del){
my $dFreq=$del{$d}/$tot;
if(exists $rand{$d}){
my $rFreq=$rand{$d}/$tot;
my $logT=log($dFreq/$rFreq);
print INS "$d\t$del{$d}\t$dFreq\t$rand{$d}\t$rFreq\t$logT\n";
}
}
close INS;