-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtax_trace.pl
83 lines (59 loc) · 1.61 KB
/
tax_trace.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env perl
use strict;
die "Usage:perl $0 <node.dmp> <name.dmp> <taxids> <export>" if( @ARGV != 4 );
my ($node, $name, $taxids, $export) = @ARGV;
open (NODE, $node) ||die "$!";
my %parents = ();
my %rank = ();
while(<NODE>){
my @its = split /\s+\|\s+/ , $_;
$parents{ $its[0] } = $its[1];
$rank{ $its[0] } = $its[2];
}
close NODE;
open (NAME , $name) || die "$!";
my %names = ();
while(<NAME>){
my @its = split /\s+\|\s+/ , $_;
if(/scientific name/){
$names{ $its[0] } = $its[1];
}
}
close NAME;
open( TAXID, $taxids ) || die "$!";
open( EXPORT, ">$export" ) || die "$!";
while (<TAXID>) {
$_ =~ s/\s+$//;
my @its = split /\t/, $_;
my ($seqId, $taxId) = ($its[0], $its[1]);
if(! exists $names{ $taxId }){
print "$seqId, no taxonomy found\n";
print EXPORT qq{no taxonomy found\n};
next;
}
my ($node_path, $name_path) = taxon_trace( $taxId );
#print EXPORT qq{$name_path\n};
print EXPORT qq{$seqId\t$names{ $taxId }\t$name_path\t$node_path\n};
}
close TAXID;
close EXPORT;
exit;
sub taxon_trace{
my $node = shift;
my @rank = ();
my @name_path = ();
while(1){
push @rank, $rank{ $node };
push @name_path, $names{ $node };
if($node == 1){
last;
}
if(exists $parents{ $node }){
$node = $parents{ $node };
}else{
warn "$node \t something may be wrong!\n";
exit;
}
}
return ( join("|", reverse @rank), join("|", reverse @name_path) );
}