-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlincRNA_classification2.pl
89 lines (87 loc) · 2.38 KB
/
lincRNA_classification2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!perl
use strict;
use warnings;
#input format
die "USAGE: lncRNA_file genecode_file OUT_file" unless @ARGV == 3;
open IN1,"$ARGV[0]" || die "$!";
open IN2,"$ARGV[1]" || die "$!";
open OUT,"> $ARGV[2]" || die "$!";
my %gene_data;
my %gene_dir;
#read genecode file and store the data
while(<IN2>){
chomp;
my @data = split /\t/,$_;
if ($data[2] ne 'exon'){ ##不等于
next;
}
# $data[8] =~ /gene_name\s\"(.*)\";\stranscript_type/; ###修改如下
$data[8] =~ /gene_id\s\"(.*)\"/;
my $gene_name = $1; ##数组
my @arr = [$data[3],$data[4]];
push(@{$gene_data{$gene_name}},@arr); ##push:从数组的末尾加入元素。
$gene_dir{$gene_name} = $data[6]; ##方向
}
#read the lncRNA file per line and classify
my @results;
while(<IN1>){
chomp;
my $result;
my $smname;
my @data = split /\t/,$_;
#if ($data[8] !~ /transcript_id\s\"LINC/){ #remove not LINC
# next;
#}
my @rna = split /;/,$data[8];
$rna[1] =~ /\"(.*)\"/;
my $linc = $1;
if ($data[8] =~ /NA/){
$result = $linc."\t"."Intergenic"; ##过滤掉NA,归类到Intergenic
push(@results,$result);
next;
}
$rna[2] =~ /\"(\d+)\"/;
my $distance = $1;
$_ =~ /closet_gene "(.*)"/;
$smname = $1;
if ($distance > 1000){ #c1
$result = $linc."\t"."Intergenic";
push(@results,$result);
}elsif($distance > 0){ #c1 and c2
if($gene_dir{$smname} eq $data[6]){
$result = $linc."\t"."Intergenic";
push(@results,$result);
}else{
$result = $linc."\t"."Bidirectional";
push(@results,$result);
}
}elsif($distance == 0){ #c3 and c4 and c5
if ($gene_dir{$smname} ne $data[6]){ #c3
$result = $linc."\t"."Antisense"; ####有漏洞,遇到NA时,smane="",gene_dir{smname} = na
push(@results,$result);
}else{ #c4 and c5
my $m = 0;
foreach my $gename(keys %gene_data){
if($gename eq $smname){
my @num = @{$gene_data{$gename}};
for (my $i =0;$i <= $#num;$i++){
if(($num[$i][0] < $data[4])&&($num[$i][1] > $data[3])){
$m++;
}
}
}
}
if ($m > 0){
$result = $linc."\t"."Exonic Sense";
push(@results,$result);
}else{
$result = $linc."\t"."Intronic Sense";
push(@results,$result);
}
}
}
}
my %hash;@results = grep { ! $hash{$_} ++ } @results;
foreach my $result(@results){
print OUT $result."\n";
}