-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogoFile
65 lines (53 loc) · 1.92 KB
/
LogoFile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
This python script processes the contribution scores for creating sequence logos.
"""
import numpy as np
'''
This function calculates average of scores for all nucleotides at a certain position.
It takes in two lists for sequences and scores, and a position.
'''
def Score(nucleotidesList, scoresList, pos):
scores = {'A': [], 'C': [], 'G': [], 'T': []}
for nucs,scrs in zip(nucleotidesList,scoresList):
n = nucs[pos] #single nucleotide
s = scrs[pos]
scores[n].append(s)
for k,v in scores.items():
if scores[k]:
scores[k] = np.mean(v)
else:
scores[k] = 0.0
return scores
'''
This function writes the processed contribution scores in pandas dataframe format for logomaker.
'''
def LogoFile(input,output):
import pandas as pd
allLines = open(input).readlines()
#indicators of TP/TN/FN/FP
m = [[float(x) for x in allLines[i].strip().split(',')] for i in range(0,len(allLines),3)]
#nucleotides in a sequence
nuc = [allLines[i].rstrip().split(',') for i in range(1,len(allLines),3)]
#scores in a sequence
score = [[float(a) for a in allLines[i].rstrip().split(',')] for i in range(2,len(allLines),3)]
#only use the TP samples
nucTP = []
scrTP =[]
for i in range(0,len(m)):
if m[i][0]>=0.5 and m[i][1]==1:
nucTP.append(nuc[i])
scrTP.append(score[i])
files = '{}_{}.txt'.format(output,'TP')
#write the scores into a dictionary then convert it into dataframe.
scoresTP = {'A': [], 'C': [], 'G': [], 'T': []}
for pos in range(0,len(nucTP[0])):
s = Score(nucTP,scrTP,pos)
scoresTP['A'].append(s['A'])
scoresTP['C'].append(s['C'])
scoresTP['G'].append(s['G'])
scoresTP['T'].append(s['T'])
TP = pd.DataFrame(scoresTP)
TP.to_pickle(files)
LogoFile('RBBM1.txt','RBBMlogo')
LogoFile('SBBM2.txt','SBBMlogo')
LogoFile('CBBM1.txt','CBBMlogo')