-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaffineGap.py
139 lines (117 loc) · 4 KB
/
affineGap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/python -tt
#
# Affine Gap Penalty
# Gotoh algorithm
# Copyleft (c) 2013. Ridlo W. Wibowo
#
import sys, math
def affine_gap():
#### error message
usage = """
MANUAL
Usage : python affineGap.py [option] [input]
Option :
-i input two sequence in command line argument
-d gap open value, default -1
-e gap extend value, default -0.1
-m match score, default +1
-mm mismatch score, default -1
-f input file [under construction]
status: disable
Example: python affineGap.py -i ATTGTC AGTC
END
"""
#### default gap value
gap_open = -1.
gap_extend = -0.1
match = 1.
mismatch = -1.
#### input from command line argument
if len(sys.argv) == 1:
print usage
sys.exit(1)
while len(sys.argv) > 1:
option = sys.argv[1]; del sys.argv[1]
if option == '-i':
seq1 = sys.argv[1]; del sys.argv[1]
seq2 = sys.argv[1]; del sys.argv[1]
elif option == '-d':
gap_open = float(sys.argv[1]); del sys.argv[1]
elif option == '-e':
gap_extend = float(sys.argv[1]); del sys.argv[1]
elif option == '-m':
match = float(sys.argv[1]); del sys.argv[1]
elif option == '-mm':
mismatch = float(sys.argv[1]); del sys.argv[1]
else:
print "\n", sys.argv[0], ': invalid option', option, usage
sys.exit(1)
#### print input
print "Affine Gap Penalty, Gotoh Algorithm"
print "SEQUENCE 1:", seq1; print "SEQUENCE 2:", seq2
print "gap open : ", gap_open
print "gap extend : ", gap_extend
print "match score : ", match
print "mismacth score: ", mismatch
#### initiate and calculate value
lseq1 = len(seq1); lseq2 = len(seq2)
row = lseq2+1; col = lseq1+1
xval = [[0. for j in range(col)] for i in range(row)]
yval = [[0. for j in range(col)] for i in range(row)]
val = [[0. for j in range(col)] for i in range(row)]
for i in range(row):
val[i][0] = gap_open + i*gap_extend
yval[i][0] = -10000.
for j in range(col):
val[0][j] = gap_open + j*gap_extend
xval[0][j] = -10000. # assign -INF
val[0][0] = 0.
for i in range(1,row):
for j in range(1,col):
xval[i][j] = max(xval[i-1][j] + gap_extend, val[i-1][j] + gap_open + gap_extend)
yval[i][j] = max(yval[i][j-1] + gap_extend, val[i][j-1] + gap_open + gap_extend)
cople = 0.
if (seq1[j-1] == seq2[i-1]):
cople = val[i-1][j-1] + match
else:
cople = val[i-1][j-1] + mismatch
val[i][j] = max(cople, xval[i][j], yval[i][j])
#### print value
for i in range(row):
for j in range(col):
print val[i][j], '\t',
print ''
#### traceback
sequ1 = ''
sequ2 = ''
i = lseq2
j = lseq1
ITER_MAX = 1000000
iteration = 0
while ((i>0 or j>0) and iteration < ITER_MAX):
if (i>0 and j>0 and val[i][j] == val[i-1][j-1] + (match if seq2[i-1] == seq1[j-1] else mismatch)):
sequ1 += seq1[j-1]
sequ2 += seq2[i-1]
i -= 1; j -= 1
elif (i>0 and val[i][j] == xval[i][j]):
sequ1 += '_'
sequ2 += seq2[i-1]
i -= 1
elif (j>0 and val[i][j] == yval[i][j]):
sequ1 += seq1[j-1]
sequ2 += '_'
j -= 1
iteration += 1
sequ1r = ' '.join([sequ1[j] for j in range(-1, -(len(sequ1)+1), -1)])
sequ2r = ' '.join([sequ2[j] for j in range(-1, -(len(sequ2)+1), -1)])
score = 0.
for j in range(len(sequ1)):
if (sequ1[j] == sequ2[j]):
score += match
else:
score += mismatch
print "Sequence 1: ", sequ1r
print "Sequence 2: ", sequ2r
print "Score : ", score
if __name__ == "__main__":
affine_gap()