-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap-primer.py
executable file
·133 lines (101 loc) · 2.96 KB
/
bootstrap-primer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/python
# given a sequence, it shuffles the bases and tests it to see if its better than the previous
import sys
from itertools import *
from Bio.SeqUtils import MeltingTemp as mt
from Bio.SeqUtils import GC as gc
from Bio.SeqUtils import lcc
from Bio.Seq import Seq
from primer3 import calcHairpin as hp
from primer3 import calcHomodimer as hd
import time
import random
import numpy as np
def findBCSets():
bcSets = dict()
for bc in product('ACTG', repeat=7):
barcode = "".join(bc)
if lcc.lcc_simp(barcode) <= 1.8 or any(nt*3 in barcode for nt in 'ACTG'):
continue
elif len(bcSets) == 0:
bcSets["set1"] = list()
bcSets["set1"].append(barcode)
else:
setsTried = 0
for k,v in bcSets.iteritems():
match = True
setsTried += 1
for i in range(0,len(v)):
if sum(ch1 != ch2 for ch1, ch2 in zip(v[i],barcode)) < 3:
match = False
break
if match == True:
v.append(barcode)
setsTried = 0
break
else:
if setsTried == len(bcSets):
bcSets["set"+str(setsTried+1)] = list()
bcSets["set"+str(setsTried+1)].append(barcode)
break
elif setsTried == len(bcSets):
bcSets["set"+str(setsTried)] = list()
bcSets["set"+str(setsTried)].append(barcode)
break
finalBCSets = dict()
for bc,bcList in bcSets.iteritems():
if len(bcList) >= 96:
finalBCSets[bc] = bcList
return finalBCSets
def testBarcodes(bcSets,primer):
amp = primer[-22:]
validSets = dict()
for bcSet,bcList in bcSets.iteritems():
worksList = list()
for bc in bcList:
seq = amp+bc
if 60 < mt.Tm_NN(seq) < 65 and hp(seq).tm <= 42 and hd(seq).dg/1000 > 2:
worksList.append(bc)
#print hd(seq).dg/1000
# print mt.Tm_NN(seq),hp(seq).tm,hd(seq).dg/1000
if len(worksList) >= 96:
validSets[bcSet] = worksList
return validSets
def main():
#orig seq. had to replace last G with T to get to 52 pct
# this was taken from JCVI's list, but as long as it has 52 pct GC it doesn't matter
# seqPrimer = "TCTTCCGATCTCGTCGCTACGCGTCATCAGTAG"
seqPrimer = "TCTTCCGATCTCGTCGCTACGCGTCATCAGTAT"
lastTm = mt.Tm_NN(seqPrimer)
lastHP = hp(seqPrimer).tm
lastHD = hd(seqPrimer).dg/1000
lastgc = gc(seqPrimer)
lastSeqPrimer = seqPrimer
validSets = list()
bcSets = findBCSets()
count = 0
while count <= 1000000:
# shuffles the sequences while retaining the GC%
amp = np.random.permutation(list(seqPrimer[-22:]))
amp = "".join(amp)
if any(nt*3 in amp for nt in 'ACTG'):
continue
amp = "".join(amp)
amp = seqPrimer[:11]+amp
newTm = mt.Tm_NN(amp)
newHP = hp(amp).tm
newHD = hd(amp).dg/1000
if newTm < 65:
continue
elif lastHP >= newHP and lastHD < newHD:
print amp,lastTm,newTm,lastHP,newHP,lastHD,newHD
validSets = testBarcodes(bcSets,amp)
if len(validSets) > 0:
lastHP = newHP
lastHD = newHD
lastTm = newTm
lastSeqPrimer = amp
count += 1
print amp,lastTm,newTm,lastHP,newHP,lastHD,newHD
if __name__ == "__main__":
main()