-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap.py
74 lines (65 loc) · 2.41 KB
/
bootstrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from node import Node
import neighbour_joining
import utils
import random
# do the 100 bootstrap sampling
def bootstrap(root, ids, sequences):
# original order of the ids in the original tree
ordered_ids = dfs(root,None)
original_partitions = bfs(root)
# Since we have 59 internal nodes.
partitions_match = [0] * 59
#Starting the 100 inferences
for _ in range(100):
bootstrap_sequences = {}
indices = []
# Using a random seq to get the length of the sequences. Assuming all the sequences to be of the same length.
indices_to_generate = len(sequences['152801'])
for i in range(indices_to_generate):
indices.append(random.randint(0, indices_to_generate-1))
for id in ids:
bootstrap_seq = ''
for index in indices:
bootstrap_seq += sequences[id][index]
bootstrap_sequences[id] = bootstrap_seq
# Neisaitou and Distance Matrix for the new sequences
distMatrix = utils.get_distance_matrix(ids, bootstrap_sequences)
seqCounter = 120
root = neighbour_joining.nei_saitou(ids, distMatrix, seqCounter)
# get the dictionary of partitions and ids under those partitions
partitions = bfs(root)
# Comparing the original and random partitions.
for index, id in enumerate(ordered_ids):
if original_partitions[id] == partitions[id]:
partitions_match[index] += 1
percentages = [count / 100.0 for count in partitions_match]
return percentages
# BFS implementation to return a partitions map when a root is given
def bfs(root):
visited = []
queue = [root]
partitions = {}
while queue:
node = queue.pop(0)
if node.children is None:
continue
leaves = dfs(node, None)
partitions[node.id] = leaves
for child in node.children.keys():
queue.append(child)
return partitions
# DFS to get the order of the original ids
def dfs(root, visited):
if visited is None:
visited = []
if root is None or root.children is None:
return
visited.append(root.id)
for child in root.children.keys():
dfs(child, visited)
return visited
# write the percentages in the bootstrap file
def write_bootstrap(percentages):
with open('bootstrap.txt', 'w') as f:
for percent in percentages:
f.write(str(percent) + '\n')