Skip to content

Commit

Permalink
Checkpoint 11/07 2:30
Browse files Browse the repository at this point in the history
  • Loading branch information
anicksaha committed Nov 7, 2019
1 parent 8fb25e4 commit e5641ff
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 129 deletions.
125 changes: 61 additions & 64 deletions bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,75 +4,72 @@
import utils
from random import randint

class Bootstrap:
# do the 100 bootstrap sampling
def bootstrap(self, root, ids, sequences):
# original order of the ids in the original tree
ordered_ids = self.dfs(root,None)
original_partitions = self.bfs(root)
# Since we have 59 internal nodes.
partitions_match = [0] * 59
#Starting the 100 inferences
for _ in range(100):
bootstrap_sequences = {}
indices = []
# Using a random seq to get the length of the sequences. Assuming all the sequences to be of the same length.
indices_to_generate = len(sequences['152801'])
for i in range(indices_to_generate):
indices.append(randint(0, indices_to_generate-1))
# do the 100 bootstrap sampling
def bootstrap(root, ids, sequences):
# original order of the ids in the original tree
ordered_ids = dfs(root,None)
original_partitions = bfs(root)
# Since we have 59 internal nodes.
partitions_match = [0] * 59
#Starting the 100 inferences
for _ in range(100):
bootstrap_sequences = {}
indices = []
# Using a random seq to get the length of the sequences. Assuming all the sequences to be of the same length.
indices_to_generate = len(sequences['152801'])
for i in range(indices_to_generate):
indices.append(randint(0, indices_to_generate-1))

for id in ids:
bootstrap_seq = ''
for index in indices:
bootstrap_seq += sequences[id][index]
bootstrap_sequences[id] = bootstrap_seq
for id in ids:
bootstrap_seq = ''
for index in indices:
bootstrap_seq += sequences[id][index]
bootstrap_sequences[id] = bootstrap_seq

# Neisaitou and Distance Matrix for the new sequences
distMatrix = utils.get_distMatrix(ids, bootstrap_sequences)
seqCounter = 120
Neighbour_Joining_instance = neighbour_joining.Neighbour_Joining()
root = Neighbour_Joining_instance.nei_saitou(ids, distMatrix, seqCounter)
# get the dictionary of partitions and ids under those partitions
partitions = self.bfs(root)

# Comparing the original and random partitions.
for index, id in enumerate(ordered_ids):
if original_partitions[id] == partitions[id]:
partitions_match[index] += 1
percentages = [count / 100.0 for count in partitions_match]
return percentages


# BFS implementation to return a partitions map when a root is given
def bfs(self, root):
visited = []
queue = [root]
partitions = {}
while queue:
node = queue.pop(0)
if node.children is None:
continue
leaves = self.dfs(node, None)
partitions[node.id] = leaves
for child in node.children.keys():
queue.append(child)

return partitions


# DFS to get the order of the original ids
def dfs(self, root, visited):
if visited is None:
visited = []
if root is None or root.children is None:
return
visited.append(root.id)
for child in root.children.keys():
self.dfs(child, visited)
return visited
# Neisaitou and Distance Matrix for the new sequences
distMatrix = utils.get_distMatrix(ids, bootstrap_sequences)
seqCounter = 120
root = neighbour_joining.nei_saitou(ids, distMatrix, seqCounter)
# get the dictionary of partitions and ids under those partitions
partitions = bfs(root)

# Comparing the original and random partitions.
for index, id in enumerate(ordered_ids):
if original_partitions[id] == partitions[id]:
partitions_match[index] += 1
percentages = [count / 100.0 for count in partitions_match]
return percentages


# BFS implementation to return a partitions map when a root is given
def bfs(root):
visited = []
queue = [root]
partitions = {}
while queue:
node = queue.pop(0)
if node.children is None:
continue
leaves = dfs(node, None)
partitions[node.id] = leaves
for child in node.children.keys():
queue.append(child)

return partitions

# DFS to get the order of the original ids
def dfs(root, visited):
if visited is None:
visited = []
if root is None or root.children is None:
return
visited.append(root.id)
for child in root.children.keys():
dfs(child, visited)
return visited

# write the percentages in the bootstrap file
def write_bootstrap(percentages):
with open('bootstrap.txt', 'w') as f:
for percent in percentages:
f.write(str(percent) + '\n')
54 changes: 27 additions & 27 deletions submission/boot.txt → bootstrap.txt
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
0.0
0.0
0.17
0.16
0.0
0.02
0.1
0.06
0.13
0.07
0.32
0.01
0.41
0.25
0.0
0.43
0.0
0.0
0.0
0.0
0.06
0.63
0.0
0.01
0.11
0.95
0.0
0.01
0.42
0.41
0.0
0.19
0.97
0.01
0.56
0.0
0.0
0.03
0.23
0.0
0.2
0.0
0.0
0.45
0.03
0.15
0.33
0.52
0.0
0.38
0.02
0.13
0.32
0.53
0.0
0.0
0.38
0.0
0.33
0.01
1.0
0.0
0.0
0.0
0.38
0.0
0.15
0.49
0.01
0.13
1.0
0.8
0.12
0.58
0.75
0.07
0.54
1.0
1.0
0.0
0.0
0.01
0.23
0.0
0.24
0.6
Binary file added bootstrap_tree.pdf
Binary file not shown.
14 changes: 5 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from node import Node
import sys
import neighbour_joining
import utils
from random import randint
import bootstrap
import sys

def main(filename):
# First step is to read the file and get the sequences
Expand All @@ -26,12 +25,9 @@ def main(filename):
# Writing the newick file
utils.write_newick_file(ids, root)

#bootstrap calculations
#Bootstrap_instance = bootstrap.Bootstrap()
#percentages = Bootstrap_instance.bootstrap(root, ids, sequences)
#utils.write_bootstrap(percentages)

# bootstrap calculations
percentages = bootstrap.bootstrap(root, ids, sequences)
bootstrap.write_bootstrap(percentages)

if __name__ == '__main__':
main(sys.argv[1])

main(sys.argv[1])
59 changes: 59 additions & 0 deletions submission/bootstrap.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
0.0
0.0
0.2
0.0
0.03
0.12
0.03
0.25
0.0
0.32
0.0
0.0
0.0
0.0
0.0
0.0
0.11
0.96
0.0
0.01
0.43
0.0
0.02
0.62
0.0
0.0
0.01
0.29
0.0
0.0
0.0
0.4
0.01
0.16
0.35
0.54
0.0
0.0
0.0
0.45
0.0
1.0
0.0
0.0
0.0
0.38
0.0
0.09
1.0
0.77
0.13
0.56
1.0
1.0
0.0
0.0
0.01
0.29
0.55
Binary file modified submission/bootstrap_tree.pdf
Binary file not shown.
Loading

0 comments on commit e5641ff

Please sign in to comment.