-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
146 additions
and
154 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,90 +1,85 @@ | ||
from node import Node | ||
import sys | ||
import random | ||
import utils | ||
import utils_nj as utils | ||
|
||
# Holds the methods to join neighbours | ||
class Neighbour_Joining: | ||
# Applies nei_saitou to calculte the distances recursively and get the phylogeny tree | ||
def nei_saitou(original_ids, original_distMatrix, seqCounter): | ||
#Using a global root holder | ||
global root | ||
root = None | ||
# Maintaining a map for the relationships between child and parent | ||
relationship_map = {} | ||
|
||
#Applies nei_saitou to calculte the distances recursively and get the phylogeny tree | ||
def nei_saitou(self, original_ids, original_distMatrix, seqCounter): | ||
#Using a global root holder | ||
# Using a mapping for seqId and new ids. | ||
seqId_orderId_map = {} | ||
for i, id in enumerate(original_ids): | ||
seqId_orderId_map[id] = str(i + 1) | ||
|
||
def calculate_next_neighbours(ids, distMatrix, seqCounter, seqId_orderId_map): | ||
global root | ||
root = None | ||
# Maintaining a map for the relationships between child and parent | ||
relationship_map = {} | ||
N = len(distMatrix) | ||
|
||
# Using a mapping for seqId and new ids. | ||
seqId_orderId_map = {} | ||
for i, id in enumerate(original_ids): | ||
seqId_orderId_map[id] = str(i + 1) | ||
|
||
def calculate_next_neighbours(ids, distMatrix, seqCounter, seqId_orderId_map): | ||
global root | ||
N = len(distMatrix) | ||
|
||
# Base case for recursion | ||
if N <= 2: | ||
# Assigning the ids to these nodes according to the requirements | ||
if ids[0] in seqId_orderId_map: | ||
tip_1 = seqId_orderId_map[ids[0]] | ||
else: | ||
tip_1 = ids[0] | ||
if ids[1] in seqId_orderId_map: | ||
tip_2 = seqId_orderId_map[ids[1]] | ||
else: | ||
tip_2 = ids[1] | ||
# Adding their relationship to the tree | ||
relationship_map[tip_1] = (tip_2, distMatrix[0][1]) | ||
# Since these are the last two nodes, any of these can be chosen to be the root | ||
root = tip_2 | ||
return | ||
|
||
#First step is to get the closest two tips from the distance matrix | ||
mini, minj, minVal = utils.construct_QMatrix(distMatrix) | ||
|
||
edge_i, edge_j = utils.calculate_edge_lengths(distMatrix, mini, minj, N) | ||
#print(edge_i, edge_j) | ||
tip_1 = tip_2 = None | ||
# Setting the new ids to the tips/ internal node | ||
if ids[mini] in seqId_orderId_map: | ||
tip_1 = seqId_orderId_map[ids[mini]] | ||
# Base case for recursion | ||
if N <= 2: | ||
# Assigning the ids to these nodes according to the requirements | ||
if ids[0] in seqId_orderId_map: | ||
tip_1 = seqId_orderId_map[ids[0]] | ||
else: | ||
tip_1 = ids[mini] | ||
if ids[minj] in seqId_orderId_map: | ||
tip_2 = seqId_orderId_map[ids[minj]] | ||
tip_1 = ids[0] | ||
if ids[1] in seqId_orderId_map: | ||
tip_2 = seqId_orderId_map[ids[1]] | ||
else: | ||
tip_2 = ids[minj] | ||
|
||
# Add them to the tree relationships | ||
relationship_map[tip_1] = (str(seqCounter), edge_i) | ||
relationship_map[tip_2] = (str(seqCounter), edge_j) | ||
tip_2 = ids[1] | ||
# Adding their relationship to the tree | ||
relationship_map[tip_1] = (tip_2, distMatrix[0][1]) | ||
# Since these are the last two nodes, any of these can be chosen to be the root | ||
root = tip_2 | ||
return | ||
|
||
#First step is to get the closest two tips from the distance matrix | ||
mini, minj, minVal = utils.construct_QMatrix(distMatrix) | ||
|
||
edge_i, edge_j = utils.calculate_edge_lengths(distMatrix, mini, minj, N) | ||
#print(edge_i, edge_j) | ||
tip_1 = tip_2 = None | ||
# Setting the new ids to the tips/ internal node | ||
if ids[mini] in seqId_orderId_map: | ||
tip_1 = seqId_orderId_map[ids[mini]] | ||
else: | ||
tip_1 = ids[mini] | ||
if ids[minj] in seqId_orderId_map: | ||
tip_2 = seqId_orderId_map[ids[minj]] | ||
else: | ||
tip_2 = ids[minj] | ||
|
||
# Add them to the tree relationships | ||
relationship_map[tip_1] = (str(seqCounter), edge_i) | ||
relationship_map[tip_2] = (str(seqCounter), edge_j) | ||
|
||
# Remove the used ids and add the new seqCounter | ||
ids = [id for id in ids if id not in (ids[mini], ids[minj])] + [str(seqCounter)] | ||
# Remove the used ids and add the new seqCounter | ||
ids = [id for id in ids if id not in (ids[mini], ids[minj])] + [str(seqCounter)] | ||
|
||
seqCounter -=1 | ||
new_distMatrix = utils.calculate_new_distMatrix(distMatrix, mini, minj, N) | ||
seqCounter -=1 | ||
new_distMatrix = utils.calculate_new_distMatrix(distMatrix, mini, minj, N) | ||
|
||
# recursively call with the new distance matrix | ||
calculate_next_neighbours(ids, new_distMatrix, seqCounter, seqId_orderId_map) | ||
# recursively call with the new distance matrix | ||
calculate_next_neighbours(ids, new_distMatrix, seqCounter, seqId_orderId_map) | ||
|
||
calculate_next_neighbours(original_ids, original_distMatrix, seqCounter, seqId_orderId_map) | ||
calculate_next_neighbours(original_ids, original_distMatrix, seqCounter, seqId_orderId_map) | ||
|
||
# construct tree from the relationship_map relations | ||
rootNode = Node(root) | ||
queue = [rootNode] | ||
# bfs for tree construction | ||
while len(queue) > 0: | ||
nodeList = [] | ||
for node in queue: | ||
for child, (parent, distance) in relationship_map.items(): | ||
if parent == node.id: | ||
childNode = Node(child) | ||
node.add_child(childNode, distance) | ||
nodeList.append(childNode) | ||
for node in nodeList: | ||
del relationship_map[node.id] | ||
queue = nodeList | ||
return rootNode | ||
# construct tree from the relationship_map relations | ||
rootNode = Node(root) | ||
queue = [rootNode] | ||
# bfs for tree construction | ||
while len(queue) > 0: | ||
nodeList = [] | ||
for node in queue: | ||
for child, (parent, distance) in relationship_map.items(): | ||
if parent == node.id: | ||
childNode = Node(child) | ||
node.add_child(childNode, distance) | ||
nodeList.append(childNode) | ||
for node in nodeList: | ||
del relationship_map[node.id] | ||
queue = nodeList | ||
return rootNode | ||
|
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
(((4439229:0.0994636314408,(164216:0.0805450227767,146154:0.0648116394037):0.0142644977651):0.00735813002246,((720729:0.097762461851,(((((100149:0.0486748871823,(4468812:0.0261929524043,166838:0.0343723235042):0.0135727575014):0.0312959330949,348167:0.0893299080895):0.0107871348319,(286686:0.0867460556338,((779409:0.04678825286,593544:0.060210401245):0.00697756765851,4387271:0.0431570218435):0.0165513871656):0.00338682209946):0.0117075956228,(11136:0.104185713798,(5364:0.0459632162084,(3148512:0.0387720577239,106543:0.0419816434874):0.0112373221496):0.024851971263):0.00389111736513):0.00387210426656,(((1920715:0.120428860031,(260976:0.0822133814411,646961:0.0759292834311):0.0141606419883):0.0116483249952,((((4378332:0.0506567246683,523025:0.0846057248606):0.0147056847708,(511006:0.0515193738372,(((273179:0.0226913456411,188674:0.020377295005):0.000565372968216,319214:0.0219783686199):0.0109189939435,197222:0.0380379374159):0.0254489976298):0.0132847257272):0.00561348628869,(806723:0.0722060713208,(1009440:0.0708603722105,((954340:0.0772832314566,947627:0.0727840632945):0.00484880367808,(4164932:0.0605401041029,(565897:0.0476464546292,144260:0.0337801940922):0.0454491287369):0.00415186926943):0.00135530746646):0.00572966219195):0.0100726310145):0.00375090334222,4397763:0.0781236103481):0.0106890635478):0.00388775124005,((((664158:0.0802724654919,822879:0.0825808319509):0.00461971797755,((3906666:0.0824905753015,(816087:0.0812659224188,4448694:0.0762037949433):0.0255175000687):0.0177473177689,(4431405:0.0543373959424,809189:0.0614095757938):0.0302843107641):0.00656803437777):0.00423269087876,((2034243:0.094206517777,208595:0.0686467796658):0.00510333767336,996519:0.0891093137398):0.025745017735):0.013850279425,((152801:0.115587289276,3855581:0.124654971827):0.0215198171439,4345542:0.132248688913):0.188580743455):0.0120725313848):0.0120473718793):0.00536520381635):0.00336721222622,((2263183:0.0648454483874,((586332:0.0726342497929,1136782:0.103677997852):0.00763606407822,2963287:0.0704258470927):0.0074964089477):0.00238916628989,(555079:0.0352536797343,553272:0.0313681237651):0.0385765133871):0.0120786458936):0.00753624506626):0.000959080610108); | ||
(((4439229:0.0994636314408,(164216:0.0805450227767,146154:0.0648116394037):0.0142644977651):0.00735813002246,((720729:0.097762461851,(((((100149:0.0486748871823,(4468812:0.0261929524043,166838:0.0343723235042):0.0135727575014):0.0312959330949,348167:0.0893299080895):0.0107871348319,(286686:0.0867460556338,((779409:0.04678825286,593544:0.060210401245):0.00697756765851,4387271:0.0431570218435):0.0165513871656):0.00338682209946):0.0117075956228,(11136:0.104185713798,(5364:0.0459632162084,(3148512:0.0387720577239,106543:0.0419816434874):0.0112373221496):0.024851971263):0.00389111736513):0.00387210426656,(((1920715:0.120428860031,(260976:0.0822133814411,646961:0.0759292834311):0.0141606419883):0.0116483249952,((((4378332:0.0506567246683,523025:0.0846057248606):0.0147056847708,(511006:0.0515193738372,(((273179:0.0226913456411,188674:0.020377295005):0.000565372968216,319214:0.0219783686199):0.0109189939435,197222:0.0380379374159):0.0254489976298):0.0132847257272):0.00561348628869,(806723:0.0722060713208,(1009440:0.0708603722105,((954340:0.0772832314566,947627:0.0727840632945):0.00484880367808,(4164932:0.0605401041029,(565897:0.0476464546292,144260:0.0337801940922):0.0454491287369):0.00415186926943):0.00135530746646):0.00572966219195):0.0100726310145):0.00375090334222,4397763:0.0781236103481):0.0106890635478):0.00388775124005,((((822879:0.0825808319509,664158:0.0802724654919):0.00461971797755,((3906666:0.0824905753015,(816087:0.0812659224188,4448694:0.0762037949433):0.0255175000687):0.0177473177689,(4431405:0.0543373959424,809189:0.0614095757938):0.0302843107641):0.00656803437777):0.00423269087876,((2034243:0.094206517777,208595:0.0686467796658):0.00510333767336,996519:0.0891093137398):0.025745017735):0.013850279425,((152801:0.115587289276,3855581:0.124654971827):0.0215198171439,4345542:0.132248688913):0.188580743455):0.0120725313848):0.0120473718793):0.00536520381635):0.00336721222622,((2263183:0.0648454483874,((586332:0.0726342497929,1136782:0.103677997852):0.00763606407822,2963287:0.0704258470927):0.0074964089477):0.00238916628989,(555079:0.0352536797343,553272:0.0313681237651):0.0385765133871):0.0120786458936):0.00753624506626):0.000959080610108); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# This file contains the util functions | ||
# used by the nei-saitou neighbor-joining algorithm | ||
|
||
# Calculates the Q matrix from the distance matrix and returns the two closest nodes and their distance | ||
def construct_QMatrix(distance_matrix): | ||
N = len(distance_matrix) | ||
# Matrix to store the Q values | ||
QMatrix = [[0] * N for _ in range(N)] | ||
|
||
# We also want to return the two closest nodes | ||
# keep track of the minimum value and indices in the Q matrix | ||
mini = minj = 0 | ||
minVal = float('inf') | ||
|
||
for i in range(N): | ||
for j in range(N): | ||
if i == j: | ||
continue | ||
QMatrix[i][j] = (N - 2) * distance_matrix[i][j] - sum(distance_matrix[i]) - sum(distance_matrix[j]) | ||
|
||
if QMatrix[i][j] < minVal: | ||
mini = i | ||
minj = j | ||
minVal = QMatrix[i][j] | ||
|
||
return mini, minj, minVal | ||
|
||
# Calculates the edge lengths to the u node and returns the lengths. | ||
def calculate_edge_lengths(distMatrix, mini, minj, N): | ||
# Distances to the new internal node | ||
edge_i = 1/ 2.0 * distMatrix[mini][minj] + 1 / (2.0 * (N - 2))* (sum(distMatrix[mini]) - sum(distMatrix[minj])) | ||
# distance from minj node to u | ||
edge_j = distMatrix[mini][minj] - edge_i | ||
return edge_i, edge_j | ||
|
||
|
||
def calculate_new_distMatrix(distMatrix, mini, minj, N): | ||
updatedDistances = [[0] * (N + 1) for _ in range(N + 1)] | ||
for i in xrange(N): | ||
for j in xrange(N): | ||
updatedDistances[i][j] = distMatrix[i][j] | ||
# update the distances to the new node | ||
for k in range(N): | ||
updatedDistances[N][k] = (0.5) * (distMatrix[mini][k] + distMatrix[minj][k] - distMatrix[mini][minj]) | ||
updatedDistances[k][N] = updatedDistances[N][k] | ||
|
||
# Creates a new distance matrix | ||
new_distMatrix = [[0] * (N - 1) for _ in range(N - 1)] | ||
keep_i = keep_j = 0 | ||
for i in range(N + 1): | ||
# Replacing these two with a new node | ||
if i == mini or i == minj: | ||
continue | ||
keep_j = 0 | ||
for j in range(N + 1): | ||
# Replacing these two with the new node | ||
if j == mini or j == minj: | ||
continue | ||
new_distMatrix[keep_i][keep_j] = updatedDistances[i][j] | ||
keep_j += 1 | ||
keep_i += 1 | ||
|
||
return new_distMatrix |