-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnewick.py
74 lines (66 loc) · 2.55 KB
/
newick.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Class for reading and writing to or from a Newick file.
"""
from __future__ import absolute_import
import re
from collections import Counter
def _validate_newick(nw_str):
"""
Takes a Newick string as an input and performs a bunch of tests to verify
that the input is in the correct format.
"""
if not nw_str.startswith("(") and nw_str.endswith(";"):
raise AssertionError("File should start with \"(\" end with ;")
count = Counter(nw_str)
if count["("] != count[")"]:
raise AssertionError("Unbalanced parentheses do not match.")
return
def _read_label(label, node, interior_node):
"""
Takes a node label, current working node and a boolean, signifying whether
if the label belongs to an interior node or not, as an input. Adds the
information to the current node, or a new node, depending on if it is an
interior node or not.
"""
label = label.split(")")[0]
if ":" in label:
label, branch_len = label.split(":")
else:
branch_len = 0 # No branch length is assigned, use default.
if interior_node:
node.dist = float(branch_len)
# Works for both decimal and integers.
if label.replace(".", "", 1).isdigit():
node.support = label
else:
node.name = label
else:
# Node label belongs to leaf, assign data to a new node.
node.add_child(None, label, branch_len)
return
def read(path, root=None):
"""
Parse the Newick file contained in path and yield the root node of a
TreeNode object.
"""
with open(path, "r") as nw_file:
nw_str = "".join(list(nw_file)) # merge lines to single string
nw_str = re.sub("[\t\n\r ]", "", nw_str) # remove unnecessary whitespace
_validate_newick(nw_str)
# Create a new node if no node has been assigned.
if not root:
import tree_node
root = tree_node.TreeNode()
node = None
for opening_paren in nw_str.split("(")[1:]:
# Current node is root if no node has been assigned, else child
# of previous node.
node = root if not node else node.add_child()
for subpart in opening_paren.split(","): # do for each comma
if subpart:
_read_label(subpart, node, False)
for closing_paren in subpart.split(")")[1:]:
closing_paren = closing_paren.rstrip(";")
_read_label(closing_paren, node, True)
node = node.parent # up one level
return root