Skip to content

Commit

Permalink
Adding optional flag filter-unobserved-features-from-phylogeny to imp…
Browse files Browse the repository at this point in the history
…rove running time (#178)

* adding filter unobserved features from phylogeny

* updating the code to filter unobserved features from phylogeny; making tests for it

* updating the code to filter unobserved features from phylogeny; making tests for it

* fixing failed tests

* fixing failed tests

* fixing failed tests

* fixing failed tests
  • Loading branch information
esayyari authored Jun 18, 2020
1 parent 36df44c commit db462fa
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 14 deletions.
8 changes: 5 additions & 3 deletions empress/_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame,
feature_metadata: qiime2.Metadata = None,
ignore_missing_samples: bool = False,
filter_missing_features: bool = False,
number_of_features: int = 5) -> None:
number_of_features: int = 5,
filter_unobserved_features_from_phylogeny: bool = True) -> None:
# TODO: do not ignore the feature metadata when specified by the user
if feature_metadata is not None:
feature_metadata = feature_metadata.to_dataframe()
Expand All @@ -50,12 +51,13 @@ def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame,
# path to the actual newick file
with open(str(tree)) as file:
t = parse_newick(file.readline())

trim_tree = filter_unobserved_features_from_phylogeny
viz = Empress(tree=t, table=feature_table,
sample_metadata=sample_metadata,
feature_metadata=feature_metadata, ordination=pcoa,
ignore_missing_samples=ignore_missing_samples,
filter_missing_features=filter_missing_features)
filter_missing_features=filter_missing_features,
filter_unobserved_features_from_phylogeny=trim_tree)

with open(os.path.join(output_dir, 'empress.html'), 'w') as file:
file.write(str(viz))
Expand Down
18 changes: 15 additions & 3 deletions empress/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class Empress():
def __init__(self, tree, table, sample_metadata,
feature_metadata=None, ordination=None,
ignore_missing_samples=False, filter_missing_features=False,
resource_path=None):
resource_path=None,
filter_unobserved_features_from_phylogeny=True):
"""Visualize a phylogenetic tree
Use this object to interactively display a phylogenetic tree using the
Expand Down Expand Up @@ -69,6 +70,10 @@ def __init__(self, tree, table, sample_metadata,
resource_path: str, optional
Load the resources from a user-specified remote location. If set to
None resources are loaded from the current directory.
filter_unobserved_features_from_phylogeny: bool, optional
If True, filters features from the phylogeny that aren't present as
features in feature table. features in feature table. Otherwise,
the phylogeny is not filtered.
Attributes
Expand Down Expand Up @@ -104,7 +109,9 @@ def __init__(self, tree, table, sample_metadata,
if self.base_url is None:
self.base_url = './'

self._validate_data(ignore_missing_samples, filter_missing_features)
self._validate_data(ignore_missing_samples,
filter_missing_features,
filter_unobserved_features_from_phylogeny)

if self.ordination is not None:
self._emperor = Emperor(
Expand All @@ -115,7 +122,12 @@ def __init__(self, tree, table, sample_metadata,
else:
self._emperor = None

def _validate_data(self, ignore_missing_samples, filter_missing_features):
def _validate_data(self, ignore_missing_samples, filter_missing_features,
filter_unobserved_features_from_phylogeny):
# remove unobserved features from the phylogeny
if filter_unobserved_features_from_phylogeny:
self.tree = self.tree.shear(set(self.table.columns))

# extract balance parenthesis
self._bp_tree = list(self.tree.B)

Expand Down
8 changes: 7 additions & 1 deletion empress/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
'feature_metadata': Metadata,
'ignore_missing_samples': Bool,
'filter_missing_features': Bool,
'number_of_features': Int % Range(1, None)
'number_of_features': Int % Range(1, None),
'filter_unobserved_features_from_phylogeny': Bool
},
input_descriptions={
'tree': 'The phylogenetic tree to visualize.',
Expand Down Expand Up @@ -91,6 +92,11 @@
'(euclidean distance from origin). Note, this '
'parameter is only honored when a biplot is '
'inputed.',
'filter_unobserved_features_from_phylogeny': (
'If this flag is passed, filters features from the phylogeny '
'that are not present as features in feature table. '
'Default is True.'
)
},
name='Visualize and Explore Phylogenies with Empress',
description=(
Expand Down
58 changes: 51 additions & 7 deletions tests/python/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@
from emperor import Emperor
from empress.core import Empress
from bp import parse_newick
from six import StringIO
from skbio.tree import TreeNode


class TestCore(unittest.TestCase):
def setUp(self):
self.tree = parse_newick('(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2):1;')
self.pruned_tree = TreeNode.read(
StringIO('(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;')
)
# Test table/metadata (mostly) adapted from Qurro:
# the table is transposed to match QIIME2's expectation
self.table = pd.DataFrame(
Expand All @@ -44,6 +49,16 @@ def setUp(self):
index=list(self.table.index)
)

self.filtered_table = pd.DataFrame(
{
"Sample1": [1, 2, 4],
"Sample2": [8, 7, 5],
"Sample3": [1, 0, 0],
"Sample4": [0, 0, 0]
},
index=["a", "b", "d"]
).T

eigvals = pd.Series(np.array([0.50, 0.25, 0.25]),
index=['PC1', 'PC2', 'PC3'])
samples = np.array([[0.1, 0.2, 0.3],
Expand Down Expand Up @@ -71,7 +86,8 @@ def tearDown(self):
rmtree(path)

def test_init(self):
viz = Empress(self.tree, self.table, self.sample_metadata)
viz = Empress(self.tree, self.table, self.sample_metadata,
filter_unobserved_features_from_phylogeny=False)

self.assertEqual(viz.base_url, './')
self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,
Expand All @@ -95,7 +111,8 @@ def test_init(self):

def test_init_with_ordination(self):
viz = Empress(self.tree, self.table, self.sample_metadata,
ordination=self.pcoa)
ordination=self.pcoa,
filter_unobserved_features_from_phylogeny=False)

self.assertEqual(viz.base_url, './')
self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,
Expand Down Expand Up @@ -126,13 +143,15 @@ def test_init_feature_metadata_warning(self):
with self.assertWarnsRegex(UserWarning, 'Feature metadata is currently'
' not supported'):
Empress(self.tree, self.table, self.sample_metadata,
feature_metadata=self.sample_metadata.copy())
feature_metadata=self.sample_metadata.copy(),
filter_unobserved_features_from_phylogeny=False)

def test_copy_support_files_use_base(self):
local_path = './some-local-path/'

viz = Empress(self.tree, self.table, self.sample_metadata,
resource_path=local_path)
resource_path=local_path,
filter_unobserved_features_from_phylogeny=False)
self.assertEqual(viz.base_url, local_path)

viz.copy_support_files()
Expand All @@ -145,7 +164,8 @@ def test_copy_support_files_use_target(self):
local_path = './other-local-path/'

viz = Empress(self.tree, self.table, self.sample_metadata,
resource_path=local_path)
resource_path=local_path,
filter_unobserved_features_from_phylogeny=False)
self.assertEqual(viz.base_url, local_path)

viz.copy_support_files(target='./something-else')
Expand All @@ -156,13 +176,15 @@ def test_copy_support_files_use_target(self):
self.files_to_remove.append('./something-else')

def test_to_dict(self):
viz = Empress(self.tree, self.table, self.sample_metadata)
viz = Empress(self.tree, self.table, self.sample_metadata,
filter_unobserved_features_from_phylogeny=False)
obs = viz._to_dict()
self.assertEqual(obs, DICT_A)

def test_to_dict_with_emperor(self):
viz = Empress(self.tree, self.table, self.sample_metadata,
ordination=self.pcoa)
ordination=self.pcoa,
filter_unobserved_features_from_phylogeny=False)
obs = viz._to_dict()

self.assertEqual(viz._emperor.width, '48vw')
Expand Down Expand Up @@ -196,6 +218,28 @@ def test_to_dict_with_emperor(self):

self.assertTrue(obs['emperor_classes'], 'combined-plot-container')

def test_filter_unobserved_features_from_phylogeny(self):

viz = Empress(self.tree, self.filtered_table, self.sample_metadata,
filter_unobserved_features_from_phylogeny=True)
self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1,
0, 0, 0])

names = ['a', 'EmpressNode0', 'b', 'g', 'd', 'h', 'EmpressNode1']
for i, node in enumerate(viz.tree.postorder()):
self.assertEqual(node.name, names[i])

# table should be unchanged and be a different id instance
assert_frame_equal(self.filtered_table, viz.table.T)
self.assertNotEqual(id(self.filtered_table), id(viz.table))

# sample metadata should be unchanged and be a different id instance
assert_frame_equal(self.sample_metadata, viz.samples)
self.assertNotEqual(id(self.sample_metadata), id(viz.samples))

self.assertIsNone(viz.features)
self.assertIsNone(viz.ordination)


# How data should look like when converted to a dict
DICT_A = {'base_url': './support_files',
Expand Down

0 comments on commit db462fa

Please sign in to comment.