From db462fa873959a6f0b4bb13c95347e2357e2aa1f Mon Sep 17 00:00:00 2001 From: esayyari Date: Thu, 18 Jun 2020 15:27:19 -0700 Subject: [PATCH] Adding optional flag filter-unobserved-features-from-phylogeny to improve running time (#178) * adding filter unobserved features from phylogeny * updating the code to filter unobserved features from phylogeny; making tests for it * updating the code to filter unobserved features from phylogeny; making tests for it * fixing failed tests * fixing failed tests * fixing failed tests * fixing failed tests --- empress/_plot.py | 8 ++++-- empress/core.py | 18 ++++++++++-- empress/plugin_setup.py | 8 +++++- tests/python/test_core.py | 58 ++++++++++++++++++++++++++++++++++----- 4 files changed, 78 insertions(+), 14 deletions(-) diff --git a/empress/_plot.py b/empress/_plot.py index 559171321..ecb2595e4 100644 --- a/empress/_plot.py +++ b/empress/_plot.py @@ -30,7 +30,8 @@ def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame, feature_metadata: qiime2.Metadata = None, ignore_missing_samples: bool = False, filter_missing_features: bool = False, - number_of_features: int = 5) -> None: + number_of_features: int = 5, + filter_unobserved_features_from_phylogeny: bool = True) -> None: # TODO: do not ignore the feature metadata when specified by the user if feature_metadata is not None: feature_metadata = feature_metadata.to_dataframe() @@ -50,12 +51,13 @@ def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame, # path to the actual newick file with open(str(tree)) as file: t = parse_newick(file.readline()) - + trim_tree = filter_unobserved_features_from_phylogeny viz = Empress(tree=t, table=feature_table, sample_metadata=sample_metadata, feature_metadata=feature_metadata, ordination=pcoa, ignore_missing_samples=ignore_missing_samples, - filter_missing_features=filter_missing_features) + filter_missing_features=filter_missing_features, + filter_unobserved_features_from_phylogeny=trim_tree) with open(os.path.join(output_dir, 'empress.html'), 'w') as file: file.write(str(viz)) diff --git a/empress/core.py b/empress/core.py index c929d2e58..f81143b49 100644 --- a/empress/core.py +++ b/empress/core.py @@ -29,7 +29,8 @@ class Empress(): def __init__(self, tree, table, sample_metadata, feature_metadata=None, ordination=None, ignore_missing_samples=False, filter_missing_features=False, - resource_path=None): + resource_path=None, + filter_unobserved_features_from_phylogeny=True): """Visualize a phylogenetic tree Use this object to interactively display a phylogenetic tree using the @@ -69,6 +70,10 @@ def __init__(self, tree, table, sample_metadata, resource_path: str, optional Load the resources from a user-specified remote location. If set to None resources are loaded from the current directory. + filter_unobserved_features_from_phylogeny: bool, optional + If True, filters features from the phylogeny that aren't present as + features in feature table. features in feature table. Otherwise, + the phylogeny is not filtered. Attributes @@ -104,7 +109,9 @@ def __init__(self, tree, table, sample_metadata, if self.base_url is None: self.base_url = './' - self._validate_data(ignore_missing_samples, filter_missing_features) + self._validate_data(ignore_missing_samples, + filter_missing_features, + filter_unobserved_features_from_phylogeny) if self.ordination is not None: self._emperor = Emperor( @@ -115,7 +122,12 @@ def __init__(self, tree, table, sample_metadata, else: self._emperor = None - def _validate_data(self, ignore_missing_samples, filter_missing_features): + def _validate_data(self, ignore_missing_samples, filter_missing_features, + filter_unobserved_features_from_phylogeny): + # remove unobserved features from the phylogeny + if filter_unobserved_features_from_phylogeny: + self.tree = self.tree.shear(set(self.table.columns)) + # extract balance parenthesis self._bp_tree = list(self.tree.B) diff --git a/empress/plugin_setup.py b/empress/plugin_setup.py index c65fdd9ce..c83e9fc72 100644 --- a/empress/plugin_setup.py +++ b/empress/plugin_setup.py @@ -41,7 +41,8 @@ 'feature_metadata': Metadata, 'ignore_missing_samples': Bool, 'filter_missing_features': Bool, - 'number_of_features': Int % Range(1, None) + 'number_of_features': Int % Range(1, None), + 'filter_unobserved_features_from_phylogeny': Bool }, input_descriptions={ 'tree': 'The phylogenetic tree to visualize.', @@ -91,6 +92,11 @@ '(euclidean distance from origin). Note, this ' 'parameter is only honored when a biplot is ' 'inputed.', + 'filter_unobserved_features_from_phylogeny': ( + 'If this flag is passed, filters features from the phylogeny ' + 'that are not present as features in feature table. ' + 'Default is True.' + ) }, name='Visualize and Explore Phylogenies with Empress', description=( diff --git a/tests/python/test_core.py b/tests/python/test_core.py index 8ef7c1542..ae52c22ed 100644 --- a/tests/python/test_core.py +++ b/tests/python/test_core.py @@ -18,11 +18,16 @@ from emperor import Emperor from empress.core import Empress from bp import parse_newick +from six import StringIO +from skbio.tree import TreeNode class TestCore(unittest.TestCase): def setUp(self): self.tree = parse_newick('(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2):1;') + self.pruned_tree = TreeNode.read( + StringIO('(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;') + ) # Test table/metadata (mostly) adapted from Qurro: # the table is transposed to match QIIME2's expectation self.table = pd.DataFrame( @@ -44,6 +49,16 @@ def setUp(self): index=list(self.table.index) ) + self.filtered_table = pd.DataFrame( + { + "Sample1": [1, 2, 4], + "Sample2": [8, 7, 5], + "Sample3": [1, 0, 0], + "Sample4": [0, 0, 0] + }, + index=["a", "b", "d"] + ).T + eigvals = pd.Series(np.array([0.50, 0.25, 0.25]), index=['PC1', 'PC2', 'PC3']) samples = np.array([[0.1, 0.2, 0.3], @@ -71,7 +86,8 @@ def tearDown(self): rmtree(path) def test_init(self): - viz = Empress(self.tree, self.table, self.sample_metadata) + viz = Empress(self.tree, self.table, self.sample_metadata, + filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, './') self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, @@ -95,7 +111,8 @@ def test_init(self): def test_init_with_ordination(self): viz = Empress(self.tree, self.table, self.sample_metadata, - ordination=self.pcoa) + ordination=self.pcoa, + filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, './') self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, @@ -126,13 +143,15 @@ def test_init_feature_metadata_warning(self): with self.assertWarnsRegex(UserWarning, 'Feature metadata is currently' ' not supported'): Empress(self.tree, self.table, self.sample_metadata, - feature_metadata=self.sample_metadata.copy()) + feature_metadata=self.sample_metadata.copy(), + filter_unobserved_features_from_phylogeny=False) def test_copy_support_files_use_base(self): local_path = './some-local-path/' viz = Empress(self.tree, self.table, self.sample_metadata, - resource_path=local_path) + resource_path=local_path, + filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, local_path) viz.copy_support_files() @@ -145,7 +164,8 @@ def test_copy_support_files_use_target(self): local_path = './other-local-path/' viz = Empress(self.tree, self.table, self.sample_metadata, - resource_path=local_path) + resource_path=local_path, + filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, local_path) viz.copy_support_files(target='./something-else') @@ -156,13 +176,15 @@ def test_copy_support_files_use_target(self): self.files_to_remove.append('./something-else') def test_to_dict(self): - viz = Empress(self.tree, self.table, self.sample_metadata) + viz = Empress(self.tree, self.table, self.sample_metadata, + filter_unobserved_features_from_phylogeny=False) obs = viz._to_dict() self.assertEqual(obs, DICT_A) def test_to_dict_with_emperor(self): viz = Empress(self.tree, self.table, self.sample_metadata, - ordination=self.pcoa) + ordination=self.pcoa, + filter_unobserved_features_from_phylogeny=False) obs = viz._to_dict() self.assertEqual(viz._emperor.width, '48vw') @@ -196,6 +218,28 @@ def test_to_dict_with_emperor(self): self.assertTrue(obs['emperor_classes'], 'combined-plot-container') + def test_filter_unobserved_features_from_phylogeny(self): + + viz = Empress(self.tree, self.filtered_table, self.sample_metadata, + filter_unobserved_features_from_phylogeny=True) + self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, + 0, 0, 0]) + + names = ['a', 'EmpressNode0', 'b', 'g', 'd', 'h', 'EmpressNode1'] + for i, node in enumerate(viz.tree.postorder()): + self.assertEqual(node.name, names[i]) + + # table should be unchanged and be a different id instance + assert_frame_equal(self.filtered_table, viz.table.T) + self.assertNotEqual(id(self.filtered_table), id(viz.table)) + + # sample metadata should be unchanged and be a different id instance + assert_frame_equal(self.sample_metadata, viz.samples) + self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) + + self.assertIsNone(viz.features) + self.assertIsNone(viz.ordination) + # How data should look like when converted to a dict DICT_A = {'base_url': './support_files',