Add tests

mckinsey · Apr 9, 2024 · 04be56e · 04be56e
1 parent e1a7222
commit 04be56e
Show file tree

Hide file tree

Showing 7 changed files with 240 additions and 83 deletions.
diff --git a/vizro-core/docs/pages/user-guides/dynamic-data.md b/vizro-core/docs/pages/user-guides/dynamic-data.md
@@ -50,14 +50,42 @@ The Vizro Data Manager has a server-side caching mechanism to help solve this. V
 
 In a development environment the easiest way to enable caching is to use a [simple memory cache](https://cachelib.readthedocs.io/en/stable/simple/) with the default configuration options:
 
-```py title="Simple cache with default timeout of 5 minutes"
-from flask_caching import Cache
+!!! example "Simple cache with default timeout of 5 minutes"
+    === "app.py"
+        ```py hl_lines="11"
+        from vizro import Vizro
+        import pandas as pd
+        import vizro.plotly.express as px
+        import vizro.models as vm
 
-data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
-data_manager["iris"] = load_iris_data
-```
+        from vizro.managers import data_manager
+
+        def load_iris_data():
+            return pd.read_csv("iris.csv")
+
+        data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
+        data_manager["iris"] = load_iris_data
+        
+        page = vm.Page(
+            title="My first page",
+            components=[
+                vm.Graph(figure=px.scatter("iris", x="sepal_length", y="petal_width", color="species")),
+            ],
+            controls=[vm.Filter(column="species")],
+        )
+
+        dashboard = vm.Dashboard(pages=[page])
 
-By default, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed.
+        Vizro().build(dashboard).run()
+        ```
+
+    === "Result"
+        [![DataBasic]][DataBasic]
+
+    [DataBasic]: ../../assets/user_guides/data/data_pandas_dataframe.png
+
+
+By default, when caching is turned on, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed.
 
 If you would like to alter some options, such as the default cache timeout, then you can specify a different cache configuration:
 
@@ -77,6 +105,8 @@ data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_T
     data_manager.cache = Cache(config={"CACHE_TYPE": "RedisCache", "CACHE_REDIS_HOST": "localhost", "CACHE_REDIS_PORT": 6379})
     ```
 
+Note that when a production-ready cache backend is used, the cache is persisted beyond the Vizro process and is not cleared by restarting your server. If you wish to clear the cache then you must do so manually, e.g. if you use `FileSystemCache` then you would delete your `cache` directory. Persisting the cache can also be useful for development purposes when handling data that takes a long time to load: even if you do not need the data to refresh while your dashboard is running, it can speed up your development loop to use dynamic data with a cache that is persisted between repeated runs of Vizro.
+
 ### Configure timeouts
 
 You can change the timeout of the cache independently for each dynamic data source in the Data Manager using the `timeout` setting (measured in seconds). A `timeout` of 0 indicates that the cache does not expire. This is effectively the same as using [static data](static-data.md).

diff --git a/vizro-core/tests/tests_utils/asserts.py b/vizro-core/tests/tests_utils/asserts.py
@@ -2,6 +2,7 @@
 
 import dash.development
 import plotly
+from pandas.testing import assert_frame_equal
 
 STRIP_ALL = object()
 
@@ -42,3 +43,15 @@ def assert_component_equal(left, right, *, keys_to_strip=None):
     left = _strip_keys(_component_to_dict(left), keys_to_strip)
     right = _strip_keys(_component_to_dict(right), keys_to_strip)
     assert left == right
+
+
+# Taken from https://stackoverflow.com/questions/38778266/assert-two-frames-are-not-equal.
+def assert_frame_not_equal(*args, **kwargs):
+    try:
+        assert_frame_equal(*args, **kwargs)
+    except AssertionError:
+        # frames are not equal
+        pass
+    else:
+        # frames are equal
+        raise AssertionError
diff --git a/vizro-core/tests/unit/vizro/managers/test_data_manager.py b/vizro-core/tests/unit/vizro/managers/test_data_manager.py
@@ -1,62 +1,173 @@
 """Unit tests for vizro.managers.data_manager."""
+from contextlib import suppress
 
+import time
+
+import numpy as np
 import pandas as pd
 import pytest
-from vizro.managers._data_manager import DataManager
-
-
-class TestDataManager:
-    def setup_method(self):
-        self.data_manager = DataManager()
-        self.data = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
-
-    def test_add_dataframe(self):
-        dataset_name = "test_dataset"
-        component_id = "component_id_a"
-        self.data_manager[dataset_name] = self.data
-        self.data_manager._add_component(component_id, dataset_name)
-        assert self.data_manager._get_component_data(component_id).equals(self.data)
-
-    def test_add_lazy_dataframe(self):
-        dataset_name = "test_lazy_dataset"
-        component_id = "component_id_b"
-
-        def lazy_data():
-            return self.data
-
-        self.data_manager[dataset_name] = lazy_data
-        self.data_manager._add_component(component_id, dataset_name)
-        assert self.data_manager._get_component_data(component_id).equals(lazy_data())
-
-    def test_add_existing_dataset(self):
-        dataset_name = "existing_dataset"
-        self.data_manager[dataset_name] = self.data
-        with pytest.raises(ValueError):
-            self.data_manager[dataset_name] = self.data
-
-    def test_add_invalid_dataset(self):
-        dataset_name = "invalid_dataset"
-        invalid_data = "not_a_dataframe"
-        with pytest.raises(TypeError):
-            self.data_manager[dataset_name] = invalid_data
-
-    def test_add_component_to_nonexistent_dataset(self):
-        component_id = "test_component"
-        dataset_name = "nonexistent_dataset"
-        with pytest.raises(KeyError):
-            self.data_manager._add_component(component_id, dataset_name)
-
-    def test_add_existing_component(self):
-        component_id = "existing_component"
-        dataset_name = "test_dataset"
-        self.data_manager[dataset_name] = self.data
-        self.data_manager._add_component(component_id, dataset_name)
-        with pytest.raises(ValueError):
-            self.data_manager._add_component(component_id, dataset_name)
-
-    def test_get_component_data_nonexistent(self):
-        dataset_name = "test_dataset"
-        nonexistent_component = "nonexistent_component"
-        self.data_manager[dataset_name] = self.data
-        with pytest.raises(KeyError):
-            self.data_manager._get_component_data(nonexistent_component)
+from flask_caching import Cache
+
+from asserts import assert_frame_not_equal
+from vizro import Vizro
+from vizro.managers import data_manager
+from pandas.testing import assert_frame_equal
+
+
+@pytest.fixture(autouse=True)
+def clear_cache():
+    yield
+    # Vizro._reset doesn't empty the cache, so any tests which have something other than NullCache must clear it
+    # after running. Suppress AttributeError: 'Cache' object has no attribute 'app' that occurs when
+    # data_manager._cache_has_app is False.
+    with suppress(AttributeError):
+        data_manager.cache.clear()
+
+
+class TestLoad:
+    def test_static(self):
+        data = pd.DataFrame([1, 2, 3])
+        data_manager["data"] = data
+        loaded_data = data_manager["data"].load()
+        assert_frame_equal(loaded_data, data)
+        # Make sure loaded_data is a copy rather than the same object.
+        assert loaded_data is not data
+
+    def test_dynamic(self):
+        data = lambda: pd.DataFrame([1, 2, 3])
+        data_manager["data"] = data
+        loaded_data = data_manager["data"].load()
+        assert_frame_equal(loaded_data, data())
+        # Make sure loaded_data is a copy rather than the same object.
+        assert loaded_data is not data()
+
+
+class TestInvalid:
+    def test_static_data_des_not_support_timeout(self):
+        data = pd.DataFrame([1, 2, 3])
+        data_manager["data"] = data
+        with pytest.raises(
+            AttributeError, match="Static data that is a pandas.DataFrame itself does not support timeout"
+        ):
+            data_manager["data"].timeout = 10
+
+    def test_setitem(self):
+        with pytest.raises(
+            TypeError, match="Data source data must be a pandas DataFrame or function that returns a pandas DataFrame."
+        ):
+            data_manager["data"] = pd.Series([1, 2, 3])
+
+    def test_does_not_exist(self):
+        with pytest.raises(KeyError, match="Data source data does not exist."):
+            data_manager["data"]
+
+
+def make_random_data():
+    return pd.DataFrame(np.random.default_rng().random(3))
+
+
+class TestCacheNotOperational:
+    def test_null_cache_no_app(self):
+        # No app at all, so memoize decorator is bypassed completely as data_manager._cache_has_app is False.
+        data_manager["data"] = make_random_data
+        loaded_data_1 = data_manager["data"].load()
+        loaded_data_2 = data_manager["data"].load()
+        assert_frame_not_equal(loaded_data_1, loaded_data_2)
+
+    def test_null_cache_with_app(self):
+        # App exists but cache is NullCache so does not do anything.
+        data_manager["data"] = make_random_data
+        Vizro()
+        loaded_data_1 = data_manager["data"].load()
+        loaded_data_2 = data_manager["data"].load()
+        assert_frame_not_equal(loaded_data_1, loaded_data_2)
+
+    def test_cache_no_app(self):
+        # App exists and has a real cache but data_manager.cache is set too late so app is not attached to cache.
+        data_manager["data"] = make_random_data
+        Vizro()
+        data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
+
+        with pytest.warns(UserWarning, match="Cache does not have Vizro app attached and so is not operational."):
+            loaded_data_1 = data_manager["data"].load()
+            loaded_data_2 = data_manager["data"].load()
+        assert_frame_not_equal(loaded_data_1, loaded_data_2)
+
+
+@pytest.fixture
+def simple_cache():
+    # We don't need the Flask request context to run tests. (flask-caching tests for memoize use
+    # app.test_request_context() but look like they don't actually need to, since only flask_caching.Cache.cached
+    # requires the request context.)
+    # We do need a Flask app to be attached for the cache to be operational though, hence the call Vizro().
+    data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
+    Vizro()
+    yield
+
+
+# TODO: think if any more needed here
+# TODO: make sure no changes would have affected the manual testing
+class TestCache:
+    def test_simple_cache(self, simple_cache):
+        data_manager["data"] = make_random_data
+        loaded_data_1 = data_manager["data"].load()
+        loaded_data_2 = data_manager["data"].load()
+
+        # Cache saves result.
+        assert_frame_equal(loaded_data_1, loaded_data_2)
+
+    def test_shared_dynamic_data_function(self, simple_cache):
+        data_manager["data_x"] = make_random_data
+        data_manager["data_y"] = make_random_data
+
+        # Two data sources that shared the same function are independent.
+        loaded_data_x_1 = data_manager["data_x"].load()
+        loaded_data_y_1 = data_manager["data_y"].load()
+        assert_frame_not_equal(loaded_data_x_1, loaded_data_y_1)
+
+        loaded_data_x_2 = data_manager["data_x"].load()
+        loaded_other_y_2 = data_manager["data_y"].load()
+
+        # Cache saves result.
+        assert_frame_equal(loaded_data_x_1, loaded_data_x_2)
+        assert_frame_equal(loaded_data_y_1, loaded_other_y_2)
+
+    def test_change_default_timeout(self):
+        data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 1})
+        Vizro()
+
+        data_manager["data"] = make_random_data
+        loaded_data_1 = data_manager["data"].load()
+        time.sleep(1)
+        loaded_data_2 = data_manager["data"].load()
+
+        # Cache has expired in between two data loads.
+        assert_frame_not_equal(loaded_data_1, loaded_data_2)
+
+    def test_change_individual_timeout(self, simple_cache):
+        data_manager["data"] = make_random_data
+        data_manager["data"].timeout = 1
+
+        loaded_data_1 = data_manager["data"].load()
+        time.sleep(1)
+        loaded_data_2 = data_manager["data"].load()
+
+        # Cache has expired in between two data loads.
+        assert_frame_not_equal(loaded_data_1, loaded_data_2)
+
+    def test_timeouts_do_not_interfere(self, simple_cache):
+        # This test only passes thanks to the code in memoize that alters the wrapped.__func__.__qualname__,
+        # as explained in the docstring there. If that bit of code is removed then this test correctly fails.
+        data_manager["data_x"] = make_random_data
+        data_manager["data_y"] = make_random_data
+        data_manager["data_y"].timeout = 1
+
+        loaded_data_x_1 = data_manager["data_x"].load()
+        loaded_data_y_1 = data_manager["data_y"].load()
+        time.sleep(1)
+        loaded_data_x_2 = data_manager["data_x"].load()
+        loaded_data_y_2 = data_manager["data_y"].load()
+
+        # Cache has expired for data_y but not data_x.
+        assert_frame_equal(loaded_data_x_1, loaded_data_x_2)
+        assert_frame_not_equal(loaded_data_y_1, loaded_data_y_2)
diff --git a/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py b/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py
@@ -91,15 +91,16 @@ def test_ag_grid_filter_interaction_attributes(self, ag_grid_with_id):
 class TestProcessAgGridDataFrame:
     def test_process_figure_data_frame_str_df(self, dash_ag_grid_with_str_dataframe, gapminder):
         data_manager["gapminder"] = gapminder
-        ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe)
+        ag_grid = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe)
         assert data_manager._get_component_data("ag_grid").equals(gapminder)
-        assert ag_grid_with_str_df["data_frame"] == "gapminder"
+        with pytest.raises(KeyError, match="'data_frame'"):
+            ag_grid["data_frame"]
 
     def test_process_figure_data_frame_df(self, standard_ag_grid, gapminder):
-        ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=standard_ag_grid)
+        ag_grid = vm.AgGrid(id="ag_grid", figure=standard_ag_grid)
         assert data_manager._get_component_data("ag_grid").equals(gapminder)
         with pytest.raises(KeyError, match="'data_frame'"):
-            ag_grid_with_str_df.figure["data_frame"]
+            ag_grid["data_frame"]
 
 
 class TestPreBuildAgGrid:

diff --git a/vizro-core/tests/unit/vizro/models/_components/test_graph.py b/vizro-core/tests/unit/vizro/models/_components/test_graph.py
@@ -119,18 +119,19 @@ def test_graph_filter_interaction_attributes(self, standard_px_chart):
         assert "modelID" in graph._filter_interaction_input
 
 
-class TestProcessFigureDataFrame:
+class TestProcessGraphDataFrame:
     def test_process_figure_data_frame_str_df(self, standard_px_chart_with_str_dataframe, gapminder):
         data_manager["gapminder"] = gapminder
-        graph_with_str_df = vm.Graph(id="text_graph", figure=standard_px_chart_with_str_dataframe)
-        assert data_manager._get_component_data("text_graph").equals(gapminder)
-        assert graph_with_str_df["data_frame"] == "gapminder"
+        graph = vm.Graph(id="graph", figure=standard_px_chart_with_str_dataframe)
+        assert data_manager._get_component_data("graph").equals(gapminder)
+        with pytest.raises(KeyError, match="'data_frame'"):
+            graph["data_frame"]
 
     def test_process_figure_data_frame_df(self, standard_px_chart, gapminder):
-        graph_with_df = vm.Graph(id="text_graph", figure=standard_px_chart)
-        assert data_manager._get_component_data("text_graph").equals(gapminder)
+        graph = vm.Graph(id="graph", figure=standard_px_chart)
+        assert data_manager._get_component_data("graph").equals(gapminder)
         with pytest.raises(KeyError, match="'data_frame'"):
-            graph_with_df.figure["data_frame"]
+            graph["data_frame"]
 
 
 class TestBuild:

diff --git a/vizro-core/tests/unit/vizro/models/_components/test_table.py b/vizro-core/tests/unit/vizro/models/_components/test_table.py
@@ -91,15 +91,16 @@ class TestProcessTableDataFrame:
     # Testing at this low implementation level as mocking callback contexts skips checking for creation of these objects
     def test_process_figure_data_frame_str_df(self, dash_table_with_str_dataframe, gapminder):
         data_manager["gapminder"] = gapminder
-        table_with_str_df = vm.Table(id="table", figure=dash_table_with_str_dataframe)
+        table = vm.Table(id="table", figure=dash_table_with_str_dataframe)
         assert data_manager._get_component_data("table").equals(gapminder)
-        assert table_with_str_df["data_frame"] == "gapminder"
+        with pytest.raises(KeyError, match="'data_frame'"):
+            table["data_frame"]
 
     def test_process_figure_data_frame_df(self, standard_dash_table, gapminder):
-        table_with_str_df = vm.Table(id="table", figure=standard_dash_table)
+        graph = vm.Table(id="table", figure=standard_dash_table)
         assert data_manager._get_component_data("table").equals(gapminder)
         with pytest.raises(KeyError, match="'data_frame'"):
-            table_with_str_df.figure["data_frame"]
+            graph["data_frame"]
 
 
 class TestPreBuildTable:

diff --git a/vizro-core/tests/unit/vizro/plotly/test_express.py b/vizro-core/tests/unit/vizro/plotly/test_express.py
@@ -1,12 +1,12 @@
 import plotly.express as px
-import vizro.plotly.express as hpx
+import vizro.plotly.express as vpx
 
 
 def test_non_chart_unchanged():
-    assert hpx.data is px.data
+    assert vpx.data is px.data
 
 
 def test_chart_wrapped():
-    graph = hpx.scatter(px.data.iris(), x="petal_width", y="petal_length")
+    graph = vpx.scatter(px.data.iris(), x="petal_width", y="petal_length")
     assert graph._captured_callable._function is px.scatter
-    assert hpx.scatter is not px.scatter
+    assert vpx.scatter is not px.scatter