diff --git a/vizro-core/docs/pages/user-guides/dynamic-data.md b/vizro-core/docs/pages/user-guides/dynamic-data.md index 51a4e70a6..de79b5959 100644 --- a/vizro-core/docs/pages/user-guides/dynamic-data.md +++ b/vizro-core/docs/pages/user-guides/dynamic-data.md @@ -50,14 +50,42 @@ The Vizro Data Manager has a server-side caching mechanism to help solve this. V In a development environment the easiest way to enable caching is to use a [simple memory cache](https://cachelib.readthedocs.io/en/stable/simple/) with the default configuration options: -```py title="Simple cache with default timeout of 5 minutes" -from flask_caching import Cache +!!! example "Simple cache with default timeout of 5 minutes" + === "app.py" + ```py hl_lines="11" + from vizro import Vizro + import pandas as pd + import vizro.plotly.express as px + import vizro.models as vm -data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"}) -data_manager["iris"] = load_iris_data -``` + from vizro.managers import data_manager + + def load_iris_data(): + return pd.read_csv("iris.csv") + + data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"}) + data_manager["iris"] = load_iris_data + + page = vm.Page( + title="My first page", + components=[ + vm.Graph(figure=px.scatter("iris", x="sepal_length", y="petal_width", color="species")), + ], + controls=[vm.Filter(column="species")], + ) + + dashboard = vm.Dashboard(pages=[page]) -By default, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed. + Vizro().build(dashboard).run() + ``` + + === "Result" + [![DataBasic]][DataBasic] + + [DataBasic]: ../../assets/user_guides/data/data_pandas_dataframe.png + + +By default, when caching is turned on, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed. If you would like to alter some options, such as the default cache timeout, then you can specify a different cache configuration: @@ -77,6 +105,8 @@ data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_T data_manager.cache = Cache(config={"CACHE_TYPE": "RedisCache", "CACHE_REDIS_HOST": "localhost", "CACHE_REDIS_PORT": 6379}) ``` +Note that when a production-ready cache backend is used, the cache is persisted beyond the Vizro process and is not cleared by restarting your server. If you wish to clear the cache then you must do so manually, e.g. if you use `FileSystemCache` then you would delete your `cache` directory. Persisting the cache can also be useful for development purposes when handling data that takes a long time to load: even if you do not need the data to refresh while your dashboard is running, it can speed up your development loop to use dynamic data with a cache that is persisted between repeated runs of Vizro. + ### Configure timeouts You can change the timeout of the cache independently for each dynamic data source in the Data Manager using the `timeout` setting (measured in seconds). A `timeout` of 0 indicates that the cache does not expire. This is effectively the same as using [static data](static-data.md). diff --git a/vizro-core/tests/tests_utils/asserts.py b/vizro-core/tests/tests_utils/asserts.py index f477563c9..923d32e21 100644 --- a/vizro-core/tests/tests_utils/asserts.py +++ b/vizro-core/tests/tests_utils/asserts.py @@ -2,6 +2,7 @@ import dash.development import plotly +from pandas.testing import assert_frame_equal STRIP_ALL = object() @@ -42,3 +43,15 @@ def assert_component_equal(left, right, *, keys_to_strip=None): left = _strip_keys(_component_to_dict(left), keys_to_strip) right = _strip_keys(_component_to_dict(right), keys_to_strip) assert left == right + + +# Taken from https://stackoverflow.com/questions/38778266/assert-two-frames-are-not-equal. +def assert_frame_not_equal(*args, **kwargs): + try: + assert_frame_equal(*args, **kwargs) + except AssertionError: + # frames are not equal + pass + else: + # frames are equal + raise AssertionError diff --git a/vizro-core/tests/unit/vizro/managers/test_data_manager.py b/vizro-core/tests/unit/vizro/managers/test_data_manager.py index 7bffb9f13..73b93676c 100644 --- a/vizro-core/tests/unit/vizro/managers/test_data_manager.py +++ b/vizro-core/tests/unit/vizro/managers/test_data_manager.py @@ -1,62 +1,173 @@ """Unit tests for vizro.managers.data_manager.""" +from contextlib import suppress +import time + +import numpy as np import pandas as pd import pytest -from vizro.managers._data_manager import DataManager - - -class TestDataManager: - def setup_method(self): - self.data_manager = DataManager() - self.data = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}) - - def test_add_dataframe(self): - dataset_name = "test_dataset" - component_id = "component_id_a" - self.data_manager[dataset_name] = self.data - self.data_manager._add_component(component_id, dataset_name) - assert self.data_manager._get_component_data(component_id).equals(self.data) - - def test_add_lazy_dataframe(self): - dataset_name = "test_lazy_dataset" - component_id = "component_id_b" - - def lazy_data(): - return self.data - - self.data_manager[dataset_name] = lazy_data - self.data_manager._add_component(component_id, dataset_name) - assert self.data_manager._get_component_data(component_id).equals(lazy_data()) - - def test_add_existing_dataset(self): - dataset_name = "existing_dataset" - self.data_manager[dataset_name] = self.data - with pytest.raises(ValueError): - self.data_manager[dataset_name] = self.data - - def test_add_invalid_dataset(self): - dataset_name = "invalid_dataset" - invalid_data = "not_a_dataframe" - with pytest.raises(TypeError): - self.data_manager[dataset_name] = invalid_data - - def test_add_component_to_nonexistent_dataset(self): - component_id = "test_component" - dataset_name = "nonexistent_dataset" - with pytest.raises(KeyError): - self.data_manager._add_component(component_id, dataset_name) - - def test_add_existing_component(self): - component_id = "existing_component" - dataset_name = "test_dataset" - self.data_manager[dataset_name] = self.data - self.data_manager._add_component(component_id, dataset_name) - with pytest.raises(ValueError): - self.data_manager._add_component(component_id, dataset_name) - - def test_get_component_data_nonexistent(self): - dataset_name = "test_dataset" - nonexistent_component = "nonexistent_component" - self.data_manager[dataset_name] = self.data - with pytest.raises(KeyError): - self.data_manager._get_component_data(nonexistent_component) +from flask_caching import Cache + +from asserts import assert_frame_not_equal +from vizro import Vizro +from vizro.managers import data_manager +from pandas.testing import assert_frame_equal + + +@pytest.fixture(autouse=True) +def clear_cache(): + yield + # Vizro._reset doesn't empty the cache, so any tests which have something other than NullCache must clear it + # after running. Suppress AttributeError: 'Cache' object has no attribute 'app' that occurs when + # data_manager._cache_has_app is False. + with suppress(AttributeError): + data_manager.cache.clear() + + +class TestLoad: + def test_static(self): + data = pd.DataFrame([1, 2, 3]) + data_manager["data"] = data + loaded_data = data_manager["data"].load() + assert_frame_equal(loaded_data, data) + # Make sure loaded_data is a copy rather than the same object. + assert loaded_data is not data + + def test_dynamic(self): + data = lambda: pd.DataFrame([1, 2, 3]) + data_manager["data"] = data + loaded_data = data_manager["data"].load() + assert_frame_equal(loaded_data, data()) + # Make sure loaded_data is a copy rather than the same object. + assert loaded_data is not data() + + +class TestInvalid: + def test_static_data_des_not_support_timeout(self): + data = pd.DataFrame([1, 2, 3]) + data_manager["data"] = data + with pytest.raises( + AttributeError, match="Static data that is a pandas.DataFrame itself does not support timeout" + ): + data_manager["data"].timeout = 10 + + def test_setitem(self): + with pytest.raises( + TypeError, match="Data source data must be a pandas DataFrame or function that returns a pandas DataFrame." + ): + data_manager["data"] = pd.Series([1, 2, 3]) + + def test_does_not_exist(self): + with pytest.raises(KeyError, match="Data source data does not exist."): + data_manager["data"] + + +def make_random_data(): + return pd.DataFrame(np.random.default_rng().random(3)) + + +class TestCacheNotOperational: + def test_null_cache_no_app(self): + # No app at all, so memoize decorator is bypassed completely as data_manager._cache_has_app is False. + data_manager["data"] = make_random_data + loaded_data_1 = data_manager["data"].load() + loaded_data_2 = data_manager["data"].load() + assert_frame_not_equal(loaded_data_1, loaded_data_2) + + def test_null_cache_with_app(self): + # App exists but cache is NullCache so does not do anything. + data_manager["data"] = make_random_data + Vizro() + loaded_data_1 = data_manager["data"].load() + loaded_data_2 = data_manager["data"].load() + assert_frame_not_equal(loaded_data_1, loaded_data_2) + + def test_cache_no_app(self): + # App exists and has a real cache but data_manager.cache is set too late so app is not attached to cache. + data_manager["data"] = make_random_data + Vizro() + data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"}) + + with pytest.warns(UserWarning, match="Cache does not have Vizro app attached and so is not operational."): + loaded_data_1 = data_manager["data"].load() + loaded_data_2 = data_manager["data"].load() + assert_frame_not_equal(loaded_data_1, loaded_data_2) + + +@pytest.fixture +def simple_cache(): + # We don't need the Flask request context to run tests. (flask-caching tests for memoize use + # app.test_request_context() but look like they don't actually need to, since only flask_caching.Cache.cached + # requires the request context.) + # We do need a Flask app to be attached for the cache to be operational though, hence the call Vizro(). + data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"}) + Vizro() + yield + + +# TODO: think if any more needed here +# TODO: make sure no changes would have affected the manual testing +class TestCache: + def test_simple_cache(self, simple_cache): + data_manager["data"] = make_random_data + loaded_data_1 = data_manager["data"].load() + loaded_data_2 = data_manager["data"].load() + + # Cache saves result. + assert_frame_equal(loaded_data_1, loaded_data_2) + + def test_shared_dynamic_data_function(self, simple_cache): + data_manager["data_x"] = make_random_data + data_manager["data_y"] = make_random_data + + # Two data sources that shared the same function are independent. + loaded_data_x_1 = data_manager["data_x"].load() + loaded_data_y_1 = data_manager["data_y"].load() + assert_frame_not_equal(loaded_data_x_1, loaded_data_y_1) + + loaded_data_x_2 = data_manager["data_x"].load() + loaded_other_y_2 = data_manager["data_y"].load() + + # Cache saves result. + assert_frame_equal(loaded_data_x_1, loaded_data_x_2) + assert_frame_equal(loaded_data_y_1, loaded_other_y_2) + + def test_change_default_timeout(self): + data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 1}) + Vizro() + + data_manager["data"] = make_random_data + loaded_data_1 = data_manager["data"].load() + time.sleep(1) + loaded_data_2 = data_manager["data"].load() + + # Cache has expired in between two data loads. + assert_frame_not_equal(loaded_data_1, loaded_data_2) + + def test_change_individual_timeout(self, simple_cache): + data_manager["data"] = make_random_data + data_manager["data"].timeout = 1 + + loaded_data_1 = data_manager["data"].load() + time.sleep(1) + loaded_data_2 = data_manager["data"].load() + + # Cache has expired in between two data loads. + assert_frame_not_equal(loaded_data_1, loaded_data_2) + + def test_timeouts_do_not_interfere(self, simple_cache): + # This test only passes thanks to the code in memoize that alters the wrapped.__func__.__qualname__, + # as explained in the docstring there. If that bit of code is removed then this test correctly fails. + data_manager["data_x"] = make_random_data + data_manager["data_y"] = make_random_data + data_manager["data_y"].timeout = 1 + + loaded_data_x_1 = data_manager["data_x"].load() + loaded_data_y_1 = data_manager["data_y"].load() + time.sleep(1) + loaded_data_x_2 = data_manager["data_x"].load() + loaded_data_y_2 = data_manager["data_y"].load() + + # Cache has expired for data_y but not data_x. + assert_frame_equal(loaded_data_x_1, loaded_data_x_2) + assert_frame_not_equal(loaded_data_y_1, loaded_data_y_2) diff --git a/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py b/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py index fb751caa0..3d6393d93 100644 --- a/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py +++ b/vizro-core/tests/unit/vizro/models/_components/test_ag_grid.py @@ -91,15 +91,16 @@ def test_ag_grid_filter_interaction_attributes(self, ag_grid_with_id): class TestProcessAgGridDataFrame: def test_process_figure_data_frame_str_df(self, dash_ag_grid_with_str_dataframe, gapminder): data_manager["gapminder"] = gapminder - ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe) + ag_grid = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe) assert data_manager._get_component_data("ag_grid").equals(gapminder) - assert ag_grid_with_str_df["data_frame"] == "gapminder" + with pytest.raises(KeyError, match="'data_frame'"): + ag_grid["data_frame"] def test_process_figure_data_frame_df(self, standard_ag_grid, gapminder): - ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=standard_ag_grid) + ag_grid = vm.AgGrid(id="ag_grid", figure=standard_ag_grid) assert data_manager._get_component_data("ag_grid").equals(gapminder) with pytest.raises(KeyError, match="'data_frame'"): - ag_grid_with_str_df.figure["data_frame"] + ag_grid["data_frame"] class TestPreBuildAgGrid: diff --git a/vizro-core/tests/unit/vizro/models/_components/test_graph.py b/vizro-core/tests/unit/vizro/models/_components/test_graph.py index 20556f2f1..8ca019e49 100644 --- a/vizro-core/tests/unit/vizro/models/_components/test_graph.py +++ b/vizro-core/tests/unit/vizro/models/_components/test_graph.py @@ -119,18 +119,19 @@ def test_graph_filter_interaction_attributes(self, standard_px_chart): assert "modelID" in graph._filter_interaction_input -class TestProcessFigureDataFrame: +class TestProcessGraphDataFrame: def test_process_figure_data_frame_str_df(self, standard_px_chart_with_str_dataframe, gapminder): data_manager["gapminder"] = gapminder - graph_with_str_df = vm.Graph(id="text_graph", figure=standard_px_chart_with_str_dataframe) - assert data_manager._get_component_data("text_graph").equals(gapminder) - assert graph_with_str_df["data_frame"] == "gapminder" + graph = vm.Graph(id="graph", figure=standard_px_chart_with_str_dataframe) + assert data_manager._get_component_data("graph").equals(gapminder) + with pytest.raises(KeyError, match="'data_frame'"): + graph["data_frame"] def test_process_figure_data_frame_df(self, standard_px_chart, gapminder): - graph_with_df = vm.Graph(id="text_graph", figure=standard_px_chart) - assert data_manager._get_component_data("text_graph").equals(gapminder) + graph = vm.Graph(id="graph", figure=standard_px_chart) + assert data_manager._get_component_data("graph").equals(gapminder) with pytest.raises(KeyError, match="'data_frame'"): - graph_with_df.figure["data_frame"] + graph["data_frame"] class TestBuild: diff --git a/vizro-core/tests/unit/vizro/models/_components/test_table.py b/vizro-core/tests/unit/vizro/models/_components/test_table.py index b6ac5ad3a..57cec4902 100644 --- a/vizro-core/tests/unit/vizro/models/_components/test_table.py +++ b/vizro-core/tests/unit/vizro/models/_components/test_table.py @@ -91,15 +91,16 @@ class TestProcessTableDataFrame: # Testing at this low implementation level as mocking callback contexts skips checking for creation of these objects def test_process_figure_data_frame_str_df(self, dash_table_with_str_dataframe, gapminder): data_manager["gapminder"] = gapminder - table_with_str_df = vm.Table(id="table", figure=dash_table_with_str_dataframe) + table = vm.Table(id="table", figure=dash_table_with_str_dataframe) assert data_manager._get_component_data("table").equals(gapminder) - assert table_with_str_df["data_frame"] == "gapminder" + with pytest.raises(KeyError, match="'data_frame'"): + table["data_frame"] def test_process_figure_data_frame_df(self, standard_dash_table, gapminder): - table_with_str_df = vm.Table(id="table", figure=standard_dash_table) + graph = vm.Table(id="table", figure=standard_dash_table) assert data_manager._get_component_data("table").equals(gapminder) with pytest.raises(KeyError, match="'data_frame'"): - table_with_str_df.figure["data_frame"] + graph["data_frame"] class TestPreBuildTable: diff --git a/vizro-core/tests/unit/vizro/plotly/test_express.py b/vizro-core/tests/unit/vizro/plotly/test_express.py index b82e3f5d1..cd4c749a5 100644 --- a/vizro-core/tests/unit/vizro/plotly/test_express.py +++ b/vizro-core/tests/unit/vizro/plotly/test_express.py @@ -1,12 +1,12 @@ import plotly.express as px -import vizro.plotly.express as hpx +import vizro.plotly.express as vpx def test_non_chart_unchanged(): - assert hpx.data is px.data + assert vpx.data is px.data def test_chart_wrapped(): - graph = hpx.scatter(px.data.iris(), x="petal_width", y="petal_length") + graph = vpx.scatter(px.data.iris(), x="petal_width", y="petal_length") assert graph._captured_callable._function is px.scatter - assert hpx.scatter is not px.scatter + assert vpx.scatter is not px.scatter