Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
antonymilne committed Apr 9, 2024
1 parent e1a7222 commit 04be56e
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 83 deletions.
42 changes: 36 additions & 6 deletions vizro-core/docs/pages/user-guides/dynamic-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,42 @@ The Vizro Data Manager has a server-side caching mechanism to help solve this. V

In a development environment the easiest way to enable caching is to use a [simple memory cache](https://cachelib.readthedocs.io/en/stable/simple/) with the default configuration options:

```py title="Simple cache with default timeout of 5 minutes"
from flask_caching import Cache
!!! example "Simple cache with default timeout of 5 minutes"
=== "app.py"
```py hl_lines="11"
from vizro import Vizro
import pandas as pd
import vizro.plotly.express as px
import vizro.models as vm

data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
data_manager["iris"] = load_iris_data
```
from vizro.managers import data_manager

def load_iris_data():
return pd.read_csv("iris.csv")

data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
data_manager["iris"] = load_iris_data
page = vm.Page(
title="My first page",
components=[
vm.Graph(figure=px.scatter("iris", x="sepal_length", y="petal_width", color="species")),
],
controls=[vm.Filter(column="species")],
)

dashboard = vm.Dashboard(pages=[page])

By default, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed.
Vizro().build(dashboard).run()
```

=== "Result"
[![DataBasic]][DataBasic]

[DataBasic]: ../../assets/user_guides/data/data_pandas_dataframe.png


By default, when caching is turned on, dynamic data is cached in the Data Manager for 5 minutes. A refresh of the dashboard within this time interval will fetch the pandas DataFrame from the cache and not re-run the data loading function. Once the cache timeout period has elapsed, the next refresh of the dashboard will re-execute the dynamic data loading function. The resulting pandas DataFrame will again be put into the cache and not expire until another 5 minutes has elapsed.

If you would like to alter some options, such as the default cache timeout, then you can specify a different cache configuration:

Expand All @@ -77,6 +105,8 @@ data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_T
data_manager.cache = Cache(config={"CACHE_TYPE": "RedisCache", "CACHE_REDIS_HOST": "localhost", "CACHE_REDIS_PORT": 6379})
```

Note that when a production-ready cache backend is used, the cache is persisted beyond the Vizro process and is not cleared by restarting your server. If you wish to clear the cache then you must do so manually, e.g. if you use `FileSystemCache` then you would delete your `cache` directory. Persisting the cache can also be useful for development purposes when handling data that takes a long time to load: even if you do not need the data to refresh while your dashboard is running, it can speed up your development loop to use dynamic data with a cache that is persisted between repeated runs of Vizro.

### Configure timeouts

You can change the timeout of the cache independently for each dynamic data source in the Data Manager using the `timeout` setting (measured in seconds). A `timeout` of 0 indicates that the cache does not expire. This is effectively the same as using [static data](static-data.md).
Expand Down
13 changes: 13 additions & 0 deletions vizro-core/tests/tests_utils/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import dash.development
import plotly
from pandas.testing import assert_frame_equal

STRIP_ALL = object()

Expand Down Expand Up @@ -42,3 +43,15 @@ def assert_component_equal(left, right, *, keys_to_strip=None):
left = _strip_keys(_component_to_dict(left), keys_to_strip)
right = _strip_keys(_component_to_dict(right), keys_to_strip)
assert left == right


# Taken from https://stackoverflow.com/questions/38778266/assert-two-frames-are-not-equal.
def assert_frame_not_equal(*args, **kwargs):
try:
assert_frame_equal(*args, **kwargs)
except AssertionError:
# frames are not equal
pass
else:
# frames are equal
raise AssertionError
227 changes: 169 additions & 58 deletions vizro-core/tests/unit/vizro/managers/test_data_manager.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,173 @@
"""Unit tests for vizro.managers.data_manager."""
from contextlib import suppress

import time

import numpy as np
import pandas as pd
import pytest
from vizro.managers._data_manager import DataManager


class TestDataManager:
def setup_method(self):
self.data_manager = DataManager()
self.data = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})

def test_add_dataframe(self):
dataset_name = "test_dataset"
component_id = "component_id_a"
self.data_manager[dataset_name] = self.data
self.data_manager._add_component(component_id, dataset_name)
assert self.data_manager._get_component_data(component_id).equals(self.data)

def test_add_lazy_dataframe(self):
dataset_name = "test_lazy_dataset"
component_id = "component_id_b"

def lazy_data():
return self.data

self.data_manager[dataset_name] = lazy_data
self.data_manager._add_component(component_id, dataset_name)
assert self.data_manager._get_component_data(component_id).equals(lazy_data())

def test_add_existing_dataset(self):
dataset_name = "existing_dataset"
self.data_manager[dataset_name] = self.data
with pytest.raises(ValueError):
self.data_manager[dataset_name] = self.data

def test_add_invalid_dataset(self):
dataset_name = "invalid_dataset"
invalid_data = "not_a_dataframe"
with pytest.raises(TypeError):
self.data_manager[dataset_name] = invalid_data

def test_add_component_to_nonexistent_dataset(self):
component_id = "test_component"
dataset_name = "nonexistent_dataset"
with pytest.raises(KeyError):
self.data_manager._add_component(component_id, dataset_name)

def test_add_existing_component(self):
component_id = "existing_component"
dataset_name = "test_dataset"
self.data_manager[dataset_name] = self.data
self.data_manager._add_component(component_id, dataset_name)
with pytest.raises(ValueError):
self.data_manager._add_component(component_id, dataset_name)

def test_get_component_data_nonexistent(self):
dataset_name = "test_dataset"
nonexistent_component = "nonexistent_component"
self.data_manager[dataset_name] = self.data
with pytest.raises(KeyError):
self.data_manager._get_component_data(nonexistent_component)
from flask_caching import Cache

from asserts import assert_frame_not_equal
from vizro import Vizro
from vizro.managers import data_manager
from pandas.testing import assert_frame_equal


@pytest.fixture(autouse=True)
def clear_cache():
yield
# Vizro._reset doesn't empty the cache, so any tests which have something other than NullCache must clear it
# after running. Suppress AttributeError: 'Cache' object has no attribute 'app' that occurs when
# data_manager._cache_has_app is False.
with suppress(AttributeError):
data_manager.cache.clear()


class TestLoad:
def test_static(self):
data = pd.DataFrame([1, 2, 3])
data_manager["data"] = data
loaded_data = data_manager["data"].load()
assert_frame_equal(loaded_data, data)
# Make sure loaded_data is a copy rather than the same object.
assert loaded_data is not data

def test_dynamic(self):
data = lambda: pd.DataFrame([1, 2, 3])
data_manager["data"] = data
loaded_data = data_manager["data"].load()
assert_frame_equal(loaded_data, data())
# Make sure loaded_data is a copy rather than the same object.
assert loaded_data is not data()


class TestInvalid:
def test_static_data_des_not_support_timeout(self):
data = pd.DataFrame([1, 2, 3])
data_manager["data"] = data
with pytest.raises(
AttributeError, match="Static data that is a pandas.DataFrame itself does not support timeout"
):
data_manager["data"].timeout = 10

def test_setitem(self):
with pytest.raises(
TypeError, match="Data source data must be a pandas DataFrame or function that returns a pandas DataFrame."
):
data_manager["data"] = pd.Series([1, 2, 3])

def test_does_not_exist(self):
with pytest.raises(KeyError, match="Data source data does not exist."):
data_manager["data"]


def make_random_data():
return pd.DataFrame(np.random.default_rng().random(3))


class TestCacheNotOperational:
def test_null_cache_no_app(self):
# No app at all, so memoize decorator is bypassed completely as data_manager._cache_has_app is False.
data_manager["data"] = make_random_data
loaded_data_1 = data_manager["data"].load()
loaded_data_2 = data_manager["data"].load()
assert_frame_not_equal(loaded_data_1, loaded_data_2)

def test_null_cache_with_app(self):
# App exists but cache is NullCache so does not do anything.
data_manager["data"] = make_random_data
Vizro()
loaded_data_1 = data_manager["data"].load()
loaded_data_2 = data_manager["data"].load()
assert_frame_not_equal(loaded_data_1, loaded_data_2)

def test_cache_no_app(self):
# App exists and has a real cache but data_manager.cache is set too late so app is not attached to cache.
data_manager["data"] = make_random_data
Vizro()
data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})

with pytest.warns(UserWarning, match="Cache does not have Vizro app attached and so is not operational."):
loaded_data_1 = data_manager["data"].load()
loaded_data_2 = data_manager["data"].load()
assert_frame_not_equal(loaded_data_1, loaded_data_2)


@pytest.fixture
def simple_cache():
# We don't need the Flask request context to run tests. (flask-caching tests for memoize use
# app.test_request_context() but look like they don't actually need to, since only flask_caching.Cache.cached
# requires the request context.)
# We do need a Flask app to be attached for the cache to be operational though, hence the call Vizro().
data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache"})
Vizro()
yield


# TODO: think if any more needed here
# TODO: make sure no changes would have affected the manual testing
class TestCache:
def test_simple_cache(self, simple_cache):
data_manager["data"] = make_random_data
loaded_data_1 = data_manager["data"].load()
loaded_data_2 = data_manager["data"].load()

# Cache saves result.
assert_frame_equal(loaded_data_1, loaded_data_2)

def test_shared_dynamic_data_function(self, simple_cache):
data_manager["data_x"] = make_random_data
data_manager["data_y"] = make_random_data

# Two data sources that shared the same function are independent.
loaded_data_x_1 = data_manager["data_x"].load()
loaded_data_y_1 = data_manager["data_y"].load()
assert_frame_not_equal(loaded_data_x_1, loaded_data_y_1)

loaded_data_x_2 = data_manager["data_x"].load()
loaded_other_y_2 = data_manager["data_y"].load()

# Cache saves result.
assert_frame_equal(loaded_data_x_1, loaded_data_x_2)
assert_frame_equal(loaded_data_y_1, loaded_other_y_2)

def test_change_default_timeout(self):
data_manager.cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 1})
Vizro()

data_manager["data"] = make_random_data
loaded_data_1 = data_manager["data"].load()
time.sleep(1)
loaded_data_2 = data_manager["data"].load()

# Cache has expired in between two data loads.
assert_frame_not_equal(loaded_data_1, loaded_data_2)

def test_change_individual_timeout(self, simple_cache):
data_manager["data"] = make_random_data
data_manager["data"].timeout = 1

loaded_data_1 = data_manager["data"].load()
time.sleep(1)
loaded_data_2 = data_manager["data"].load()

# Cache has expired in between two data loads.
assert_frame_not_equal(loaded_data_1, loaded_data_2)

def test_timeouts_do_not_interfere(self, simple_cache):
# This test only passes thanks to the code in memoize that alters the wrapped.__func__.__qualname__,
# as explained in the docstring there. If that bit of code is removed then this test correctly fails.
data_manager["data_x"] = make_random_data
data_manager["data_y"] = make_random_data
data_manager["data_y"].timeout = 1

loaded_data_x_1 = data_manager["data_x"].load()
loaded_data_y_1 = data_manager["data_y"].load()
time.sleep(1)
loaded_data_x_2 = data_manager["data_x"].load()
loaded_data_y_2 = data_manager["data_y"].load()

# Cache has expired for data_y but not data_x.
assert_frame_equal(loaded_data_x_1, loaded_data_x_2)
assert_frame_not_equal(loaded_data_y_1, loaded_data_y_2)
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ def test_ag_grid_filter_interaction_attributes(self, ag_grid_with_id):
class TestProcessAgGridDataFrame:
def test_process_figure_data_frame_str_df(self, dash_ag_grid_with_str_dataframe, gapminder):
data_manager["gapminder"] = gapminder
ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe)
ag_grid = vm.AgGrid(id="ag_grid", figure=dash_ag_grid_with_str_dataframe)
assert data_manager._get_component_data("ag_grid").equals(gapminder)
assert ag_grid_with_str_df["data_frame"] == "gapminder"
with pytest.raises(KeyError, match="'data_frame'"):
ag_grid["data_frame"]

def test_process_figure_data_frame_df(self, standard_ag_grid, gapminder):
ag_grid_with_str_df = vm.AgGrid(id="ag_grid", figure=standard_ag_grid)
ag_grid = vm.AgGrid(id="ag_grid", figure=standard_ag_grid)
assert data_manager._get_component_data("ag_grid").equals(gapminder)
with pytest.raises(KeyError, match="'data_frame'"):
ag_grid_with_str_df.figure["data_frame"]
ag_grid["data_frame"]


class TestPreBuildAgGrid:
Expand Down
15 changes: 8 additions & 7 deletions vizro-core/tests/unit/vizro/models/_components/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,19 @@ def test_graph_filter_interaction_attributes(self, standard_px_chart):
assert "modelID" in graph._filter_interaction_input


class TestProcessFigureDataFrame:
class TestProcessGraphDataFrame:
def test_process_figure_data_frame_str_df(self, standard_px_chart_with_str_dataframe, gapminder):
data_manager["gapminder"] = gapminder
graph_with_str_df = vm.Graph(id="text_graph", figure=standard_px_chart_with_str_dataframe)
assert data_manager._get_component_data("text_graph").equals(gapminder)
assert graph_with_str_df["data_frame"] == "gapminder"
graph = vm.Graph(id="graph", figure=standard_px_chart_with_str_dataframe)
assert data_manager._get_component_data("graph").equals(gapminder)
with pytest.raises(KeyError, match="'data_frame'"):
graph["data_frame"]

def test_process_figure_data_frame_df(self, standard_px_chart, gapminder):
graph_with_df = vm.Graph(id="text_graph", figure=standard_px_chart)
assert data_manager._get_component_data("text_graph").equals(gapminder)
graph = vm.Graph(id="graph", figure=standard_px_chart)
assert data_manager._get_component_data("graph").equals(gapminder)
with pytest.raises(KeyError, match="'data_frame'"):
graph_with_df.figure["data_frame"]
graph["data_frame"]


class TestBuild:
Expand Down
9 changes: 5 additions & 4 deletions vizro-core/tests/unit/vizro/models/_components/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ class TestProcessTableDataFrame:
# Testing at this low implementation level as mocking callback contexts skips checking for creation of these objects
def test_process_figure_data_frame_str_df(self, dash_table_with_str_dataframe, gapminder):
data_manager["gapminder"] = gapminder
table_with_str_df = vm.Table(id="table", figure=dash_table_with_str_dataframe)
table = vm.Table(id="table", figure=dash_table_with_str_dataframe)
assert data_manager._get_component_data("table").equals(gapminder)
assert table_with_str_df["data_frame"] == "gapminder"
with pytest.raises(KeyError, match="'data_frame'"):
table["data_frame"]

def test_process_figure_data_frame_df(self, standard_dash_table, gapminder):
table_with_str_df = vm.Table(id="table", figure=standard_dash_table)
graph = vm.Table(id="table", figure=standard_dash_table)
assert data_manager._get_component_data("table").equals(gapminder)
with pytest.raises(KeyError, match="'data_frame'"):
table_with_str_df.figure["data_frame"]
graph["data_frame"]


class TestPreBuildTable:
Expand Down
8 changes: 4 additions & 4 deletions vizro-core/tests/unit/vizro/plotly/test_express.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import plotly.express as px
import vizro.plotly.express as hpx
import vizro.plotly.express as vpx


def test_non_chart_unchanged():
assert hpx.data is px.data
assert vpx.data is px.data


def test_chart_wrapped():
graph = hpx.scatter(px.data.iris(), x="petal_width", y="petal_length")
graph = vpx.scatter(px.data.iris(), x="petal_width", y="petal_length")
assert graph._captured_callable._function is px.scatter
assert hpx.scatter is not px.scatter
assert vpx.scatter is not px.scatter

0 comments on commit 04be56e

Please sign in to comment.