From 023a90fdd6fef49a445b68c8556f8d99844a5de0 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Tue, 17 Oct 2023 14:27:35 -0400 Subject: [PATCH 01/10] first poc --- vizro-core/examples/default/app.py | 573 ++---------------- vizro-core/hatch.toml | 2 +- vizro-core/pyproject.toml | 1 + vizro-core/src/vizro/_vizro.py | 2 + .../src/vizro/managers/_data_manager.py | 42 +- 5 files changed, 103 insertions(+), 517 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index f866fab4c..f16a4c0f8 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -6,532 +6,85 @@ import vizro.models as vm import vizro.plotly.express as px from vizro import Vizro +from vizro.managers import data_manager from vizro.actions import export_data, filter_interaction +from vizro.models import Action +from vizro.models.types import capture -def retrieve_avg_continent_data(): - """This is function returns gapminder data grouped by continent.""" - df = px.data.gapminder() - mean = ( - df.groupby(by=["continent", "year"]).agg({"lifeExp": "mean", "pop": "mean", "gdpPercap": "mean"}).reset_index() - ) - return mean +def retrieve_gapminder(): + """This is a function that returns gapminder data.""" + return px.data.gapminder() -def create_variable_analysis(): - """Function returns a page with gapminder data to do variable analysis.""" - df_gapminder = px.data.gapminder() - df_avg_gapminder = retrieve_avg_continent_data() - page_variable = vm.Page( - title="Variable Analysis", - layout=vm.Layout( - grid=[ - # fmt: off - [0, 1, 1, 1], - [2, 3, 3, 3], - [4, 5, 5, 5], - [6, 7, 7, 7], - # fmt: on - ], - row_min_height="400px", - row_gap="25px", - ), - components=[ - vm.Card( - text=""" - ### Overview - The world map provides initial insights into the variations of metrics across countries and - continents. Click on Play to see the animation and explore the development over time. +data_manager["gapminder"] = retrieve_gapminder - #### Observation - A global trend of increasing life expectancy emerges, with some exceptions in specific African - countries. Additionally, despite similar population growth rates across continents, the overall - global population continues to expand, with India and China leading the way. Meanwhile, GDP per - capita experiences growth in most regions. - """ - ), - vm.Graph( - id="variable_map", - figure=px.choropleth( - df_gapminder, - locations="iso_alpha", - color="lifeExp", - hover_name="country", - animation_frame="year", - labels={ - "year": "year", - "lifeExp": "Life expectancy", - "pop": "Population", - "gdpPercap": "GDP per capita", - }, - title="Global development over time", - ), - ), - vm.Card( - text=""" - ### Distribution - The boxplot illustrates the distribution of each metric across continents, facilitating comparisons - of life expectancy, GDP per capita, and population statistics. +@capture("action") +def delete_memoized_cache(collapse_button_id_n_clicks): + if collapse_button_id_n_clicks: + data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph") + # data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph2") - Observations reveal that Europe and Oceania have the highest life expectancy and GDP per capita, - likely influenced by their smaller population growth. Additionally, Asia and America exhibit - notable GDP per capita outliers, indicating variations among countries within these continents or - large growth over the observed years. - """ - ), - vm.Graph( - id="variable_boxplot", - figure=px.box( - df_gapminder, - x="continent", - y="lifeExp", - color="continent", - labels={ - "year": "year", - "lifeExp": "Life expectancy", - "pop": "Population", - "gdpPercap": "GDP per capita", - "continent": "Continent", - }, - title="Distribution per continent", - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - ), - ), - vm.Card( - text=""" - ### Development - The line chart tracks the variable's progress from 1952 to 2007, facilitating a deeper comprehension - of each metric. - - #### Observation - Oceania and Europe are found to have the highest total GDP per capita and exhibit significant - growth. In contrast, Asia, Africa, and America demonstrate a more pronounced upward trend in - population increase compared to Europe and Oceania, suggesting that GDP per capita growth might be - influenced by relatively smaller population growth in the latter two continents. - - """ - ), - vm.Graph( - id="variable_line", - figure=px.line( - df_avg_gapminder, - y="lifeExp", - x="year", - color="continent", - title="Development between 1952 and 2007", - labels={ - "year": "Year", - "lifeExp": "Life expectancy", - "pop": "Population", - "gdpPercap": "GDP per capita", - "continent": "Continent", - }, - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - ), - ), - vm.Card( - text=""" - ### Recent status - Examining the data for 2007 provides insight into the current status of each continent and metrics. - #### Observation - Asia held the largest population, followed by America, Europe, Africa, and Oceania. Life expectancy - surpassed 70 years for all continents, except Africa with 55 years. GDP per capita aligns with - earlier findings, with Oceania and Europe reporting the highest values and Africa recording the - lowest. - """ +page = vm.Page( + title="test_page", + components=[ + vm.Graph( + figure=px.box( + "gapminder", + x="continent", + y="lifeExp", + color="continent", + title="Distribution per continent", ), - vm.Graph( - id="variable_bar", - figure=px.bar( - df_avg_gapminder.query("year == 2007"), - x="lifeExp", - y="continent", - orientation="h", - title="Comparison of average metric for 2007", - labels={ - "year": "year", - "continent": "Continent", - "lifeExp": "Life expectancy", - "pop": "Population", - "gdpPercap": "GDP per capita", - }, - color="continent", - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - ), - ), - ], - controls=[ - vm.Parameter( - targets=["variable_map.color", "variable_boxplot.y", "variable_line.y", "variable_bar.x"], - selector=vm.RadioItems(options=["lifeExp", "pop", "gdpPercap"], title="Select variable"), - ) - ], - ) - return page_variable - - -def create_relation_analysis(): - """Function returns a page to perform relation analysis.""" - df_gapminder = px.data.gapminder() - - page_relation_analysis = vm.Page( - title="Relationship Analysis", - layout=vm.Layout( - grid=[[0, 0, 0, 0, 1]] + [[2, 2, 3, 3, 3]] * 4 + [[4, 4, 4, 4, 4]] * 5, - row_min_height="100px", - row_gap="24px", + id="the_graph", ), - components=[ - vm.Card( - text=""" - Population, GDP per capita, and life expectancy are interconnected metrics that provide insights - into the socio-economic well-being of a country. - Rapid population growth can strain resources and infrastructure, impacting GDP per capita. Higher - GDP per capita often enables better healthcare and improved life expectancy, but other factors such - as healthcare quality and social policies also play significant roles. - """ - ), - vm.Card( - text=""" - #### Last updated - July, 2023 - """ - ), - vm.Graph( - id="bar_relation_2007", - figure=px.box( - df_gapminder.query("year == 2007"), - x="continent", - y="lifeExp", - color="continent", - hover_name="continent", - title="Relationship in 2007", - labels={ - "gdpPercap": "GDP per capita", - "pop": "Population", - "lifeExp": "Life expectancy", - "continent": "Continent", - }, - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - custom_data=["continent"], - ), - actions=[vm.Action(function=filter_interaction(targets=["scatter_relation_2007"]))], - ), - vm.Graph( - id="scatter_relation_2007", - figure=px.scatter( - df_gapminder.query("year == 2007"), - x="gdpPercap", - y="lifeExp", - size="pop", - color="continent", - hover_name="country", - size_max=60, - labels={ - "gdpPercap": "GDP per capita", - "pop": "Population", - "lifeExp": "Life expectancy", - "continent": "Continent", - }, - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - ), - ), - vm.Graph( - id="scatter_relation", - figure=px.scatter( - df_gapminder, - x="gdpPercap", - y="lifeExp", - animation_frame="year", - animation_group="country", - size="pop", - color="continent", - hover_name="country", - facet_col="continent", - labels={ - "gdpPercap": "GDP per capita", - "pop": "Population", - "lifeExp": "Life expectancy", - "continent": "Continent", - }, - range_y=[25, 90], - color_discrete_map={ - "Africa": "#00b4ff", - "Americas": "#ff9222", - "Asia": "#3949ab", - "Europe": "#ff5267", - "Oceania": "#08bdba", - }, - ), - ), - ], - controls=[ - vm.Parameter( - targets=["scatter_relation_2007.x", "scatter_relation.x"], - selector=vm.Dropdown( - options=["lifeExp", "gdpPercap", "pop"], multi=False, value="gdpPercap", title="Choose x-axis" - ), - ), - vm.Parameter( - targets=["scatter_relation_2007.y", "scatter_relation.y", "bar_relation_2007.y"], - selector=vm.Dropdown( - options=["lifeExp", "gdpPercap", "pop"], multi=False, value="lifeExp", title="Choose y-axis" - ), - ), - vm.Parameter( - targets=["scatter_relation_2007.size", "scatter_relation.size"], - selector=vm.Dropdown( - options=["lifeExp", "gdpPercap", "pop"], multi=False, value="pop", title="Choose bubble size" - ), - ), - ], - ) - return page_relation_analysis - - -def create_continent_summary(): - """Function returns a page with markdown including images.""" - page_summary = vm.Page( - title="Continent Summary", - layout=vm.Layout(grid=[[i] for i in range(5)], row_min_height="190px", row_gap="25px"), - components=[ - vm.Card( - text=""" - ### Africa - ![](assets/images/continents/africa.svg#my-image) - - Africa, a diverse and expansive continent, faces both challenges and progress in its socio-economic - landscape. In 2007, Africa's GDP per capita was approximately $3,000, reflecting relatively slower - growth compared to other continents like Oceania and Europe. - - However, Africa has shown notable improvements in life expectancy over time, reaching 55 years in - 2007. Despite these economic disparities, Africa's population has been steadily increasing, - reflecting its significant potential for development. - """, - ), - vm.Card( - text=""" - ### Americas - ![](assets/images/continents/america.svg#my-image) - - Comprising North and South America, Americas represents a region of vast geographical and cultural - diversity. In 2007, the continent experienced substantial population growth, with a diverse mix of - countries contributing to this expansion. - - Although its GDP per capita of $11,000 in 2007 exhibited variations across countries, America - maintained similar levels to Asia, reflecting its economic significance. With North America - generally reporting higher life expectancy compared to South America, America remains a region of - opportunities and challenges. - """, - ), - vm.Card( - text=""" - ### Asia - ![](assets/images/continents/asia.svg#my-image) - - Asia holds a central role in the global economy. It's growth in GDP per capita to $12,000 in 2007 - and population has been significant, outpacing many other continents. In 2007, it boasted the - highest population among all continents, with countries like China and India leading the way. - - Despite facing various socio-economic challenges, Asia's increasing life expectancy from 46 years - to 70 over the years reflects advancements in healthcare and overall well-being, making it a vital - region driving global progress and development. - """, - ), - vm.Card( - text=""" - ### Europe - ![](assets/images/continents/europe.svg#my-image) - - Europe boasts a strong and thriving economy. In 2007, it exhibited the second-highest GDP per - capita of $25,000 among continents, indicating sustained economic growth and development. - - Europe's life expectancy surpassed 75 years, showcasing a high standard of living and - well-established healthcare systems. With its robust infrastructure, advanced industries, and - quality of life, Europe continues to be a leading force in the global economy. Between 1952 and - 2007, Europe's population experienced moderate growth, with a factor of approximately 1.5, - notably lower compared to other continents like Asia and America. - """, - ), - vm.Card( - text=""" - ### Oceania - ![](assets/images/continents/oceania.svg#my-image) - - Oceania, comprising countries like Australia and New Zealand, stands out with notable economic - prosperity and longer life expectancy. In 2007, it boasted the highest GDP per capita of $27,000 - among continents and exhibited one of the highest life expectancy levels, surpassing 80 years. - - Despite a relatively smaller population size, Oceania's strong economic growth has contributed - to improved living standards and overall well-being of its population. - """, - ), - ], - ) - return page_summary - - -def create_country_analysis(): - """Function returns a page to perform analysis on country level.""" - df_gapminder = px.data.gapminder() - - df_gapminder_agg = px.data.gapminder() - df_gapminder_agg["lifeExp"] = df_gapminder_agg.groupby(by=["continent", "year"])["lifeExp"].transform("mean") - df_gapminder_agg["gdpPercap"] = df_gapminder_agg.groupby(by=["continent", "year"])["gdpPercap"].transform("mean") - df_gapminder_agg["pop"] = df_gapminder_agg.groupby(by=["continent", "year"])["pop"].transform("sum") - - df_gapminder["data"] = "Country" - df_gapminder_agg["data"] = "Continent" - - df_gapminder = pd.concat([df_gapminder_agg, df_gapminder], ignore_index=True) - - page_country = vm.Page( - title="Country Analysis", - layout=vm.Layout(grid=[[0, 0, 0, 1, 1, 1]] * 7 + [[2, 2, 2, 2, 2, 2]]), - components=[ - vm.Graph( - id="bar_country", - figure=px.bar( - df_gapminder, - x="year", - y="pop", - color="data", - barmode="group", - labels={"year": "Year", "data": "Data", "pop": "Population"}, - color_discrete_map={"Country": "#afe7f9", "Continent": "#003875"}, - ), - ), - vm.Graph( - id="line_country", - figure=px.line( - df_gapminder, - x="year", - y="gdpPercap", - color="data", - labels={"year": "Year", "data": "Data", "gdpPercap": "GDP per capita"}, - color_discrete_map={"Country": "#afe7f9", "Continent": "#003875"}, - markers=True, - ), - ), - vm.Button( - text="Export data", - actions=[ - vm.Action( - function=export_data( - targets=["bar_country"], - ) - ), - ], - ), - ], - controls=[ - vm.Filter(column="country", selector=vm.Dropdown(value="India", multi=False, title="Select country")), - vm.Filter(column="year", selector=vm.RangeSlider(title="Select timeframe")), - ], - ) - return page_country - - -def create_home_page(): - """Function returns the homepage.""" - page_home = vm.Page( - title="Homepage", - layout=vm.Layout(grid=[[0, 1], [2, 3]], row_gap="16px", col_gap="24px"), - components=[ - vm.Card( - text=""" - ![](assets/images/icons/content/hypotheses.svg#icon-top) - - ### Variable Analysis - - Analyzing population, GDP per capita and life expectancy on country and continent level. - """, - href="/variable-analysis", - ), - vm.Card( - text=""" - ![](assets/images/icons/content/hypotheses.svg#icon-top) - - ### Relationship Analysis - - Investigating the interconnection between population, GDP per capita and life expectancy. - """, - href="/relationship-analysis", - ), - vm.Card( - text=""" - ![](assets/images/icons/content/collections.svg#icon-top) - - ### Continent Summary - - Summarizing the main findings for each continent. - """, - href="/continent-summary", - ), - vm.Card( - text=""" - ![](assets/images/icons/content/features.svg#icon-top) - - ### Country Analysis - - Discovering how the metrics differ for each country and export data for further investigation. - """, - href="/country-analysis", + # vm.Graph( + # figure=px.box( + # "gapminder", + # x="continent", + # y="lifeExp", + # color="continent", + # title="Distribution per continent", + # ), + # id="the_graph2", + # ), + # vm.Button( + # id="delete_button_id", + # text="delete_memoized_cache", + # actions=[ + # vm.Action( + # function=delete_memoized_cache(), + # inputs=["collapse_button_id.n_clicks"], + # outputs=[], + # ) + # ] + # ) + ], + controls=[ + vm.Filter( + column="year", + selector=vm.RangeSlider( + title="Select timeframe", ), - ], - ) - return page_home - - -dashboard = vm.Dashboard( - pages=[ - create_home_page(), - create_variable_analysis(), - create_relation_analysis(), - create_continent_summary(), - create_country_analysis(), + ), ], - navigation=vm.Navigation( - pages={ - "Analysis": ["Homepage", "Variable Analysis", "Relationship Analysis", "Country Analysis"], - "Summary": ["Continent Summary"], - } - ), ) +dashboard = vm.Dashboard(pages=[page]) if __name__ == "__main__": Vizro._user_assets_folder = os.path.abspath("../assets") + Vizro._cache_confg = { + "CACHE_TYPE": "FileSystemCache", + "CACHE_DIR": "cache", + "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold + 'CACHE_DEFAULT_TIMEOUT': 3000, # Unit of time is seconds + } Vizro().build(dashboard).run() + # Vizro().build(dashboard).run( + # threaded=False, + # processes=3, + # dev_tools_hot_reload=False + # ) diff --git a/vizro-core/hatch.toml b/vizro-core/hatch.toml index da34010a1..1b47fa341 100644 --- a/vizro-core/hatch.toml +++ b/vizro-core/hatch.toml @@ -23,7 +23,7 @@ dependencies = [ ] [envs.default.env-vars] -DASH_DEBUG = "true" +#DASH_DEBUG = "true" VIZRO_LOG_LEVEL = "DEBUG" [envs.default.scripts] diff --git a/vizro-core/pyproject.toml b/vizro-core/pyproject.toml index 265614172..fe56ab4da 100644 --- a/vizro-core/pyproject.toml +++ b/vizro-core/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "pandas", "pydantic>=1.10.13, <2", # must be synced with pre-commit mypy hook "dash_daq", + "flask_caching", "ipython>=8.10.0", # not directly required, pinned by Snyk to avoid a vulnerability: https://app.snyk.io/vuln/SNYK-PYTHON-IPYTHON-3318382 "numpy>=1.22.2", # not directly required, pinned by Snyk to avoid a vulnerability: https://security.snyk.io/vuln/SNYK-PYTHON-NUMPY-2321970 "tornado>=6.3.2", # not directly required, pinned by Snyk to avoid a vulnerability: https://security.snyk.io/vuln/SNYK-PYTHON-TORNADO-5537286 diff --git a/vizro-core/src/vizro/_vizro.py b/vizro-core/src/vizro/_vizro.py index 1b2996585..e35e1480b 100644 --- a/vizro-core/src/vizro/_vizro.py +++ b/vizro-core/src/vizro/_vizro.py @@ -18,6 +18,7 @@ class Vizro: _user_assets_folder = Path.cwd() / "assets" _lib_assets_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") + _cache_config = {"CACHE_TYPE": "NullCache"} def __init__(self): """Initializes Dash.""" @@ -44,6 +45,7 @@ def build(self, dashboard: Dashboard): Returns: Vizro: App object """ + data_manager._cache.init_app(self.dash.server, config=self._cache_config) # Note that model instantiation and pre_build are independent of Dash. self._pre_build() diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index 3f268d2f7..ee52f914f 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -1,10 +1,15 @@ """The data manager handles access to all DataFrames used in a Vizro app.""" +import logging +import time from typing import Callable, Dict, Union import pandas as pd +from flask_caching import Cache from vizro.managers._managers_utils import _state_modifier + +logger = logging.getLogger(__name__) # Really ComponentID and DatasetName should be NewType and not just aliases but then for a user's code to type check # correctly they would need to cast all strings to these types. ComponentID = str @@ -20,6 +25,7 @@ class DataManager: >>> data_manager["iris"] = px.data.iris() """ + _cache = Cache(config={"CACHE_TYPE": "NullCache"}) def __init__(self): self.__lazy_data: Dict[DatasetName, pd_LazyDataFrame] = {} @@ -27,6 +33,7 @@ def __init__(self): self.__component_to_original: Dict[ComponentID, DatasetName] = {} self._frozen_state = False + # happens before dashboard build @_state_modifier def __setitem__(self, dataset_name: DatasetName, data: Union[pd.DataFrame, pd_LazyDataFrame]): """Adds `data` to the `DataManager` with key `dataset_name`. @@ -46,6 +53,7 @@ def __setitem__(self, dataset_name: DatasetName, data: Union[pd.DataFrame, pd_La f"Dataset {dataset_name} must be a pandas DataFrame or callable that returns pandas DataFrame." ) + # happens before dashboard build @_state_modifier def _add_component(self, component_id: ComponentID, dataset_name: DatasetName): """Adds a mapping from `component_id` to `dataset_name`.""" @@ -60,13 +68,25 @@ def _add_component(self, component_id: ComponentID, dataset_name: DatasetName): ) self.__component_to_original[component_id] = dataset_name - def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: - """Returns the original data for `component_id`.""" - if component_id not in self.__component_to_original: - raise KeyError(f"Component {component_id} does not exist. You need to call add_component first.") - dataset_name = self.__component_to_original[component_id] - + # def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: + # """Returns the original data for `component_id`.""" + # if component_id not in self.__component_to_original: + # raise KeyError(f"Component {component_id} does not exist. You need to call add_component first.") + # dataset_name = self.__component_to_original[component_id] + # + # # Populate original data on first access only + # if dataset_name not in self.__original_data: + # self.__original_data[dataset_name] = self.__lazy_data[dataset_name]() + # + # # Return a copy so that the original data cannot be modified. This is not necessary if we are careful + # # to not do any inplace=True operations, but probably safest to leave it here. + # return self.__original_data[dataset_name].copy() + + @_cache.memoize() + def _get_original_data(self, dataset_name: DatasetName) -> pd.DataFrame: + """Returns the original data for `dataset_name`.""" # Populate original data on first access only + time.sleep(2.0) if dataset_name not in self.__original_data: self.__original_data[dataset_name] = self.__lazy_data[dataset_name]() @@ -74,6 +94,16 @@ def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: # to not do any inplace=True operations, but probably safest to leave it here. return self.__original_data[dataset_name].copy() + # @_cache.memoize() + def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: + """Returns the original data for `component_id`.""" + logger.debug("get_component_data: %s", component_id) + if component_id not in self.__component_to_original: + raise KeyError(f"Component {component_id} does not exist. You need to call add_component first.") + dataset_name = self.__component_to_original[component_id] + + return self._get_original_data(dataset_name) + def _has_registered_data(self, component_id: ComponentID) -> bool: try: self._get_component_data(component_id) From 4234255608bb3664fec762cdc746b4d4a1ade816 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Tue, 17 Oct 2023 18:57:20 -0400 Subject: [PATCH 02/10] linting --- vizro-core/examples/default/app.py | 18 +++++++----------- vizro-core/hatch.toml | 2 +- vizro-core/src/vizro/managers/_data_manager.py | 4 +++- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index f16a4c0f8..45a872ce8 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -1,15 +1,10 @@ """Example to show dashboard configuration.""" import os -import pandas as pd - import vizro.models as vm import vizro.plotly.express as px from vizro import Vizro from vizro.managers import data_manager -from vizro.actions import export_data, filter_interaction - -from vizro.models import Action from vizro.models.types import capture @@ -23,6 +18,7 @@ def retrieve_gapminder(): @capture("action") def delete_memoized_cache(collapse_button_id_n_clicks): + """Delete memoized cache.""" if collapse_button_id_n_clicks: data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph") # data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph2") @@ -76,12 +72,12 @@ def delete_memoized_cache(collapse_button_id_n_clicks): if __name__ == "__main__": Vizro._user_assets_folder = os.path.abspath("../assets") - Vizro._cache_confg = { - "CACHE_TYPE": "FileSystemCache", - "CACHE_DIR": "cache", - "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold - 'CACHE_DEFAULT_TIMEOUT': 3000, # Unit of time is seconds - } + Vizro._cache_config = { + "CACHE_TYPE": "FileSystemCache", + "CACHE_DIR": "cache", + "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold + "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds + } Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, diff --git a/vizro-core/hatch.toml b/vizro-core/hatch.toml index 1b47fa341..980096a55 100644 --- a/vizro-core/hatch.toml +++ b/vizro-core/hatch.toml @@ -23,7 +23,7 @@ dependencies = [ ] [envs.default.env-vars] -#DASH_DEBUG = "true" +# DASH_DEBUG = "true" VIZRO_LOG_LEVEL = "DEBUG" [envs.default.scripts] diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index ee52f914f..c715a7da2 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -8,7 +8,6 @@ from vizro.managers._managers_utils import _state_modifier - logger = logging.getLogger(__name__) # Really ComponentID and DatasetName should be NewType and not just aliases but then for a user's code to type check # correctly they would need to cast all strings to these types. @@ -25,6 +24,7 @@ class DataManager: >>> data_manager["iris"] = px.data.iris() """ + _cache = Cache(config={"CACHE_TYPE": "NullCache"}) def __init__(self): @@ -86,6 +86,8 @@ def _add_component(self, component_id: ComponentID, dataset_name: DatasetName): def _get_original_data(self, dataset_name: DatasetName) -> pd.DataFrame: """Returns the original data for `dataset_name`.""" # Populate original data on first access only + logger.debug("get original data: %s", dataset_name) + logger.debug("loading...") time.sleep(2.0) if dataset_name not in self.__original_data: self.__original_data[dataset_name] = self.__lazy_data[dataset_name]() From 95183753e032dcc61cf127f8dccca425a62673f8 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Tue, 17 Oct 2023 22:38:51 -0400 Subject: [PATCH 03/10] redis cache --- vizro-core/examples/default/app.py | 31 +++---- .../src/vizro/managers/_data_manager.py | 84 +++++++++---------- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index 45a872ce8..5279b09cc 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -17,11 +17,11 @@ def retrieve_gapminder(): @capture("action") -def delete_memoized_cache(collapse_button_id_n_clicks): +def delete_memoized_cache(delete_button_id_n_clicks): """Delete memoized cache.""" - if collapse_button_id_n_clicks: - data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph") - # data_manager._cache.delete_memoized(data_manager._get_component_data, data_manager, "the_graph2") + if delete_button_id_n_clicks: + data_manager._cache.delete_memoized(data_manager._get_original_data, data_manager, "gapminder") + return None page = vm.Page( @@ -47,17 +47,17 @@ def delete_memoized_cache(collapse_button_id_n_clicks): # ), # id="the_graph2", # ), - # vm.Button( - # id="delete_button_id", - # text="delete_memoized_cache", - # actions=[ - # vm.Action( - # function=delete_memoized_cache(), - # inputs=["collapse_button_id.n_clicks"], - # outputs=[], - # ) - # ] - # ) + vm.Button( + id="delete_button_id", + text="delete_memoized_cache", + actions=[ + vm.Action( + function=delete_memoized_cache(), + inputs=["delete_button_id.n_clicks"], + outputs=[], + ) + ] + ) ], controls=[ vm.Filter( @@ -78,6 +78,7 @@ def delete_memoized_cache(collapse_button_id_n_clicks): "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds } + # Vizro._cache_config = {"CACHE_TYPE": "redis", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index c715a7da2..75947f522 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -120,45 +120,45 @@ def _reset(self): data_manager = DataManager() -if __name__ == "__main__": - from functools import partial - - import vizro.plotly.express as px - - dm = data_manager - dm["iris"] = px.data.iris() - - dm._add_component("component_id_a", "iris") - print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201 - - dm._add_component("component_id_b", "iris") - df_a = dm._get_component_data("component_id_a") - df_a.drop(columns="species", inplace=True) - print(df_a.shape) # (150, 5) # noqa: T201 - df_b = dm._get_component_data("component_id_b") - print(df_b.shape) # (150, 6) # noqa: T201 - - # Lazy loading example 1 - def retrieve_iris(): - df = px.data.iris() - subset = df.query("species == 'setosa'") - return subset - - dm["iris_subset"] = retrieve_iris - dm._add_component("component_id_c", "iris_subset") - print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201 - - # Lazy loading example 2 - def retrieve_one_species(species): - df = px.data.iris() - subset = df[df["species"] == species].copy() - return subset - - dm["data_from_external_1"] = lambda: retrieve_one_species("setosa") - dm._add_component("component_id_d", "data_from_external_1") - print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201 - - # Lazy loading example 3 - dm["data_from_external_2"] = partial(retrieve_one_species, "setosa") - dm._add_component("component_id_e", "data_from_external_2") - print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201 +# if __name__ == "__main__": +# from functools import partial +# +# import vizro.plotly.express as px +# +# dm = data_manager +# dm["iris"] = px.data.iris() +# +# dm._add_component("component_id_a", "iris") +# print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201 +# +# dm._add_component("component_id_b", "iris") +# df_a = dm._get_component_data("component_id_a") +# df_a.drop(columns="species", inplace=True) +# print(df_a.shape) # (150, 5) # noqa: T201 +# df_b = dm._get_component_data("component_id_b") +# print(df_b.shape) # (150, 6) # noqa: T201 +# +# # Lazy loading example 1 +# def retrieve_iris(): +# df = px.data.iris() +# subset = df.query("species == 'setosa'") +# return subset +# +# dm["iris_subset"] = retrieve_iris +# dm._add_component("component_id_c", "iris_subset") +# print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201 +# +# # Lazy loading example 2 +# def retrieve_one_species(species): +# df = px.data.iris() +# subset = df[df["species"] == species].copy() +# return subset +# +# dm["data_from_external_1"] = lambda: retrieve_one_species("setosa") +# dm._add_component("component_id_d", "data_from_external_1") +# print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201 +# +# # Lazy loading example 3 +# dm["data_from_external_2"] = partial(retrieve_one_species, "setosa") +# dm._add_component("component_id_e", "data_from_external_2") +# print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201 From 59e1ebbca0e9f0cca7cf5de8e1f54dc78e752676 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Wed, 18 Oct 2023 12:18:29 -0400 Subject: [PATCH 04/10] test with gunicorn --- vizro-core/examples/default/app.py | 15 +++++++++++++++ vizro-core/pyproject.toml | 1 + 2 files changed, 16 insertions(+) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index 5279b09cc..b16c1938b 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -70,6 +70,21 @@ def delete_memoized_cache(delete_button_id_n_clicks): ) dashboard = vm.Dashboard(pages=[page]) +### when launching with gunicorn ### +Vizro._user_assets_folder = os.path.abspath("../assets") +Vizro._cache_config = { + "CACHE_TYPE": "FileSystemCache", + "CACHE_DIR": "cache", + "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold + "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds +} +# Vizro._cache_config = {"CACHE_TYPE": "redis", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} +app = Vizro() +app.build(dashboard) +server = app.dash.server +### when launching with gunicorn ### + + if __name__ == "__main__": Vizro._user_assets_folder = os.path.abspath("../assets") Vizro._cache_config = { diff --git a/vizro-core/pyproject.toml b/vizro-core/pyproject.toml index fe56ab4da..be95d5170 100644 --- a/vizro-core/pyproject.toml +++ b/vizro-core/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "pydantic>=1.10.13, <2", # must be synced with pre-commit mypy hook "dash_daq", "flask_caching", + "gunicorn", "ipython>=8.10.0", # not directly required, pinned by Snyk to avoid a vulnerability: https://app.snyk.io/vuln/SNYK-PYTHON-IPYTHON-3318382 "numpy>=1.22.2", # not directly required, pinned by Snyk to avoid a vulnerability: https://security.snyk.io/vuln/SNYK-PYTHON-NUMPY-2321970 "tornado>=6.3.2", # not directly required, pinned by Snyk to avoid a vulnerability: https://security.snyk.io/vuln/SNYK-PYTHON-TORNADO-5537286 From f5c67df4878c2a30d9d8a72dca3f5fc9bc4e1bb2 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Wed, 18 Oct 2023 12:19:35 -0400 Subject: [PATCH 05/10] clean up data_manager --- .../src/vizro/managers/_data_manager.py | 99 ++++++++----------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index 75947f522..b61dacb18 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -68,20 +68,6 @@ def _add_component(self, component_id: ComponentID, dataset_name: DatasetName): ) self.__component_to_original[component_id] = dataset_name - # def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: - # """Returns the original data for `component_id`.""" - # if component_id not in self.__component_to_original: - # raise KeyError(f"Component {component_id} does not exist. You need to call add_component first.") - # dataset_name = self.__component_to_original[component_id] - # - # # Populate original data on first access only - # if dataset_name not in self.__original_data: - # self.__original_data[dataset_name] = self.__lazy_data[dataset_name]() - # - # # Return a copy so that the original data cannot be modified. This is not necessary if we are careful - # # to not do any inplace=True operations, but probably safest to leave it here. - # return self.__original_data[dataset_name].copy() - @_cache.memoize() def _get_original_data(self, dataset_name: DatasetName) -> pd.DataFrame: """Returns the original data for `dataset_name`.""" @@ -96,7 +82,6 @@ def _get_original_data(self, dataset_name: DatasetName) -> pd.DataFrame: # to not do any inplace=True operations, but probably safest to leave it here. return self.__original_data[dataset_name].copy() - # @_cache.memoize() def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: """Returns the original data for `component_id`.""" logger.debug("get_component_data: %s", component_id) @@ -120,45 +105,45 @@ def _reset(self): data_manager = DataManager() -# if __name__ == "__main__": -# from functools import partial -# -# import vizro.plotly.express as px -# -# dm = data_manager -# dm["iris"] = px.data.iris() -# -# dm._add_component("component_id_a", "iris") -# print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201 -# -# dm._add_component("component_id_b", "iris") -# df_a = dm._get_component_data("component_id_a") -# df_a.drop(columns="species", inplace=True) -# print(df_a.shape) # (150, 5) # noqa: T201 -# df_b = dm._get_component_data("component_id_b") -# print(df_b.shape) # (150, 6) # noqa: T201 -# -# # Lazy loading example 1 -# def retrieve_iris(): -# df = px.data.iris() -# subset = df.query("species == 'setosa'") -# return subset -# -# dm["iris_subset"] = retrieve_iris -# dm._add_component("component_id_c", "iris_subset") -# print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201 -# -# # Lazy loading example 2 -# def retrieve_one_species(species): -# df = px.data.iris() -# subset = df[df["species"] == species].copy() -# return subset -# -# dm["data_from_external_1"] = lambda: retrieve_one_species("setosa") -# dm._add_component("component_id_d", "data_from_external_1") -# print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201 -# -# # Lazy loading example 3 -# dm["data_from_external_2"] = partial(retrieve_one_species, "setosa") -# dm._add_component("component_id_e", "data_from_external_2") -# print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201 +if __name__ == "__main__": + from functools import partial + + import vizro.plotly.express as px + + dm = data_manager + dm["iris"] = px.data.iris() + + dm._add_component("component_id_a", "iris") + print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201 + + dm._add_component("component_id_b", "iris") + df_a = dm._get_component_data("component_id_a") + df_a.drop(columns="species", inplace=True) + print(df_a.shape) # (150, 5) # noqa: T201 + df_b = dm._get_component_data("component_id_b") + print(df_b.shape) # (150, 6) # noqa: T201 + + # Lazy loading example 1 + def retrieve_iris(): + df = px.data.iris() + subset = df.query("species == 'setosa'") + return subset + + dm["iris_subset"] = retrieve_iris + dm._add_component("component_id_c", "iris_subset") + print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201 + + # Lazy loading example 2 + def retrieve_one_species(species): + df = px.data.iris() + subset = df[df["species"] == species].copy() + return subset + + dm["data_from_external_1"] = lambda: retrieve_one_species("setosa") + dm._add_component("component_id_d", "data_from_external_1") + print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201 + + # Lazy loading example 3 + dm["data_from_external_2"] = partial(retrieve_one_species, "setosa") + dm._add_component("component_id_e", "data_from_external_2") + print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201 From 76cbb8cfcf6fb2c05dd8ce4a101faf25efe09f1c Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Wed, 18 Oct 2023 12:33:52 -0400 Subject: [PATCH 06/10] minor cleanup --- vizro-core/examples/default/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index b16c1938b..bd260223a 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -78,7 +78,7 @@ def delete_memoized_cache(delete_button_id_n_clicks): "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds } -# Vizro._cache_config = {"CACHE_TYPE": "redis", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} +# Vizro._cache_config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} app = Vizro() app.build(dashboard) server = app.dash.server @@ -93,7 +93,7 @@ def delete_memoized_cache(delete_button_id_n_clicks): "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds } - # Vizro._cache_config = {"CACHE_TYPE": "redis", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} + # Vizro._cache_config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, From b7ddeaab503f9aaaf582b250f16e2b0cbf60c50d Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Tue, 24 Oct 2023 18:15:45 -0400 Subject: [PATCH 07/10] drop original data when caching --- vizro-core/examples/default/app.py | 40 +++++++++---------- vizro-core/src/vizro/_vizro.py | 5 ++- .../src/vizro/managers/_data_manager.py | 9 +++++ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index bd260223a..795d4002d 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -70,30 +70,30 @@ def delete_memoized_cache(delete_button_id_n_clicks): ) dashboard = vm.Dashboard(pages=[page]) -### when launching with gunicorn ### -Vizro._user_assets_folder = os.path.abspath("../assets") -Vizro._cache_config = { - "CACHE_TYPE": "FileSystemCache", - "CACHE_DIR": "cache", - "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold - "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds -} -# Vizro._cache_config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} -app = Vizro() -app.build(dashboard) -server = app.dash.server -### when launching with gunicorn ### +# ### when launching with gunicorn ### +# Vizro._user_assets_folder = os.path.abspath("../assets") +# data_manager._cache.config = { +# "CACHE_TYPE": "FileSystemCache", +# "CACHE_DIR": "cache", +# "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold +# "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds +# } +# # data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} +# app = Vizro() +# app.build(dashboard) +# server = app.dash.server +# ### when launching with gunicorn ### if __name__ == "__main__": Vizro._user_assets_folder = os.path.abspath("../assets") - Vizro._cache_config = { - "CACHE_TYPE": "FileSystemCache", - "CACHE_DIR": "cache", - "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold - "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds - } - # Vizro._cache_config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} + # data_manager._cache.config = { + # "CACHE_TYPE": "FileSystemCache", + # "CACHE_DIR": "cache", + # "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold + # "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds + # } + data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, diff --git a/vizro-core/src/vizro/_vizro.py b/vizro-core/src/vizro/_vizro.py index fa5a65d42..df0bbb972 100644 --- a/vizro-core/src/vizro/_vizro.py +++ b/vizro-core/src/vizro/_vizro.py @@ -18,7 +18,6 @@ class Vizro: _user_assets_folder = Path.cwd() / "assets" _lib_assets_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") - _cache_config = {"CACHE_TYPE": "NullCache"} def __init__(self): """Initializes Dash.""" @@ -45,9 +44,11 @@ def build(self, dashboard: Dashboard): Returns: Vizro: App object """ - data_manager._cache.init_app(self.dash.server, config=self._cache_config) + data_manager._cache.init_app(self.dash.server, config=data_manager._cache.config) # Note that model instantiation and pre_build are independent of Dash. self._pre_build() + # to clear original data if the cache type is not NullCache + data_manager._drop_original_data() self.dash.layout = dashboard.build() diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index 8e90acf6d..22bf66319 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -101,6 +101,15 @@ def _has_registered_data(self, component_id: ComponentID) -> bool: def _clear(self): self.__init__() # type: ignore[misc] + # to clear original data if the cache type is not NullCache + def _drop_original_data(self): + logger.debug(f"__original_data: {self.__original_data}") + logger.debug(f"config: {self._cache.config}") + logger.debug("drop original data") + if self._cache.config["CACHE_TYPE"] != "NullCache": + self.__original_data = {} + logger.debug(f"__original_data: {self.__original_data}") + data_manager = DataManager() From c428f494126828e6f81d3546fb798eaa3c3e7591 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Wed, 25 Oct 2023 22:56:55 -0400 Subject: [PATCH 08/10] only drop original if there is lazy data --- vizro-core/examples/default/app.py | 66 +++++++++++++++---- vizro-core/src/vizro/_vizro.py | 4 +- .../src/vizro/managers/_data_manager.py | 25 ++++--- 3 files changed, 71 insertions(+), 24 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index 795d4002d..f7fdb6bf3 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -14,14 +14,24 @@ def retrieve_gapminder(): data_manager["gapminder"] = retrieve_gapminder +data_manager["gapminder2"] = retrieve_gapminder + +df_gapminder = px.data.gapminder() +df_gapminder2 = px.data.gapminder() @capture("action") def delete_memoized_cache(delete_button_id_n_clicks): - """Delete memoized cache.""" + """Delete one memoized cache.""" if delete_button_id_n_clicks: data_manager._cache.delete_memoized(data_manager._get_original_data, data_manager, "gapminder") - return None + + +@capture("action") +def empty_cache(empty_button_id_n_clicks): + """Empty the entire cache.""" + if empty_button_id_n_clicks: + data_manager._cache.cache.clear() page = vm.Page( @@ -30,6 +40,7 @@ def delete_memoized_cache(delete_button_id_n_clicks): vm.Graph( figure=px.box( "gapminder", + # df_gapminder, x="continent", y="lifeExp", color="continent", @@ -37,16 +48,33 @@ def delete_memoized_cache(delete_button_id_n_clicks): ), id="the_graph", ), - # vm.Graph( - # figure=px.box( - # "gapminder", - # x="continent", - # y="lifeExp", - # color="continent", - # title="Distribution per continent", - # ), - # id="the_graph2", - # ), + vm.Graph( + id="the_graph2", + figure=px.scatter( + "gapminder2", + # df_gapminder2, + x="gdpPercap", + y="lifeExp", + size="pop", + color="continent", + hover_name="country", + facet_col="continent", + labels={ + "gdpPercap": "GDP per capita", + "pop": "Population", + "lifeExp": "Life expectancy", + "continent": "Continent", + }, + range_y=[25, 90], + color_discrete_map={ + "Africa": "#00b4ff", + "Americas": "#ff9222", + "Asia": "#3949ab", + "Europe": "#ff5267", + "Oceania": "#08bdba", + }, + ), + ), vm.Button( id="delete_button_id", text="delete_memoized_cache", @@ -54,7 +82,16 @@ def delete_memoized_cache(delete_button_id_n_clicks): vm.Action( function=delete_memoized_cache(), inputs=["delete_button_id.n_clicks"], - outputs=[], + ) + ] + ), + vm.Button( + id="empty_button_id", + text="empty_cache", + actions=[ + vm.Action( + function=empty_cache(), + inputs=["empty_button_id.n_clicks"], ) ] ) @@ -62,6 +99,7 @@ def delete_memoized_cache(delete_button_id_n_clicks): controls=[ vm.Filter( column="year", + targets=["the_graph"], selector=vm.RangeSlider( title="Select timeframe", ), @@ -93,7 +131,7 @@ def delete_memoized_cache(delete_button_id_n_clicks): # "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold # "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds # } - data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 3000,} + data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 60,} Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, diff --git a/vizro-core/src/vizro/_vizro.py b/vizro-core/src/vizro/_vizro.py index df0bbb972..ad70463dd 100644 --- a/vizro-core/src/vizro/_vizro.py +++ b/vizro-core/src/vizro/_vizro.py @@ -47,10 +47,10 @@ def build(self, dashboard: Dashboard): data_manager._cache.init_app(self.dash.server, config=data_manager._cache.config) # Note that model instantiation and pre_build are independent of Dash. self._pre_build() - # to clear original data if the cache type is not NullCache - data_manager._drop_original_data() self.dash.layout = dashboard.build() + # to clear original data if the cache type is not NullCache + data_manager._clean_original_data() return self diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index 22bf66319..f0f390af0 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -89,7 +89,12 @@ def _get_component_data(self, component_id: ComponentID) -> pd.DataFrame: raise KeyError(f"Component {component_id} does not exist. You need to call add_component first.") dataset_name = self.__component_to_original[component_id] - return self._get_original_data(dataset_name) + component_data = self._get_original_data(dataset_name) + + # clean up original data if the cache type is not NullCache + self._clean_original_data() + + return component_data def _has_registered_data(self, component_id: ComponentID) -> bool: try: @@ -102,13 +107,17 @@ def _clear(self): self.__init__() # type: ignore[misc] # to clear original data if the cache type is not NullCache - def _drop_original_data(self): - logger.debug(f"__original_data: {self.__original_data}") - logger.debug(f"config: {self._cache.config}") - logger.debug("drop original data") - if self._cache.config["CACHE_TYPE"] != "NullCache": - self.__original_data = {} - logger.debug(f"__original_data: {self.__original_data}") + def _clean_original_data(self): + logger.debug(f"__original_data before cleaning: {self.__original_data.keys()}") + # logger.debug(f"config: {self._cache.config}") + logger.debug("clean original data") + if self._cache.config["CACHE_TYPE"] == "NullCache": + return + for dataset_name in list(self.__original_data.keys()): + if dataset_name in self.__lazy_data: + logger.debug(f"drop --> {dataset_name}") + del self.__original_data[dataset_name] + logger.debug(f"__original_data after cleaning: {self.__original_data.keys()}") data_manager = DataManager() From be91776b3cee209aee90e8c91af455eded803a6c Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Thu, 26 Oct 2023 00:44:17 -0400 Subject: [PATCH 09/10] clean up --- vizro-core/examples/default/app.py | 18 +++++++++++++----- vizro-core/src/vizro/managers/_data_manager.py | 4 ++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/vizro-core/examples/default/app.py b/vizro-core/examples/default/app.py index f7fdb6bf3..dd5167631 100644 --- a/vizro-core/examples/default/app.py +++ b/vizro-core/examples/default/app.py @@ -83,7 +83,7 @@ def empty_cache(empty_button_id_n_clicks): function=delete_memoized_cache(), inputs=["delete_button_id.n_clicks"], ) - ] + ], ), vm.Button( id="empty_button_id", @@ -93,8 +93,8 @@ def empty_cache(empty_button_id_n_clicks): function=empty_cache(), inputs=["empty_button_id.n_clicks"], ) - ] - ) + ], + ), ], controls=[ vm.Filter( @@ -116,7 +116,11 @@ def empty_cache(empty_button_id_n_clicks): # "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold # "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds # } -# # data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 120} +# data_manager._cache.config = { +# "CACHE_TYPE": "RedisCache", +# "CACHE_REDIS_URL": "redis://localhost:6379/0", +# "CACHE_DEFAULT_TIMEOUT": 120, +# } # app = Vizro() # app.build(dashboard) # server = app.dash.server @@ -131,7 +135,11 @@ def empty_cache(empty_button_id_n_clicks): # "CACHE_THRESHOLD": 20, # The maximum number of items the cache can hold # "CACHE_DEFAULT_TIMEOUT": 3000, # Unit of time is seconds # } - data_manager._cache.config = {"CACHE_TYPE": "RedisCache", "CACHE_REDIS_URL": "redis://localhost:6379/0", "CACHE_DEFAULT_TIMEOUT": 60,} + data_manager._cache.config = { + "CACHE_TYPE": "RedisCache", + "CACHE_REDIS_URL": "redis://localhost:6379/0", + "CACHE_DEFAULT_TIMEOUT": 60, + } Vizro().build(dashboard).run() # Vizro().build(dashboard).run( # threaded=False, diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index f0f390af0..28c65eea3 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -108,6 +108,10 @@ def _clear(self): # to clear original data if the cache type is not NullCache def _clean_original_data(self): + """Clean up original data if the cache type is not NullCache. + + This only drops the original data if the same key is in the lazy data. + """ logger.debug(f"__original_data before cleaning: {self.__original_data.keys()}") # logger.debug(f"config: {self._cache.config}") logger.debug("clean original data") From aa04766536b7e8500eaabdd2ef4703c6edd0a770 Mon Sep 17 00:00:00 2001 From: Lingyi Zhang Date: Thu, 26 Oct 2023 09:53:32 -0400 Subject: [PATCH 10/10] address comments --- vizro-core/src/vizro/_vizro.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vizro-core/src/vizro/_vizro.py b/vizro-core/src/vizro/_vizro.py index ad70463dd..c35875f22 100644 --- a/vizro-core/src/vizro/_vizro.py +++ b/vizro-core/src/vizro/_vizro.py @@ -44,13 +44,11 @@ def build(self, dashboard: Dashboard): Returns: Vizro: App object """ - data_manager._cache.init_app(self.dash.server, config=data_manager._cache.config) + data_manager._cache.init_app(self.dash.server) # Note that model instantiation and pre_build are independent of Dash. self._pre_build() self.dash.layout = dashboard.build() - # to clear original data if the cache type is not NullCache - data_manager._clean_original_data() return self