[POC] Dashboard generator (#522)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Schulz <[email protected]>
mckinsey · Aug 2, 2024 · 4187e95 · 4187e95
1 parent 1c5e36f
commit 4187e95
Show file tree

Hide file tree

Showing 19 changed files with 1,453 additions and 5 deletions.
diff --git a/vizro-ai/changelog.d/20240613_020634_lingyi_zhang_dashboard_generator.md b/vizro-ai/changelog.d/20240613_020634_lingyi_zhang_dashboard_generator.md
@@ -0,0 +1,48 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Changed
+
+- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
diff --git a/vizro-ai/examples/example_dashboard.ipynb b/vizro-ai/examples/example_dashboard.ipynb
@@ -0,0 +1,283 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53e857ce-22bc-49de-9adc-9a2e7c9829cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a25acdd-20c3-4762-b97f-254de1586aeb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import vizro.plotly.express as px\n",
+    "\n",
+    "from vizro import Vizro\n",
+    "from vizro_ai import VizroAI\n",
+    "\n",
+    "# vizro_ai = VizroAI(model=\"gpt-4-turbo\")\n",
+    "vizro_ai = VizroAI(model=\"gpt-4o\")\n",
+    "# vizro_ai = VizroAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5e24f1b-e698-40e5-be00-c3a59c53ec65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = px.data.gapminder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "449da2ee-c754-420a-ba2e-c9b0ef62d934",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = px.data.stocks()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec46d4d1-d20b-4351-831d-d3d8ddc5cb70",
+   "metadata": {},
+   "source": [
+    "# Example: Simple dashboard request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "820a5d0f-a31e-4bbd-a924-9629631cc291",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_question_2_data = \"\"\"\n",
+    "I need a page with 1 table.\n",
+    "The table shows the tech companies stock data.\n",
+    "\n",
+    "I need a second page showing 2 cards and one chart.\n",
+    "The first card says 'The Gapminder dataset provides historical data on countries' development indicators.'\n",
+    "The chart is a scatter plot showing life expectancy vs. GDP per capita by country. Life expectancy on the y axis, GDP per capita on the x axis, and colored by continent.\n",
+    "The second card says 'Data spans from 1952 to 2007 across various countries'\n",
+    "The layout uses a grid of 3 columns and 2 rows.\n",
+    "\n",
+    "Row 1: The first row has three columns:\n",
+    "The first column is occupied by the first card.\n",
+    "The second and third columns are spanned by the chart.\n",
+    "\n",
+    "Row 2: The second row mirrors the layout of the first row with respect to chart, but the first column is occupied by the second card.\n",
+    "\n",
+    "Add a filter to filter the scatter plot by continent.\n",
+    "Add a second filter to filter the chart by year.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d71e089-8c94-4d12-87bd-d803552acb32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dashboard = vizro_ai.dashboard([df1, df2], user_question_2_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14477c56-54e9-43a5-9136-25bc950fdf3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Vizro().build(dashboard).run()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "747964b9-fd05-4c5a-a73a-79dae82320b3",
+   "metadata": {},
+   "source": [
+    "# Example: 5-page dashboard request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "967ff6a4-f138-4643-b993-a72e5cc26de2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df3 = px.data.tips()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb9347f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_question_3_data = \"\"\"\n",
+    "<Page 1>\n",
+    "I need a page with 1 table and 1 line chart. \n",
+    "The chart shows the stock price trends of GOOG and AAPL.\n",
+    "The table shows the stock prices data details.\n",
+    "\n",
+    "<Page 2>\n",
+    "I need a second page showing 1 card and 1 chart.\n",
+    "The card says 'The Gapminder dataset provides historical data on countries' development indicators.'\n",
+    "The chart is a scatter plot showing GDP per capita vs. life expectancy. GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.\n",
+    "Layout the card on the left and the chart on the right. The card takes 1/3 of the whole space on the left.\n",
+    "The chart takes 2/3 of the whole space and is on the right.\n",
+    "Add a filter to filter the scatter plot by continent.\n",
+    "Add a second filter to filter the chart by year.\n",
+    "\n",
+    "<Page 3>\n",
+    "This page displays the tips dataset. use two different charts to show data\n",
+    "distributions. one chart should be a bar chart and the other should be a scatter plot.\n",
+    "first chart is on the left and the second chart is on the right.\n",
+    "Add a filter to filter data in the scatter plot by smoker.\n",
+    "\n",
+    "<Page 4>\n",
+    "Create 3 cards on this page:\n",
+    "1. The first card on top says \"This page combines data from various sources including tips, stock prices, and global indicators.\"\n",
+    "2. The second card says \"Insights from Gapminder dataset.\"\n",
+    "3. The third card says \"Stock price trends over time.\"\n",
+    "\n",
+    "Layout these 3 cards in this way:\n",
+    "create a grid with 3 columns and 2 rows.\n",
+    "Row 1: The first row has three columns:\n",
+    "- The first column is empty.\n",
+    "- The second and third columns span the area for card 1.\n",
+    "\n",
+    "Row 2: The second row also has three columns:\n",
+    "- The first column is empty.\n",
+    "- The second column is occupied by the area for card 2.\n",
+    "- The third column is occupied by the area for card 3.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0a0cdfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Vizro._reset()\n",
+    "dashboard = vizro_ai.dashboard([df1, df2, df3], user_question_3_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3167e996",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Vizro().build(dashboard).run()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bbf5c920-0432-4415-996f-1acb9d7b6b8a",
+   "metadata": {},
+   "source": [
+    "# Example: Request with unsupported features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12d5976e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_question_2_data = \"\"\"\n",
+    "<Page 1>\n",
+    "I need a page showing 2 cards, one chart, and 1 button.\n",
+    "The first card says 'The Tips dataset provides insights into customer tipping behavior.'\n",
+    "The chart is a bar chart showing the total bill amount by day. Day on the x axis, total bill amount on the y axis, and colored by time of day.\n",
+    "The second card says 'Data collected from various days and times.'\n",
+    "Layout the two cards on the left and the chart on the right. Two cards take 1/3 of the whole space on the left in total.\n",
+    "The first card is on top of the second card vertically.\n",
+    "The chart takes 2/3 of the whole space and is on the right.\n",
+    "The button would trigger a download action to download the Tips dataset.\n",
+    "Add a filter to filter the bar chart by `size`.\n",
+    "Make another tab on this page,\n",
+    "In this tab, create a card saying \"Tipping patterns and trends.\"\n",
+    "Group all the above content into the first NavLink.\n",
+    "\n",
+    "<Second NavLink>\n",
+    "Create two pages:\n",
+    "1. The first page has a card saying \"Analyzing global development trends.\"\n",
+    "2. The second page has a scatter plot showing GDP per capita vs. life expectancy. GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.\n",
+    "Add a parameter to control the title of the scatter plot, with title options \"Economic Growth vs. Health\" and \"Development Indicators.\"\n",
+    "Also create a button and a spinning circle on the right-hand side of the page.\n",
+    "\n",
+    "<Third NavLink>\n",
+    "Create one page:\n",
+    "1. The first page has a card saying \"Stock price trends over time.\"\n",
+    "Create a button and a spinning circle on the right-hand side of the page.\n",
+    "\n",
+    "For hosting the dashboard on AWS, which service should I use?\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b4838d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Vizro._reset()\n",
+    "dashboard = vizro_ai.dashboard([df3, df2, df1], user_question_2_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f055bec1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Vizro().build(dashboard).run()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/vizro-ai/examples/example_dashboard.py b/vizro-ai/examples/example_dashboard.py
@@ -0,0 +1,33 @@
+"""Example of creating a dashboard using VizroAI."""
+
+import vizro.plotly.express as px
+from dotenv import load_dotenv
+from vizro import Vizro
+from vizro_ai import VizroAI
+
+load_dotenv()
+
+vizro_ai = VizroAI(model="gpt-4o")
+# vizro_ai = VizroAI()
+
+gapminder_data = px.data.gapminder()
+tips_data = px.data.tips()
+
+dfs = [gapminder_data, tips_data]
+input_text = (
+    "Create a dashboard that displays the Gapminder dataset and the tips dataset. "
+    "page1 displays the Gapminder dataset. create a bar chart for average GDP per capita of each continent. "
+    "add a filter to filter by continent. "
+    "Use a card to explain what Gapminder dataset is about. "
+    "The card should only take 1/6 of the whole page. "
+    "The rest of the page should be the graph or table. Don't create empty space."
+    "page2 displays the tips dataset. use two different charts to help me understand the data "
+    "distributions. one chart should be a bar chart and the other should be a scatter plot. "
+    "first chart is on the left and the second chart is on the right. "
+    "add a filter to filter data in the scatter plot by smoker."
+)
+
+dashboard = vizro_ai.dashboard(dfs=dfs, user_input=input_text)
+
+if __name__ == "__main__":
+    Vizro().build(dashboard).run()
diff --git a/vizro-ai/hatch.toml b/vizro-ai/hatch.toml
@@ -25,6 +25,7 @@ VIZRO_AI_LOG_LEVEL = "DEBUG"
 
 [envs.default.scripts]
 example = "cd examples; python example.py"
+example-create-dashboard = "cd examples; python example_dashboard.py"
 lint = "hatch run lint:lint {args:--all-files}"
 prep-release = [
   "hatch version release",

diff --git a/vizro-ai/pyproject.toml b/vizro-ai/pyproject.toml
@@ -17,8 +17,9 @@ dependencies = [
   "pandas",
   "tabulate",
   "openai>=1.0.0",
-  "langchain>=0.1.0, <0.3.0",  # TODO update all LLMChain class and remove upper bound
+  "langchain>=0.1.0, <0.3.0",  # TODO update all LLMChain class, update to pydantic v2 and remove upper bound
   "langchain-openai",
+  "langgraph>=0.1.2",
   "python-dotenv>=1.0.0",  # TODO decide env var management to see if we need this
   "vizro>=0.1.4",  # TODO set upper bound later
   "ipython>=8.10.0",  # not directly required, pinned by Snyk to avoid a vulnerability: https://app.snyk.io/vuln/SNYK-PYTHON-IPYTHON-3318382

diff --git a/vizro-ai/snyk/requirements.txt b/vizro-ai/snyk/requirements.txt
@@ -3,6 +3,7 @@ tabulate
 openai>=1.0.0
 langchain>=0.1.0, <0.3.0
 langchain-openai
+langgraph>=0.1.2
 python-dotenv>=1.0.0
 vizro>=0.1.4
 ipython>=8.10.0