complex prompt and review fixes

mckinsey · Jan 16, 2025 · 459f482 · 459f482
1 parent 7139fb5
commit 459f482
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 86 deletions.
diff --git a/.github/workflows/test-score-vizro-ai.yml b/.github/workflows/test-score-vizro-ai.yml
@@ -123,8 +123,9 @@ jobs:
       - name: Create one csv report
         run: |
           cd /home/runner/work/vizro/vizro/
-          head -n 1 Report-3.12-/report_model_gpt-4o-mini_pypi.csv > report-aggregated-${{ steps.date.outputs.date }}.csv && tail -n+2 -q */*.csv >> report-aggregated-${{ steps.date.outputs.date }}.csv
-          gawk -F, -i inplace 'FNR>1 {$1="${{ steps.date.outputs.date }}"} {print}' OFS=, report-aggregated-${{ steps.date.outputs.date }}.csv
+          ls */*.csv | head -n1 | xargs head -n1 > report-aggregated-${{ steps.date.outputs.date }}.csv && tail -n+2 -q */*.csv >> report-aggregated-${{ steps.date.outputs.date }}.csv
+          # replace all timestamps in aggregated report to current date
+          gawk -F, -i inplace 'FNR>1 {$1="${{ steps.date.outputs.date }}"} {print}' report-aggregated-${{ steps.date.outputs.date }}.csv
 
       - name: Report artifacts
         uses: actions/upload-artifact@v4

diff --git a/vizro-ai/tests/score/prompts.py b/vizro-ai/tests/score/prompts.py
@@ -1,23 +1,17 @@
 easy_prompt = """
-I need a page with 1 table.
+I need a page with 1 table, 1 card and 1 chart.
 The table shows the tech companies stock data.
-
-I need a second page showing 2 cards and one chart.
-The first card says 'The Gapminder dataset provides historical data on countries' development indicators.'
-The chart is an scatter plot showing life expectancy vs. GDP per capita by country.
+The card says 'The Gapminder dataset provides historical data on countries' development indicators.'
+The chart is the scatter plot which uses gapminder dataframe and showing life expectancy vs. GDP per capita by country.
 Life expectancy on the y axis, GDP per capita on the x axis, and colored by continent.
-The second card says 'Data spans from 1952 to 2007 across various countries.'
-The layout uses a grid of 3 columns and 2 rows.
-
-Row 1: The first row has three columns:
-The first column is occupied by the first card.
-The second and third columns are spanned by the chart.
 
-Row 2: The second row mirrors the layout of the first row with respect to chart,
-but the first column is occupied by the second card.
+The layout uses a grid of 2 columns and 3 rows.
+The first row contains card
+The second row contains chart
+The third row contains table
 
 Add a filter to filter the scatter plot by continent.
-Add a second filter to filter the chart by year.
+Add a second filter to filter the table by companies.
 """
 
 medium_prompt = """
@@ -59,47 +53,33 @@
 - The first column is empty.
 - The second column is occupied by the area for card 2.
 - The third column is occupied by the area for card 3.
-    """
+"""
 
 
 complex_prompt = """
 <Page 1>
-I need a page with 1 table and 3 line charts.
-The chart shows the stock price trends of GOOG and AAPL.
-The table shows the stock prices data details.
-Add 3 filters to filter the line chart by companies.
+Show me 1 table on the first page that shows tips and sorted by day
+Using export button I want to export data to csv
+Add filters by bill and by tip amount using range slider
 
 <Page 2>
-I need a second page showing 1 card and 1 chart.
-The card says 'The Gapminder dataset provides historical data on countries' development indicators.'
-The chart is a scatter plot showing GDP per capita vs. life expectancy.
-GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.
-Layout the card on the left and the chart on the right. The card takes 1/3 of the whole space on the left.
-The chart takes 2/3 of the whole space and is on the right.
-Add a filter to filter the scatter plot by continent.
-Add a second filter to filter the chart by year.
+Second page should contain kpi cards with population trends and
+two popular charts that display population per capita vs. continent.
+Filter charts by GDP using slider.
+Align kpi cards in one row and charts in different.
+Both charts should be in tabs.
 
 <Page 3>
-This page displays the tips dataset. use two different charts to show data
-distributions. one chart should be a bar chart and the other should be a scatter plot.
-first chart is on the left and the second chart is on the right.
-Add a filter to filter data in the scatter plot by smoker.
+Third page should contain 6 charts showing stocks.
+Each should have separate filter by date.
+Filter types should include dropdown, datepicker, slider, checklist and radio items.
+Add parameter for any chart.
 
 <Page 4>
-Create 3 cards on this page:
-1. The first card on top says "This page combines data from various sources
- including tips, stock prices, and global indicators."
-2. The second card says "Insights from Gapminder dataset."
-3. The third card says "Stock price trends over time."
-
-Layout these 3 cards in this way:
-create a grid with 3 columns and 2 rows.
-Row 1: The first row has three columns:
-- The first column is empty.
-- The second and third columns span the area for card 1.
-
-Row 2: The second row also has three columns:
-- The first column is empty.
-- The second column is occupied by the area for card 2.
-- The third column is occupied by the area for card 3.
-    """
+Fourth page contains chart with wind data.
+Table with GDP data.
+Two more charts with stocks and tips representations.
+Align table beautifully relative to the charts.
+Every chart should have 2 filters.
+Table should have 1 filter.
+"""
diff --git a/vizro-ai/tests/score/test_dashboard.py b/vizro-ai/tests/score/test_dashboard.py
@@ -2,6 +2,7 @@
 
 import csv
 import os
+import statistics
 from collections import Counter
 from dataclasses import dataclass
 from datetime import datetime
@@ -19,6 +20,7 @@
 df1 = px.data.gapminder()
 df2 = px.data.stocks()
 df3 = px.data.tips()
+df4 = px.data.wind()
 
 
 @dataclass
@@ -38,6 +40,12 @@ def setup_test_environment():
         chromedriver_autoinstaller.install()
 
 
+# If len() is 0, it means that nothing was entered for this score in config,
+# in this case in should be 1.0.
+def score_calculator(score_name):
+    return statistics.mean(score_name) if len(score_name) != 0 else 1.0
+
+
 def logic(  # noqa: PLR0912, PLR0915
     dashboard,
     model_name,
@@ -91,8 +99,8 @@ def logic(  # noqa: PLR0912, PLR0915
         branch = "local"
         python_version = "local"
 
-    pages_exist = [1 if dashboard.pages else 0]
-    pages_exist_report = bool(pages_exist[0])
+    pages_exist = [1.0 if dashboard.pages else 0][0]
+    pages_exist_report = bool(pages_exist)
     pages_num = [1 if len(dashboard.pages) == len(config["pages"]) else 0]
     pages_num_report = [f'{len(config["pages"])} page(s) for dashboard is {bool(pages_num[0])}']
 
@@ -160,24 +168,24 @@ def logic(  # noqa: PLR0912, PLR0915
         controls_types_names.append(controls_types)
         controls_types_names_report.append("page or control does not exists")
 
-    pages_exist.extend(pages_num)
 
     # Every separate score has its own weight.
     scores = [
         {"score_name": "app_started_score", "weight": 0.4, "score": app_started},
         {"score_name": "no_browser_console_errors_score", "weight": 0.1, "score": no_browser_console_errors},
-        {"score_name": "pages_score", "weight": 0.2, "score": sum(pages_exist) / len(pages_exist)},
-        {"score_name": "components_score", "weight": 0.1, "score": sum(components_num) / len(components_num)},
+        {"score_name": "pages_score", "weight": 0.3, "score": pages_exist},
+        {"score_name": "pages_number", "weight": 0.2, "score": score_calculator(score_name=pages_num)},
+        {"score_name": "components_score", "weight": 0.2, "score": score_calculator(score_name=components_num)},
         {
             "score_name": "component_types_score",
-            "weight": 0.1,
-            "score": sum(components_types_names) / len(components_types_names),
+            "weight": 0.2,
+            "score": score_calculator(score_name=components_types_names),
         },
-        {"score_name": "controls_score", "weight": 0.1, "score": sum(controls_num) / len(controls_num)},
+        {"score_name": "controls_score", "weight": 0.2, "score": score_calculator(score_name=controls_num)},
         {
             "score_name": "controls_types_score",
-            "weight": 0.1,
-            "score": sum(controls_types_names) / len(controls_types_names),
+            "weight": 0.2,
+            "score": score_calculator(score_name=controls_types_names),
         },
     ]
 
@@ -215,9 +223,7 @@ def logic(  # noqa: PLR0912, PLR0915
                 ]
                 header_rows.extend(score["score_name"] for score in scores)
                 writer.writerow(header_rows)
-                writer.writerow(data_rows)
-            else:
-                writer.writerow(data_rows)
+            writer.writerow(data_rows)
 
     # Readable report for the console output
     print(f"App started: {app_started_report}")  # noqa: T201
@@ -258,12 +264,6 @@ def test_easy_dashboard(dash_duo, model_name):
                 {
                     "components": [
                         Component(type="ag_grid"),
-                    ],
-                    "controls": [],
-                },
-                {
-                    "components": [
-                        Component(type="card"),
                         Component(type="card"),
                         Component(type="graph"),
                     ],
@@ -336,7 +336,7 @@ def test_medium_dashboard(dash_duo, model_name):
     ids=["gpt-4o-mini"],
 )
 def test_complex_dashboard(dash_duo, model_name):
-    dashboard = VizroAI(model=model_name).dashboard([df1, df2, df3], complex_prompt)
+    dashboard = VizroAI(model=model_name).dashboard([df1, df2, df3, df4], complex_prompt)
 
     logic(
         dashboard=dashboard,
@@ -349,17 +349,17 @@ def test_complex_dashboard(dash_duo, model_name):
                 {
                     "components": [
                         Component(type="ag_grid"),
-                        Component(type="graph"),
-                        Component(type="graph"),
-                        Component(type="graph"),
                     ],
-                    "controls": [Control(type="filter"), Control(type="filter"), Control(type="filter")],
+                    "controls": [Control(type="filter"), Control(type="filter")],
+                },
+                {
+                    "components": [Component(type="graph"), Component(type="graph")],
+                    "controls": [Control(type="filter")],
                 },
                 {
                     "components": [
-                        Component(type="card"),
-                        Component(type="card"),
-                        Component(type="card"),
+                        Component(type="graph"),
+                        Component(type="graph"),
                         Component(type="graph"),
                         Component(type="graph"),
                         Component(type="graph"),
@@ -368,27 +368,29 @@ def test_complex_dashboard(dash_duo, model_name):
                     "controls": [
                         Control(type="filter"),
                         Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
                     ],
                 },
                 {
                     "components": [
-                        Component(type="graph"),
+                        Component(type="ag_grid"),
                         Component(type="graph"),
                         Component(type="graph"),
                         Component(type="graph"),
                     ],
                     "controls": [
                         Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
                     ],
                 },
-                {
-                    "components": [
-                        Component(type="card"),
-                        Component(type="card"),
-                        Component(type="card"),
-                    ],
-                    "controls": [],
-                },
             ],
         },
     )