From cead746402515fa9f20af7ed98c406378f0d77c9 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Wed, 29 May 2024 12:43:42 +0100
Subject: [PATCH 1/2] Add transcoding related utility functions to Kedro viz
 (#1928)

* Add transcoding related utility functions to Kedro viz

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* no coverage

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

---------

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 package/kedro_viz/data_access/managers.py     |  2 +-
 .../data_access/repositories/catalog.py       |  9 +----
 package/kedro_viz/integrations/kedro/hooks.py | 10 +----
 package/kedro_viz/models/flowchart.py         |  9 +----
 package/kedro_viz/utils.py                    | 39 +++++++++++++++++++
 5 files changed, 43 insertions(+), 26 deletions(-)
 create mode 100644 package/kedro_viz/utils.py

diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py
index 4e4e772e5a..98fe36e78d 100644
--- a/package/kedro_viz/data_access/managers.py
+++ b/package/kedro_viz/data_access/managers.py
@@ -9,7 +9,6 @@
 from kedro.io import DataCatalog
 from kedro.pipeline import Pipeline as KedroPipeline
 from kedro.pipeline.node import Node as KedroNode
-from kedro.pipeline.pipeline import _strip_transcoding
 from sqlalchemy.orm import sessionmaker
 
 from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID
@@ -26,6 +25,7 @@
     TranscodedDataNode,
 )
 from kedro_viz.services import layers_services, modular_pipelines_services
+from kedro_viz.utils import _strip_transcoding
 
 from .repositories import (
     CatalogRepository,
diff --git a/package/kedro_viz/data_access/repositories/catalog.py b/package/kedro_viz/data_access/repositories/catalog.py
index bfbd61a437..663e2d2230 100644
--- a/package/kedro_viz/data_access/repositories/catalog.py
+++ b/package/kedro_viz/data_access/repositories/catalog.py
@@ -6,17 +6,10 @@
 from typing import TYPE_CHECKING, Dict, Optional
 
 from kedro.io import DataCatalog
-
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding  # type: ignore
-
 from packaging.version import parse
 
 from kedro_viz.constants import KEDRO_VERSION
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 try:
     # kedro 0.18.11 onwards
diff --git a/package/kedro_viz/integrations/kedro/hooks.py b/package/kedro_viz/integrations/kedro/hooks.py
index 32f2ffa594..f062c04ea8 100644
--- a/package/kedro_viz/integrations/kedro/hooks.py
+++ b/package/kedro_viz/integrations/kedro/hooks.py
@@ -12,15 +12,7 @@
 from kedro.io import DataCatalog
 from kedro.io.core import get_filepath_str
 
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import (  # type: ignore
-        TRANSCODING_SEPARATOR,
-        _strip_transcoding,
-    )
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 logger = logging.getLogger(__name__)
 
diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py
index b4d43d5cbc..d82dcc9bec 100644
--- a/package/kedro_viz/models/flowchart.py
+++ b/package/kedro_viz/models/flowchart.py
@@ -11,14 +11,6 @@
 from typing import Any, Dict, List, Optional, Set, Union, cast
 
 from kedro.pipeline.node import Node as KedroNode
-
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding  # type: ignore
-
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -29,6 +21,7 @@
 )
 
 from kedro_viz.models.utils import get_dataset_type
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 try:
     # kedro 0.18.11 onwards
diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py
new file mode 100644
index 0000000000..2d919a0c82
--- /dev/null
+++ b/package/kedro_viz/utils.py
@@ -0,0 +1,39 @@
+"""Transcoding related utility functions."""
+from typing import Tuple
+
+TRANSCODING_SEPARATOR = "@"
+
+
+def _transcode_split(element: str) -> Tuple[str, str]:
+    """Split the name by the transcoding separator.
+    If the transcoding part is missing, empty string will be put in.
+
+    Returns:
+        Node input/output name before the transcoding separator, if present.
+    Raises:
+        ValueError: Raised if more than one transcoding separator
+        is present in the name.
+    """
+    split_name = element.split(TRANSCODING_SEPARATOR)
+
+    if len(split_name) > 2:  # noqa: PLR2004
+        raise ValueError(  # pragma: no cover
+            f"Expected maximum 1 transcoding separator, found {len(split_name) - 1} "
+            f"instead: '{element}'."
+        )
+    if len(split_name) == 1:
+        split_name.append("")
+
+    return tuple(split_name)  # type: ignore
+
+
+def _strip_transcoding(element: str) -> str:
+    """Strip out the transcoding separator and anything that follows.
+
+    Returns:
+        Node input/output name before the transcoding separator, if present.
+    Raises:
+        ValueError: Raised if more than one transcoding separator
+        is present in the name.
+    """
+    return _transcode_split(element)[0]

From 979d2e66f4e2544a61a443c095fb4e6e1518ee76 Mon Sep 17 00:00:00 2001
From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com>
Date: Thu, 30 May 2024 11:44:08 +0100
Subject: [PATCH 2/2] Include JSON dataset in the demo-project (#1930)

* replace companies csv with json version

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* Update nodes.py

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* fix inputs

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* fix cypress tests

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* Update menu.cy.js

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* update size file and number of cols

Signed-off-by: huongg <huongg1409@gmail.com>

* use Shuttles instead of Reviews

Signed-off-by: huongg <huongg1409@gmail.com>

* Revert

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

* changes based on review

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>

---------

Signed-off-by: Sajid Alam <sajid_alam@mckinsey.com>
Signed-off-by: huongg <huongg1409@gmail.com>
Co-authored-by: huongg <huongg1409@gmail.com>
---
 .../conf/base/catalog_08_reporting.yml        |   7 +
 .../data/08_reporting/top_shuttle_data.json   | 147 ++++++++++++++++++
 .../pipelines/data_ingestion/pipeline.py      |   1 -
 .../demo_project/pipelines/reporting/nodes.py |  20 ++-
 .../pipelines/reporting/pipeline.py           |   6 +
 5 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 demo-project/data/08_reporting/top_shuttle_data.json

diff --git a/demo-project/conf/base/catalog_08_reporting.yml b/demo-project/conf/base/catalog_08_reporting.yml
index 2cd3d21411..148c0e1246 100644
--- a/demo-project/conf/base/catalog_08_reporting.yml
+++ b/demo-project/conf/base/catalog_08_reporting.yml
@@ -38,3 +38,10 @@ reporting.confusion_matrix:
   type: matplotlib.MatplotlibWriter
   filepath: ${_base_location}/08_reporting/confusion_matrix.png
   versioned: true
+
+reporting.top_shuttle_data:
+  type: json.JSONDataset
+  filepath: ${_base_location}/08_reporting/top_shuttle_data.json
+  metadata:
+    kedro-viz:
+      layer: reporting
diff --git a/demo-project/data/08_reporting/top_shuttle_data.json b/demo-project/data/08_reporting/top_shuttle_data.json
new file mode 100644
index 0000000000..4e6afa6e00
--- /dev/null
+++ b/demo-project/data/08_reporting/top_shuttle_data.json
@@ -0,0 +1,147 @@
+[
+  {
+    "shuttle_id": 63561,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1325.0,
+    "company_id": 35029,
+    "review_scores_rating": 97,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 133,
+    "reviews_per_month": 1.65,
+    "review_id": 1,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 53260,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "Banks, Wood and Phillips",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1325.0,
+    "company_id": 35029,
+    "review_scores_rating": 98,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 37,
+    "reviews_per_month": 0.48,
+    "review_id": 1354,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 51019,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "flexible",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1260.0,
+    "company_id": 35029,
+    "review_scores_rating": 92,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 9,
+    "number_of_reviews": 10,
+    "reviews_per_month": 0.15,
+    "review_id": 1985,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 53898,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Plasma",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 3.0,
+    "passenger_capacity": 5,
+    "cancellation_policy": "strict",
+    "crew": 3.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 2196.0,
+    "company_id": 35029,
+    "review_scores_rating": 98,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 11,
+    "reviews_per_month": 0.21,
+    "review_id": 4879,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 36260,
+    "shuttle_location": "Anguilla",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": true,
+    "moon_clearance_complete": false,
+    "price": 1780.0,
+    "company_id": 30292,
+    "review_scores_rating": 90,
+    "review_scores_comfort": 8,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 9,
+    "review_scores_location": 9,
+    "review_scores_price": 9,
+    "number_of_reviews": 3,
+    "reviews_per_month": 0.09,
+    "review_id": 2,
+    "company_rating": 0.67,
+    "company_location": "Anguilla",
+    "total_fleet_count": 6.0,
+    "iata_approved": false
+  }
+]
\ No newline at end of file
diff --git a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py
index 4ed0f9f676..1acbdf9531 100755
--- a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py
+++ b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py
@@ -42,7 +42,6 @@ def create_pipeline(**kwargs) -> Pipeline:
                 inputs=["reviews", "params:typing.reviews.columns_as_floats"],
                 outputs="int_typed_reviews",
                 name='apply_types_to_reviews'
-                
             ),
             node(
                 func=aggregate_company_data,
diff --git a/demo-project/src/demo_project/pipelines/reporting/nodes.py b/demo-project/src/demo_project/pipelines/reporting/nodes.py
index b758540302..cd4796ceb1 100644
--- a/demo-project/src/demo_project/pipelines/reporting/nodes.py
+++ b/demo-project/src/demo_project/pipelines/reporting/nodes.py
@@ -9,7 +9,7 @@
 import plotly.express as px
 import seaborn as sn
 from plotly import graph_objects as go
-
+from typing import Dict
 from .image_utils import DrawTable
 
 
@@ -119,3 +119,21 @@ def create_matplotlib_chart(companies: pd.DataFrame) -> plt:
     )
     sn.heatmap(confusion_matrix, annot=True)
     return plt
+
+
+def get_top_shuttles_data(model_input_table: pd.DataFrame) -> Dict:
+    """This function retrieves the head from the input table
+    and converts them into a JSON dataset.
+
+    Args:
+        model_input_table (pd.DataFrame): The data to retrieve the top N rows from
+        top_n (int, optional): The number of top rows to retrieve. Defaults to 5.
+
+    Returns:
+        str: A JSON string representing the top N rows of the dataset.
+    """
+
+    # Get the top N rows of the model input table
+    top_shuttle_df = model_input_table.head(5)
+    top_shuttle_json = top_shuttle_df.to_dict(orient="records")
+    return top_shuttle_json
diff --git a/demo-project/src/demo_project/pipelines/reporting/pipeline.py b/demo-project/src/demo_project/pipelines/reporting/pipeline.py
index 18a4a5d3b9..4b6eb4e6de 100644
--- a/demo-project/src/demo_project/pipelines/reporting/pipeline.py
+++ b/demo-project/src/demo_project/pipelines/reporting/pipeline.py
@@ -11,6 +11,7 @@
     make_cancel_policy_bar_chart,
     make_price_analysis_image,
     make_price_histogram,
+    get_top_shuttles_data,
 )
 
 
@@ -43,6 +44,11 @@ def create_pipeline(**kwargs) -> Pipeline:
                 inputs="prm_shuttle_company_reviews",
                 outputs="confusion_matrix",
             ),
+            node(
+                func=get_top_shuttles_data,
+                inputs="prm_shuttle_company_reviews",
+                outputs="top_shuttle_data",
+            ),
         ],
         inputs=["prm_shuttle_company_reviews", "feature_importance_output"],
         namespace="reporting",