Merge branch 'main' into feature/disable-preview

kedro-org · May 30, 2024 · b80c799 · b80c799
2 parents 20bbe47 + 979d2e6
commit b80c799
Show file tree

Hide file tree

Showing 10 changed files with 222 additions and 28 deletions.
diff --git a/demo-project/conf/base/catalog_08_reporting.yml b/demo-project/conf/base/catalog_08_reporting.yml
@@ -38,3 +38,10 @@ reporting.confusion_matrix:
   type: matplotlib.MatplotlibWriter
   filepath: ${_base_location}/08_reporting/confusion_matrix.png
   versioned: true
+
+reporting.top_shuttle_data:
+  type: json.JSONDataset
+  filepath: ${_base_location}/08_reporting/top_shuttle_data.json
+  metadata:
+    kedro-viz:
+      layer: reporting
diff --git a/demo-project/data/08_reporting/top_shuttle_data.json b/demo-project/data/08_reporting/top_shuttle_data.json
@@ -0,0 +1,147 @@
+[
+  {
+    "shuttle_id": 63561,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1325.0,
+    "company_id": 35029,
+    "review_scores_rating": 97,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 133,
+    "reviews_per_month": 1.65,
+    "review_id": 1,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 53260,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "Banks, Wood and Phillips",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1325.0,
+    "company_id": 35029,
+    "review_scores_rating": 98,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 37,
+    "reviews_per_month": 0.48,
+    "review_id": 1354,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 51019,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "flexible",
+    "crew": 1.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 1260.0,
+    "company_id": 35029,
+    "review_scores_rating": 92,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 9,
+    "number_of_reviews": 10,
+    "reviews_per_month": 0.15,
+    "review_id": 1985,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 53898,
+    "shuttle_location": "Niue",
+    "shuttle_type": "Type V5",
+    "engine_type": "Plasma",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 3.0,
+    "passenger_capacity": 5,
+    "cancellation_policy": "strict",
+    "crew": 3.0,
+    "d_check_complete": false,
+    "moon_clearance_complete": false,
+    "price": 2196.0,
+    "company_id": 35029,
+    "review_scores_rating": 98,
+    "review_scores_comfort": 10,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 10,
+    "review_scores_location": 9,
+    "review_scores_price": 10,
+    "number_of_reviews": 11,
+    "reviews_per_month": 0.21,
+    "review_id": 4879,
+    "company_rating": 1.0,
+    "company_location": "Niue",
+    "total_fleet_count": 4.0,
+    "iata_approved": false
+  },
+  {
+    "shuttle_id": 36260,
+    "shuttle_location": "Anguilla",
+    "shuttle_type": "Type V5",
+    "engine_type": "Quantum",
+    "engine_vendor": "ThetaBase Services",
+    "engines": 1.0,
+    "passenger_capacity": 2,
+    "cancellation_policy": "strict",
+    "crew": 1.0,
+    "d_check_complete": true,
+    "moon_clearance_complete": false,
+    "price": 1780.0,
+    "company_id": 30292,
+    "review_scores_rating": 90,
+    "review_scores_comfort": 8,
+    "review_scores_amenities": 9,
+    "review_scores_trip": 10,
+    "review_scores_crew": 9,
+    "review_scores_location": 9,
+    "review_scores_price": 9,
+    "number_of_reviews": 3,
+    "reviews_per_month": 0.09,
+    "review_id": 2,
+    "company_rating": 0.67,
+    "company_location": "Anguilla",
+    "total_fleet_count": 6.0,
+    "iata_approved": false
+  }
+]
diff --git a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py
@@ -42,7 +42,6 @@ def create_pipeline(**kwargs) -> Pipeline:
                 inputs=["reviews", "params:typing.reviews.columns_as_floats"],
                 outputs="int_typed_reviews",
                 name='apply_types_to_reviews'
-
             ),
             node(
                 func=aggregate_company_data,

diff --git a/demo-project/src/demo_project/pipelines/reporting/nodes.py b/demo-project/src/demo_project/pipelines/reporting/nodes.py
@@ -9,7 +9,7 @@
 import plotly.express as px
 import seaborn as sn
 from plotly import graph_objects as go
-
+from typing import Dict
 from .image_utils import DrawTable
 
 
@@ -119,3 +119,21 @@ def create_matplotlib_chart(companies: pd.DataFrame) -> plt:
     )
     sn.heatmap(confusion_matrix, annot=True)
     return plt
+
+
+def get_top_shuttles_data(model_input_table: pd.DataFrame) -> Dict:
+    """This function retrieves the head from the input table
+    and converts them into a JSON dataset.
+
+    Args:
+        model_input_table (pd.DataFrame): The data to retrieve the top N rows from
+        top_n (int, optional): The number of top rows to retrieve. Defaults to 5.
+
+    Returns:
+        str: A JSON string representing the top N rows of the dataset.
+    """
+
+    # Get the top N rows of the model input table
+    top_shuttle_df = model_input_table.head(5)
+    top_shuttle_json = top_shuttle_df.to_dict(orient="records")
+    return top_shuttle_json
diff --git a/demo-project/src/demo_project/pipelines/reporting/pipeline.py b/demo-project/src/demo_project/pipelines/reporting/pipeline.py
@@ -11,6 +11,7 @@
     make_cancel_policy_bar_chart,
     make_price_analysis_image,
     make_price_histogram,
+    get_top_shuttles_data,
 )
 
 
@@ -43,6 +44,11 @@ def create_pipeline(**kwargs) -> Pipeline:
                 inputs="prm_shuttle_company_reviews",
                 outputs="confusion_matrix",
             ),
+            node(
+                func=get_top_shuttles_data,
+                inputs="prm_shuttle_company_reviews",
+                outputs="top_shuttle_data",
+            ),
         ],
         inputs=["prm_shuttle_company_reviews", "feature_importance_output"],
         namespace="reporting",

diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py
@@ -9,7 +9,6 @@
 from kedro.io import DataCatalog
 from kedro.pipeline import Pipeline as KedroPipeline
 from kedro.pipeline.node import Node as KedroNode
-from kedro.pipeline.pipeline import _strip_transcoding
 from sqlalchemy.orm import sessionmaker
 
 from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID
@@ -26,6 +25,7 @@
     TranscodedDataNode,
 )
 from kedro_viz.services import layers_services, modular_pipelines_services
+from kedro_viz.utils import _strip_transcoding
 
 from .repositories import (
     CatalogRepository,

diff --git a/package/kedro_viz/data_access/repositories/catalog.py b/package/kedro_viz/data_access/repositories/catalog.py
@@ -6,17 +6,10 @@
 from typing import TYPE_CHECKING, Dict, Optional
 
 from kedro.io import DataCatalog
-
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding  # type: ignore
-
 from packaging.version import parse
 
 from kedro_viz.constants import KEDRO_VERSION
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 try:
     # kedro 0.18.11 onwards

diff --git a/package/kedro_viz/integrations/kedro/hooks.py b/package/kedro_viz/integrations/kedro/hooks.py
@@ -12,15 +12,7 @@
 from kedro.io import DataCatalog
 from kedro.io.core import get_filepath_str
 
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import (  # type: ignore
-        TRANSCODING_SEPARATOR,
-        _strip_transcoding,
-    )
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 logger = logging.getLogger(__name__)
 

diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py
@@ -11,14 +11,6 @@
 from typing import Any, ClassVar, Dict, List, Optional, Set, Union, cast
 
 from kedro.pipeline.node import Node as KedroNode
-
-try:
-    # kedro 0.19.4 onwards
-    from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding
-except ImportError:  # pragma: no cover
-    # older versions
-    from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding  # type: ignore
-
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -29,6 +21,7 @@
 )
 
 from kedro_viz.models.utils import get_dataset_type
+from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding
 
 try:
     # kedro 0.18.11 onwards

diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py
@@ -0,0 +1,39 @@
+"""Transcoding related utility functions."""
+from typing import Tuple
+
+TRANSCODING_SEPARATOR = "@"
+
+
+def _transcode_split(element: str) -> Tuple[str, str]:
+    """Split the name by the transcoding separator.
+    If the transcoding part is missing, empty string will be put in.
+
+    Returns:
+        Node input/output name before the transcoding separator, if present.
+    Raises:
+        ValueError: Raised if more than one transcoding separator
+        is present in the name.
+    """
+    split_name = element.split(TRANSCODING_SEPARATOR)
+
+    if len(split_name) > 2:  # noqa: PLR2004
+        raise ValueError(  # pragma: no cover
+            f"Expected maximum 1 transcoding separator, found {len(split_name) - 1} "
+            f"instead: '{element}'."
+        )
+    if len(split_name) == 1:
+        split_name.append("")
+
+    return tuple(split_name)  # type: ignore
+
+
+def _strip_transcoding(element: str) -> str:
+    """Strip out the transcoding separator and anything that follows.
+
+    Returns:
+        Node input/output name before the transcoding separator, if present.
+    Raises:
+        ValueError: Raised if more than one transcoding separator
+        is present in the name.
+    """
+    return _transcode_split(element)[0]