From cead746402515fa9f20af7ed98c406378f0d77c9 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Wed, 29 May 2024 12:43:42 +0100 Subject: [PATCH 1/2] Add transcoding related utility functions to Kedro viz (#1928) * Add transcoding related utility functions to Kedro viz Signed-off-by: Ankita Katiyar * no coverage Signed-off-by: Ankita Katiyar --------- Signed-off-by: Ankita Katiyar --- package/kedro_viz/data_access/managers.py | 2 +- .../data_access/repositories/catalog.py | 9 +---- package/kedro_viz/integrations/kedro/hooks.py | 10 +---- package/kedro_viz/models/flowchart.py | 9 +---- package/kedro_viz/utils.py | 39 +++++++++++++++++++ 5 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 package/kedro_viz/utils.py diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 4e4e772e5a..98fe36e78d 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -9,7 +9,6 @@ from kedro.io import DataCatalog from kedro.pipeline import Pipeline as KedroPipeline from kedro.pipeline.node import Node as KedroNode -from kedro.pipeline.pipeline import _strip_transcoding from sqlalchemy.orm import sessionmaker from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID @@ -26,6 +25,7 @@ TranscodedDataNode, ) from kedro_viz.services import layers_services, modular_pipelines_services +from kedro_viz.utils import _strip_transcoding from .repositories import ( CatalogRepository, diff --git a/package/kedro_viz/data_access/repositories/catalog.py b/package/kedro_viz/data_access/repositories/catalog.py index bfbd61a437..663e2d2230 100644 --- a/package/kedro_viz/data_access/repositories/catalog.py +++ b/package/kedro_viz/data_access/repositories/catalog.py @@ -6,17 +6,10 @@ from typing import TYPE_CHECKING, Dict, Optional from kedro.io import DataCatalog - -try: - # kedro 0.19.4 onwards - from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding -except ImportError: # pragma: no cover - # older versions - from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding # type: ignore - from packaging.version import parse from kedro_viz.constants import KEDRO_VERSION +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding try: # kedro 0.18.11 onwards diff --git a/package/kedro_viz/integrations/kedro/hooks.py b/package/kedro_viz/integrations/kedro/hooks.py index 32f2ffa594..f062c04ea8 100644 --- a/package/kedro_viz/integrations/kedro/hooks.py +++ b/package/kedro_viz/integrations/kedro/hooks.py @@ -12,15 +12,7 @@ from kedro.io import DataCatalog from kedro.io.core import get_filepath_str -try: - # kedro 0.19.4 onwards - from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding -except ImportError: # pragma: no cover - # older versions - from kedro.pipeline.pipeline import ( # type: ignore - TRANSCODING_SEPARATOR, - _strip_transcoding, - ) +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding logger = logging.getLogger(__name__) diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py index b4d43d5cbc..d82dcc9bec 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart.py @@ -11,14 +11,6 @@ from typing import Any, Dict, List, Optional, Set, Union, cast from kedro.pipeline.node import Node as KedroNode - -try: - # kedro 0.19.4 onwards - from kedro.pipeline._transcoding import TRANSCODING_SEPARATOR, _strip_transcoding -except ImportError: # pragma: no cover - # older versions - from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding # type: ignore - from pydantic import ( BaseModel, ConfigDict, @@ -29,6 +21,7 @@ ) from kedro_viz.models.utils import get_dataset_type +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding try: # kedro 0.18.11 onwards diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py new file mode 100644 index 0000000000..2d919a0c82 --- /dev/null +++ b/package/kedro_viz/utils.py @@ -0,0 +1,39 @@ +"""Transcoding related utility functions.""" +from typing import Tuple + +TRANSCODING_SEPARATOR = "@" + + +def _transcode_split(element: str) -> Tuple[str, str]: + """Split the name by the transcoding separator. + If the transcoding part is missing, empty string will be put in. + + Returns: + Node input/output name before the transcoding separator, if present. + Raises: + ValueError: Raised if more than one transcoding separator + is present in the name. + """ + split_name = element.split(TRANSCODING_SEPARATOR) + + if len(split_name) > 2: # noqa: PLR2004 + raise ValueError( # pragma: no cover + f"Expected maximum 1 transcoding separator, found {len(split_name) - 1} " + f"instead: '{element}'." + ) + if len(split_name) == 1: + split_name.append("") + + return tuple(split_name) # type: ignore + + +def _strip_transcoding(element: str) -> str: + """Strip out the transcoding separator and anything that follows. + + Returns: + Node input/output name before the transcoding separator, if present. + Raises: + ValueError: Raised if more than one transcoding separator + is present in the name. + """ + return _transcode_split(element)[0] From 979d2e66f4e2544a61a443c095fb4e6e1518ee76 Mon Sep 17 00:00:00 2001 From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com> Date: Thu, 30 May 2024 11:44:08 +0100 Subject: [PATCH 2/2] Include JSON dataset in the demo-project (#1930) * replace companies csv with json version Signed-off-by: Sajid Alam * Update nodes.py Signed-off-by: Sajid Alam * fix inputs Signed-off-by: Sajid Alam * fix cypress tests Signed-off-by: Sajid Alam * Update menu.cy.js Signed-off-by: Sajid Alam * update size file and number of cols Signed-off-by: huongg * use Shuttles instead of Reviews Signed-off-by: huongg * Revert Signed-off-by: Sajid Alam * changes based on review Signed-off-by: Sajid Alam --------- Signed-off-by: Sajid Alam Signed-off-by: huongg Co-authored-by: huongg --- .../conf/base/catalog_08_reporting.yml | 7 + .../data/08_reporting/top_shuttle_data.json | 147 ++++++++++++++++++ .../pipelines/data_ingestion/pipeline.py | 1 - .../demo_project/pipelines/reporting/nodes.py | 20 ++- .../pipelines/reporting/pipeline.py | 6 + 5 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 demo-project/data/08_reporting/top_shuttle_data.json diff --git a/demo-project/conf/base/catalog_08_reporting.yml b/demo-project/conf/base/catalog_08_reporting.yml index 2cd3d21411..148c0e1246 100644 --- a/demo-project/conf/base/catalog_08_reporting.yml +++ b/demo-project/conf/base/catalog_08_reporting.yml @@ -38,3 +38,10 @@ reporting.confusion_matrix: type: matplotlib.MatplotlibWriter filepath: ${_base_location}/08_reporting/confusion_matrix.png versioned: true + +reporting.top_shuttle_data: + type: json.JSONDataset + filepath: ${_base_location}/08_reporting/top_shuttle_data.json + metadata: + kedro-viz: + layer: reporting diff --git a/demo-project/data/08_reporting/top_shuttle_data.json b/demo-project/data/08_reporting/top_shuttle_data.json new file mode 100644 index 0000000000..4e6afa6e00 --- /dev/null +++ b/demo-project/data/08_reporting/top_shuttle_data.json @@ -0,0 +1,147 @@ +[ + { + "shuttle_id": 63561, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1325.0, + "company_id": 35029, + "review_scores_rating": 97, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 133, + "reviews_per_month": 1.65, + "review_id": 1, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 53260, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "Banks, Wood and Phillips", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1325.0, + "company_id": 35029, + "review_scores_rating": 98, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 37, + "reviews_per_month": 0.48, + "review_id": 1354, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 51019, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "flexible", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1260.0, + "company_id": 35029, + "review_scores_rating": 92, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 9, + "number_of_reviews": 10, + "reviews_per_month": 0.15, + "review_id": 1985, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 53898, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Plasma", + "engine_vendor": "ThetaBase Services", + "engines": 3.0, + "passenger_capacity": 5, + "cancellation_policy": "strict", + "crew": 3.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 2196.0, + "company_id": 35029, + "review_scores_rating": 98, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 11, + "reviews_per_month": 0.21, + "review_id": 4879, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 36260, + "shuttle_location": "Anguilla", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": true, + "moon_clearance_complete": false, + "price": 1780.0, + "company_id": 30292, + "review_scores_rating": 90, + "review_scores_comfort": 8, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 9, + "review_scores_location": 9, + "review_scores_price": 9, + "number_of_reviews": 3, + "reviews_per_month": 0.09, + "review_id": 2, + "company_rating": 0.67, + "company_location": "Anguilla", + "total_fleet_count": 6.0, + "iata_approved": false + } +] \ No newline at end of file diff --git a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py index 4ed0f9f676..1acbdf9531 100755 --- a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py +++ b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py @@ -42,7 +42,6 @@ def create_pipeline(**kwargs) -> Pipeline: inputs=["reviews", "params:typing.reviews.columns_as_floats"], outputs="int_typed_reviews", name='apply_types_to_reviews' - ), node( func=aggregate_company_data, diff --git a/demo-project/src/demo_project/pipelines/reporting/nodes.py b/demo-project/src/demo_project/pipelines/reporting/nodes.py index b758540302..cd4796ceb1 100644 --- a/demo-project/src/demo_project/pipelines/reporting/nodes.py +++ b/demo-project/src/demo_project/pipelines/reporting/nodes.py @@ -9,7 +9,7 @@ import plotly.express as px import seaborn as sn from plotly import graph_objects as go - +from typing import Dict from .image_utils import DrawTable @@ -119,3 +119,21 @@ def create_matplotlib_chart(companies: pd.DataFrame) -> plt: ) sn.heatmap(confusion_matrix, annot=True) return plt + + +def get_top_shuttles_data(model_input_table: pd.DataFrame) -> Dict: + """This function retrieves the head from the input table + and converts them into a JSON dataset. + + Args: + model_input_table (pd.DataFrame): The data to retrieve the top N rows from + top_n (int, optional): The number of top rows to retrieve. Defaults to 5. + + Returns: + str: A JSON string representing the top N rows of the dataset. + """ + + # Get the top N rows of the model input table + top_shuttle_df = model_input_table.head(5) + top_shuttle_json = top_shuttle_df.to_dict(orient="records") + return top_shuttle_json diff --git a/demo-project/src/demo_project/pipelines/reporting/pipeline.py b/demo-project/src/demo_project/pipelines/reporting/pipeline.py index 18a4a5d3b9..4b6eb4e6de 100644 --- a/demo-project/src/demo_project/pipelines/reporting/pipeline.py +++ b/demo-project/src/demo_project/pipelines/reporting/pipeline.py @@ -11,6 +11,7 @@ make_cancel_policy_bar_chart, make_price_analysis_image, make_price_histogram, + get_top_shuttles_data, ) @@ -43,6 +44,11 @@ def create_pipeline(**kwargs) -> Pipeline: inputs="prm_shuttle_company_reviews", outputs="confusion_matrix", ), + node( + func=get_top_shuttles_data, + inputs="prm_shuttle_company_reviews", + outputs="top_shuttle_data", + ), ], inputs=["prm_shuttle_company_reviews", "feature_importance_output"], namespace="reporting",