From 979d2e66f4e2544a61a443c095fb4e6e1518ee76 Mon Sep 17 00:00:00 2001 From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com> Date: Thu, 30 May 2024 11:44:08 +0100 Subject: [PATCH] Include JSON dataset in the demo-project (#1930) * replace companies csv with json version Signed-off-by: Sajid Alam * Update nodes.py Signed-off-by: Sajid Alam * fix inputs Signed-off-by: Sajid Alam * fix cypress tests Signed-off-by: Sajid Alam * Update menu.cy.js Signed-off-by: Sajid Alam * update size file and number of cols Signed-off-by: huongg * use Shuttles instead of Reviews Signed-off-by: huongg * Revert Signed-off-by: Sajid Alam * changes based on review Signed-off-by: Sajid Alam --------- Signed-off-by: Sajid Alam Signed-off-by: huongg Co-authored-by: huongg --- .../conf/base/catalog_08_reporting.yml | 7 + .../data/08_reporting/top_shuttle_data.json | 147 ++++++++++++++++++ .../pipelines/data_ingestion/pipeline.py | 1 - .../demo_project/pipelines/reporting/nodes.py | 20 ++- .../pipelines/reporting/pipeline.py | 6 + 5 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 demo-project/data/08_reporting/top_shuttle_data.json diff --git a/demo-project/conf/base/catalog_08_reporting.yml b/demo-project/conf/base/catalog_08_reporting.yml index 2cd3d21411..148c0e1246 100644 --- a/demo-project/conf/base/catalog_08_reporting.yml +++ b/demo-project/conf/base/catalog_08_reporting.yml @@ -38,3 +38,10 @@ reporting.confusion_matrix: type: matplotlib.MatplotlibWriter filepath: ${_base_location}/08_reporting/confusion_matrix.png versioned: true + +reporting.top_shuttle_data: + type: json.JSONDataset + filepath: ${_base_location}/08_reporting/top_shuttle_data.json + metadata: + kedro-viz: + layer: reporting diff --git a/demo-project/data/08_reporting/top_shuttle_data.json b/demo-project/data/08_reporting/top_shuttle_data.json new file mode 100644 index 0000000000..4e6afa6e00 --- /dev/null +++ b/demo-project/data/08_reporting/top_shuttle_data.json @@ -0,0 +1,147 @@ +[ + { + "shuttle_id": 63561, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1325.0, + "company_id": 35029, + "review_scores_rating": 97, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 133, + "reviews_per_month": 1.65, + "review_id": 1, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 53260, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "Banks, Wood and Phillips", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1325.0, + "company_id": 35029, + "review_scores_rating": 98, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 37, + "reviews_per_month": 0.48, + "review_id": 1354, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 51019, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "flexible", + "crew": 1.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 1260.0, + "company_id": 35029, + "review_scores_rating": 92, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 9, + "number_of_reviews": 10, + "reviews_per_month": 0.15, + "review_id": 1985, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 53898, + "shuttle_location": "Niue", + "shuttle_type": "Type V5", + "engine_type": "Plasma", + "engine_vendor": "ThetaBase Services", + "engines": 3.0, + "passenger_capacity": 5, + "cancellation_policy": "strict", + "crew": 3.0, + "d_check_complete": false, + "moon_clearance_complete": false, + "price": 2196.0, + "company_id": 35029, + "review_scores_rating": 98, + "review_scores_comfort": 10, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 10, + "review_scores_location": 9, + "review_scores_price": 10, + "number_of_reviews": 11, + "reviews_per_month": 0.21, + "review_id": 4879, + "company_rating": 1.0, + "company_location": "Niue", + "total_fleet_count": 4.0, + "iata_approved": false + }, + { + "shuttle_id": 36260, + "shuttle_location": "Anguilla", + "shuttle_type": "Type V5", + "engine_type": "Quantum", + "engine_vendor": "ThetaBase Services", + "engines": 1.0, + "passenger_capacity": 2, + "cancellation_policy": "strict", + "crew": 1.0, + "d_check_complete": true, + "moon_clearance_complete": false, + "price": 1780.0, + "company_id": 30292, + "review_scores_rating": 90, + "review_scores_comfort": 8, + "review_scores_amenities": 9, + "review_scores_trip": 10, + "review_scores_crew": 9, + "review_scores_location": 9, + "review_scores_price": 9, + "number_of_reviews": 3, + "reviews_per_month": 0.09, + "review_id": 2, + "company_rating": 0.67, + "company_location": "Anguilla", + "total_fleet_count": 6.0, + "iata_approved": false + } +] \ No newline at end of file diff --git a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py index 4ed0f9f676..1acbdf9531 100755 --- a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py +++ b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py @@ -42,7 +42,6 @@ def create_pipeline(**kwargs) -> Pipeline: inputs=["reviews", "params:typing.reviews.columns_as_floats"], outputs="int_typed_reviews", name='apply_types_to_reviews' - ), node( func=aggregate_company_data, diff --git a/demo-project/src/demo_project/pipelines/reporting/nodes.py b/demo-project/src/demo_project/pipelines/reporting/nodes.py index b758540302..cd4796ceb1 100644 --- a/demo-project/src/demo_project/pipelines/reporting/nodes.py +++ b/demo-project/src/demo_project/pipelines/reporting/nodes.py @@ -9,7 +9,7 @@ import plotly.express as px import seaborn as sn from plotly import graph_objects as go - +from typing import Dict from .image_utils import DrawTable @@ -119,3 +119,21 @@ def create_matplotlib_chart(companies: pd.DataFrame) -> plt: ) sn.heatmap(confusion_matrix, annot=True) return plt + + +def get_top_shuttles_data(model_input_table: pd.DataFrame) -> Dict: + """This function retrieves the head from the input table + and converts them into a JSON dataset. + + Args: + model_input_table (pd.DataFrame): The data to retrieve the top N rows from + top_n (int, optional): The number of top rows to retrieve. Defaults to 5. + + Returns: + str: A JSON string representing the top N rows of the dataset. + """ + + # Get the top N rows of the model input table + top_shuttle_df = model_input_table.head(5) + top_shuttle_json = top_shuttle_df.to_dict(orient="records") + return top_shuttle_json diff --git a/demo-project/src/demo_project/pipelines/reporting/pipeline.py b/demo-project/src/demo_project/pipelines/reporting/pipeline.py index 18a4a5d3b9..4b6eb4e6de 100644 --- a/demo-project/src/demo_project/pipelines/reporting/pipeline.py +++ b/demo-project/src/demo_project/pipelines/reporting/pipeline.py @@ -11,6 +11,7 @@ make_cancel_policy_bar_chart, make_price_analysis_image, make_price_histogram, + get_top_shuttles_data, ) @@ -43,6 +44,11 @@ def create_pipeline(**kwargs) -> Pipeline: inputs="prm_shuttle_company_reviews", outputs="confusion_matrix", ), + node( + func=get_top_shuttles_data, + inputs="prm_shuttle_company_reviews", + outputs="top_shuttle_data", + ), ], inputs=["prm_shuttle_company_reviews", "feature_importance_output"], namespace="reporting",