Food Receipe issue resolved

SonyShrestha · Jun 10, 2024 · 8f69b02 · 8f69b02
1 parent 389cba7
commit 8f69b02
Show file tree

Hide file tree

Showing 25 changed files with 47 additions and 16 deletions.
diff --git a/Website/pages/cust_purchase_expected_expiry.py b/Website/pages/cust_purchase_expected_expiry.py
@@ -2,6 +2,9 @@
 import pandas as pd
 import os
 import re 
+from pyspark.sql import SparkSession
+import configparser
+import json
 
 # Get the path to the parent parent directory
 root_dir = os.path.abspath(os.path.join(os.getcwd()))
@@ -12,23 +15,52 @@
 # Title of the app
 st.title('Customer Purchase with Expected Expiry')
 
+root_dir = os.path.abspath(os.path.join(os.getcwd()))
+
+# Specify the path to config file
+config_file_path = os.path.join(root_dir, "config.ini")
+config = configparser.ConfigParser()
+config.read(config_file_path)
+
+config_file_path_json = os.path.join(root_dir, "config.json")
+with open(config_file_path_json) as f:
+    config_json = json.load(f)
+
+formatted_zone_bucket = config["GCS"]["formatted_bucket_name"]
+
 # Function to load data from local Parquet file
 def load_data(filepath):
     return pd.read_parquet(filepath)
 
+@st.cache_data
+def load_data_from_gcs(filepath):
+    spark = SparkSession.builder \
+        .appName("Feature 1") \
+        .config("spark.jars.packages", "com.google.cloud.bigdataoss:gcs-connector:hadoop3-2.2.2") \
+        .config("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
+        .config("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
+        .config("google.cloud.auth.service.account.json.keyfile", os.path.join(root_dir,"gcs_config.json")) \
+        .getOrCreate()
+    spark.sparkContext.setLogLevel("ERROR")
+
+    df = spark.read.parquet(filepath)
+
+    # Convert PySpark DataFrame to Pandas DataFrame
+    return df.toPandas()
+
 
 def cust_purchase_expected_expiry():
     # Specify the path to the local Parquet file
-    parquet_file_path = os.path.join(root_dir,'data', 'formatted_zone', 'purchases_nearing_expiry')
+    # parquet_file_path = os.path.join(root_dir,'data', 'formatted_zone', 'purchases_nearing_expiry')
+    parquet_file_path = 'gs://'+formatted_zone_bucket+'/purchases_nearing_expiry*'
 
     try:
         st.write("<br>", unsafe_allow_html=True) 
         st.header("Estimation of Expected Expiry Date")
 
         col1, col2, col3,  col4, col5, col6 = st.columns(6)
         # Read the Parquet file into a DataFrame
-        df = load_data(parquet_file_path)
-        df = df[df['score']==100]
+        df = load_data_from_gcs(parquet_file_path)
 
         df = df[["customer_name", "product_name", "purchase_date", "expected_expiry_date"]]
         df['product_name'] = df['product_name'].str.title()
@@ -85,22 +117,19 @@ def cust_purchase_expected_expiry():
         }, inplace=True)
 
         st.write("<br>", unsafe_allow_html=True)  
-        st.dataframe(df, use_container_width=True)
+        st.dataframe(df.sample(frac=1).reset_index(drop=True), use_container_width=True)
 
     except FileNotFoundError as e:
         st.error(f"File not found: {e}")
     except Exception as e:
         st.error(f"An error occurred: {e}")
 
 
-
     # Add an image at the end
     image_path = os.path.join(root_dir,'images','expiry_notification.jpg') 
 
-
     st.image(image_path, caption='Expiry Notification', use_column_width=True)
 
-
     # Custom CSS for footer
     st.markdown("""
         <style>
@@ -118,4 +147,4 @@ def cust_purchase_expected_expiry():
         <div class="footer">
             <p>Developed by SpicyBytes</p>
         </div>
-    """, unsafe_allow_html=True)
+    """, unsafe_allow_html=True)
diff --git a/Website/pages/food_recommender.py b/Website/pages/food_recommender.py
@@ -40,7 +40,6 @@
     config_json = json.load(f)
 
 
-
 def create_spark_session():
     gcs_config = config["GCS"]["credentials_path"]
     spark = SparkSession.builder \
@@ -54,27 +53,27 @@ def create_spark_session():
     spark.sparkContext.setLogLevel("ERROR")
     return spark
 
+@udf(StringType())
 def clean_ingredients(ingredient_list):
     ingredient_string = ', '.join(ingredient_list)
     cleaned_ingredients = re.sub(r'\d+\s*(g|oz|ml|tsp|tbs|cups|pint|quart|l|lb|kg|teaspoon|tablespoon|medium|cup|/|-)?\s*|¼\s*|½\s*|¾\s*|to serve|Handful\s*', '', ingredient_string, flags=re.IGNORECASE)
     cleaned_ingredients = re.sub(r'[\s,-]*$', '', cleaned_ingredients).strip()
     return cleaned_ingredients
 
-clean_ingredients_udf = udf(clean_ingredients, StringType())
+# clean_ingredients_udf = udf(clean_ingredients, StringType())
 
 def preprocess_data(input_path):
     spark = create_spark_session()
     try:
         df = spark.read.parquet(input_path)
+        # df.show()
 
         # Convert ingredients array to a single string
         df = df.withColumn("ingredients_string", concat_ws(", ", col("ingredients")))
-
         # Apply lower function and clean_ingredients function
-        processed_df = df.withColumn("clean_ingredients", clean_ingredients_udf(col("ingredients"))) \
+        processed_df = df.withColumn("clean_ingredients", clean_ingredients(col("ingredients"))) \
                          .withColumn("ingredients_lower", lower(col("ingredients_string"))) \
-                         .drop("ingredients", "ingredients_string")
-
+                         .drop("ingredients", "ingredients_string")        
         return processed_df
     except Exception as e:
         print(f"Error during processing: {e}")
@@ -109,18 +108,20 @@ def initialize():
     processed_df = preprocess_data(input_path)
     return processed_df
 
-
 def food_recommender():
     st.write("<br>", unsafe_allow_html=True) 
     processed_df = initialize()
+    # st.dataframe(processed_df)
 
     st.header("Recipe Recommendatioin")
 
     user_ingredients = st.text_input("Enter ingredients, separated by commas", "rice, tomatoes")
     ingredients_list = [ingredient.strip() for ingredient in user_ingredients.split(',')]
 
     if st.button("Generate Recipe"):
+
         if processed_df is not None:
+            st.dataframe(processed_df)
             recipes_or_generated = find_or_generate_recipes(processed_df, ingredients_list)
             if 'generated_recipe' in recipes_or_generated[0]:
                 st.write("Generated Recipe:")
@@ -150,4 +151,5 @@ def food_recommender():
         <div class="footer">
             <p>Developed by SpicyBytes</p>
         </div>
-    """, unsafe_allow_html=True)
+    """, unsafe_allow_html=True)
+
diff --git a/...e_new/b2c_time_series/part-00000-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00000-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00001-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00001-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00002-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00002-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00003-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00003-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00004-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00004-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00005-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00005-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00006-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00006-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...e_new/b2c_time_series/part-00007-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet b/...e_new/b2c_time_series/part-00007-b5f3e36a-da21-4a42-b14c-1b0c573ff7e3-c000.snappy.parquet
diff --git a/...ew/business_sentiment/part-00000-fc753c67-0990-4c92-9c70-a43393d37447-c000.snappy.parquet b/...ew/business_sentiment/part-00000-fc753c67-0990-4c92-9c70-a43393d37447-c000.snappy.parquet
diff --git a/...ew/customer_sentiment/part-00000-443af7a8-144e-4158-94e6-a076f74a478a-c000.snappy.parquet b/...ew/customer_sentiment/part-00000-443af7a8-144e-4158-94e6-a076f74a478a-c000.snappy.parquet
diff --git a/...ablishments_catalonia/part-00000-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet b/...ablishments_catalonia/part-00000-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet
diff --git a/...ablishments_catalonia/part-00001-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet b/...ablishments_catalonia/part-00001-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet
diff --git a/...ablishments_catalonia/part-00002-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet b/...ablishments_catalonia/part-00002-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet
diff --git a/...ablishments_catalonia/part-00003-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet b/...ablishments_catalonia/part-00003-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet
diff --git a/...ablishments_catalonia/part-00004-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet b/...ablishments_catalonia/part-00004-1556c6e2-a9ab-4270-86e4-d4d9189282bc-c000.snappy.parquet
diff --git a/.../estimated_avg_expiry/part-00000-90f9e16e-94c6-489d-a4e8-a555e2eb756e-c000.snappy.parquet b/.../estimated_avg_expiry/part-00000-90f9e16e-94c6-489d-a4e8-a555e2eb756e-c000.snappy.parquet
diff --git a/...atted_zone_new/mealdb/part-00000-6d07f4db-7f64-4222-93d1-a1311fb62490-c000.snappy.parquet b/...atted_zone_new/mealdb/part-00000-6d07f4db-7f64-4222-93d1-a1311fb62490-c000.snappy.parquet
diff --git a/...atted_zone_new/mealdb/part-00001-6d07f4db-7f64-4222-93d1-a1311fb62490-c000.snappy.parquet b/...atted_zone_new/mealdb/part-00001-6d07f4db-7f64-4222-93d1-a1311fb62490-c000.snappy.parquet
diff --git a/...r_pricing_data_output/part-00000-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet b/...r_pricing_data_output/part-00000-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet
diff --git a/...r_pricing_data_output/part-00001-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet b/...r_pricing_data_output/part-00001-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet
diff --git a/...r_pricing_data_output/part-00002-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet b/...r_pricing_data_output/part-00002-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet
diff --git a/...r_pricing_data_output/part-00003-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet b/...r_pricing_data_output/part-00003-7c44897b-015d-43ef-a441-4193120f2ac8-c000.snappy.parquet
diff --git a/.../supermarket_products/part-00000-ae39b075-3d42-4e1f-b493-1be35f4275f2-c000.snappy.parquet b/.../supermarket_products/part-00000-ae39b075-3d42-4e1f-b493-1be35f4275f2-c000.snappy.parquet