BUFFING SKLEARN

MasTutor · Jan 13, 2024 · 794b45e · 794b45e
1 parent 11ee36c
commit 794b45e
Show file tree

Hide file tree

Showing 13 changed files with 371 additions and 172 deletions.
diff --git a/Pipfile b/Pipfile
@@ -0,0 +1,27 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+uvicorn = "==0.15.0"
+fastapi = "==0.70.0"
+pydantic = {version = "==1.10.7", extras = ["email"]}
+google-cloud-storage = "==2.9.0"
+python-dotenv = "==1.0.0"
+pyjwt = "==1.7.1"
+python-decouple = "==3.3"
+mysql-connector = "==2.2.9"
+python-multipart = "==0.0.6"
+pillow = "==9.5.0"
+numpy = "==1.24.3"
+cloud-sql-python-connector = "*"
+streamlit = "*"
+cryptography = "*"
+fastapi-pagination = "*"
+pandas = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.10"
diff --git a/app/auth/jwt_bearer.py → api/auth/jwt_bearer.py b/app/auth/jwt_bearer.py → api/auth/jwt_bearer.py
diff --git a/app/auth/jwt_handler.py → api/auth/jwt_handler.py b/app/auth/jwt_handler.py → api/auth/jwt_handler.py
diff --git a/app/db.py → api/db.py b/app/db.py → api/db.py
diff --git a/app/encryptor.py → api/encryptor.py b/app/encryptor.py → api/encryptor.py
diff --git a/app/function.py → api/function.py b/app/function.py → api/function.py
@@ -1,15 +1,15 @@
 from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
 from fastapi.responses import FileResponse
-from app.auth.jwt_bearer import jwtBearer
-from app.auth.jwt_handler import *
+from api.auth.jwt_bearer import jwtBearer
+from api.auth.jwt_handler import *
 from google.cloud import storage
 from dotenv import load_dotenv
-from app.encryptor import *
-from app.model import *
+from api.encryptor import *
+from api.model import *
 from io import BytesIO
 import mysql.connector
-from app.function import *
-from app.db import *
+from api.function import *
+from api.db import *
 from PIL import Image
 import requests
 import uvicorn
@@ -216,6 +216,30 @@ def get_tutor_by_id(id_Tutor):
     close_db_connection(mydb, "User")
     return tutor_items
 
+def get_tutor_by_name(name_Tutor):
+    mydb=defineDB()
+    mycursor = mydb.cursor()
+    res = (name_Tutor,)
+    mycursor.execute("SELECT * FROM Tutor WHERE Nama = %s ORDER BY id + 0 asc", res)
+    myresult = mycursor.fetchall()
+    for x in myresult:
+        tutor_items = {
+            "id":x[0],
+            "UserId":x[1],
+            "Nama":x[2],
+            "hasPenis":x[3],
+            "AgesRanges":x[4],
+            "Specialization":x[5],
+            "Categories":x[6],
+            "AboutMe":x[8],
+            "SkillsAndExperiences":x[9],
+            "picture":x[10],
+            "price":x[11]
+        }
+    mycursor.close()
+    close_db_connection(mydb, "User")
+    return tutor_items
+
 def get_tutor_name(id_Tutor):
     mydb=defineDB()
     mycursor = mydb.cursor()

diff --git a/app/model.py → api/model.py b/app/model.py → api/model.py
@@ -145,4 +145,13 @@ class Config:
                 "Date" : "12-12-12"
             }
         }
+
+class PersonaSchema (BaseModel):
+    Persona : str = Field(default=None)
+    class Config:
+        schema_extra = {
+            "post_demo" : {
+                "list":"[1,2,3,4,5,3,4,5,6,4,3,3,2,3]"
+            }
+        }
 
diff --git a/api/personality/clustering.py b/api/personality/clustering.py
@@ -0,0 +1,119 @@
+import requests
+import numpy as np
+from decouple import config
+from dotenv import load_dotenv
+import os
+import ast
+from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
+from fastapi.responses import FileResponse
+from api.auth.jwt_bearer import jwtBearer
+from api.auth.jwt_handler import *
+from google.cloud import storage
+from dotenv import load_dotenv
+from api.encryptor import *
+from api.model import *
+from io import BytesIO
+import mysql.connector
+from api.function import *
+from api.db import *
+from PIL import Image
+import requests
+import os
+import numpy as np
+
+
+def post_answer(data, email):
+    try:
+        _data = ast.literal_eval(data)
+        if(len(_data) != 25):
+            return {"error":"true",
+                    "message":"Please Input 25 length list"}
+        mydb = defineDB()
+        mycursor = mydb.cursor()
+        sql = "UPDATE User SET answer = %s WHERE Email = %s"
+        mycursor.execute(sql, (data, email))
+        mydb.commit()
+        mycursor.close()
+        close_db_connection(mydb, "User")
+        process_user_data()
+        return {"error":"false",
+                "message":"successfully added the personality"}
+
+    except:
+        return {"error":"true",
+                "message":"Please reformat the list"}
+
+
+
+
+def get_answers():
+    mydb=defineDB()
+    mycursor = mydb.cursor()
+    mycursor.execute("SELECT * FROM User WHERE Answer IS NOT NULL AND NOT Answer = ''")
+    myresult = mycursor.fetchall()
+    tutors = []
+    for x in myresult:
+        x[7] #answer
+        tutors.append(x[7]) 
+    mycursor.close()
+    close_db_connection(mydb, "User")
+    return tutors
+
+
+def get_names():
+    mydb=defineDB()
+    mycursor = mydb.cursor()
+    mycursor.execute("SELECT * FROM User WHERE Answer IS NOT NULL AND NOT Answer = ''")
+    myresult = mycursor.fetchall()
+    tutors = []
+    for x in myresult:
+        x[2] #answer
+        tutors.append(x[2]) 
+    mycursor.close()
+    close_db_connection(mydb, "User")
+    return tutors
+
+
+def process_answers():
+    return [ast.literal_eval(s) for s in get_answers()]
+
+def get_predictions(list_of_lists):
+    # Sample input data for testing
+    data = {"instances": list_of_lists}
+
+    # Send a POST request to the TensorFlow Serving REST API
+    response = requests.post(os.getenv("cluster_url"), json=data)
+    predictions = response.json()
+
+    # Convert predictions to numpy array
+    predictions = np.array(predictions["predictions"])
+
+    # Get the indices of the maximum values using argmax
+    argmax_indices = np.argmax(predictions, axis=1)
+
+    return argmax_indices
+
+
+def create_name_value_hash(names, values):
+    return {name: value for name, value in zip(names, values)}
+
+def parse_answers(hash):
+    mydb=defineDB()
+    mycursor = mydb.cursor()
+    for username, value in hash.items():
+        value = int(value)
+        mycursor.execute("UPDATE User SET Personality = %s WHERE Nama = %s",(value,username))
+    mydb.commit()
+    mycursor.close()
+
+    close_db_connection(mydb, "User")
+
+def process_user_data():
+    answers = process_answers()
+    names = get_names()
+    predictions = get_predictions(answers)
+    name_value_hash = create_name_value_hash(names, predictions)
+    parse_answers(name_value_hash)
+
+
+
diff --git a/api/personality/pca.py b/api/personality/pca.py
@@ -0,0 +1,163 @@
+import requests
+import numpy as np
+from decouple import config
+from dotenv import load_dotenv
+from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
+from fastapi.responses import FileResponse
+from api.auth.jwt_bearer import jwtBearer
+from api.auth.jwt_handler import *
+from google.cloud import storage
+from dotenv import load_dotenv
+from api.encryptor import *
+from api.model import *
+from io import BytesIO
+from api.function import *
+from api.db import *
+import requests
+import numpy as np
+import pandas as pd
+from sklearn.metrics.pairwise import euclidean_distances
+
+def get_data_as_dataframe(table_name):
+    mydb = defineDB()
+    mycursor = mydb.cursor()
+
+    mycursor.execute(f"SELECT * FROM {table_name}")
+    rows = mycursor.fetchall()
+    column_names = [i[0] for i in mycursor.description]
+    df = pd.DataFrame(rows, columns=column_names)
+
+    mycursor.close()
+    mycursor = mydb.cursor()
+    close_db_connection(mydb, table_name)
+
+    return df
+
+def get_user_as_dataframe(email):
+    mydb = defineDB()
+    mycursor = mydb.cursor()
+    resq = (email,)
+    mycursor.execute("SELECT * FROM User WHERE Email = %s", resq)
+    rows = mycursor.fetchall()
+    column_names = [i[0] for i in mycursor.description]
+    df = pd.DataFrame(rows, columns=column_names)
+
+    mycursor.close()
+    mycursor = mydb.cursor()
+    close_db_connection(mydb,'User')
+
+    return df
+
+
+def process_data(user_df, tutor_df):
+    user_df = pd.merge(user_df, tutor_df[['Nama', 'Categories']], on='Nama', how='left')
+    user_df = user_df[user_df['Tipe'] != 'student']
+    df_name_answers = user_df[['uid', 'Nama', 'hasPenis', 'Answer', 'Personality']]
+    match_df = df_name_answers.dropna(subset=['uid', 'Nama', 'hasPenis', 'Answer', 'Personality',])
+
+    df_type = match_df[['Personality']]
+    persona_type = pd.get_dummies(df_type, columns=['Personality']).astype(int)
+    match_df = match_df.drop(['Answer', 'Personality', 'uid'], axis=1)
+
+    match_df = pd.concat([match_df, persona_type], axis=1)
+
+    # Insert new columns
+    new_columns = ['Technology', 'Arts', 'Multimedia', 'Music', 'Science', 'Social', 'Language', 'Math']
+    for col in new_columns:
+        match_df[col] = None
+
+    # Merge and update categories
+    merged_df = pd.merge(match_df, tutor_df[['Nama', 'Categories']], on='Nama', how='left')
+    for category in new_columns:
+        merged_df[category] = merged_df.apply(lambda row: 1 if row['Categories'] == category else 0, axis=1)
+    credentials_df = merged_df['Nama']
+    merged_df.drop(['Categories', 'Nama'], axis=1, inplace=True)
+
+    return merged_df, credentials_df
+
+def process_data_user(user_df, user_chosen_category):
+    # Keep only relevant columns and drop NA values
+    relevant_df = user_df[['hasPenis', 'Personality']].dropna()
+
+    # Ensure Personality column is of float type for consistent encoding
+    relevant_df['Personality'] = relevant_df['Personality'].astype(float)
+
+    # Initialize all category columns to 0
+    categories = ['Technology', 'Arts', 'Multimedia', 'Music', 'Science', 'Social', 'Language', 'Math']
+    for category in categories:
+        relevant_df[category] = 0
+
+    # Set the user chosen category to 1
+    if user_chosen_category in categories:
+        relevant_df[user_chosen_category] = 1
+
+    # Explicitly create columns for each personality type
+    personality_types = [0.0, 1.0, 2.0, 3.0, 4.0]
+    for p_type in personality_types:
+        column_name = f'Personality_{p_type}'
+        relevant_df[column_name] = relevant_df['Personality'].apply(lambda x: 1 if x == p_type else 0)
+
+    # Drop the original Personality column
+    final_df = relevant_df.drop('Personality', axis=1)
+
+    return final_df
+
+
+def get_predictions(data):
+    response = requests.post(os.getenv("pca_url"), json=data)
+    return response.json()
+
+
+
+def matchmaking(data, user_key='USER_KEY'):
+    # Extract the predictions for USER_KEY
+    user_key_prediction = None
+    for name, prediction in data:
+        if name == user_key:
+            user_key_prediction = prediction
+            break
+
+    # Check if USER_KEY was found
+    if user_key_prediction is None:
+        return "USER_KEY not found in the data."
+
+    # Calculate Euclidean distances
+    distances = {}
+    for name, prediction in data:
+        if name != user_key:
+            distance = euclidean_distances([user_key_prediction], [prediction])[0][0]
+            distances[name] = distance
+
+    sorted_data = {k: v for k, v in sorted(distances.items(), key=lambda item: item[1])[:5]}
+    tutor = []
+    for name in sorted_data.keys():
+        tutor.append(get_tutor_by_name(name))
+
+    return tutor
+
+
+def master_function(email, category):
+    user_data = pd.DataFrame(['USER_KEY'])
+    user_df = get_data_as_dataframe("User")
+    tutor_df = get_data_as_dataframe("Tutor")
+    merged_df, _ = process_data(user_df, tutor_df)
+    _, name_df = process_data(user_df, tutor_df)
+    name_df = pd.concat([user_data, name_df]).reset_index(drop=True)
+    name_d = name_df.iloc[:,0].tolist()
+
+    input_data = process_data_user(get_user_as_dataframe(email), category)
+    input_arr = input_data.values
+    match_arr = merged_df.values
+    one_array_resize = np.reshape(match_arr, (1, -1)) if match_arr.ndim == 1 else match_arr
+
+    data = {"instances": input_arr.tolist() + one_array_resize.tolist()}
+
+    predictions = get_predictions(data)
+    combined_data = list(zip(name_d, predictions['predictions']))
+
+
+    return {
+        "error":"false",
+        "message": "successfully getting the match XD",
+        "data":matchmaking(combined_data)
+            }
diff --git a/doc/doc.py b/doc/doc.py