Skip to content

Commit

Permalink
BUFFING SKLEARN
Browse files Browse the repository at this point in the history
  • Loading branch information
Riveong committed Jan 13, 2024
1 parent 11ee36c commit 794b45e
Show file tree
Hide file tree
Showing 13 changed files with 371 additions and 172 deletions.
27 changes: 27 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
uvicorn = "==0.15.0"
fastapi = "==0.70.0"
pydantic = {version = "==1.10.7", extras = ["email"]}
google-cloud-storage = "==2.9.0"
python-dotenv = "==1.0.0"
pyjwt = "==1.7.1"
python-decouple = "==3.3"
mysql-connector = "==2.2.9"
python-multipart = "==0.0.6"
pillow = "==9.5.0"
numpy = "==1.24.3"
cloud-sql-python-connector = "*"
streamlit = "*"
cryptography = "*"
fastapi-pagination = "*"
pandas = "*"

[dev-packages]

[requires]
python_version = "3.10"
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
36 changes: 30 additions & 6 deletions app/function.py → api/function.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
from fastapi.responses import FileResponse
from app.auth.jwt_bearer import jwtBearer
from app.auth.jwt_handler import *
from api.auth.jwt_bearer import jwtBearer
from api.auth.jwt_handler import *
from google.cloud import storage
from dotenv import load_dotenv
from app.encryptor import *
from app.model import *
from api.encryptor import *
from api.model import *
from io import BytesIO
import mysql.connector
from app.function import *
from app.db import *
from api.function import *
from api.db import *
from PIL import Image
import requests
import uvicorn
Expand Down Expand Up @@ -216,6 +216,30 @@ def get_tutor_by_id(id_Tutor):
close_db_connection(mydb, "User")
return tutor_items

def get_tutor_by_name(name_Tutor):
mydb=defineDB()
mycursor = mydb.cursor()
res = (name_Tutor,)
mycursor.execute("SELECT * FROM Tutor WHERE Nama = %s ORDER BY id + 0 asc", res)
myresult = mycursor.fetchall()
for x in myresult:
tutor_items = {
"id":x[0],
"UserId":x[1],
"Nama":x[2],
"hasPenis":x[3],
"AgesRanges":x[4],
"Specialization":x[5],
"Categories":x[6],
"AboutMe":x[8],
"SkillsAndExperiences":x[9],
"picture":x[10],
"price":x[11]
}
mycursor.close()
close_db_connection(mydb, "User")
return tutor_items

def get_tutor_name(id_Tutor):
mydb=defineDB()
mycursor = mydb.cursor()
Expand Down
9 changes: 9 additions & 0 deletions app/model.py → api/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,13 @@ class Config:
"Date" : "12-12-12"
}
}

class PersonaSchema (BaseModel):
Persona : str = Field(default=None)
class Config:
schema_extra = {
"post_demo" : {
"list":"[1,2,3,4,5,3,4,5,6,4,3,3,2,3]"
}
}

119 changes: 119 additions & 0 deletions api/personality/clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import requests
import numpy as np
from decouple import config
from dotenv import load_dotenv
import os
import ast
from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
from fastapi.responses import FileResponse
from api.auth.jwt_bearer import jwtBearer
from api.auth.jwt_handler import *
from google.cloud import storage
from dotenv import load_dotenv
from api.encryptor import *
from api.model import *
from io import BytesIO
import mysql.connector
from api.function import *
from api.db import *
from PIL import Image
import requests
import os
import numpy as np


def post_answer(data, email):
try:
_data = ast.literal_eval(data)
if(len(_data) != 25):
return {"error":"true",
"message":"Please Input 25 length list"}
mydb = defineDB()
mycursor = mydb.cursor()
sql = "UPDATE User SET answer = %s WHERE Email = %s"
mycursor.execute(sql, (data, email))
mydb.commit()
mycursor.close()
close_db_connection(mydb, "User")
process_user_data()
return {"error":"false",
"message":"successfully added the personality"}

except:
return {"error":"true",
"message":"Please reformat the list"}




def get_answers():
mydb=defineDB()
mycursor = mydb.cursor()
mycursor.execute("SELECT * FROM User WHERE Answer IS NOT NULL AND NOT Answer = ''")
myresult = mycursor.fetchall()
tutors = []
for x in myresult:
x[7] #answer
tutors.append(x[7])
mycursor.close()
close_db_connection(mydb, "User")
return tutors


def get_names():
mydb=defineDB()
mycursor = mydb.cursor()
mycursor.execute("SELECT * FROM User WHERE Answer IS NOT NULL AND NOT Answer = ''")
myresult = mycursor.fetchall()
tutors = []
for x in myresult:
x[2] #answer
tutors.append(x[2])
mycursor.close()
close_db_connection(mydb, "User")
return tutors


def process_answers():
return [ast.literal_eval(s) for s in get_answers()]

def get_predictions(list_of_lists):
# Sample input data for testing
data = {"instances": list_of_lists}

# Send a POST request to the TensorFlow Serving REST API
response = requests.post(os.getenv("cluster_url"), json=data)
predictions = response.json()

# Convert predictions to numpy array
predictions = np.array(predictions["predictions"])

# Get the indices of the maximum values using argmax
argmax_indices = np.argmax(predictions, axis=1)

return argmax_indices


def create_name_value_hash(names, values):
return {name: value for name, value in zip(names, values)}

def parse_answers(hash):
mydb=defineDB()
mycursor = mydb.cursor()
for username, value in hash.items():
value = int(value)
mycursor.execute("UPDATE User SET Personality = %s WHERE Nama = %s",(value,username))
mydb.commit()
mycursor.close()

close_db_connection(mydb, "User")

def process_user_data():
answers = process_answers()
names = get_names()
predictions = get_predictions(answers)
name_value_hash = create_name_value_hash(names, predictions)
parse_answers(name_value_hash)



163 changes: 163 additions & 0 deletions api/personality/pca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import requests
import numpy as np
from decouple import config
from dotenv import load_dotenv
from fastapi import FastAPI, Body, Depends, File, UploadFile, Request
from fastapi.responses import FileResponse
from api.auth.jwt_bearer import jwtBearer
from api.auth.jwt_handler import *
from google.cloud import storage
from dotenv import load_dotenv
from api.encryptor import *
from api.model import *
from io import BytesIO
from api.function import *
from api.db import *
import requests
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances

def get_data_as_dataframe(table_name):
mydb = defineDB()
mycursor = mydb.cursor()

mycursor.execute(f"SELECT * FROM {table_name}")
rows = mycursor.fetchall()
column_names = [i[0] for i in mycursor.description]
df = pd.DataFrame(rows, columns=column_names)

mycursor.close()
mycursor = mydb.cursor()
close_db_connection(mydb, table_name)

return df

def get_user_as_dataframe(email):
mydb = defineDB()
mycursor = mydb.cursor()
resq = (email,)
mycursor.execute("SELECT * FROM User WHERE Email = %s", resq)
rows = mycursor.fetchall()
column_names = [i[0] for i in mycursor.description]
df = pd.DataFrame(rows, columns=column_names)

mycursor.close()
mycursor = mydb.cursor()
close_db_connection(mydb,'User')

return df


def process_data(user_df, tutor_df):
user_df = pd.merge(user_df, tutor_df[['Nama', 'Categories']], on='Nama', how='left')
user_df = user_df[user_df['Tipe'] != 'student']
df_name_answers = user_df[['uid', 'Nama', 'hasPenis', 'Answer', 'Personality']]
match_df = df_name_answers.dropna(subset=['uid', 'Nama', 'hasPenis', 'Answer', 'Personality',])

df_type = match_df[['Personality']]
persona_type = pd.get_dummies(df_type, columns=['Personality']).astype(int)
match_df = match_df.drop(['Answer', 'Personality', 'uid'], axis=1)

match_df = pd.concat([match_df, persona_type], axis=1)

# Insert new columns
new_columns = ['Technology', 'Arts', 'Multimedia', 'Music', 'Science', 'Social', 'Language', 'Math']
for col in new_columns:
match_df[col] = None

# Merge and update categories
merged_df = pd.merge(match_df, tutor_df[['Nama', 'Categories']], on='Nama', how='left')
for category in new_columns:
merged_df[category] = merged_df.apply(lambda row: 1 if row['Categories'] == category else 0, axis=1)
credentials_df = merged_df['Nama']
merged_df.drop(['Categories', 'Nama'], axis=1, inplace=True)

return merged_df, credentials_df

def process_data_user(user_df, user_chosen_category):
# Keep only relevant columns and drop NA values
relevant_df = user_df[['hasPenis', 'Personality']].dropna()

# Ensure Personality column is of float type for consistent encoding
relevant_df['Personality'] = relevant_df['Personality'].astype(float)

# Initialize all category columns to 0
categories = ['Technology', 'Arts', 'Multimedia', 'Music', 'Science', 'Social', 'Language', 'Math']
for category in categories:
relevant_df[category] = 0

# Set the user chosen category to 1
if user_chosen_category in categories:
relevant_df[user_chosen_category] = 1

# Explicitly create columns for each personality type
personality_types = [0.0, 1.0, 2.0, 3.0, 4.0]
for p_type in personality_types:
column_name = f'Personality_{p_type}'
relevant_df[column_name] = relevant_df['Personality'].apply(lambda x: 1 if x == p_type else 0)

# Drop the original Personality column
final_df = relevant_df.drop('Personality', axis=1)

return final_df


def get_predictions(data):
response = requests.post(os.getenv("pca_url"), json=data)
return response.json()



def matchmaking(data, user_key='USER_KEY'):
# Extract the predictions for USER_KEY
user_key_prediction = None
for name, prediction in data:
if name == user_key:
user_key_prediction = prediction
break

# Check if USER_KEY was found
if user_key_prediction is None:
return "USER_KEY not found in the data."

# Calculate Euclidean distances
distances = {}
for name, prediction in data:
if name != user_key:
distance = euclidean_distances([user_key_prediction], [prediction])[0][0]
distances[name] = distance

sorted_data = {k: v for k, v in sorted(distances.items(), key=lambda item: item[1])[:5]}
tutor = []
for name in sorted_data.keys():
tutor.append(get_tutor_by_name(name))

return tutor


def master_function(email, category):
user_data = pd.DataFrame(['USER_KEY'])
user_df = get_data_as_dataframe("User")
tutor_df = get_data_as_dataframe("Tutor")
merged_df, _ = process_data(user_df, tutor_df)
_, name_df = process_data(user_df, tutor_df)
name_df = pd.concat([user_data, name_df]).reset_index(drop=True)
name_d = name_df.iloc[:,0].tolist()

input_data = process_data_user(get_user_as_dataframe(email), category)
input_arr = input_data.values
match_arr = merged_df.values
one_array_resize = np.reshape(match_arr, (1, -1)) if match_arr.ndim == 1 else match_arr

data = {"instances": input_arr.tolist() + one_array_resize.tolist()}

predictions = get_predictions(data)
combined_data = list(zip(name_d, predictions['predictions']))


return {
"error":"false",
"message": "successfully getting the match XD",
"data":matchmaking(combined_data)
}
3 changes: 0 additions & 3 deletions doc/doc.py

This file was deleted.

Loading

0 comments on commit 794b45e

Please sign in to comment.