-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
70 lines (55 loc) · 1.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import numpy as np
import sys
from TrainTestSplit import TrainTestSplit
from FunkSVD import FunkSVD
# Helper function to calculate Root Mean Squared Error (RMSE)
def rmse(predictions, actuals):
return np.sqrt(np.mean((predictions - actuals) ** 2))
#Reading file names
ratings_file = sys.argv[1]
targets_file = sys.argv[2]
# Defining hyperparameters
epochs = 20
lr = 0.1
test_size = 0.001
k = 100
batch_size = 64
lamda = 0.2
# Split the data
splitter = TrainTestSplit(file_name=ratings_file)
train_data, test_data, data = splitter(test_size=test_size,random_state=42)
# Train the model
model = FunkSVD(train_data)
model.train(k=k, batch_size=batch_size, lr=lr, lamda=lamda, epochs=epochs)
# Auxiliary Dataframe of average rating of each item
item_mean = data.groupby('ItemId')['Rating'].mean()
# TEST PREDICTION ON TEST SET
'''
test_predictions = []
test_actuals = []
for index, row in test_data.iterrows():
user = row['UserId']
item = row['ItemId']
actual_rating = row['Rating']
predicted_rating = model.prediction(user, item, item_mean)
test_predictions.append(predicted_rating)
test_actuals.append(actual_rating)
# Calculate RMSE for the test set
test_predictions = np.array(test_predictions)
test_actuals = np.array(test_actuals)
test_rmse = rmse(test_predictions, test_actuals)
print(f"Test Set RMSE: {test_rmse}")
'''
# Creating the targets dataframe
targets = pd.read_csv(targets_file)
targets[['UserId', 'ItemId']] = targets['UserId:ItemId'].str.split(':', expand=True)
targets = targets.drop(columns=['UserId:ItemId'])
# Making predictions for every UserId and ItemId in targets
print('UserId:ItemId,Rating')
for _, row in targets.iterrows():
user = row['UserId']
item = row['ItemId']
rating = model.prediction(user,item,item_mean)
#Writing prediction to stdout
print(f'{user}:{item},{rating}')