-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknnBasic.py
171 lines (108 loc) · 3.01 KB
/
knnBasic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import pandas as pd
import numpy as np
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBasic , KNNWithMeans , KNNWithZScore , KNNBaseline
from surprise import accuracy
from surprise.model_selection import train_test_split
import scipy.sparse
from collections import defaultdict
print("changes mfkr")
br_cols = ['book_id' , 'user_id' , 'rating']
bookRatings = pd.read_csv('Data/ratings.csv' , sep=',' , names = br_cols , encoding='latin-1' , low_memory=False , skiprows=[0])
bookRatings = bookRatings[['user_id' , 'book_id' , 'rating']]
print(bookRatings.head())
#print(ratings.head())
print(bookRatings.shape)
bookRatings = bookRatings.drop_duplicates(['user_id' , 'book_id'] , 'first')
bookRatings.groupby('user_id').filter(lambda x: len(x) >= 4)
bookRatings = bookRatings[bookRatings['user_id']<=55000]
print(bookRatings.shape)
reader = Reader(rating_scale=(1, 5) )
data = Dataset.load_from_df(bookRatings , reader)
#print(bookRatings.head(n=15))
trainingSet = data.build_full_trainset()
sim_options = {
'name': 'cosine',
'user_based': False,
'min_support': 3
}
knn = KNNBasic(k = 100 , min_k = 6 ,sim_options=sim_options)
knn.fit(trainingSet)
sim = knn.sim
print(type(sim))
sparse_matrix = scipy.sparse.csc_matrix(sim)
scipy.sparse.save_npz('Data/simCosine.npz', sparse_matrix)
"""
predictions = knn.test(testSet)
#print(predictions)
#Prediction(uid=6727, iid=9476, r_ui=3.0, est=3.0, details={u'actual_k': 1, u'was_impossible': False}),
print("Running the loop now \n")
for uid , bid , rui , est , details in predictions:
if(details['was_impossible']==True):
continue
else:
if details['actual_k'] >=5 :
print(uid , bid , rui , est)
def get_topN_recommendations(predictions , topN=5):
top_recs = defaultdict(list)
for uid, iid, true_r, est, details in predictions:
if(details['was_impossible']==True):
continue
if details['actual_k'] <=5:
continue
top_recs[uid].append((iid, est))
for uid, user_ratings in top_recs.items():
user_ratings.sort(key = lambda x: x[1], reverse = True)
top_recs[uid] = user_ratings[:topN]
return top_recs
top3_recommendations = get_topN_recommendations(predictions , topN=3)
for uid, user_ratings in top3_recommendations.items():
for iid , _ in user_ratings:
print(uid , books.loc[iid-1]['original_title'])
print(accuracy.rmse(predictions))
"""
"""
Basic
cosine
RMSE: 0.9069
0.906855950284
Pearson
RMSE: 0.9257
0.925713187254
pearson_baseline
RMSE: 0.9134
0.913366535973
After implicit average_rating
RMSE: 0.9142
0.914248731719
"""
"""
ZScore
Cosine
RMSE: 0.8802
0.880221480684
After implicit
RMSE: 0.8782
0.878221157428
Pearson
RMSE: 0.9050
0.90498226223
Pearson Baseline
RMSE: 0.8937
0.893722398362
"""
"""
Means
Cosine
RMSE: 0.8747
0.87470288656
Pearson
RMSE: 0.8973
0.897298162953
Peasron Baseline
RMSE: 0.8833
0.88334230876
"""