-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
131 lines (95 loc) · 3.94 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import pandas as pd
from random import randint
jokes_number = 158
def initialize():
# Downloading data
ratings = pd.read_csv(
'April 2015 to Nov 30 2019 - Transformed Jester Data.csv', header=None,
delimiter=';')
jokes = pd.read_csv('Dataset4JokeSet.csv', header=None, sep=';')
# Normalizing data
number_of_ratings = ratings[0]
ratings = ratings.iloc[:, 1:]
for i in range(1, jokes_number + 1):
ratings[i] = ratings[i].astype('float')
jokes_dict = {}
for i in range(1, jokes_number + 1):
jokes_dict[i] = jokes.iloc[i - 1, 0]
ratings = ratings.replace(99, 0)
mean_ratings = pd.DataFrame()
for i in range(1, 159):
mean_ratings[i] = [ratings[i][ratings[i] != 0].mean()]
mean_ratings = mean_ratings.fillna(0)
mean_ratings = mean_ratings.T
mean_ratings.rename(columns={0: 'mean_joke_ratings'}, inplace=True)
return ratings, jokes, jokes_dict, mean_ratings, number_of_ratings
def get_popular_jokes(mean_ratings, jokes_dict, n=3):
# Recommend the top n most popular jokes
top_ratings = mean_ratings.sort_values(ascending=False,
by='mean_joke_ratings')[:n]
top_ids = top_ratings.index.tolist()
popular_jokes = []
for e in top_ids:
popular_jokes.append(jokes_dict[e])
return popular_jokes
def get_worst_jokes(mean_ratings, jokes_dict, n=3):
bottom_ratings = mean_ratings.sort_values(ascending=False,
by='mean_joke_ratings')[-n:].iloc[
::-1]
bottom_ids = bottom_ratings.index.tolist()
worst_jokes = []
for e in bottom_ids:
worst_jokes.append(jokes_dict[e])
return worst_jokes
def get_recommanded_joke(ratings, jokes_dict, number_of_ratings, user_data):
# if sum(user_historic) == 0:
# rand = randint(1, 158)
# return rand, jokes_dict[rand]
ratings_T = ratings.T
users_like = ratings_T.corrwith(pd.Series(user_data))
users_like_frame = pd.DataFrame(users_like, columns=['Correlation'])
users_like_frame['Count'] = number_of_ratings
users_like_frame = users_like_frame[
users_like_frame['Count'] > 5].sort_values('Correlation',
ascending=False)
joke_to_show = 0
indexs = users_like_frame.index.tolist()
class Found(Exception):
pass
try:
for i in indexs:
for j in range(jokes_number):
if ratings.iloc[i, j] > 5 and user_data[j] == 0:
raise Found
except Found:
joke_to_show = j + 1
return j, jokes_dict[j]
def write_rating(ratings, rate, joke_id, user_id):
if user_id not in ratings.index.tolist():
add_new_user(ratings, user_id)
ratings.loc[user_id, joke_id] = rate
def add_new_user(ratings, user_id):
ratings.loc[len(ratings.index)] = [0 for i in range(158)]
indexs = ratings.index.tolist()
indexs[-1] = user_id
ratings.index = pd.Index(indexs)
if __name__ == '__main__':
ratings, jokes, jokes_dict, mean_ratings, number_of_ratings = initialize()
get_popular_jokes(mean_ratings, jokes_dict)
get_worst_jokes(mean_ratings, jokes_dict)
# Fake user
new_user = [0 for i in range(158)]
# new_user[72] = new_user[105] = new_user[53] = new_user[89] = new_user[
# 32] = 7
# new_user[19] = new_user[155] = new_user[156] = new_user[151] = -7
joke_id, joke = get_recommanded_joke(ratings, jokes_dict, number_of_ratings,
276804728338382858)
print(joke)
write_rating(ratings, 7, joke_id, 276804728338382858)
print(ratings.iloc[-1, :].to_string())
print("Second joke")
joke_id, joke = get_recommanded_joke(ratings, jokes_dict, number_of_ratings,
276804728338382858)
print(joke)
write_rating(ratings, 7, joke_id, 276804728338382858)
print(ratings.iloc[-1, :].to_string())