-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrunALS.py
118 lines (79 loc) · 3.95 KB
/
runALS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from utils.arg_parser import extract_args_from_json
from utils.data_provider import split_dataset
from utils.reset_seed import set_seeds
from utils.experiment_builder import ExperimentBuilderNN
from dataloaders.TestDataLoader import UserIndexTestDataLoader
from utils.evaluation_metrics import precision_hit_coverage_ratio, movie_diversity
import numpy as np
import torch
from torch.utils.data import DataLoader
import implicit
from scipy import sparse
class MFExperimentBuilder(ExperimentBuilderNN):
criterion = torch.nn.MSELoss()
def pre_epoch_init_function(self):
self.train_loader.dataset.negative_sampling()
def train_iteration(self, idx, values_to_unpack):
user_indexes = values_to_unpack[0].to(self.device)
movie_indexes = values_to_unpack[1].to(self.device)
ratings = values_to_unpack[2].to(self.device).float()
predicted = self.model(user_indexes, movie_indexes)
loss = self.criterion(predicted, ratings)
return loss
def eval_iteration(self, values_to_unpack):
user_indexes = values_to_unpack[0]
slates = []
for user_index in user_indexes:
user_index = user_index.item()
movie_index = np.arange(self.model.num_items)
user_index = np.full((self.model.num_items,), user_index)
movie_tensor = torch.from_numpy(movie_index).to(self.device)
user_tensor = torch.from_numpy(user_index).to(self.device)
prediction = self.model(user_tensor, movie_tensor)
slate = torch.topk(prediction, self.configs['slate_size'])
slates.append(slate.indices)
predicted_slates = torch.stack(slates, dim=0)
return predicted_slates
def experiments_run():
configs = extract_args_from_json()
print(configs)
set_seeds(configs['seed'])
df_train, df_test, df_train_matrix, df_test_matrix, movies_categories, titles = split_dataset(configs)
test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix, df_train_matrix)
test_loader = DataLoader(test_dataset, batch_size=configs['test_batch_size'], shuffle=True, num_workers=4,
drop_last=False)
model = implicit.als.AlternatingLeastSquares(regularization=configs['weight_decay'], iterations=50,
factors=configs['embed_dims'])
a = sparse.coo_matrix(df_train_matrix.to_numpy().T)
temp = sparse.csr_matrix(df_train_matrix.to_numpy())
# train the model on a sparse matrix of item/user/confidence weights
model.fit(a)
for slate_size in configs['slate_size']:
print(f'Test for {slate_size}')
recommendations = model.recommend_all(temp, N=slate_size)
predicted_slates = []
ground_truth_slates = []
for values in test_loader:
for value in values[0]:
predicted_slates.append(recommendations[int(value)])
ground_truth_slate = values[1].cpu()
ground_truth_indexes = np.nonzero(ground_truth_slate)
grouped_ground_truth = np.split(ground_truth_indexes[:, 1],
np.cumsum(np.unique(ground_truth_indexes[:, 0], return_counts=True)[1])[:-1])
ground_truth_slates.extend(grouped_ground_truth)
predicted_slates = torch.from_numpy(np.vstack(predicted_slates))
precision, hr, cc = precision_hit_coverage_ratio(predicted_slates, ground_truth_slates, movies_categories)
diversity = movie_diversity(predicted_slates, len(df_train_matrix.columns))
# Count years
years_dict = {}
all_years = np.unique(titles)
for year in all_years:
years_dict[year] = 0
for predicted_slate in list(predicted_slates):
for predicted_movie in predicted_slate:
years_dict[titles[predicted_movie]] += 1
print(years_dict)
print(precision, hr, cc)
print(diversity)
if __name__ == '__main__':
experiments_run()