Skip to content

Commit

Permalink
some stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Lorenzo Terenzi committed Oct 29, 2017
1 parent 9c26061 commit 5363f22
Show file tree
Hide file tree
Showing 8 changed files with 1,136,672 additions and 91 deletions.
217 changes: 155 additions & 62 deletions .idea/workspace.xml

Large diffs are not rendered by default.

Binary file modified src/__pycache__/utils.cpython-36.pyc
Binary file not shown.
41 changes: 26 additions & 15 deletions src/ensemble_log_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,13 @@ def test(self):
"""Tests classifier on test set"""
output = self(self.test_data)
self.test_loss = self.loss(output, self.test_labels)
# record values
self.test_losses.append(self.test_loss)
self.test_predictions = self.predict(output)

correct = np.sum(self.test_predictions == self.test_labels)
self.accuracy = correct / np.shape(self.test_data)[0]
# record accuracy
self.accuracies.append(self.accuracy)
if self.accuracy > self.best_accuracy:
self.best_accuracy = self.accuracy
Expand All @@ -144,8 +147,15 @@ def test(self):
print('Test accuracy :', self.accuracy)

def predict(self, output):
"""Predicts label from output of classifier"""
return output > 0.5
y_pred = output > 0.5
return y_pred

def predict_submission(self, output):
"""Applies 0.5 treshold on output and tranforms 0 predictions to -1 and """
y_pred = np.zeros(np.shape(output)[0])
y_pred[np.where(output <= 0.5)] = -1
y_pred[np.where(output > 0.5)] = 1
return y_pred

def save(self):
"""Save the weights of the model"""
Expand All @@ -169,7 +179,7 @@ def plot_convergence(self):
x = np.arange(0, self.config.num_epochs)
train_trend, = ax.plot(x, self.train_losses, label="Train loss")
test_trend, = ax.plot(x, self.test_losses, label="Test loss")
# ax.legend(loc='lower right')
ax.legend(loc='upper right')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss history')
Expand All @@ -178,11 +188,11 @@ def plot_convergence(self):
def plot_accuracy(self):
fig, ax = plt.subplots()
x = np.arange(0, self.config.num_epochs)
train_trend, = ax.plot(x, self.train_accuracies, label="Train accuracy")
test_trend, = ax.plot(x, self.accuracies, label="Test accuracy")
train_trend, = ax.plot(x, self.train_accuracies, 'r--', label="Train accuracy")
test_trend, = ax.plot(x, self.accuracies, 'b-o', label="Test accuracy")
ax.legend(loc='lower right')
plt.xlabel('accuracy')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Learning curves')
plt.show()

Expand Down Expand Up @@ -342,29 +352,30 @@ def find_best_batch(batch_sizes):
# print(x_test.shape)
x = standardize(x)
x_test = standardize(x_test)
train_dataset, test_dataset = split_data(x, y, ratio=0.9)
# train_dataset, test_dataset = split_data(x, y, ratio=0.9)
# train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
# test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
# # # # x = dataloader(mode='test', reduced=False)
# # # # x = standardize(x)
# # # # x = build_polynomial(x)
config = Config(batch_size=120, num_epochs=10, learning_rate=5 * 10 ** -4,
config = Config(batch_size=120, num_epochs=400, learning_rate=5 * 10 ** -4,
lambda_=2.15443469003e-05, mode='train')
log_class = LogisticClassifier(config, (build_polynomial(x), y))
log_class.train(show_every=1)
log_class.train(show_every=1p)
predictions_test = log_class.predict_submission(log_class(build_polynomial(x_test)))
log_class.plot_accuracy()
log_class.plot_convergence()
ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 1, LogisticClassifier,
ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 5, LogisticClassifier,
label='ensemble_2_log')

#
ensemble.train()
# ensemble.plot_convergence()
# ensemble.plot_accuracy()
ensemble.save()
# ensemble.load_weights()
# ensemble.save()
# # ensemble.load_weights()
predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test,
'dataset/submission_07.csv')
'dataset/submission_10.csv')
#
# predictions = ensemble.predict(ensemble(build_polynomial(x)))
# y[np.where(y == 0)] = -1
Expand Down
14 changes: 7 additions & 7 deletions src/old_models/logistic_regression.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
from src.utils import batch_iter, dataloader, split_data, standardize, xavier_init, adam
from src.utils import batch_iter, dataloader, split_data, standardize
import matplotlib.pyplot as plt
num_epochs = 300
batch_size = 300
learning_rate = 10**-4
learning_rate = 10**-3

x, y = dataloader(mode='train', reduced=False)
x = standardize(x)
Expand Down Expand Up @@ -72,15 +72,15 @@ def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every
iter_num = 1
m, v = 0, 0
# m, v = 0, 0
poly_train_data = build_polynomial(train_data)
weights = xavier_init(np.shape(poly_train_data[1]))
poly_train_data = (train_data)
weights = np.zeros((np.shape(poly_train_data[1])))
# weights = np.zeros(np.shape(poly_train_data)[1])
for epoch in range(num_epochs):
if epoch % 90 == 0:
learning_rate *= 0.5
for batch_label, batch_input in batch_iter(
train_target, train_data, batch_size, num_batches=num_batches):
batch_input = build_polynomial(batch_input)
batch_input = (batch_input)
grad = grad_func(weights, batch_input, batch_label, lambda_)
# weights, m, v = adam(weights, m, v, 0.9, 0.999, learning_rate, grad, iter_num)
weights -= learning_rate*grad
Expand All @@ -94,7 +94,7 @@ def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every
return weights, accuracy, train_loss, test_loss

def test_logistic_regression(weights, loss_func, lambda_):
poly_test_data = build_polynomial(test_data)
poly_test_data = (test_data)
loss = loss_func(weights, poly_test_data, test_target, lambda_)
output = sigmoid(poly_test_data @ weights)
predicted = output > 0.5
Expand Down Expand Up @@ -138,7 +138,7 @@ def plot(x, train_loss, test_loss):


if __name__ == '__main__':
# train_logistic_regression(loss_ce, gradient_ce, lambda_= 0.01)
train_logistic_regression(loss_ce, gradient_ce, lambda_= 0.01)
# lambdas, best_weigths, best_accurary, test_losses, train_losses, \
# best_combination = find_best_lambda(loss_mse_reg, gradient_mse_reg)
# plot(lambdas, train_losses, test_losses)
Expand Down
12 changes: 5 additions & 7 deletions src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@ def main():
x_test = dataloader(mode='test', reduced=False)
x = standardize(x)
x_test = standardize(x_test)
config = Config(batch_size=120, num_epochs=300, learning_rate=5*10**-4,
lambda_=2.15443469003e-05,
mode='train')
ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 50, LogisticClassifier,
label='ensemble_50_log')
ensemble.train()
predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
config = Config(batch_size=120, num_epochs=400, learning_rate=5 * 10 ** -4,
lambda_=2.15443469003e-05, mode='train')
log_class = LogisticClassifier(config, (build_polynomial(x), y))
log_class.train(show_every=10)
predictions_test = log_class.predict_submission(log_class(build_polynomial(x_test)))
create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test,
'dataset/submission_0x.csv')

Expand Down
1 change: 1 addition & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import csv
np.random.seed(seed=3)

def predict_labels(weights, data):
"""Generates class predictions given weights, and a test data matrix"""
Expand Down
Loading

0 comments on commit 5363f22

Please sign in to comment.