diff --git a/.gitignore b/.gitignore index c85baf5..434af93 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,3 @@ dataset/sample-submission.csv dataset/test.csv dataset/train.csv - - diff --git a/.idea/cs_433_ML_project_1.iml b/.idea/cs_433_ML_project_1.iml index 6f63a63..569c162 100644 --- a/.idea/cs_433_ML_project_1.iml +++ b/.idea/cs_433_ML_project_1.iml @@ -2,7 +2,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 0ed3b43..33291ec 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 26a24d7..860f5e8 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,14 +2,13 @@ - - - - - - - - + + + + + + + + + + + @@ -26,7 +29,60 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + poly + xavier + @@ -76,6 +136,20 @@ - - + + + + @@ -106,9 +182,230 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + project + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -121,26 +418,27 @@ + - - - - - - - - + + + + + + + + - - + + - - + + @@ -155,13 +453,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/__pycache__/utils.cpython-36.pyc b/src/__pycache__/utils.cpython-36.pyc new file mode 100644 index 0000000..db049c2 Binary files /dev/null and b/src/__pycache__/utils.cpython-36.pyc differ diff --git a/src/adaboost.py b/src/adaboost.py new file mode 100644 index 0000000..58294ec --- /dev/null +++ b/src/adaboost.py @@ -0,0 +1,105 @@ +from src.utils import sigmoid, batch_iter, dataloader, split_data,\ + standardize, xavier_init, build_polynomial +import numpy as np +import pickle +import csv + +class Config(object): + """Configuration object for the classifiers""" + def __init__(self, batch_size, num_epochs, learning_rate, lambda_): + self.batch_size = batch_size + self.num_epochs = num_epochs + self.learning_rate = learning_rate + self.lambda_ = lambda_ + + +class LogisticClassifier(object): + def __init__(self, config, train_set, test_set): + self.config = config + # construct non linear features + self.train_data, self.train_labels = train_set + self.test_data, self.test_labels = test_set + self.weights = xavier_init(np.shape(self.train_data[1])) + self.train_losses = [] + self.test_losses = [] + self.accuracy = 0 + self.test_predictions = None + + def __call__(self, input): + return sigmoid(input @ self.weights) + + def loss(self, output, target, sample_weights=1): + loss = 1 / np.shape(target)[0] * (target - output).T\ + @ (sample_weights * (target - output)) + self.config.lambda_ * self.weights.T @ \ + self.weights + return loss + + def grad(self, data_batch, target_batch, sample_weights): + return data_batch.T @ ((self(data_batch) - target_batch) * sample_weights * + self(data_batch) * (1 - self(data_batch))) + \ + self.config.lambda_ * self.weights + + def sdg(self, param, data, target): + param -= self.config.learning_rate * self.grad(data, target) + return param + + def train(self, weights=1, show_every=10): + num_batches = int(np.shape(self.train_data)[0]/self.config.batch_size) + for epoch in range(self.config.num_epochs): + if epoch % 50 == 0: + self.config.learning_rate *= 0.5 + for batch_label, batch_input in batch_iter( + self.train_labels, self.train_data, self.config.batch_size, num_batches=num_batches): + self.weights = self.sdg(self.weights, batch_input, batch_label) + train_loss = self.loss(self(self.train_data), self.train_labels) + if epoch % show_every == 0 or epoch == self.config.num_epochs - 1: + print("Epoch : ", epoch) + print("Train loss : ", train_loss) + self.test() + return weights + + def test(self): + output = self(self.test_data) + test_loss = self.loss(output, self.test_labels) + self.test_losses.append(test_loss) + self.test_predictions = self.predict(output) + correct = np.sum(self.test_predictions == self.test_labels) + self.accuracy = correct / np.shape(self.test_data)[0] + print("Test loss :", test_loss) + print('Test accuracy :', self.accuracy) + + def predict(self, output): + return output > 0.5 + + def save(self): + pickle.dump(self.weights, open('config/weights.p', 'wb')) + + def load_weights(self): + self.weights = pickle.load(open('config/weights.p', 'rb')) + + def export_predictions(self): + with open('prediction/submission.csv', 'w', newline='') as csvfile: + writer = csv.writer(csvfile, delimiter=',') + for i in range(len(self.test_predictions)): + writer.writerow([str(i) + ", " + self.test_predictions[i]]) + +class Adaboost(object): + def __init__(self, config, train_data, test_data, num_classifiers): + self.train_data, self.train_labels = train_set + self.test_data, self.test_labels = test_set + self.classifiers = [LogisticClassifier(config, (self.train_data, self.train_labels), + (self.test_data, self.test_labels)) + for i in range(num_classifiers)] + self.sample_weights = 1/np.shape(train_data)[0] + self.classifier_weights = np.ones(num_classifiers) + + +if __name__ == '__main__': + x, y = dataloader(mode='train', reduced=False) + x = standardize(x) + train_dataset, test_dataset = split_data(x, y, ratio=0.9) + train_set = (build_polynomial(train_dataset[0]), train_dataset[1]) + test_set = (build_polynomial(test_dataset[0]), test_dataset[1]) + config = Config(batch_size=200, num_epochs=300, learning_rate=5*10**-4, lambda_=0.01) + log_classifier = LogisticClassifier(config, train_set, test_set) + log_classifier.train() \ No newline at end of file diff --git a/src/logistic_regression.py b/src/logistic_regression.py index 8c98e7b..8efdcb3 100644 --- a/src/logistic_regression.py +++ b/src/logistic_regression.py @@ -1,8 +1,8 @@ import numpy as np -from utils import batch_iter, dataloader, split_data, standardize - -num_epochs = 10 -batch_size = 100 +from src.utils import batch_iter, dataloader, split_data, standardize, xavier_init, adam +import matplotlib.pyplot as plt +num_epochs = 300 +batch_size = 300 learning_rate = 10**-3 x, y = dataloader(mode='train', reduced=False) @@ -10,7 +10,30 @@ train_dataset, test_dataset = split_data(x, y, ratio=0.9) test_data, test_target = test_dataset train_data, train_target = train_dataset +print(np.shape(train_data)) num_batches = int(np.shape(train_data)[0]/batch_size) +# till now up to degree two its fine + x**3 (no mixed cubic terms +def build_polynomial(x): + base_mixed = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2))) + # base_mixed_cube = np.zeros((np.shape(x)[0], int(np.shape(x)[1]**2))) + bias = np.ones(np.shape(x)[0]) + counter = 0 + # gaussian_base = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2))) + for i in range(np.shape(x)[1]): + for j in range(i): + base_mixed[:, counter] = x[:, i] * x[:, j] + # gaussian_base[:, counter] = np.exp(-(x[:, i] - x[:, j])**2/(2*0.25)) + counter += 1 + + counter = 0 + base_mixed_cube = np.zeros((np.shape(x)[0], np.shape(x)[1]-2)) + + # for i in range(np.shape(x)[1]): + # for j in range(np.shape(x)[1]): + # base_mixed_cube[:, counter] = x[:, i]**2 * x[:, j] + # + base = np.hstack((bias[:, np.newaxis], x, base_mixed, x**2, x**3)) + return base def sigmoid(x): return 1/(1 + np.exp(-x)) @@ -18,44 +41,110 @@ def sigmoid(x): def sigmoid_prime(x): return sigmoid(x)*(1-sigmoid(x)) -def loss_mse(weights, x, target): +def loss_mse(weights, x, target, lambda_=0): g_x = sigmoid(x @ weights) return 1/np.shape(target)[0] * (target - g_x).T @ (target - g_x) -def gradient_mse(weights, x, target): +def gradient_mse(weights, x, target, lambda_= 0): + g_x = sigmoid(x @ weights) + return x.T @ ((g_x - target) * g_x * (1 - g_x)) + +def loss_mse_reg(weights, x, target, lambda_ = 0): g_x = sigmoid(x @ weights) - return (g_x - target) * g_x * (1 - g_x) * x + return 1/np.shape(target)[0] * (target - g_x).T @ (target - g_x) + lambda_ * weights.T @ weights -def loss_ce(weights, x, target): +def gradient_mse_reg(weights, x, target, lambda_ = 0): g_x = sigmoid(x @ weights) - return 1/np.shape(x)[0] * np.sum(target * np.log(g_x) + (1-target)*np.log(1-g_x)) + return x.T @ ((g_x - target) * g_x * (1 - g_x)) + lambda_ * weights -def gradient_ce(weights, x, target): +def loss_ce(weights, x, target, lambda_): g_x = sigmoid(x @ weights) - return 1/np.shape(target)[0] * x.T @ (target - g_x) + loss = -1/np.shape(x)[0] * np.sum(target * np.log(g_x) + (1 - target)*np.log(1 - g_x)) + \ + lambda_ * weights.T @ weights + return loss -def train_logistic_regression(loss_func, grad_func): - weights = np.zeros(30) +def gradient_ce(weights, x, target, lambda_): + g_x = sigmoid(x @ weights) + return -1/np.shape(target)[0] * x.T @ (target - g_x) + lambda_ * weights + +def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every=10): + global learning_rate + iter_num = 1 + m, v = 0, 0 + # m, v = 0, 0 + poly_train_data = build_polynomial(train_data) + weights = xavier_init(np.shape(poly_train_data[1])) + # weights = np.zeros(np.shape(poly_train_data)[1]) for epoch in range(num_epochs): + if epoch % 90 == 0: + learning_rate *= 0.5 for batch_label, batch_input in batch_iter( train_target, train_data, batch_size, num_batches=num_batches): - grad = grad_func(weights, batch_input, batch_label) - weights -= learning_rate * grad - train_loss = loss_func(weights, train_data, train_target) - print("Epoch : ", epoch) - print("Train loss : ", train_loss) - test_logistic_regression(weights, loss_func) - + batch_input = build_polynomial(batch_input) + grad = grad_func(weights, batch_input, batch_label, lambda_) + # weights, m, v = adam(weights, m, v, 0.9, 0.999, learning_rate, grad, iter_num) + weights -= learning_rate*grad + iter_num += 1 + train_loss = loss_func(weights, poly_train_data, train_target, lambda_) + if epoch % show_every == 0 or epoch == num_epochs - 1: + print("Epoch : ", epoch) + print("Train loss : ", train_loss) + # print('Weights :', weights) + weights, accuracy, test_loss = test_logistic_regression(weights, loss_func, lambda_) + return weights, accuracy, train_loss, test_loss -def test_logistic_regression(weights, loss_func): - loss = loss_func(weights, test_data, test_target) - output = sigmoid(test_data @ weights) +def test_logistic_regression(weights, loss_func, lambda_): + poly_test_data = build_polynomial(test_data) + loss = loss_func(weights, poly_test_data, test_target, lambda_) + output = sigmoid(poly_test_data @ weights) predicted = output > 0.5 correct = np.sum(predicted == test_target) accuracy = correct/np.shape(test_data)[0] print("Test loss :", loss) print('Test accuracy :', accuracy) + return weights, accuracy, loss + +def find_best_lambda(loss_func, grad_func): + lambdas = np.logspace(-5, 0, 20) + weights_history = [] + accuracies = [] + train_losses = [] + test_losses = [] + best_weigths = None + best_accurary = 0 + best_combination = 0 + for idx, lambda_ in enumerate(lambdas): + weights, accuracy, train_loss, test_loss = train_logistic_regression( + loss_func, grad_func, lambda_) + weights_history.append(weights) + accuracies.append(accuracy) + train_losses.append(train_loss) + test_losses.append(test_loss) + + if accuracy > best_accurary: + best_accurary = accuracy + best_weigths = weights + best_combination = idx + + print('best combination lambda : ', lambdas[best_combination]) + return lambdas, best_weigths, best_accurary, test_losses, train_losses, best_combination + +def plot(x, train_loss, test_loss): + plt.plot(x, train_loss, label='train loss') + plt.plot(x, test_loss, label='test loss') + plt.show() + + + if __name__ == '__main__': - train_logistic_regression(loss_ce, gradient_ce) + train_logistic_regression(loss_mse_reg, gradient_mse_reg, lambda_= 0.01) + # lambdas, best_weigths, best_accurary, test_losses, train_losses, \ + # best_combination = find_best_lambda(loss_mse_reg, gradient_mse_reg) + # plot(lambdas, train_losses, test_losses) + + best_lambda = 0.0012742749857 + + + diff --git a/src/neural_network_torch.py b/src/neural_network_torch.py index a8c0087..229c814 100644 --- a/src/neural_network_torch.py +++ b/src/neural_network_torch.py @@ -5,13 +5,16 @@ from torch.autograd import Variable from torch import nn import torch.nn.functional as f -from utils import dataloader, standardize, split_data +from src.utils import dataloader, standardize, split_data, build_polynomial x, y = dataloader(mode='train', reduced=False) x = standardize(x) train_dataset, test_dataset = split_data(x, y, ratio=0.9) test_data, test_target = test_dataset train_data, train_target = train_dataset +test_data = build_polynomial(test_data) +train_data = build_polynomial(train_data) +num_features = np.shape(train_data)[1] train = torch.utils.data.TensorDataset(torch.from_numpy(train_data).type(torch.FloatTensor), @@ -29,9 +32,9 @@ def __init__(self, batch_size=128, learning_rate=10**-4, num_epochs= 10, load_we self.load_weights = load_weights self.num_epochs = num_epochs # architecture - self.fc_1 = nn.Linear(30, 256) - self.fc_2 = nn.Linear(256, 256) - self.fc_3 = nn.Linear(256, 2) + self.fc_1 = nn.Linear(num_features, 512) + self.fc_2 = nn.Linear(512, 512) + self.fc_3 = nn.Linear(512, 2) def forward(self, x): x = f.relu(self.fc_1(x)) diff --git a/src/utils.py b/src/utils.py index 4270513..03d5acd 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,10 +1,25 @@ import numpy as np +def xavier_init(size): + var = 2/(np.sum(size)) + return var * np.random.randn(*size) +def adam(theta, m, v, beta_1, beta_2, learning_rate, gradient, iter_num): + m = (beta_1 * m + (1 - beta_1) * gradient)/(1-beta_1**iter_num) + v = (beta_2 * v + (1 - beta_2) * gradient**2)/(1-beta_2**iter_num) + return theta - learning_rate*m/(v**0.5 + 10**-8), m, v -def dataloader(mode= 'train', reduced=False): - table = np.genfromtxt('dataset/' + mode + '.csv', dtype=float, delimiter=',', skip_header=1, - converters={1: lambda x: float(x == b's')}) + +def dataloader(mode='train', reduced=False): + print("Loading data ...") + file_name = '../dataset/' + mode + '.csv' + with open(file_name) as f: + first_line = f.readline() + columns_headears = first_line.split(',') + indeces_wo_phi = [idx for idx in range(30) if 'phi' not in columns_headears[idx]] + + table = np.genfromtxt(file_name, dtype=float, delimiter=',', skip_header=1, + converters={1: lambda x: float(x == b's')}, usecols=indeces_wo_phi) if reduced: features = table[:10000, 2:] @@ -12,13 +27,17 @@ def dataloader(mode= 'train', reduced=False): else: features = table[:, 2:] labels = table[:, 1] + print("Data extracted.") if mode == 'train': return features, labels else: return features +def sigmoid(x): + return 1/(1 + np.exp(-x)) + def standardize(x): - x = (x-np.mean(x, axis=0))/(np.std(x, axis=0)) + x = (x-np.mean(x, axis=0))/(np.std(x, axis=0) + 10**-8) return x def split_data(x, y, ratio, seed=1): @@ -65,5 +84,23 @@ def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True): yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index] +def build_polynomial(x): + base_mixed = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2))) + # base_mixed_cube = np.zeros((np.shape(x)[0], int(np.shape(x)[1]**2))) + bias = np.ones(np.shape(x)[0]) + counter = 0 + # gaussian_base = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2))) + for i in range(np.shape(x)[1]): + for j in range(i): + base_mixed[:, counter] = x[:, i] * x[:, j] + # gaussian_base[:, counter] = np.exp(-(x[:, i] - x[:, j])**2/(2*0.25)) + counter += 1 + # for i in range(np.shape(x)[1]): + # for j in range(np.shape(x)[1]): + # base_mixed_cube[:, counter] = x[:, i]**2 * x[:, j] + # + base = np.hstack((bias[:, np.newaxis], x, base_mixed, x**2, x**3)) + return base + if __name__ == '__main__': pass \ No newline at end of file