adaboost config done

Idate96 · Oct 15, 2017 · c55c52e · c55c52e
1 parent 2b0b1cf
commit c55c52e
Show file tree

Hide file tree

Showing 9 changed files with 679 additions and 61 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,5 +4,3 @@
 dataset/sample-submission.csv
 dataset/test.csv
 dataset/train.csv
-
-
diff --git a/.idea/cs_433_ML_project_1.iml b/.idea/cs_433_ML_project_1.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/src/__pycache__/utils.cpython-36.pyc b/src/__pycache__/utils.cpython-36.pyc
diff --git a/src/adaboost.py b/src/adaboost.py
@@ -0,0 +1,105 @@
+from src.utils import sigmoid, batch_iter, dataloader, split_data,\
+    standardize, xavier_init, build_polynomial
+import numpy as np
+import pickle
+import csv
+
+class Config(object):
+    """Configuration object for the classifiers"""
+    def __init__(self, batch_size, num_epochs, learning_rate, lambda_):
+        self.batch_size = batch_size
+        self.num_epochs = num_epochs
+        self.learning_rate = learning_rate
+        self.lambda_ = lambda_
+
+
+class LogisticClassifier(object):
+    def __init__(self, config, train_set, test_set):
+        self.config = config
+        # construct non linear features
+        self.train_data, self.train_labels = train_set
+        self.test_data, self.test_labels = test_set
+        self.weights = xavier_init(np.shape(self.train_data[1]))
+        self.train_losses = []
+        self.test_losses = []
+        self.accuracy = 0
+        self.test_predictions = None
+
+    def __call__(self, input):
+        return sigmoid(input @ self.weights)
+
+    def loss(self, output, target, sample_weights=1):
+        loss = 1 / np.shape(target)[0] * (target - output).T\
+               @ (sample_weights * (target - output)) + self.config.lambda_ * self.weights.T @ \
+                                                  self.weights
+        return loss
+
+    def grad(self, data_batch, target_batch, sample_weights):
+        return data_batch.T @ ((self(data_batch) - target_batch) * sample_weights *
+                self(data_batch) * (1 - self(data_batch))) + \
+               self.config.lambda_ * self.weights
+
+    def sdg(self, param, data, target):
+        param -= self.config.learning_rate * self.grad(data, target)
+        return param
+
+    def train(self, weights=1, show_every=10):
+        num_batches = int(np.shape(self.train_data)[0]/self.config.batch_size)
+        for epoch in range(self.config.num_epochs):
+            if epoch % 50 == 0:
+                self.config.learning_rate *= 0.5
+            for batch_label, batch_input in batch_iter(
+                    self.train_labels, self.train_data, self.config.batch_size, num_batches=num_batches):
+                self.weights = self.sdg(self.weights, batch_input, batch_label)
+            train_loss = self.loss(self(self.train_data), self.train_labels)
+            if epoch % show_every == 0 or epoch == self.config.num_epochs - 1:
+                print("Epoch : ", epoch)
+                print("Train loss : ", train_loss)
+                self.test()
+        return weights
+
+    def test(self):
+        output = self(self.test_data)
+        test_loss = self.loss(output, self.test_labels)
+        self.test_losses.append(test_loss)
+        self.test_predictions = self.predict(output)
+        correct = np.sum(self.test_predictions == self.test_labels)
+        self.accuracy = correct / np.shape(self.test_data)[0]
+        print("Test loss :", test_loss)
+        print('Test accuracy :', self.accuracy)
+
+    def predict(self, output):
+        return output > 0.5
+
+    def save(self):
+        pickle.dump(self.weights, open('config/weights.p', 'wb'))
+
+    def load_weights(self):
+        self.weights = pickle.load(open('config/weights.p', 'rb'))
+
+    def export_predictions(self):
+        with open('prediction/submission.csv', 'w', newline='') as csvfile:
+            writer = csv.writer(csvfile, delimiter=',')
+            for i in range(len(self.test_predictions)):
+                writer.writerow([str(i) + ", " + self.test_predictions[i]])
+
+class Adaboost(object):
+    def __init__(self, config, train_data, test_data, num_classifiers):
+        self.train_data, self.train_labels = train_set
+        self.test_data, self.test_labels = test_set
+        self.classifiers = [LogisticClassifier(config, (self.train_data, self.train_labels),
+                                               (self.test_data, self.test_labels))
+                            for i in range(num_classifiers)]
+        self.sample_weights = 1/np.shape(train_data)[0]
+        self.classifier_weights = np.ones(num_classifiers)
+
+
+if __name__ == '__main__':
+    x, y = dataloader(mode='train', reduced=False)
+    x = standardize(x)
+    train_dataset, test_dataset = split_data(x, y, ratio=0.9)
+    train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
+    test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
+    config = Config(batch_size=200, num_epochs=300, learning_rate=5*10**-4, lambda_=0.01)
+    log_classifier = LogisticClassifier(config, train_set, test_set)
+    log_classifier.train()
diff --git a/src/logistic_regression.py b/src/logistic_regression.py
@@ -1,61 +1,150 @@
 import numpy as np
-from utils import batch_iter, dataloader, split_data, standardize
-
-num_epochs = 10
-batch_size = 100
+from src.utils import batch_iter, dataloader, split_data, standardize, xavier_init, adam
+import matplotlib.pyplot as plt
+num_epochs = 300
+batch_size = 300
 learning_rate = 10**-3
 
 x, y = dataloader(mode='train', reduced=False)
 x = standardize(x)
 train_dataset, test_dataset = split_data(x, y, ratio=0.9)
 test_data, test_target = test_dataset
 train_data, train_target = train_dataset
+print(np.shape(train_data))
 num_batches = int(np.shape(train_data)[0]/batch_size)
+# till now up to degree two its fine + x**3 (no mixed cubic terms
+def build_polynomial(x):
+    base_mixed = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2)))
+    # base_mixed_cube = np.zeros((np.shape(x)[0], int(np.shape(x)[1]**2)))
+    bias = np.ones(np.shape(x)[0])
+    counter = 0
+    # gaussian_base = np.zeros((np.shape(x)[0],int(np.shape(x)[1]*(np.shape(x)[1]-1)/2)))
+    for i in range(np.shape(x)[1]):
+        for j in range(i):
+            base_mixed[:, counter] = x[:, i] * x[:, j]
+            # gaussian_base[:, counter] = np.exp(-(x[:, i] - x[:, j])**2/(2*0.25))
+            counter += 1
+
+    counter = 0
+    base_mixed_cube = np.zeros((np.shape(x)[0], np.shape(x)[1]-2))
+
+    # for i in range(np.shape(x)[1]):
+    #     for j in range(np.shape(x)[1]):
+    #         base_mixed_cube[:, counter] = x[:, i]**2 * x[:, j]
+    #
+    base = np.hstack((bias[:, np.newaxis], x, base_mixed, x**2, x**3))
+    return base
 
 def sigmoid(x):
     return 1/(1 + np.exp(-x))
 
 def sigmoid_prime(x):
     return sigmoid(x)*(1-sigmoid(x))
 
-def loss_mse(weights, x, target):
+def loss_mse(weights, x, target, lambda_=0):
     g_x = sigmoid(x @ weights)
     return 1/np.shape(target)[0] * (target - g_x).T @ (target - g_x)
 
-def gradient_mse(weights, x, target):
+def gradient_mse(weights, x, target, lambda_= 0):
+    g_x = sigmoid(x @ weights)
+    return x.T @ ((g_x - target) * g_x * (1 - g_x))
+
+def loss_mse_reg(weights, x, target, lambda_ = 0):
     g_x = sigmoid(x @ weights)
-    return (g_x - target) * g_x * (1 - g_x) * x
+    return 1/np.shape(target)[0] * (target - g_x).T @ (target - g_x) + lambda_ * weights.T @ weights
 
-def loss_ce(weights, x, target):
+def gradient_mse_reg(weights, x, target, lambda_ = 0):
     g_x = sigmoid(x @ weights)
-    return 1/np.shape(x)[0] * np.sum(target * np.log(g_x) + (1-target)*np.log(1-g_x))
+    return x.T @ ((g_x - target) * g_x * (1 - g_x)) + lambda_ * weights
 
-def gradient_ce(weights, x, target):
+def loss_ce(weights, x, target, lambda_):
     g_x = sigmoid(x @ weights)
-    return 1/np.shape(target)[0] * x.T @ (target - g_x)
+    loss = -1/np.shape(x)[0] * np.sum(target * np.log(g_x) + (1 - target)*np.log(1 - g_x))  + \
+         lambda_ * weights.T @ weights
+    return loss
 
-def train_logistic_regression(loss_func, grad_func):
-    weights = np.zeros(30)
+def gradient_ce(weights, x, target, lambda_):
+    g_x = sigmoid(x @ weights)
+    return -1/np.shape(target)[0] * x.T @ (target - g_x) + lambda_ * weights
+
+def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every=10):
+    global learning_rate
+    iter_num = 1
+    m, v = 0, 0
+    # m, v = 0, 0
+    poly_train_data = build_polynomial(train_data)
+    weights = xavier_init(np.shape(poly_train_data[1]))
+    # weights = np.zeros(np.shape(poly_train_data)[1])
     for epoch in range(num_epochs):
+        if epoch % 90 == 0:
+            learning_rate *= 0.5
         for batch_label, batch_input in batch_iter(
                 train_target, train_data, batch_size, num_batches=num_batches):
-            grad = grad_func(weights, batch_input, batch_label)
-            weights -= learning_rate * grad
-        train_loss = loss_func(weights, train_data, train_target)
-        print("Epoch : ", epoch)
-        print("Train loss : ", train_loss)
-        test_logistic_regression(weights, loss_func)
-
+            batch_input = build_polynomial(batch_input)
+            grad = grad_func(weights, batch_input, batch_label, lambda_)
+            # weights, m, v = adam(weights, m, v, 0.9, 0.999, learning_rate, grad, iter_num)
+            weights -= learning_rate*grad
+            iter_num += 1
+        train_loss = loss_func(weights, poly_train_data, train_target, lambda_)
+        if epoch % show_every == 0 or epoch == num_epochs - 1:
+            print("Epoch : ", epoch)
+            print("Train loss : ", train_loss)
+            # print('Weights :', weights)
+            weights, accuracy, test_loss = test_logistic_regression(weights, loss_func, lambda_)
+    return weights, accuracy, train_loss, test_loss
 
-def test_logistic_regression(weights, loss_func):
-    loss = loss_func(weights, test_data, test_target)
-    output = sigmoid(test_data @ weights)
+def test_logistic_regression(weights, loss_func, lambda_):
+    poly_test_data = build_polynomial(test_data)
+    loss = loss_func(weights, poly_test_data, test_target, lambda_)
+    output = sigmoid(poly_test_data @ weights)
     predicted = output > 0.5
     correct = np.sum(predicted == test_target)
     accuracy = correct/np.shape(test_data)[0]
     print("Test loss :", loss)
     print('Test accuracy :', accuracy)
+    return weights, accuracy, loss
+
+def find_best_lambda(loss_func, grad_func):
+    lambdas = np.logspace(-5, 0, 20)
+    weights_history = []
+    accuracies = []
+    train_losses = []
+    test_losses = []
+    best_weigths = None
+    best_accurary = 0
+    best_combination = 0
+    for idx, lambda_ in enumerate(lambdas):
+        weights, accuracy, train_loss, test_loss = train_logistic_regression(
+                                                loss_func, grad_func, lambda_)
+        weights_history.append(weights)
+        accuracies.append(accuracy)
+        train_losses.append(train_loss)
+        test_losses.append(test_loss)
+
+        if accuracy > best_accurary:
+            best_accurary = accuracy
+            best_weigths = weights
+            best_combination = idx
+
+    print('best combination lambda : ', lambdas[best_combination])
+    return lambdas, best_weigths, best_accurary, test_losses, train_losses, best_combination
+
+def plot(x, train_loss, test_loss):
+    plt.plot(x, train_loss, label='train loss')
+    plt.plot(x, test_loss, label='test loss')
+    plt.show()
+
+
+
 
 if __name__ == '__main__':
-    train_logistic_regression(loss_ce, gradient_ce)
+    train_logistic_regression(loss_mse_reg, gradient_mse_reg, lambda_= 0.01)
+    # lambdas, best_weigths, best_accurary, test_losses, train_losses, \
+    # best_combination = find_best_lambda(loss_mse_reg, gradient_mse_reg)
+    # plot(lambdas, train_losses, test_losses)
+
+    best_lambda = 0.0012742749857
+
+
+
 
diff --git a/src/neural_network_torch.py b/src/neural_network_torch.py
@@ -5,13 +5,16 @@
 from torch.autograd import Variable
 from torch import nn
 import torch.nn.functional as f
-from utils import dataloader, standardize, split_data
+from src.utils import dataloader, standardize, split_data, build_polynomial
 
 x, y = dataloader(mode='train', reduced=False)
 x = standardize(x)
 train_dataset, test_dataset = split_data(x, y, ratio=0.9)
 test_data, test_target = test_dataset
 train_data, train_target = train_dataset
+test_data = build_polynomial(test_data)
+train_data = build_polynomial(train_data)
+num_features = np.shape(train_data)[1]
 
 
 train = torch.utils.data.TensorDataset(torch.from_numpy(train_data).type(torch.FloatTensor),
@@ -29,9 +32,9 @@ def __init__(self, batch_size=128, learning_rate=10**-4, num_epochs= 10, load_we
         self.load_weights = load_weights
         self.num_epochs = num_epochs
         # architecture
-        self.fc_1 = nn.Linear(30, 256)
-        self.fc_2 = nn.Linear(256, 256)
-        self.fc_3 = nn.Linear(256, 2)
+        self.fc_1 = nn.Linear(num_features, 512)
+        self.fc_2 = nn.Linear(512, 512)
+        self.fc_3 = nn.Linear(512, 2)
 
     def forward(self, x):
         x = f.relu(self.fc_1(x))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,5 +4,3 @@
		dataset/sample-submission.csv
		dataset/test.csv
		dataset/train.csv