some stuff

Idate96 · Oct 29, 2017 · 5363f22 · 5363f22
1 parent 9c26061
commit 5363f22
Show file tree

Hide file tree

Showing 8 changed files with 1,136,672 additions and 91 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/src/__pycache__/utils.cpython-36.pyc b/src/__pycache__/utils.cpython-36.pyc
diff --git a/src/ensemble_log_regression.py b/src/ensemble_log_regression.py
@@ -132,10 +132,13 @@ def test(self):
         """Tests classifier on test set"""
         output = self(self.test_data)
         self.test_loss = self.loss(output, self.test_labels)
+        # record values
         self.test_losses.append(self.test_loss)
         self.test_predictions = self.predict(output)
+
         correct = np.sum(self.test_predictions == self.test_labels)
         self.accuracy = correct / np.shape(self.test_data)[0]
+        # record accuracy
         self.accuracies.append(self.accuracy)
         if self.accuracy > self.best_accuracy:
             self.best_accuracy = self.accuracy
@@ -144,8 +147,15 @@ def test(self):
         print('Test accuracy :', self.accuracy)
 
     def predict(self, output):
-        """Predicts label from output of classifier"""
-        return output > 0.5
+        y_pred = output > 0.5
+        return y_pred
+
+    def predict_submission(self, output):
+        """Applies 0.5 treshold on output and tranforms 0 predictions to -1 and """
+        y_pred = np.zeros(np.shape(output)[0])
+        y_pred[np.where(output <= 0.5)] = -1
+        y_pred[np.where(output > 0.5)] = 1
+        return y_pred
 
     def save(self):
         """Save the weights of the model"""
@@ -169,7 +179,7 @@ def plot_convergence(self):
         x = np.arange(0, self.config.num_epochs)
         train_trend, = ax.plot(x, self.train_losses, label="Train loss")
         test_trend, = ax.plot(x, self.test_losses, label="Test loss")
-        # ax.legend(loc='lower right')
+        ax.legend(loc='upper right')
         plt.xlabel('epoch')
         plt.ylabel('loss')
         plt.title('Loss history')
@@ -178,11 +188,11 @@ def plot_convergence(self):
     def plot_accuracy(self):
         fig, ax = plt.subplots()
         x = np.arange(0, self.config.num_epochs)
-        train_trend, = ax.plot(x, self.train_accuracies, label="Train accuracy")
-        test_trend, = ax.plot(x, self.accuracies, label="Test accuracy")
+        train_trend, = ax.plot(x, self.train_accuracies, 'r--', label="Train accuracy")
+        test_trend, = ax.plot(x, self.accuracies, 'b-o', label="Test accuracy")
         ax.legend(loc='lower right')
-        plt.xlabel('accuracy')
-        plt.ylabel('loss')
+        plt.xlabel('epoch')
+        plt.ylabel('accuracy')
         plt.title('Learning curves')
         plt.show()
 
@@ -342,29 +352,30 @@ def find_best_batch(batch_sizes):
     # print(x_test.shape)
     x = standardize(x)
     x_test = standardize(x_test)
-    train_dataset, test_dataset = split_data(x, y, ratio=0.9)
+    # train_dataset, test_dataset = split_data(x, y, ratio=0.9)
     # train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
     # test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
     # # # # x = dataloader(mode='test', reduced=False)
     # # # # x = standardize(x)
     # # # # x = build_polynomial(x)
-    config = Config(batch_size=120, num_epochs=10, learning_rate=5 * 10 ** -4,
+    config = Config(batch_size=120, num_epochs=400, learning_rate=5 * 10 ** -4,
                     lambda_=2.15443469003e-05, mode='train')
     log_class = LogisticClassifier(config, (build_polynomial(x), y))
-    log_class.train(show_every=1)
+    log_class.train(show_every=1p)
+    predictions_test = log_class.predict_submission(log_class(build_polynomial(x_test)))
     log_class.plot_accuracy()
     log_class.plot_convergence()
-    ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 1, LogisticClassifier,
+    ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 5, LogisticClassifier,
                                    label='ensemble_2_log')
-
+    #
     ensemble.train()
     # ensemble.plot_convergence()
     # ensemble.plot_accuracy()
-    ensemble.save()
-    # ensemble.load_weights()
+    # ensemble.save()
+    # # ensemble.load_weights()
     predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
     create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test,
-                          'dataset/submission_07.csv')
+                          'dataset/submission_10.csv')
     #
     # predictions = ensemble.predict(ensemble(build_polynomial(x)))
     # y[np.where(y == 0)] = -1

diff --git a/src/old_models/logistic_regression.py b/src/old_models/logistic_regression.py
@@ -1,9 +1,9 @@
 import numpy as np
-from src.utils import batch_iter, dataloader, split_data, standardize, xavier_init, adam
+from src.utils import batch_iter, dataloader, split_data, standardize
 import matplotlib.pyplot as plt
 num_epochs = 300
 batch_size = 300
-learning_rate = 10**-4
+learning_rate = 10**-3
 
 x, y = dataloader(mode='train', reduced=False)
 x = standardize(x)
@@ -72,15 +72,15 @@ def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every
     iter_num = 1
     m, v = 0, 0
     # m, v = 0, 0
-    poly_train_data = build_polynomial(train_data)
-    weights = xavier_init(np.shape(poly_train_data[1]))
+    poly_train_data = (train_data)
+    weights = np.zeros((np.shape(poly_train_data[1])))
     # weights = np.zeros(np.shape(poly_train_data)[1])
     for epoch in range(num_epochs):
         if epoch % 90 == 0:
             learning_rate *= 0.5
         for batch_label, batch_input in batch_iter(
                 train_target, train_data, batch_size, num_batches=num_batches):
-            batch_input = build_polynomial(batch_input)
+            batch_input = (batch_input)
             grad = grad_func(weights, batch_input, batch_label, lambda_)
             # weights, m, v = adam(weights, m, v, 0.9, 0.999, learning_rate, grad, iter_num)
             weights -= learning_rate*grad
@@ -94,7 +94,7 @@ def train_logistic_regression(loss_func, grad_func, lambda_ = 10**-3, show_every
     return weights, accuracy, train_loss, test_loss
 
 def test_logistic_regression(weights, loss_func, lambda_):
-    poly_test_data = build_polynomial(test_data)
+    poly_test_data = (test_data)
     loss = loss_func(weights, poly_test_data, test_target, lambda_)
     output = sigmoid(poly_test_data @ weights)
     predicted = output > 0.5
@@ -138,7 +138,7 @@ def plot(x, train_loss, test_loss):
 
 
 if __name__ == '__main__':
-    # train_logistic_regression(loss_ce, gradient_ce, lambda_= 0.01)
+    train_logistic_regression(loss_ce, gradient_ce, lambda_= 0.01)
     # lambdas, best_weigths, best_accurary, test_losses, train_losses, \
     # best_combination = find_best_lambda(loss_mse_reg, gradient_mse_reg)
     # plot(lambdas, train_losses, test_losses)

diff --git a/src/run.py b/src/run.py
@@ -8,13 +8,11 @@ def main():
     x_test = dataloader(mode='test', reduced=False)
     x = standardize(x)
     x_test = standardize(x_test)
-    config = Config(batch_size=120, num_epochs=300, learning_rate=5*10**-4,
-                    lambda_=2.15443469003e-05,
-                    mode='train')
-    ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 50, LogisticClassifier,
-                                   label='ensemble_50_log')
-    ensemble.train()
-    predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
+    config = Config(batch_size=120, num_epochs=400, learning_rate=5 * 10 ** -4,
+                    lambda_=2.15443469003e-05, mode='train')
+    log_class = LogisticClassifier(config, (build_polynomial(x), y))
+    log_class.train(show_every=10)
+    predictions_test = log_class.predict_submission(log_class(build_polynomial(x_test)))
     create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test,
                           'dataset/submission_0x.csv')
 

diff --git a/src/utils.py b/src/utils.py
@@ -1,5 +1,6 @@
 import numpy as np
 import csv
+np.random.seed(seed=3)
 
 def predict_labels(weights, data):
     """Generates class predictions given weights, and a test data matrix"""