added ensemble

Idate96 · Oct 15, 2017 · a70df03 · a70df03
1 parent c55c52e
commit a70df03
Show file tree

Hide file tree

Showing 19 changed files with 12,012 additions and 167 deletions.
diff --git a/.idea/cs_433_ML_project_1.iml b/.idea/cs_433_ML_project_1.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/src/__pycache__/utils.cpython-36.pyc b/src/__pycache__/utils.cpython-36.pyc
diff --git a/src/adaboost.py b/src/adaboost.py
@@ -14,16 +14,18 @@ def __init__(self, batch_size, num_epochs, learning_rate, lambda_):
 
 
 class LogisticClassifier(object):
-    def __init__(self, config, train_set, test_set):
+    def __init__(self, config, train_set, test_set, label='0'):
         self.config = config
         # construct non linear features
         self.train_data, self.train_labels = train_set
         self.test_data, self.test_labels = test_set
-        self.weights = xavier_init(np.shape(self.train_data[1]))
+        # self.weights = xavier_init(np.shape(self.train_data))
+        self.weights = np.zeros((np.shape(self.train_data)[1]))
         self.train_losses = []
         self.test_losses = []
         self.accuracy = 0
         self.test_predictions = None
+        self.label = label
 
     def __call__(self, input):
         return sigmoid(input @ self.weights)
@@ -34,7 +36,7 @@ def loss(self, output, target, sample_weights=1):
                                                   self.weights
         return loss
 
-    def grad(self, data_batch, target_batch, sample_weights):
+    def grad(self, data_batch, target_batch, sample_weights=1):
         return data_batch.T @ ((self(data_batch) - target_batch) * sample_weights *
                 self(data_batch) * (1 - self(data_batch))) + \
                self.config.lambda_ * self.weights
@@ -43,7 +45,7 @@ def sdg(self, param, data, target):
         param -= self.config.learning_rate * self.grad(data, target)
         return param
 
-    def train(self, weights=1, show_every=10):
+    def train(self, show_every=10):
         num_batches = int(np.shape(self.train_data)[0]/self.config.batch_size)
         for epoch in range(self.config.num_epochs):
             if epoch % 50 == 0:
@@ -56,7 +58,6 @@ def train(self, weights=1, show_every=10):
                 print("Epoch : ", epoch)
                 print("Train loss : ", train_loss)
                 self.test()
-        return weights
 
     def test(self):
         output = self(self.test_data)
@@ -72,26 +73,74 @@ def predict(self, output):
         return output > 0.5
 
     def save(self):
-        pickle.dump(self.weights, open('config/weights.p', 'wb'))
+        with open(r'config/weights' + self.label + '.p', "wb") as file:
+            pickle.dump(self.weights, file)
 
     def load_weights(self):
-        self.weights = pickle.load(open('config/weights.p', 'rb'))
+        self.weights = pickle.load(open('config/weights' + self.label +'.p', 'rb'))
 
     def export_predictions(self):
         with open('prediction/submission.csv', 'w', newline='') as csvfile:
             writer = csv.writer(csvfile, delimiter=',')
             for i in range(len(self.test_predictions)):
                 writer.writerow([str(i) + ", " + self.test_predictions[i]])
 
-class Adaboost(object):
-    def __init__(self, config, train_data, test_data, num_classifiers):
+class EnsembleClassifiers(object):
+    def __init__(self, config, train_data, test_data, num_classifiers, classifier, label='0'):
         self.train_data, self.train_labels = train_set
         self.test_data, self.test_labels = test_set
-        self.classifiers = [LogisticClassifier(config, (self.train_data, self.train_labels),
+        self.classifiers = [classifier(config, (self.train_data, self.train_labels),
                                                (self.test_data, self.test_labels))
                             for i in range(num_classifiers)]
-        self.sample_weights = 1/np.shape(train_data)[0]
         self.classifier_weights = np.ones(num_classifiers)
+        self.test_predictions = None
+        self.label = label
+
+    def check_weights(self):
+        for classifier in self.classifiers:
+            print(classifier.weights)
+
+    def train(self):
+        for classifier in self.classifiers:
+            classifier.train()
+            self.check_weights()
+        self.test()
+
+    def test(self):
+        output = 0
+        for classifier in self.classifiers:
+            output += 1/len(self.classifiers) * classifier(self.test_data)
+        self.test_predictions = output > 0.5
+        correct = np.sum(self.test_predictions == self.test_labels)
+        self.accuracy = correct / np.shape(self.test_data)[0]
+        print('Test ensemble accuracy :', self.accuracy)
+
+    def save(self):
+        weights = np.zeros((len(self.classifiers), np.shape(self.train_data)[1]))
+        for i, classifier in enumerate(self.classifiers):
+            weights[i] = classifier.weights
+        with open(r'config/' + self.label, "wb") as file:
+            pickle.dump(weights, file)
+
+    def load_weights(self):
+        weights = pickle.load('config/' + self.label, "wb")
+        for i, classifier in enumerate(self.classifiers):
+            classifier.weights = weights[i]
+
+def load_classifiers(config, train_set, test_set, number):
+    output = 0
+    for i in range(number):
+        classifier = LogisticClassifier(config, train_set, test_set, label = 'log_' + str(number))
+        classifier.load_weights()
+        output += 1/number * classifier(test_set[0])
+    predictions = output > 0.5
+    correct = np.sum(predictions == test_set[1])
+    accuracy = correct / np.shape(test_set[0])[0]
+    print('Test accuracy :', accuracy)
+
+
+
+
 
 
 if __name__ == '__main__':
@@ -100,6 +149,12 @@ def __init__(self, config, train_data, test_data, num_classifiers):
     train_dataset, test_dataset = split_data(x, y, ratio=0.9)
     train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
     test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
-    config = Config(batch_size=200, num_epochs=300, learning_rate=5*10**-4, lambda_=0.01)
-    log_classifier = LogisticClassifier(config, train_set, test_set)
-    log_classifier.train()
+    config = Config(batch_size=200, num_epochs=100, learning_rate=5*10**-4, lambda_=0.01)
+    load_classifiers(config, train_set, test_set, 4)
+    # log_classifier = LogisticClassifier(config, train_set, test_set, label='log_4')
+    # log_classifier.train()
+    # log_classifier.save()
+    # log_classifier.load_weights()
+    # log_classifier.test()
+    # ensemble = EnsembleClassifiers(config, train_set, test_set, 5, LogisticClassifier, "ensemble_0")
+    # ensemble.train()
diff --git a/src/config/weights.p b/src/config/weights.p
diff --git a/src/config/weightslog_0.p b/src/config/weightslog_0.p
diff --git a/src/config/weightslog_1.p b/src/config/weightslog_1.p
diff --git a/src/config/weightslog_2.p b/src/config/weightslog_2.p
diff --git a/src/config/weightslog_3.p b/src/config/weightslog_3.p
diff --git a/src/config/weightslog_4.p b/src/config/weightslog_4.p
diff --git a/src/implementations/.ipynb_checkpoints/ex02-checkpoint.ipynb b/src/implementations/.ipynb_checkpoints/ex02-checkpoint.ipynb
diff --git a/src/implementations/__pycache__/costs.cpython-36.pyc b/src/implementations/__pycache__/costs.cpython-36.pyc
diff --git a/src/implementations/__pycache__/grid_search.cpython-36.pyc b/src/implementations/__pycache__/grid_search.cpython-36.pyc
diff --git a/src/implementations/__pycache__/helpers.cpython-36.pyc b/src/implementations/__pycache__/helpers.cpython-36.pyc
diff --git a/src/implementations/__pycache__/plots.cpython-36.pyc b/src/implementations/__pycache__/plots.cpython-36.pyc
diff --git a/src/implementations/grid_plot.png b/src/implementations/grid_plot.png