Skip to content

Commit

Permalink
added ensemble
Browse files Browse the repository at this point in the history
  • Loading branch information
Laura Jou Ferrer committed Oct 15, 2017
1 parent c55c52e commit a70df03
Show file tree
Hide file tree
Showing 19 changed files with 12,012 additions and 167 deletions.
2 changes: 1 addition & 1 deletion .idea/cs_433_ML_project_1.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

346 changes: 199 additions & 147 deletions .idea/workspace.xml

Large diffs are not rendered by default.

Binary file modified src/__pycache__/utils.cpython-36.pyc
Binary file not shown.
83 changes: 69 additions & 14 deletions src/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ def __init__(self, batch_size, num_epochs, learning_rate, lambda_):


class LogisticClassifier(object):
def __init__(self, config, train_set, test_set):
def __init__(self, config, train_set, test_set, label='0'):
self.config = config
# construct non linear features
self.train_data, self.train_labels = train_set
self.test_data, self.test_labels = test_set
self.weights = xavier_init(np.shape(self.train_data[1]))
# self.weights = xavier_init(np.shape(self.train_data))
self.weights = np.zeros((np.shape(self.train_data)[1]))
self.train_losses = []
self.test_losses = []
self.accuracy = 0
self.test_predictions = None
self.label = label

def __call__(self, input):
return sigmoid(input @ self.weights)
Expand All @@ -34,7 +36,7 @@ def loss(self, output, target, sample_weights=1):
self.weights
return loss

def grad(self, data_batch, target_batch, sample_weights):
def grad(self, data_batch, target_batch, sample_weights=1):
return data_batch.T @ ((self(data_batch) - target_batch) * sample_weights *
self(data_batch) * (1 - self(data_batch))) + \
self.config.lambda_ * self.weights
Expand All @@ -43,7 +45,7 @@ def sdg(self, param, data, target):
param -= self.config.learning_rate * self.grad(data, target)
return param

def train(self, weights=1, show_every=10):
def train(self, show_every=10):
num_batches = int(np.shape(self.train_data)[0]/self.config.batch_size)
for epoch in range(self.config.num_epochs):
if epoch % 50 == 0:
Expand All @@ -56,7 +58,6 @@ def train(self, weights=1, show_every=10):
print("Epoch : ", epoch)
print("Train loss : ", train_loss)
self.test()
return weights

def test(self):
output = self(self.test_data)
Expand All @@ -72,26 +73,74 @@ def predict(self, output):
return output > 0.5

def save(self):
pickle.dump(self.weights, open('config/weights.p', 'wb'))
with open(r'config/weights' + self.label + '.p', "wb") as file:
pickle.dump(self.weights, file)

def load_weights(self):
self.weights = pickle.load(open('config/weights.p', 'rb'))
self.weights = pickle.load(open('config/weights' + self.label +'.p', 'rb'))

def export_predictions(self):
with open('prediction/submission.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
for i in range(len(self.test_predictions)):
writer.writerow([str(i) + ", " + self.test_predictions[i]])

class Adaboost(object):
def __init__(self, config, train_data, test_data, num_classifiers):
class EnsembleClassifiers(object):
def __init__(self, config, train_data, test_data, num_classifiers, classifier, label='0'):
self.train_data, self.train_labels = train_set
self.test_data, self.test_labels = test_set
self.classifiers = [LogisticClassifier(config, (self.train_data, self.train_labels),
self.classifiers = [classifier(config, (self.train_data, self.train_labels),
(self.test_data, self.test_labels))
for i in range(num_classifiers)]
self.sample_weights = 1/np.shape(train_data)[0]
self.classifier_weights = np.ones(num_classifiers)
self.test_predictions = None
self.label = label

def check_weights(self):
for classifier in self.classifiers:
print(classifier.weights)

def train(self):
for classifier in self.classifiers:
classifier.train()
self.check_weights()
self.test()

def test(self):
output = 0
for classifier in self.classifiers:
output += 1/len(self.classifiers) * classifier(self.test_data)
self.test_predictions = output > 0.5
correct = np.sum(self.test_predictions == self.test_labels)
self.accuracy = correct / np.shape(self.test_data)[0]
print('Test ensemble accuracy :', self.accuracy)

def save(self):
weights = np.zeros((len(self.classifiers), np.shape(self.train_data)[1]))
for i, classifier in enumerate(self.classifiers):
weights[i] = classifier.weights
with open(r'config/' + self.label, "wb") as file:
pickle.dump(weights, file)

def load_weights(self):
weights = pickle.load('config/' + self.label, "wb")
for i, classifier in enumerate(self.classifiers):
classifier.weights = weights[i]

def load_classifiers(config, train_set, test_set, number):
output = 0
for i in range(number):
classifier = LogisticClassifier(config, train_set, test_set, label = 'log_' + str(number))
classifier.load_weights()
output += 1/number * classifier(test_set[0])
predictions = output > 0.5
correct = np.sum(predictions == test_set[1])
accuracy = correct / np.shape(test_set[0])[0]
print('Test accuracy :', accuracy)






if __name__ == '__main__':
Expand All @@ -100,6 +149,12 @@ def __init__(self, config, train_data, test_data, num_classifiers):
train_dataset, test_dataset = split_data(x, y, ratio=0.9)
train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
config = Config(batch_size=200, num_epochs=300, learning_rate=5*10**-4, lambda_=0.01)
log_classifier = LogisticClassifier(config, train_set, test_set)
log_classifier.train()
config = Config(batch_size=200, num_epochs=100, learning_rate=5*10**-4, lambda_=0.01)
load_classifiers(config, train_set, test_set, 4)
# log_classifier = LogisticClassifier(config, train_set, test_set, label='log_4')
# log_classifier.train()
# log_classifier.save()
# log_classifier.load_weights()
# log_classifier.test()
# ensemble = EnsembleClassifiers(config, train_set, test_set, 5, LogisticClassifier, "ensemble_0")
# ensemble.train()
Binary file added src/config/weights.p
Binary file not shown.
Binary file added src/config/weightslog_0.p
Binary file not shown.
Binary file added src/config/weightslog_1.p
Binary file not shown.
Binary file added src/config/weightslog_2.p
Binary file not shown.
Binary file added src/config/weightslog_3.p
Binary file not shown.
Binary file added src/config/weightslog_4.p
Binary file not shown.
1,737 changes: 1,737 additions & 0 deletions src/implementations/.ipynb_checkpoints/ex02-checkpoint.ipynb

Large diffs are not rendered by default.

Binary file added src/implementations/__pycache__/costs.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added src/implementations/grid_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a70df03

Please sign in to comment.