From 031e1db7dad9732984d113e07f4733e67668031a Mon Sep 17 00:00:00 2001 From: Lorenzo Terenzi Date: Thu, 26 Oct 2017 18:27:42 +0200 Subject: [PATCH] fixed --- .idea/workspace.xml | 130 ++++++++++----------------- src/__pycache__/utils.cpython-36.pyc | Bin 5677 -> 5186 bytes src/ensemble_log_regression.py | 24 +++-- src/utils.py | 5 +- 4 files changed, 61 insertions(+), 98 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 330bbd5..855e548 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -4,8 +4,7 @@ - - + - - - - + + - - - - - - + + - - - - + + @@ -481,17 +441,17 @@ - + - + - - + + @@ -868,46 +828,46 @@ - + - - - + + + + + - - + + - + - - + + - + - - - - - + + + - - + + diff --git a/src/__pycache__/utils.cpython-36.pyc b/src/__pycache__/utils.cpython-36.pyc index 99f98d516c29b36ecd0a4924833ea9934ca54a8f..93f1744224cd8e849477140edd624846ad2ef66f 100644 GIT binary patch delta 738 zcmYjPO-~a+7~XgLHO%g^rC-o?iIgwhV$ejQ2LX$gVuGj$63fMGoHapdYi4UiuI1*5 z%*8~FM-!t5slULBf56k;j2C(#(O=;EmYBGcdESqC_L*mA_Vf8~=O!*$79aofyWUA@ z+Ar4*K?U>-gIMBA^!W0(9gcuYyMkb14NTiT4Akrd(9WES^ z6T{ds#E8ga#}u%kxuah`{)lvE;~ju;b(DAoQ)-cIjr4TfYu~PVZEwuqYfJA@+ZVI4 zq&~5$;HqQxaDECI(m)8gK-zB06TU2vMF&BPFff4w!#Sp#kkT`d#0rNl7gT59X+Dfg zuPIuM(0Jy{#`Ct<-f1=6s4S}}UmChVN^w$x3@XWGwZ(5jS-s~)YmU~)p$2kAedl|L z9>=1`RLz)1PQy53Um;GEIQk^WweB0k2j?1=a*?1yu!P`RvP}7P2)Yy#O2#P`b}_O`tC5c@5@%uw3os=BJW>g(>B{pIkj zhi4yioZ=T>-hI8%VC;AHpP{3E5@-C;&DpuWCG6{L#S~l=P+H=kC=2I0tk|L=s%YV& zCdN<}#6D3+Srlz?pBTr8l9&*aXeoHRLCN`pekox2X8A|PRxA!Q#!lf4fqt+dN}%5Ja*!_-=5NLyS( z8YnV7-sokP^dqs}%S@7yt7S$U?gW{>=}TF}s1cMXzO?0UhJoyRVH74)Xo)Eeg9^w8 zQFb}4aFE<%dE3N9Q;}cp!Fg} zoh6{cvV-6{a*_0x)N7?V>t#~UscWSR`%j}9R}dH<2cj?q`|u~d!QIkVFrR1HPvMj_ zv;7$R%-_z8wJ=WPDYUu8dzo)L^R+i9^i6`d2;NqIIS))+Ks7ESF!O0R1W$}Ah=E)A zMr-X+95JSpncfQ{w~S=()V>B^^~a`RZSdXLIe3WLr8CqKv2OlVm^*=7k8^sGh-Sum z6vXnBYSpLll8)Dx;63$ez1iO;C-=ObA4Z<%YH|S$FSwBRAuj2rm)%-sVxwH$&XY_0 zb77L0%QgXhHu5OJ62W>>>>>> 21a3636a014486ada42b979c3e94de1878fbe7b2 def __init__(self, batch_size, num_epochs, learning_rate, lambda_, mode='cv'): self.batch_size = batch_size @@ -173,7 +169,10 @@ def plot_convergence(self): x = np.arange(0, self.config.num_epochs) train_trend, = ax.plot(x, self.train_losses, label="Train loss") test_trend, = ax.plot(x, self.test_losses, label="Test loss") - ax.legend(loc='lower right') + # ax.legend(loc='lower right') + plt.xlabel('epoch') + plt.ylabel('loss') + plt.title('Loss history') plt.show() def plot_accuracy(self): @@ -182,6 +181,9 @@ def plot_accuracy(self): train_trend, = ax.plot(x, self.train_accuracies, label="Train accuracy") test_trend, = ax.plot(x, self.accuracies, label="Test accuracy") ax.legend(loc='lower right') + plt.xlabel('accuracy') + plt.ylabel('loss') + plt.title('Learning curves') plt.show() class EnsembleClassifiers(object): @@ -340,15 +342,19 @@ def find_best_batch(batch_sizes): # print(x_test.shape) x = standardize(x) x_test = standardize(x_test) - # train_dataset, test_dataset = split_data(x, y, ratio=0.9) + train_dataset, test_dataset = split_data(x, y, ratio=0.9) # train_set = (build_polynomial(train_dataset[0]), train_dataset[1]) # test_set = (build_polynomial(test_dataset[0]), test_dataset[1]) # # # # x = dataloader(mode='test', reduced=False) # # # # x = standardize(x) # # # # x = build_polynomial(x) - config = Config(batch_size=120, num_epochs=300, learning_rate=5 * 10 ** -4, + config = Config(batch_size=120, num_epochs=10, learning_rate=5 * 10 ** -4, lambda_=2.15443469003e-05, mode='train') - ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 50, LogisticClassifier, + log_class = LogisticClassifier(config, (build_polynomial(x), y)) + log_class.train(show_every=1) + log_class.plot_accuracy() + log_class.plot_convergence() + ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 1, LogisticClassifier, label='ensemble_2_log') ensemble.train() @@ -358,7 +364,7 @@ def find_best_batch(batch_sizes): # ensemble.load_weights() predictions_test = ensemble.predict(ensemble(build_polynomial(x_test))) create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test, - 'dataset/submission_06.csv') + 'dataset/submission_07.csv') # # predictions = ensemble.predict(ensemble(build_polynomial(x))) # y[np.where(y == 0)] = -1 diff --git a/src/utils.py b/src/utils.py index 4bf9329..b3065b7 100644 --- a/src/utils.py +++ b/src/utils.py @@ -24,11 +24,8 @@ def create_csv_submission(ids, y_pred, name): writer.writerow({'Id': int(r1), 'Prediction': int(r2)}) def dataloader(mode='train', reduced=False): -<<<<<<< HEAD - #TODO: erase the reduced feature since it is not used -======= """Load datasets""" ->>>>>>> 21a3636a014486ada42b979c3e94de1878fbe7b2 + print("Loading data ...") file_name = 'dataset/' + mode + '.csv' with open(file_name) as f: