diff --git a/TF_2_x/MNIST-MLP-SELU.py b/TF_2_x/MNIST-MLP-SELU.py index 9b0c78e..89e93aa 100644 --- a/TF_2_x/MNIST-MLP-SELU.py +++ b/TF_2_x/MNIST-MLP-SELU.py @@ -1,5 +1,5 @@ # Adapted KERAS tutorial - +#%% import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras.datasets import mnist @@ -8,12 +8,15 @@ from tensorflow.keras.layers import Conv2D, MaxPooling2D from tensorflow.keras import backend as K import numpy as np +import matplotlib.pyplot as plt +import pandas as pd batch_size = 128 num_classes = 10 epochs = 20 + # input image dimensions img_rows, img_cols = 28, 28 @@ -46,33 +49,46 @@ y_val = y_train[:10000] y_train = y_train[10000:] + print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_val.shape[0], 'val samples') print(x_test.shape[0], 'test samples') -# convert class vectors to binary class matrices + +# convert class vectors to one-hot vecotrs y_train = keras.utils.to_categorical(y_train, num_classes) y_val = keras.utils.to_categorical(y_val, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) -model = Sequential() -model.add(Flatten()) -model.add(Dense(512, activation='selu',kernel_initializer='lecun_normal',bias_initializer='zeros')) -model.add(AlphaDropout(0.05)) -model.add(Dense(256, activation='selu',kernel_initializer='lecun_normal',bias_initializer='zeros')) -model.add(AlphaDropout(0.05)) -model.add(Dense(num_classes, activation='softmax',kernel_initializer='lecun_normal',bias_initializer='zeros')) +model = Sequential([ + Flatten(input_shape = (28,28)), + Dense(512, activation='selu',kernel_initializer='lecun_normal',bias_initializer='zeros'), + AlphaDropout(0.05), + Dense(256, activation='selu',kernel_initializer='lecun_normal',bias_initializer='zeros'), + AlphaDropout(0.05), + Dense(num_classes, activation='softmax',kernel_initializer='glorot_normal') #best practice to use glorot with softmax +]) + model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy']) -model.fit(x_train, y_train, +#use early stopping callbacks +early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = 6) + +history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, - validation_data=(x_val, y_val)) + validation_data=(x_val, y_val), + callbacks = [early_stopping_cb] + ) + +#visualize training curves +pd.DataFrame(history.history).plot(figsize=(8,5)) +plt.show() score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0])