diff --git a/README.md b/README.md index 1ee669a1..4c35d5e4 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ but rather to present the inner workings of them in a transparent way. - [Deep Q-Network](mlfromscratch/reinforcement_learning/deep_q_network.py) ### Deep Learning - + [Base Class](mlfromscratch/deep_learning/neural_network.py) + + [Neural Network](mlfromscratch/deep_learning/neural_network.py) + [Layers](mlfromscratch/deep_learning/layers.py) * Activation Layer * Average Pooling Layer diff --git a/mlfromscratch/deep_learning/activation_functions.py b/mlfromscratch/deep_learning/activation_functions.py index 6fdb8646..7857f75a 100644 --- a/mlfromscratch/deep_learning/activation_functions.py +++ b/mlfromscratch/deep_learning/activation_functions.py @@ -1,5 +1,4 @@ import numpy as np -import sys # Collection of activation functions # Reference: https://en.wikipedia.org/wiki/Activation_function @@ -31,7 +30,6 @@ def function(self, x): return 2 / (1 + np.exp(-2*x)) - 1 def gradient(self, x): - # Avoid overflow for large inputs return 1 - np.power(self.function(x), 2) class ReLU(): @@ -64,8 +62,8 @@ def gradient(self, x): return np.where(x >= 0.0, 1, self.function(x) + self.alpha) class SELU(): - # Reference : https://arxiv.org/abs/1706.02515, - # https://github.com/bioinf-jku/SNNs/blob/master/SelfNormalizingNetworks_MLP_MNIST.ipynb + # Reference : https://arxiv.org/abs/1706.02515, + # https://github.com/bioinf-jku/SNNs/blob/master/SelfNormalizingNetworks_MLP_MNIST.ipynb def __init__(self): self.alpha = 1.6732632423543772848170429916717 self.scale = 1.0507009873554804934193349852946 @@ -83,5 +81,5 @@ def function(self, x): return np.log(1 + np.exp(x)) def gradient(self, x): - return Sigmoid().function(x) + return 1 / (1 + np.exp(-x)) diff --git a/mlfromscratch/deep_learning/layers.py b/mlfromscratch/deep_learning/layers.py index 9f5e026c..1400bb06 100644 --- a/mlfromscratch/deep_learning/layers.py +++ b/mlfromscratch/deep_learning/layers.py @@ -1,11 +1,10 @@ from __future__ import print_function, division -import sys -import os import math import numpy as np import copy -from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU, TanH, ELU, SELU, Softmax +from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU +from mlfromscratch.deep_learning.activation_functions import TanH, ELU, SELU, Softmax class Layer(object): diff --git a/mlfromscratch/deep_learning/loss_functions.py b/mlfromscratch/deep_learning/loss_functions.py index 1728f421..72393130 100644 --- a/mlfromscratch/deep_learning/loss_functions.py +++ b/mlfromscratch/deep_learning/loss_functions.py @@ -1,6 +1,6 @@ from __future__ import division import numpy as np -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid class Loss(object): diff --git a/mlfromscratch/deep_learning/neural_network.py b/mlfromscratch/deep_learning/neural_network.py index 3b867933..2a3b5af2 100644 --- a/mlfromscratch/deep_learning/neural_network.py +++ b/mlfromscratch/deep_learning/neural_network.py @@ -1,13 +1,8 @@ from __future__ import print_function from terminaltables import AsciiTable -import copy import numpy as np import progressbar - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data, batch_iterator -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import batch_iterator from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets @@ -30,7 +25,7 @@ def __init__(self, optimizer, loss=CrossEntropy, validation_data=None): self.layers = [] self.errors = {"training": [], "validation": []} self.loss_function = loss() - + self.validation_set = False if validation_data: self.validation_set = True @@ -47,50 +42,41 @@ def add(self, layer): # to the output shape of the last added layer if self.layers: layer.set_input_shape(shape=self.layers[-1].output_shape()) - # If the layer has weights that needs to be initialized if hasattr(layer, 'initialize'): layer.initialize(optimizer=self.optimizer) - # Add layer to the network self.layers.append(layer) def train_on_batch(self, X, y): - # Calculate output y_pred = self._forward_pass(X) - # Calculate the training loss + # Calculate the loss and accuracy of the prediction loss = np.mean(self.loss_function.loss(y, y_pred)) + acc = self.loss_function.acc(y, y_pred) # Calculate the gradient of the loss function wrt y_pred loss_grad = self.loss_function.gradient(y, y_pred) - # Calculate the accuracy of the prediction - acc = self.loss_function.acc(y, y_pred) - # Backprop. Update weights + # Backpropagate. Update weights self._backward_pass(loss_grad=loss_grad) return loss, acc - def fit(self, X, y, n_epochs, batch_size): - n_samples = np.shape(X)[0] n_batches = int(n_samples / batch_size) bar = progressbar.ProgressBar(widgets=bar_widgets) for _ in bar(range(n_epochs)): - idx = range(n_samples) - np.random.shuffle(idx) - - batch_t_error = 0 # Mean batch training error + batch_error = 0 for X_batch, y_batch in batch_iterator(X, y, batch_size=batch_size): loss, _ = self.train_on_batch(X_batch, y_batch) - batch_t_error += loss + batch_error += loss + + self.errors["training"].append(batch_error / n_batches) - # Save the epoch mean error - self.errors["training"].append(batch_t_error / n_batches) if self.validation_set: # Determine validation error - y_val_p = self._forward_pass(self.X_val) - validation_loss = np.mean(self.loss_function.loss(self.y_val, y_val_p)) + y_val_pred = self._forward_pass(self.X_val) + validation_loss = np.mean(self.loss_function.loss(self.y_val, y_val_pred)) self.errors["validation"].append(validation_loss) return self.errors["training"], self.errors["validation"] @@ -116,7 +102,7 @@ def summary(self, name="Model Summary"): print (AsciiTable([[name]]).table) # Network input shape (first layer's input shape) print ("Input Shape: %s" % str(self.layers[0].input_shape)) - # Get each layer's configuration + # Iterate through network and get each layer's configuration table_data = [["Layer Type", "Parameters", "Output Shape"]] tot_params = 0 for layer in self.layers: @@ -125,10 +111,8 @@ def summary(self, name="Model Summary"): out_shape = layer.output_shape() table_data.append([layer_name, str(params), str(out_shape)]) tot_params += params - # Print network configuration table print (AsciiTable(table_data).table) - print ("Total Parameters: %d\n" % tot_params) def predict(self, X): diff --git a/mlfromscratch/deep_learning/optimizers.py b/mlfromscratch/deep_learning/optimizers.py index 28ffbe37..396d2458 100644 --- a/mlfromscratch/deep_learning/optimizers.py +++ b/mlfromscratch/deep_learning/optimizers.py @@ -1,5 +1,5 @@ import numpy as np -from mlfromscratch.utils.data_manipulation import make_diagonal, normalize +from mlfromscratch.utils import make_diagonal, normalize # Optimizers for models that use gradient based methods for finding the # weights that minimizes the loss. diff --git a/mlfromscratch/examples/convolutional_neural_network.py b/mlfromscratch/examples/convolutional_neural_network.py index 776149fa..05eb151b 100644 --- a/mlfromscratch/examples/convolutional_neural_network.py +++ b/mlfromscratch/examples/convolutional_neural_network.py @@ -7,13 +7,12 @@ # Import helper functions from mlfromscratch.deep_learning import NeuralNetwork -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data +from mlfromscratch.utils import train_test_split, to_categorical, normalize +from mlfromscratch.utils import get_random_subsets, shuffle_data, Plot from mlfromscratch.utils.data_operation import accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets -from mlfromscratch.utils import Plot from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D from mlfromscratch.deep_learning.layers import AveragePooling2D, ZeroPadding2D, BatchNormalization, RNN @@ -25,7 +24,7 @@ def main(): # Conv Net #---------- - optimizer = Adadelta() + optimizer = Adam() data = datasets.load_digits() X = data.data @@ -62,7 +61,6 @@ def main(): clf.add(BatchNormalization()) clf.add(Dense(10)) clf.add(Activation('softmax')) - print () clf.summary(name="ConvNet") diff --git a/mlfromscratch/examples/decision_tree_classifier.py b/mlfromscratch/examples/decision_tree_classifier.py index d42ee282..2d11a296 100644 --- a/mlfromscratch/examples/decision_tree_classifier.py +++ b/mlfromscratch/examples/decision_tree_classifier.py @@ -6,10 +6,8 @@ import os # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils.data_operation import mean_squared_error, calculate_variance -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, standardize, accuracy_score +from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot from mlfromscratch.supervised_learning import ClassificationTree def main(): diff --git a/mlfromscratch/examples/decision_tree_regressor.py b/mlfromscratch/examples/decision_tree_regressor.py index 75526bbb..f0b43d3e 100644 --- a/mlfromscratch/examples/decision_tree_regressor.py +++ b/mlfromscratch/examples/decision_tree_regressor.py @@ -1,16 +1,10 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets import matplotlib.pyplot as plt import pandas as pd -import sys -import os - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils.data_operation import mean_squared_error, calculate_variance -from mlfromscratch.utils import Plot + +from mlfromscratch.utils import train_test_split, standardize, accuracy_score +from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot from mlfromscratch.supervised_learning import RegressionTree def main(): diff --git a/mlfromscratch/examples/deep_q_network.py b/mlfromscratch/examples/deep_q_network.py index b5c7da30..752611ee 100644 --- a/mlfromscratch/examples/deep_q_network.py +++ b/mlfromscratch/examples/deep_q_network.py @@ -1,14 +1,6 @@ from __future__ import print_function -import sys -import os -import math -import random import numpy as np -import progressbar -import gym -from collections import deque - -from mlfromscratch.utils.data_manipulation import to_categorical +from mlfromscratch.utils import to_categorical from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import SquareLoss from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization diff --git a/mlfromscratch/examples/demo.py b/mlfromscratch/examples/demo.py index eaf5cf84..8c1c9285 100644 --- a/mlfromscratch/examples/demo.py +++ b/mlfromscratch/examples/demo.py @@ -1,13 +1,11 @@ from __future__ import print_function -import sys, os from sklearn import datasets import numpy as np -import pandas as pd +import math import matplotlib.pyplot as plt -from mlfromscratch.utils.data_manipulation import train_test_split, normalize, to_categorical -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.deep_learning.optimizers import GradientDescent, Adam +from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score +from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.activation_functions import Softmax from mlfromscratch.utils.kernels import * diff --git a/mlfromscratch/examples/gradient_boosting_classifier.py b/mlfromscratch/examples/gradient_boosting_classifier.py index f4eee1dd..fb541ef1 100644 --- a/mlfromscratch/examples/gradient_boosting_classifier.py +++ b/mlfromscratch/examples/gradient_boosting_classifier.py @@ -1,14 +1,11 @@ from __future__ import division, print_function import numpy as np from sklearn import datasets -import sys -import os import matplotlib.pyplot as plt # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils.loss_functions import CrossEntropy +from mlfromscratch.utils import train_test_split, accuracy_score +from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import GradientBoostingClassifier diff --git a/mlfromscratch/examples/gradient_boosting_regressor.py b/mlfromscratch/examples/gradient_boosting_regressor.py index 93ad0846..8bcd5454 100644 --- a/mlfromscratch/examples/gradient_boosting_regressor.py +++ b/mlfromscratch/examples/gradient_boosting_regressor.py @@ -1,18 +1,14 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets import pandas as pd import matplotlib.pyplot as plt -from scipy.optimize import line_search import progressbar -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize, to_categorical -from mlfromscratch.utils.data_operation import mean_squared_error, accuracy_score +from mlfromscratch.utils import train_test_split, standardize, to_categorical +from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot from mlfromscratch.utils.loss_functions import SquareLoss from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.supervised_learning import GradientBoostingRegressor -from mlfromscratch.utils import Plot def main(): diff --git a/mlfromscratch/examples/k_means.py b/mlfromscratch/examples/k_means.py index ae6a23ed..fb8d42ea 100644 --- a/mlfromscratch/examples/k_means.py +++ b/mlfromscratch/examples/k_means.py @@ -1,8 +1,4 @@ from __future__ import division, print_function -import sys -import os -import math -import random from sklearn import datasets import numpy as np diff --git a/mlfromscratch/examples/k_nearest_neighbors.py b/mlfromscratch/examples/k_nearest_neighbors.py index 7bb81bd7..62df6551 100644 --- a/mlfromscratch/examples/k_nearest_neighbors.py +++ b/mlfromscratch/examples/k_nearest_neighbors.py @@ -1,16 +1,10 @@ from __future__ import print_function -import sys -import os -import math import numpy as np import matplotlib.pyplot as plt from sklearn import datasets -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import euclidean_distance, accuracy_score -from mlfromscratch.unsupervised_learning import PCA -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, normalize, accuracy_score +from mlfromscratch.utils import euclidean_distance, Plot from mlfromscratch.supervised_learning import KNN def main(): diff --git a/mlfromscratch/examples/linear_discriminant_analysis.py b/mlfromscratch/examples/linear_discriminant_analysis.py index 1277a6de..65f76266 100644 --- a/mlfromscratch/examples/linear_discriminant_analysis.py +++ b/mlfromscratch/examples/linear_discriminant_analysis.py @@ -1,17 +1,12 @@ from __future__ import print_function -import sys -import os from sklearn import datasets import matplotlib.pyplot as plt import numpy as np -import pandas as pd -# Import helper functions from mlfromscratch.supervised_learning import LDA -from mlfromscratch.utils.data_operation import calculate_covariance_matrix, accuracy_score -from mlfromscratch.utils.data_manipulation import normalize, standardize, train_test_split +from mlfromscratch.utils import calculate_covariance_matrix, accuracy_score +from mlfromscratch.utils import normalize, standardize, train_test_split, Plot from mlfromscratch.unsupervised_learning import PCA -from mlfromscratch.utils import Plot def main(): # Load the dataset diff --git a/mlfromscratch/examples/linear_regression.py b/mlfromscratch/examples/linear_regression.py index 8a916b5b..b5bcaccb 100644 --- a/mlfromscratch/examples/linear_regression.py +++ b/mlfromscratch/examples/linear_regression.py @@ -2,10 +2,8 @@ import pandas as pd import matplotlib.pyplot as plt -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features -from mlfromscratch.utils.data_operation import mean_squared_error -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, polynomial_features +from mlfromscratch.utils import mean_squared_error, Plot from mlfromscratch.supervised_learning import LinearRegression def main(): diff --git a/mlfromscratch/examples/logistic_regression.py b/mlfromscratch/examples/logistic_regression.py index 7f8b75fc..6bf7b9df 100644 --- a/mlfromscratch/examples/logistic_regression.py +++ b/mlfromscratch/examples/logistic_regression.py @@ -1,17 +1,11 @@ from __future__ import print_function -import sys -import os -import math from sklearn import datasets import numpy as np -import pandas as pd import matplotlib.pyplot as plt # Import helper functions -from mlfromscratch.utils.data_manipulation import make_diagonal, normalize, train_test_split -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils.activation_functions import Sigmoid -from mlfromscratch.utils.optimizers import GradientDescent +from mlfromscratch.utils import make_diagonal, normalize, train_test_split, accuracy_score +from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import LogisticRegression @@ -30,7 +24,6 @@ def main(): y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) - print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results diff --git a/mlfromscratch/examples/multi_class_lda.py b/mlfromscratch/examples/multi_class_lda.py index f5076927..7c2d0741 100644 --- a/mlfromscratch/examples/multi_class_lda.py +++ b/mlfromscratch/examples/multi_class_lda.py @@ -1,13 +1,9 @@ from __future__ import print_function -import sys -import os -import scipy from sklearn import datasets import numpy as np -# Import helper functions from mlfromscratch.supervised_learning import MultiClassLDA -from mlfromscratch.utils.data_manipulation import normalize +from mlfromscratch.utils import normalize def main(): # Load the dataset diff --git a/mlfromscratch/examples/multilayer_perceptron.py b/mlfromscratch/examples/multilayer_perceptron.py index b9e86d5e..64d51d59 100644 --- a/mlfromscratch/examples/multilayer_perceptron.py +++ b/mlfromscratch/examples/multilayer_perceptron.py @@ -2,18 +2,15 @@ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt -import math import numpy as np # Import helper functions from mlfromscratch.deep_learning import NeuralNetwork -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot +from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets -from mlfromscratch.utils import Plot from mlfromscratch.deep_learning.layers import Dense, Dropout, Activation diff --git a/mlfromscratch/examples/naive_bayes.py b/mlfromscratch/examples/naive_bayes.py index ae5e282b..fc1d48f4 100644 --- a/mlfromscratch/examples/naive_bayes.py +++ b/mlfromscratch/examples/naive_bayes.py @@ -1,10 +1,7 @@ from __future__ import division, print_function from sklearn import datasets import numpy as np - -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot from mlfromscratch.supervised_learning import NaiveBayes def main(): diff --git a/mlfromscratch/examples/partitioning_around_medoids.py b/mlfromscratch/examples/partitioning_around_medoids.py index d528598f..2a6e354b 100644 --- a/mlfromscratch/examples/partitioning_around_medoids.py +++ b/mlfromscratch/examples/partitioning_around_medoids.py @@ -1,7 +1,3 @@ -import sys -import os -import math -import random from sklearn import datasets import numpy as np diff --git a/mlfromscratch/examples/perceptron.py b/mlfromscratch/examples/perceptron.py index c9109973..efa0d71a 100644 --- a/mlfromscratch/examples/perceptron.py +++ b/mlfromscratch/examples/perceptron.py @@ -3,8 +3,7 @@ import numpy as np # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, normalize, to_categorical -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils import Plot diff --git a/mlfromscratch/examples/polynomial_regression.py b/mlfromscratch/examples/polynomial_regression.py index 59260f2a..7acf60a0 100644 --- a/mlfromscratch/examples/polynomial_regression.py +++ b/mlfromscratch/examples/polynomial_regression.py @@ -2,16 +2,10 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from sklearn import datasets -import sys -import os -import math # Import helper functions from mlfromscratch.supervised_learning import PolynomialRidgeRegression -from mlfromscratch.utils.data_manipulation import k_fold_cross_validation_sets, normalize -from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features -from mlfromscratch.utils.data_operation import mean_squared_error -from mlfromscratch.utils import Plot +from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error +from mlfromscratch.utils import train_test_split, polynomial_features, Plot def main(): diff --git a/mlfromscratch/examples/random_forest.py b/mlfromscratch/examples/random_forest.py index 3fe1a4e7..4b3ab154 100644 --- a/mlfromscratch/examples/random_forest.py +++ b/mlfromscratch/examples/random_forest.py @@ -1,15 +1,8 @@ from __future__ import division, print_function import numpy as np from sklearn import datasets -import sys -import os -import math - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, accuracy_score, Plot from mlfromscratch.supervised_learning import RandomForest -from mlfromscratch.utils import Plot def main(): data = datasets.load_iris() diff --git a/mlfromscratch/examples/recurrent_neural_network.py b/mlfromscratch/examples/recurrent_neural_network.py index ee4569e2..d066d2e4 100644 --- a/mlfromscratch/examples/recurrent_neural_network.py +++ b/mlfromscratch/examples/recurrent_neural_network.py @@ -1,19 +1,13 @@ - from __future__ import print_function -from sklearn import datasets import matplotlib.pyplot as plt -import math import numpy as np -# Import helper functions from mlfromscratch.deep_learning import NeuralNetwork -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot +from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score from mlfromscratch.deep_learning.optimizers import GradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets -from mlfromscratch.utils import Plot from mlfromscratch.deep_learning.layers import RNN, Activation diff --git a/mlfromscratch/examples/ridge_regression.py b/mlfromscratch/examples/ridge_regression.py index 59260f2a..4532dd95 100644 --- a/mlfromscratch/examples/ridge_regression.py +++ b/mlfromscratch/examples/ridge_regression.py @@ -2,16 +2,10 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from sklearn import datasets -import sys -import os -import math # Import helper functions from mlfromscratch.supervised_learning import PolynomialRidgeRegression -from mlfromscratch.utils.data_manipulation import k_fold_cross_validation_sets, normalize -from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features -from mlfromscratch.utils.data_operation import mean_squared_error -from mlfromscratch.utils import Plot +from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, Plot +from mlfromscratch.utils import train_test_split, polynomial_features, mean_squared_error def main(): diff --git a/mlfromscratch/examples/support_vector_machine.py b/mlfromscratch/examples/support_vector_machine.py index 5665ceaa..e5543462 100644 --- a/mlfromscratch/examples/support_vector_machine.py +++ b/mlfromscratch/examples/support_vector_machine.py @@ -1,15 +1,10 @@ from __future__ import division, print_function -import math -import sys -import os import numpy as np from sklearn import datasets # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot from mlfromscratch.utils.kernels import * -from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import SupportVectorMachine def main(): diff --git a/mlfromscratch/examples/xgboost.py b/mlfromscratch/examples/xgboost.py index 9534f88c..244e53a1 100644 --- a/mlfromscratch/examples/xgboost.py +++ b/mlfromscratch/examples/xgboost.py @@ -1,15 +1,10 @@ from __future__ import division, print_function import numpy as np from sklearn import datasets -import sys -import os import matplotlib.pyplot as plt import progressbar - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize, to_categorical, normalize -from mlfromscratch.utils.data_operation import mean_squared_error, accuracy_score -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize +from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot from mlfromscratch.supervised_learning import XGBoost def main(): diff --git a/mlfromscratch/reinforcement_learning/deep_q_network.py b/mlfromscratch/reinforcement_learning/deep_q_network.py index 7c0f35e8..5b9561c4 100644 --- a/mlfromscratch/reinforcement_learning/deep_q_network.py +++ b/mlfromscratch/reinforcement_learning/deep_q_network.py @@ -1,10 +1,6 @@ from __future__ import print_function -import sys -import os -import math import random import numpy as np -import progressbar import gym from collections import deque diff --git a/mlfromscratch/supervised_learning/adaboost.py b/mlfromscratch/supervised_learning/adaboost.py index def039cc..fa235977 100644 --- a/mlfromscratch/supervised_learning/adaboost.py +++ b/mlfromscratch/supervised_learning/adaboost.py @@ -1,16 +1,12 @@ from __future__ import division, print_function -import math -import sys -import os import numpy as np +import math from sklearn import datasets import matplotlib.pyplot as plt import pandas as pd # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, accuracy_score, Plot # Decision stump used as weak classifier in this impl. of Adaboost class DecisionStump(): diff --git a/mlfromscratch/supervised_learning/bayesian_regression.py b/mlfromscratch/supervised_learning/bayesian_regression.py index 293ec31b..db3cdbf9 100644 --- a/mlfromscratch/supervised_learning/bayesian_regression.py +++ b/mlfromscratch/supervised_learning/bayesian_regression.py @@ -1,17 +1,7 @@ from __future__ import print_function -import matplotlib.pyplot as plt import numpy as np -from sklearn import datasets -import pandas as pd -import sys -import os -import math - from scipy.stats import chi2, multivariate_normal - -# Import helper functions -from mlfromscratch.utils.data_operation import mean_squared_error -from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features +from mlfromscratch.utils import mean_squared_error, train_test_split, polynomial_features diff --git a/mlfromscratch/supervised_learning/decision_tree.py b/mlfromscratch/supervised_learning/decision_tree.py index 983cb500..2f548fe9 100644 --- a/mlfromscratch/supervised_learning/decision_tree.py +++ b/mlfromscratch/supervised_learning/decision_tree.py @@ -1,15 +1,8 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets -import matplotlib.pyplot as plt -import sys -import os - -# Import helper functions -from mlfromscratch.utils.data_manipulation import divide_on_feature -from mlfromscratch.utils.data_manipulation import train_test_split, standardize -from mlfromscratch.utils.data_operation import calculate_entropy, accuracy_score -from mlfromscratch.utils.data_operation import mean_squared_error, calculate_variance + +from mlfromscratch.utils import divide_on_feature, train_test_split, standardize, mean_squared_error +from mlfromscratch.utils import calculate_entropy, accuracy_score, calculate_variance from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.utils import Plot diff --git a/mlfromscratch/supervised_learning/gradient_boosting.py b/mlfromscratch/supervised_learning/gradient_boosting.py index 3b7f82b0..47e4ca7f 100644 --- a/mlfromscratch/supervised_learning/gradient_boosting.py +++ b/mlfromscratch/supervised_learning/gradient_boosting.py @@ -1,16 +1,11 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets -import sys -import os -import matplotlib.pyplot as plt -from scipy.optimize import line_search import progressbar # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize, to_categorical -from mlfromscratch.utils.data_operation import mean_squared_error, accuracy_score -from mlfromscratch.utils.loss_functions import SquareLoss, CrossEntropy +from mlfromscratch.utils import train_test_split, standardize, to_categorical +from mlfromscratch.utils import mean_squared_error, accuracy_score +from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy from mlfromscratch.supervised_learning.decision_tree import RegressionTree from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.utils.misc import bar_widgets diff --git a/mlfromscratch/supervised_learning/k_nearest_neighbors.py b/mlfromscratch/supervised_learning/k_nearest_neighbors.py index cd35a910..a4bf1fe5 100644 --- a/mlfromscratch/supervised_learning/k_nearest_neighbors.py +++ b/mlfromscratch/supervised_learning/k_nearest_neighbors.py @@ -1,14 +1,6 @@ from __future__ import print_function -import sys -import os -import math import numpy as np -import matplotlib.pyplot as plt -from sklearn import datasets - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import euclidean_distance, accuracy_score +from mlfromscratch.utils import train_test_split, normalize, euclidean_distance, accuracy_score from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.utils import Plot diff --git a/mlfromscratch/supervised_learning/linear_discriminant_analysis.py b/mlfromscratch/supervised_learning/linear_discriminant_analysis.py index 840ceabc..77298101 100644 --- a/mlfromscratch/supervised_learning/linear_discriminant_analysis.py +++ b/mlfromscratch/supervised_learning/linear_discriminant_analysis.py @@ -1,14 +1,6 @@ from __future__ import print_function -import sys -import os -from sklearn import datasets import numpy as np -import pandas as pd - -# Import helper functions -from mlfromscratch.utils.data_operation import calculate_covariance_matrix -from mlfromscratch.utils.data_manipulation import normalize, standardize - +from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize class LDA(): """The Linear Discriminant Analysis classifier, also known as Fisher's linear discriminant. diff --git a/mlfromscratch/supervised_learning/logistic_regression.py b/mlfromscratch/supervised_learning/logistic_regression.py index 871b67f7..b9de15ae 100644 --- a/mlfromscratch/supervised_learning/logistic_regression.py +++ b/mlfromscratch/supervised_learning/logistic_regression.py @@ -1,10 +1,8 @@ from __future__ import print_function import numpy as np - -# Import helper functions -from mlfromscratch.utils.data_manipulation import make_diagonal -from mlfromscratch.utils.activation_functions import Sigmoid -from mlfromscratch.utils import Plot +import math +from mlfromscratch.utils import make_diagonal, Plot +from mlfromscratch.deep_learning.activation_functions import Sigmoid class LogisticRegression(): diff --git a/mlfromscratch/supervised_learning/multi_class_lda.py b/mlfromscratch/supervised_learning/multi_class_lda.py index c394430b..a3c8bd69 100644 --- a/mlfromscratch/supervised_learning/multi_class_lda.py +++ b/mlfromscratch/supervised_learning/multi_class_lda.py @@ -1,16 +1,7 @@ from __future__ import print_function -import sys -import os -import scipy -from sklearn import datasets -from sklearn.preprocessing import StandardScaler -from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import numpy as np - -# Import helper functions -from mlfromscratch.utils.data_operation import calculate_covariance_matrix -from mlfromscratch.utils.data_manipulation import normalize, standardize +from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize class MultiClassLDA(): diff --git a/mlfromscratch/supervised_learning/multilayer_perceptron.py b/mlfromscratch/supervised_learning/multilayer_perceptron.py index ea9c9dfd..012dcb0b 100644 --- a/mlfromscratch/supervised_learning/multilayer_perceptron.py +++ b/mlfromscratch/supervised_learning/multilayer_perceptron.py @@ -1,16 +1,9 @@ from __future__ import print_function -from sklearn import datasets -import sys -import os -import math -import pandas as pd -import matplotlib.pyplot as plt import numpy as np +import math +from sklearn import datasets -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.utils import Plot +from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score, Plot from mlfromscratch.deep_learning.activation_functions import Sigmoid, Softmax from mlfromscratch.deep_learning.loss_functions import CrossEntropy diff --git a/mlfromscratch/supervised_learning/naive_bayes.py b/mlfromscratch/supervised_learning/naive_bayes.py index 948dcfdb..499c0ce1 100644 --- a/mlfromscratch/supervised_learning/naive_bayes.py +++ b/mlfromscratch/supervised_learning/naive_bayes.py @@ -1,16 +1,8 @@ from __future__ import division, print_function -from sklearn import datasets -import matplotlib.pyplot as plt -import math -import sys -import os import numpy as np -import pandas as pd - -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import accuracy_score -from mlfromscratch.unsupervised_learning import PCA -from mlfromscratch.utils import Plot +import math +from mlfromscratch.utils import train_test_split, normalize +from mlfromscratch.utils import Plot, accuracy_score class NaiveBayes(): diff --git a/mlfromscratch/supervised_learning/perceptron.py b/mlfromscratch/supervised_learning/perceptron.py index 6c9de3a1..adb5e0df 100644 --- a/mlfromscratch/supervised_learning/perceptron.py +++ b/mlfromscratch/supervised_learning/perceptron.py @@ -1,14 +1,9 @@ from __future__ import print_function -import sys -import os import math -from sklearn import datasets -import matplotlib.pyplot as plt import numpy as np # Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU, TanH, ELU from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss from mlfromscratch.utils import Plot diff --git a/mlfromscratch/supervised_learning/random_forest.py b/mlfromscratch/supervised_learning/random_forest.py index aea7012f..d9922e77 100644 --- a/mlfromscratch/supervised_learning/random_forest.py +++ b/mlfromscratch/supervised_learning/random_forest.py @@ -1,14 +1,11 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets -import sys -import os import math import progressbar # Import helper functions -from mlfromscratch.utils.data_manipulation import divide_on_feature, train_test_split, get_random_subsets, normalize -from mlfromscratch.utils.data_operation import accuracy_score, calculate_entropy +from mlfromscratch.utils import divide_on_feature, train_test_split, get_random_subsets, normalize +from mlfromscratch.utils import accuracy_score, calculate_entropy from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.supervised_learning import ClassificationTree from mlfromscratch.utils.misc import bar_widgets diff --git a/mlfromscratch/supervised_learning/regression.py b/mlfromscratch/supervised_learning/regression.py index 4c3957f4..07a5c32f 100644 --- a/mlfromscratch/supervised_learning/regression.py +++ b/mlfromscratch/supervised_learning/regression.py @@ -1,8 +1,7 @@ from __future__ import print_function import numpy as np -# Import helper functions -from mlfromscratch.utils.data_manipulation import normalize -from mlfromscratch.utils.data_manipulation import polynomial_features +import math +from mlfromscratch.utils import normalize, polynomial_features class Regression(object): @@ -35,7 +34,7 @@ def fit(self, X, y): # Get weights by gradient descent opt. if self.gradient_descent: # Initial weights randomly [-1/N, 1/N] - limit = 1 / np.sqrt(n_features) + limit = 1 / math.sqrt(n_features) self.w = np.random.uniform(-limit, limit, (n_features, )) # Do gradient descent for n_iterations for _ in range(self.n_iterations): diff --git a/mlfromscratch/supervised_learning/support_vector_machine.py b/mlfromscratch/supervised_learning/support_vector_machine.py index 737fac14..e8dbce8e 100644 --- a/mlfromscratch/supervised_learning/support_vector_machine.py +++ b/mlfromscratch/supervised_learning/support_vector_machine.py @@ -1,17 +1,9 @@ from __future__ import division, print_function -import math -import sys -import os import numpy as np import cvxopt -from sklearn import datasets - -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, normalize -from mlfromscratch.utils.data_operation import accuracy_score +from mlfromscratch.utils import train_test_split, normalize, accuracy_score from mlfromscratch.utils.kernels import * -from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.utils import Plot # Hide cvxopt output diff --git a/mlfromscratch/supervised_learning/xgboost.py b/mlfromscratch/supervised_learning/xgboost.py index cb3b5789..a6219ca4 100644 --- a/mlfromscratch/supervised_learning/xgboost.py +++ b/mlfromscratch/supervised_learning/xgboost.py @@ -1,17 +1,11 @@ from __future__ import division, print_function import numpy as np -from sklearn import datasets -import sys -import os -import matplotlib.pyplot as plt import progressbar -# Import helper functions -from mlfromscratch.utils.data_manipulation import train_test_split, standardize, to_categorical, normalize -from mlfromscratch.utils.data_operation import mean_squared_error, accuracy_score +from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize +from mlfromscratch.utils import mean_squared_error, accuracy_score from mlfromscratch.supervised_learning import XGBoostRegressionTree -from mlfromscratch.unsupervised_learning import PCA -from mlfromscratch.utils.activation_functions import Sigmoid +from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.utils import Plot diff --git a/mlfromscratch/unsupervised_learning/apriori.py b/mlfromscratch/unsupervised_learning/apriori.py index fdf09478..a603a908 100644 --- a/mlfromscratch/unsupervised_learning/apriori.py +++ b/mlfromscratch/unsupervised_learning/apriori.py @@ -1,5 +1,4 @@ from __future__ import division, print_function -import pandas as pd import numpy as np import itertools diff --git a/mlfromscratch/unsupervised_learning/dbscan.py b/mlfromscratch/unsupervised_learning/dbscan.py index e5684ea2..022639b2 100644 --- a/mlfromscratch/unsupervised_learning/dbscan.py +++ b/mlfromscratch/unsupervised_learning/dbscan.py @@ -1,15 +1,5 @@ -import sys -import os -import math -import random -from sklearn import datasets import numpy as np - -# Import helper functions -from mlfromscratch.utils import Plot -from mlfromscratch.utils.data_manipulation import normalize -from mlfromscratch.utils.data_operation import euclidean_distance -from mlfromscratch.unsupervised_learning import PCA +from mlfromscratch.utils import Plot, euclidean_distance, normalize class DBSCAN(): diff --git a/mlfromscratch/unsupervised_learning/dcgan.py b/mlfromscratch/unsupervised_learning/dcgan.py index 8371fe7c..131961c7 100644 --- a/mlfromscratch/unsupervised_learning/dcgan.py +++ b/mlfromscratch/unsupervised_learning/dcgan.py @@ -1,20 +1,12 @@ from __future__ import print_function -from sklearn import datasets -import sys -import os -import math -import copy -import pandas as pd import matplotlib.pyplot as plt import numpy as np import progressbar - from sklearn.datasets import fetch_mldata -# Import helper functions -from mlfromscratch.utils.optimizers import Adam -from mlfromscratch.utils.loss_functions import CrossEntropy -from mlfromscratch.utils.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization, ZeroPadding2D, Conv2D, UpSampling2D +from mlfromscratch.deep_learning.optimizers import Adam +from mlfromscratch.deep_learning.loss_functions import CrossEntropy +from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization, ZeroPadding2D, Conv2D, UpSampling2D from mlfromscratch.supervised_learning import NeuralNetwork diff --git a/mlfromscratch/unsupervised_learning/fp_growth.py b/mlfromscratch/unsupervised_learning/fp_growth.py index d155b3e6..09ffd95b 100644 --- a/mlfromscratch/unsupervised_learning/fp_growth.py +++ b/mlfromscratch/unsupervised_learning/fp_growth.py @@ -1,5 +1,4 @@ from __future__ import division, print_function -import pandas as pd import numpy as np import itertools diff --git a/mlfromscratch/unsupervised_learning/gaussian_mixture_model.py b/mlfromscratch/unsupervised_learning/gaussian_mixture_model.py index 45c6fa54..17f88722 100644 --- a/mlfromscratch/unsupervised_learning/gaussian_mixture_model.py +++ b/mlfromscratch/unsupervised_learning/gaussian_mixture_model.py @@ -1,15 +1,9 @@ from __future__ import division, print_function -import sys -import os import math -import random from sklearn import datasets import numpy as np -# Import helper functions -from mlfromscratch.utils.data_manipulation import normalize -from mlfromscratch.utils.data_operation import euclidean_distance, calculate_covariance_matrix -from mlfromscratch.unsupervised_learning import PCA +from mlfromscratch.utils import normalize, euclidean_distance, calculate_covariance_matrix from mlfromscratch.utils import Plot diff --git a/mlfromscratch/unsupervised_learning/generative_adversarial_network.py b/mlfromscratch/unsupervised_learning/generative_adversarial_network.py index 6a44819d..39bfbabe 100644 --- a/mlfromscratch/unsupervised_learning/generative_adversarial_network.py +++ b/mlfromscratch/unsupervised_learning/generative_adversarial_network.py @@ -1,17 +1,12 @@ from __future__ import print_function from sklearn import datasets -import sys -import os import math -import copy -import pandas as pd import matplotlib.pyplot as plt import numpy as np import progressbar from sklearn.datasets import fetch_mldata -# Import helper functions from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization diff --git a/mlfromscratch/unsupervised_learning/k_means.py b/mlfromscratch/unsupervised_learning/k_means.py index 321ff800..ed33ff49 100644 --- a/mlfromscratch/unsupervised_learning/k_means.py +++ b/mlfromscratch/unsupervised_learning/k_means.py @@ -1,16 +1,6 @@ -import sys -import os -import math -import random -from sklearn import datasets import numpy as np - -# Import helper functions -from mlfromscratch.utils.data_manipulation import normalize -from mlfromscratch.utils.data_operation import euclidean_distance +from mlfromscratch.utils import normalize, euclidean_distance, Plot from mlfromscratch.unsupervised_learning import * -from mlfromscratch.utils import Plot - class KMeans(): """A simple clustering method that forms k clusters by iteratively reassigning diff --git a/mlfromscratch/unsupervised_learning/partitioning_around_medoids.py b/mlfromscratch/unsupervised_learning/partitioning_around_medoids.py index b43ec898..b424929a 100644 --- a/mlfromscratch/unsupervised_learning/partitioning_around_medoids.py +++ b/mlfromscratch/unsupervised_learning/partitioning_around_medoids.py @@ -1,15 +1,6 @@ -import sys -import os -import math -import random -from sklearn import datasets import numpy as np - -# Import helper functions -from mlfromscratch.utils.data_manipulation import normalize -from mlfromscratch.utils.data_operation import euclidean_distance +from mlfromscratch.utils import normalize, euclidean_distance, Plot from mlfromscratch.unsupervised_learning import PCA -from mlfromscratch.utils import Plot class PAM(): diff --git a/mlfromscratch/unsupervised_learning/principal_component_analysis.py b/mlfromscratch/unsupervised_learning/principal_component_analysis.py index ab5dd81a..f5f9fdca 100644 --- a/mlfromscratch/unsupervised_learning/principal_component_analysis.py +++ b/mlfromscratch/unsupervised_learning/principal_component_analysis.py @@ -1,11 +1,7 @@ from __future__ import print_function -import sys -import os import numpy as np - -from mlfromscratch.utils.data_operation import calculate_covariance_matrix -from mlfromscratch.utils.data_operation import calculate_correlation_matrix -from mlfromscratch.utils.data_manipulation import standardize +from mlfromscratch.utils import calculate_covariance_matrix, standardize +from mlfromscratch.utils import calculate_correlation_matrix class PCA(): diff --git a/mlfromscratch/utils/__init__.py b/mlfromscratch/utils/__init__.py index 69b52784..2d503c37 100644 --- a/mlfromscratch/utils/__init__.py +++ b/mlfromscratch/utils/__init__.py @@ -1 +1,3 @@ from .misc import Plot +from .data_manipulation import * +from .data_operation import * \ No newline at end of file diff --git a/mlfromscratch/utils/data_manipulation.py b/mlfromscratch/utils/data_manipulation.py index 438635dd..c7335670 100644 --- a/mlfromscratch/utils/data_manipulation.py +++ b/mlfromscratch/utils/data_manipulation.py @@ -6,12 +6,14 @@ def shuffle_data(X, y, seed=None): + """ Random shuffle of the samples in X and y """ if seed: np.random.seed(seed) idx = np.arange(X.shape[0]) np.random.shuffle(idx) return X[idx], y[idx] + def batch_iterator(X, y=None, batch_size=64): """ Simple batch generator """ n_samples = X.shape[0] @@ -22,9 +24,10 @@ def batch_iterator(X, y=None, batch_size=64): else: yield X[begin:end] -# Divide dataset based on if sample value on feature index is larger than -# the given threshold + def divide_on_feature(X, feature_i, threshold): + """ Divide dataset based on if sample value on feature index is larger than + the given threshold """ split_func = None if isinstance(threshold, int) or isinstance(threshold, float): split_func = lambda sample: sample[feature_i] >= threshold @@ -38,7 +41,6 @@ def divide_on_feature(X, feature_i, threshold): def polynomial_features(X, degree): - n_samples, n_features = np.shape(X) def index_combinations(): @@ -55,8 +57,9 @@ def index_combinations(): return X_new -# Return random subsets (with replacements) of the data + def get_random_subsets(X, y, n_subsets, replacements=True): + """ Return random subsets (with replacements) of the data """ n_samples = np.shape(X)[0] # Concatenate x and y and do a random shuffle X_y = np.concatenate((X, y.reshape((1, len(y))).T), axis=1) @@ -64,7 +67,7 @@ def get_random_subsets(X, y, n_subsets, replacements=True): subsets = [] # Uses 50% of training samples without replacements - subsample_size = n_samples // 2 + subsample_size = int(n_samples // 2) if replacements: subsample_size = n_samples # 100% with replacements @@ -79,15 +82,15 @@ def get_random_subsets(X, y, n_subsets, replacements=True): return subsets -# Normalize the dataset X def normalize(X, axis=-1, order=2): + """ Normalize the dataset X """ l2 = np.atleast_1d(np.linalg.norm(X, order, axis)) l2[l2 == 0] = 1 return X / np.expand_dims(l2, axis) -# Standardize the dataset X def standardize(X): + """ Standardize the dataset X """ X_std = X mean = X.mean(axis=0) std = X.std(axis=0) @@ -98,8 +101,8 @@ def standardize(X): return X_std -# Split the data into train and test sets def train_test_split(X, y, test_size=0.5, shuffle=True, seed=None): + """ Split the data into train and test sets """ if shuffle: X, y = shuffle_data(X, y, seed) # Split the training data from test data in the ratio specified in @@ -111,8 +114,8 @@ def train_test_split(X, y, test_size=0.5, shuffle=True, seed=None): return X_train, X_test, y_train, y_test -# Split the data into k sets of training / test data def k_fold_cross_validation_sets(X, y, k, shuffle=True): + """ Split the data into k sets of training / test data """ if shuffle: X, y = shuffle_data(X, y) @@ -142,8 +145,8 @@ def k_fold_cross_validation_sets(X, y, k, shuffle=True): return np.array(sets) -# One-hot encoding of nominal values def to_categorical(x, n_col=None): + """ One-hot encoding of nominal values """ if not n_col: n_col = np.amax(x) + 1 one_hot = np.zeros((x.shape[0], n_col)) @@ -151,13 +154,13 @@ def to_categorical(x, n_col=None): return one_hot -# Conversion from one-hot encoding to nominal def to_nominal(x): + """ Conversion from one-hot encoding to nominal """ return np.argmax(x, axis=1) -# Converts a vector into an diagonal matrix def make_diagonal(x): + """ Converts a vector into an diagonal matrix """ m = np.zeros((len(x), len(x))) for i in range(len(m[0])): m[i, i] = x[i] diff --git a/mlfromscratch/utils/data_operation.py b/mlfromscratch/utils/data_operation.py index f7a1f363..5e0bbb99 100644 --- a/mlfromscratch/utils/data_operation.py +++ b/mlfromscratch/utils/data_operation.py @@ -4,8 +4,8 @@ import sys -# Calculate the entropy of label array y def calculate_entropy(y): + """ Calculate the entropy of label array y """ log2 = lambda x: math.log(x) / math.log(2) unique_labels = np.unique(y) entropy = 0 @@ -16,54 +16,55 @@ def calculate_entropy(y): return entropy -# Returns the mean squared error between y_true and y_pred def mean_squared_error(y_true, y_pred): + """ Returns the mean squared error between y_true and y_pred """ mse = np.mean(np.power(y_true - y_pred, 2)) return mse -# Return the variance of the features in dataset X def calculate_variance(X): + """ Return the variance of the features in dataset X """ mean = np.ones(np.shape(X)) * X.mean(0) n_samples = np.shape(X)[0] variance = (1 / n_samples) * np.diag((X - mean).T.dot(X - mean)) - + return variance -# Calculate the standard deviations of the features in dataset X def calculate_std_dev(X): + """ Calculate the standard deviations of the features in dataset X """ std_dev = np.sqrt(calculate_variance(X)) - return std_dev -# Calculates the l2 distance between two vectors def euclidean_distance(x1, x2): + """ Calculates the l2 distance between two vectors """ distance = 0 # Squared distance between each coordinate for i in range(len(x1)): distance += pow((x1[i] - x2[i]), 2) - return math.sqrt(distance) -# Compare y_true to y_pred and return the accuracy + def accuracy_score(y_true, y_pred): + """ Compare y_true to y_pred and return the accuracy """ accuracy = np.sum(y_true == y_pred, axis=0) / len(y_true) return accuracy -# Calculate the covariance matrix for the dataset X -def calculate_covariance_matrix(X, Y=np.empty([0])): - if not Y.any(): + +def calculate_covariance_matrix(X, Y=None): + """ Calculate the covariance matrix for the dataset X """ + if Y is None: Y = X n_samples = np.shape(X)[0] covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0)) return np.array(covariance_matrix, dtype=float) -# Calculate the correlation matrix for the dataset X -def calculate_correlation_matrix(X, Y=np.empty([0])): - if not Y.any(): + +def calculate_correlation_matrix(X, Y=None): + """ Calculate the correlation matrix for the dataset X """ + if Y is None: Y = X n_samples = np.shape(X)[0] covariance = (1 / n_samples) * (X - X.mean(0)).T.dot(Y - Y.mean(0))