Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
eriklindernoren committed Oct 4, 2017
1 parent 7d3e274 commit b6ef93b
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 42 deletions.
5 changes: 5 additions & 0 deletions mlfromscratch/examples/neuroevolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@ def model_builder(n_inputs, n_outputs):
model.add(Activation('relu'))
model.add(Dense(n_outputs))
model.add(Activation('softmax'))

return model

# Print the model summary of a individual in the population
print ()
model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

model = Neuroevolution(population_size=100,
Expand Down
4 changes: 2 additions & 2 deletions mlfromscratch/examples/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from mlfromscratch.supervised_learning import RandomForest

def main():
data = datasets.load_iris()
data = datasets.load_digits()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)

clf = RandomForest()
clf = RandomForest(n_estimators=100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Expand Down
11 changes: 4 additions & 7 deletions mlfromscratch/supervised_learning/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@

from mlfromscratch.utils import divide_on_feature, train_test_split, standardize, mean_squared_error
from mlfromscratch.utils import calculate_entropy, accuracy_score, calculate_variance
from mlfromscratch.unsupervised_learning import PCA
from mlfromscratch.utils import Plot


class DecisionNode():
"""Class that represents a decision node or leaf in the decision tree
Expand Down Expand Up @@ -118,10 +115,10 @@ def _build_tree(self, X, y, current_depth=0):
largest_impurity = impurity
best_criteria = {"feature_i": feature_i, "threshold": threshold}
best_sets = {
"leftX": Xy1[:, :n_features],
"lefty": Xy1[:, n_features:],
"rightX": Xy2[:, :n_features],
"righty": Xy2[:, n_features:]
"leftX": Xy1[:, :n_features], # X of left subtree
"lefty": Xy1[:, n_features:], # y of left subtree
"rightX": Xy2[:, :n_features], # X of right subtree
"righty": Xy2[:, n_features:] # y of right subtree
}

if largest_impurity > self.min_impurity:
Expand Down
8 changes: 3 additions & 5 deletions mlfromscratch/supervised_learning/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,18 @@ def __init__(self, n_estimators, learning_rate, min_samples_split,

def fit(self, X, y):
y_pred = np.full(np.shape(y), np.mean(y, axis=0))

for i in self.bar(range(self.n_estimators)):
tree = self.trees[i]
gradient = self.loss.gradient(y, y_pred)
tree.fit(X, gradient)
update = tree.predict(X)
self.trees[i].fit(X, gradient)
update = self.trees[i].predict(X)
# Update y prediction
y_pred -= np.multiply(self.learning_rate, update)


def predict(self, X):
y_pred = np.array([])
# Make predictions
for i, tree in enumerate(self.trees):
for tree in self.trees:
update = tree.predict(X)
update = np.multiply(self.learning_rate, update)
y_pred = -update if not y_pred.any() else y_pred - update
Expand Down
4 changes: 2 additions & 2 deletions mlfromscratch/supervised_learning/k_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, k=5):
self.k = k

def _vote(self, neighbors):
""" Return the most common label among the neighbors """
""" Return the most common class among the neighbor samples """
counts = np.bincount(neighbors[:, 1].astype('int'))
return counts.argmax()

Expand All @@ -36,7 +36,7 @@ def predict(self, X_test, X_train, y_train):
# Sort the list of observed samples from lowest to highest distance
# and select the k first
k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k]
# Get the most common label among the neighbors
# Get the most common class among the neighbors
label = self._vote(k_nearest_neighbors)
y_pred[i] = label
return y_pred
2 changes: 0 additions & 2 deletions mlfromscratch/supervised_learning/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,8 @@ def fit(self, X, y):
# Use the trained model to predict labels of X
def predict(self, X):
# Forward pass:
# Calculate hidden layer
hidden_input = X.dot(self.W) + self.w0
hidden_output = self.hidden_activation(hidden_input)
# Calculate output layer
output_layer_input = hidden_output.dot(self.V) + self.v0
y_pred = self.output_activation(output_layer_input)
return y_pred
Expand Down
12 changes: 4 additions & 8 deletions mlfromscratch/supervised_learning/neuroevolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import numpy as np
import copy

from mlfromscratch.utils.misc import bar_widgets

class Neuroevolution():
""" Evolutionary optimization of Neural Networks.
Expand Down Expand Up @@ -78,7 +76,7 @@ def _crossover(self, parent1, parent2):

def _calculate_fitness(self):
""" Evaluate the NNs on the test set to get fitness scores """
for i, individual in enumerate(self.population):
for individual in self.population:
loss, acc = individual.test_on_batch(self.X, self.y)
individual.fitness = 1 / (loss + 1e-8)
individual.accuracy = acc
Expand All @@ -89,12 +87,10 @@ def evolve(self, X, y, n_generations):

self._initialize_population()

# Print the model summary of the population's individuals
print ()
self.population[0].summary()

# The 40% highest fittest individuals will be selected for the next generation
n_winners = int(self.population_size * 0.4)
# The fittest 60% of the population will be selected as parents to form offspring
n_parents = self.population_size - n_winners

for epoch in range(n_generations):
# Determine the fitness of the individuals in the population
Expand All @@ -113,7 +109,7 @@ def evolve(self, X, y, n_generations):
next_population = [self.population[i] for i in range(n_winners)]

# The fittest 60% of the population are selected as parents
parents = [self.population[i] for i in range(self.population_size - n_winners)]
parents = [self.population[i] for i in range(n_parents)]
for i in np.arange(0, len(parents), 2):
# Perform crossover to produce offspring
child1, child2 = self._crossover(parents[i], parents[i+1])
Expand Down
6 changes: 4 additions & 2 deletions mlfromscratch/supervised_learning/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU, TanH, ELU
from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss
from mlfromscratch.utils import Plot

from mlfromscratch.utils.misc import bar_widgets
import progressbar

class Perceptron():
"""The Perceptron. One layer neural network classifier.
Expand All @@ -30,6 +31,7 @@ def __init__(self, n_iterations=20000, activation_function=Sigmoid, loss=SquareL
self.learning_rate = learning_rate
self.loss = loss()
self.activation_func = activation_function()
self.progressbar = progressbar.ProgressBar(widgets=bar_widgets)

def fit(self, X, y):
n_samples, n_features = np.shape(X)
Expand All @@ -40,7 +42,7 @@ def fit(self, X, y):
self.W = np.random.uniform(-limit, limit, (n_features, n_outputs))
self.w0 = np.zeros((1, n_outputs))

for i in range(self.n_iterations):
for i in self.progressbar(range(self.n_iterations)):
# Calculate outputs
linear_output = X.dot(self.W) + self.w0
y_pred = self.activation_func(linear_output)
Expand Down
9 changes: 4 additions & 5 deletions mlfromscratch/supervised_learning/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ class RandomForest():
The maximum depth of a tree.
"""
def __init__(self, n_estimators=100, max_features=None, min_samples_split=2,
min_gain=1e-7, max_depth=float("inf")):
min_gain=0, max_depth=float("inf")):
self.n_estimators = n_estimators # Number of trees
self.max_features = max_features # Maxmimum number of features per tree
self.feature_indices = [] # The indices of the features used for each tree
self.min_samples_split = min_samples_split
self.min_gain = min_gain # Minimum information gain req. to continue
self.max_depth = max_depth # Maximum depth for tree
Expand Down Expand Up @@ -64,7 +63,7 @@ def fit(self, X, y):
# Feature bagging (select random subsets of the features)
idx = np.random.choice(range(n_features), size=self.max_features, replace=True)
# Save the indices of the features for prediction
self.feature_indices.append(idx)
self.trees[i].feature_indices = idx
# Choose the features corresponding to the indices
X_subset = X_subset[:, idx]
# Fit the tree to the data
Expand All @@ -74,8 +73,8 @@ def predict(self, X):
y_preds = np.empty((X.shape[0], len(self.trees)))
# Let each tree make a prediction on the data
for i, tree in enumerate(self.trees):
# Select the features that the tree has trained on
idx = self.feature_indices[i]
# Indices of the features that the tree has trained on
idx = tree.feature_indices
# Make a prediction based on those features
prediction = tree.predict(X[:, idx])
y_preds[:, i] = prediction
Expand Down
3 changes: 0 additions & 3 deletions mlfromscratch/supervised_learning/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ class LassoRegression(Regression):
"""
def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01):
self.degree = degree
# Lasso Regression
self.regularization = l1_regularization(alpha=reg_factor)
super(LassoRegression, self).__init__(n_iterations,
learning_rate)
Expand Down Expand Up @@ -189,7 +188,6 @@ class RidgeRegression(Regression):
The step length that will be used when updating the weights.
"""
def __init__(self, reg_factor, n_iterations=1000, learning_rate=0.001):
# Ridge Regression
self.regularization = l2_regularization(alpha=reg_factor)
super(RidgeRegression, self).__init__(n_iterations,
learning_rate)
Expand All @@ -211,7 +209,6 @@ class PolynomialRidgeRegression(Regression):
"""
def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01, gradient_descent=True):
self.degree = degree
# Ridge Regression
self.regularization = l2_regularization(alpha=reg_factor)
super(PolynomialRidgeRegression, self).__init__(n_iterations,
learning_rate)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,13 @@ class PCA():
maximizing the variance along each feature axis. This class is also used throughout
the project to plot data.
"""
def __init__(self): pass

def transform(self, X, n_components):
""" Fit the dataset to the number of principal components specified in the
constructor and return the transformed dataset """
covariance = calculate_covariance_matrix(X)
covariance_matrix = calculate_covariance_matrix(X)

# Get the eigenvalues and eigenvectors.
# (eigenvector[:,0] corresponds to eigenvalue[0])
eigenvalues, eigenvectors = np.linalg.eig(covariance)
# Where (eigenvector[:,0] corresponds to eigenvalue[0])
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

# Sort the eigenvalues and corresponding eigenvectors from largest
# to smallest eigenvalue and select the first n_components
Expand Down

0 comments on commit b6ef93b

Please sign in to comment.