Skip to content

Commit

Permalink
Restricted Boltzmann Machine
Browse files Browse the repository at this point in the history
  • Loading branch information
eriklindernoren committed Sep 30, 2017
1 parent c75b1a3 commit 05c879a
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 9 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Python implementations of some of the fundamental Machine Learning models and algorithms from scratch.

The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible
but rather to present the inner workings of them in a transparent way.
but rather to present the inner workings of them in a transparent and accessible way.

## Table of Contents
- [Machine Learning From Scratch](#machine-learning-from-scratch)
Expand All @@ -17,6 +17,7 @@ but rather to present the inner workings of them in a transparent way.
+ [Density-Based Clustering](#density-based-clustering)
+ [Generating Handwritten Digits](#generating-handwritten-digits)
+ [Deep Reinforcement Learning](#deep-reinforcement-learning)
+ [Image Reconstruction With RBM](#image-reconstruction-with-rbm)
+ [Genetic Algorithm](#genetic-algorithm)
+ [Association Analysis](#association-analysis)
* [Implementations](#implementations)
Expand Down Expand Up @@ -164,6 +165,17 @@ but rather to present the inner workings of them in a transparent way.
Figure: Deep Q-Network solution to the CartPole-v1 environment in OpenAI gym.
</p>

### Image Reconstruction With RBM
$ python mlfromscratch/examples/restricted_boltzmann_machine.py

<p align="center">
<img src="http://eriklindernoren.se/images/rbm_digits.gif" width="640">
</p>
<p align="center">
Figure: Shows how the network progresses in getting better at reconstructing <br>
the digit 2 in the MNIST training set.
</p>

### Genetic Algorithm
$ python mlfromscratch/examples/genetic_algorithm.py

Expand Down Expand Up @@ -247,6 +259,7 @@ but rather to present the inner workings of them in a transparent way.
- [K-Means](mlfromscratch/unsupervised_learning/k_means.py)
- [Partitioning Around Medoids](mlfromscratch/unsupervised_learning/partitioning_around_medoids.py)
- [Principal Component Analysis](mlfromscratch/unsupervised_learning/principal_component_analysis.py)
- [Restricted Boltzmann Machine](mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py)

### Reinforcement Learning
- [Deep Q-Network](mlfromscratch/reinforcement_learning/deep_q_network.py)
Expand Down
48 changes: 48 additions & 0 deletions mlfromscratch/examples/restricted_boltzmann_machine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging

import numpy as np
from sklearn import datasets
from sklearn.datasets import fetch_mldata
import matplotlib.pyplot as plt

from mlfromscratch.unsupervised_learning import RBM

logging.basicConfig(level=logging.DEBUG)

def main():

mnist = fetch_mldata('MNIST original')

X = mnist.data / 255.0
y = mnist.target

# Select the samples of the digit 2
X = X[y == 2]

# Limit dataset to 500 samples
idx = np.random.choice(range(X.shape[0]), size=500, replace=False)
X = X[idx]

rbm = RBM(n_hidden=50, n_iterations=200, batch_size=25, learning_rate=0.001)
rbm.fit(X)

training_gen = rbm.training_recon

# Plot images showing how the network progresses in getting better at
# reconstructing the digits in the training set
for epoch, batch in enumerate(training_gen):
fig, axs = plt.subplots(5, 5)
plt.suptitle("Restricted Boltzmann Machine")
cnt = 0
for i in range(5):
for j in range(5):
axs[i,j].imshow(batch[cnt].reshape((28, 28)), cmap='gray')
axs[i,j].axis('off')
cnt += 1
fig.savefig("rbm_%d.png" % epoch)
plt.close()



if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion mlfromscratch/supervised_learning/k_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ def __init__(self, k=5):
self.k = k

def _vote(self, neighbors):
""" Return the most label class among the neighbors """
""" Return the most common label among the neighbors """
counts = np.bincount(neighbors[:, 1].astype('int'))
return counts.argmax()

def predict(self, X_test, X_train, y_train):
y_pred = np.empty(X_test.shape[0])
# Determine the class of each sample
for i, test_sample in enumerate(X_test):
# Two columns [distance, label], for each observed sample
neighbors = np.empty((X_train.shape[0], 2))
# Calculate the distance from each observed sample to the
# sample we wish to predict
Expand Down
8 changes: 4 additions & 4 deletions mlfromscratch/supervised_learning/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, n_iterations=20000, activation_function=Sigmoid, loss=SquareL
self.n_iterations = n_iterations
self.learning_rate = learning_rate
self.loss = loss()
self.activation = activation_function()
self.activation_func = activation_function()

def fit(self, X, y):
n_samples, n_features = np.shape(X)
Expand All @@ -43,9 +43,9 @@ def fit(self, X, y):
for i in range(self.n_iterations):
# Calculate outputs
linear_output = X.dot(self.W) + self.w0
y_pred = self.activation(linear_output)
y_pred = self.activation_func(linear_output)
# Calculate the loss gradient w.r.t the input of the activation function
error_gradient = self.loss.gradient(y, y_pred) * self.activation.gradient(linear_output)
error_gradient = self.loss.gradient(y, y_pred) * self.activation_func.gradient(linear_output)
# Calculate the gradient of the loss with respect to each weight
grad_wrt_w = X.T.dot(error_gradient)
grad_wrt_w0 = np.sum(error_gradient, axis=0, keepdims=True)
Expand All @@ -55,5 +55,5 @@ def fit(self, X, y):

# Use the trained model to predict labels of X
def predict(self, X):
y_pred = self.activation(X.dot(self.W) + self.w0)
y_pred = self.activation_func(X.dot(self.W) + self.w0)
return y_pred
1 change: 1 addition & 0 deletions mlfromscratch/unsupervised_learning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
from .genetic_algorithm import GeneticAlgorithm
from .k_means import KMeans
from .partitioning_around_medoids import PAM
from .restricted_boltzmann_machine import RBM
2 changes: 1 addition & 1 deletion mlfromscratch/unsupervised_learning/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class DBSCAN():
def __init__(self, eps=1, min_samples=5):
self.eps = eps
self.min_samples = min_samples
# List of arrays (clusters) containing sample indices
# List of lists (each is a cluster) containing sample indices
self.clusters = []
self.visited_samples = []
# Hashmap {"sample_index": [neighbor1, neighbor2, ...]}
Expand Down
5 changes: 3 additions & 2 deletions mlfromscratch/unsupervised_learning/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, k=2, max_iterations=500):
self.max_iterations = max_iterations

def _init_random_centroids(self, X):
""" Initialize the centroids as random samples """
""" Initialize the centroids as k random samples of X"""
n_samples, n_features = np.shape(X)
centroids = np.zeros((self.k, n_features))
for i in range(self.k):
Expand Down Expand Up @@ -71,13 +71,14 @@ def _get_cluster_labels(self, clusters, X):
def predict(self, X):
""" Do K-Means clustering and return cluster indices """

# Initialize centroids
# Initialize centroids as k random samples from X
centroids = self._init_random_centroids(X)

# Iterate until convergence or for max iterations
for _ in range(self.max_iterations):
# Assign samples to closest centroids (create clusters)
clusters = self._create_clusters(centroids, X)
# Save current centroids for convergence check
prev_centroids = centroids
# Calculate new centroids from the clusters
centroids = self._calculate_centroids(clusters, X)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import logging
import numpy as np
import progressbar

from mlfromscratch.utils.misc import bar_widgets
from mlfromscratch.utils import batch_iterator
from mlfromscratch.deep_learning.activation_functions import Sigmoid

"""
References:
A Practical Guide to Training Restricted Boltzmann Machines https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
"""

sigmoid = Sigmoid()

class RBM():
"""Bernoulli Restricted Boltzmann Machine (RBM)
Parameters:
-----------
n_hidden: int:
The number of processing nodes (neurons) in the hidden layer.
learning_rate: float
The step length that will be used when updating the weights.
batch_size: int
The size of the mini-batch used to calculate each weight update.
n_iterations: float
The number of training iterations the algorithm will tune the weights for.
"""
def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, n_iterations=100):
self.n_iterations = n_iterations
self.batch_size = batch_size
self.lr = learning_rate
self.n_hidden = n_hidden

self.progressbar = progressbar.ProgressBar(widgets=bar_widgets)

def _initialize_weights(self, X):
n_visible = X.shape[1]
self.W = np.random.normal(scale=0.1, size=(n_visible, self.n_hidden))

self.v0 = np.zeros(n_visible) # Bias visible
self.h0 = np.zeros(self.n_hidden) # Bias hidden

def fit(self, X, y=None):
'''Contrastive Divergence training procedure'''

self._initialize_weights(X)

self.errors = []
self.training_recon = []
for i in self.progressbar(range(self.n_iterations)):
batch_errors = []
for batch in batch_iterator(X, batch_size=self.batch_size):
# Positive phase
positive_hidden = sigmoid(batch.dot(self.W) + self.h0)
hidden_states = self._sample(positive_hidden)
positive_associations = batch.T.dot(positive_hidden)

# Negative phase
negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0)
negative_visible = self._sample(negative_visible)
negative_hidden = sigmoid(negative_visible.dot(self.W) + self.h0)
negative_associations = negative_visible.T.dot(negative_hidden)

self.W += self.lr * (positive_associations - negative_associations)
self.h0 += self.lr * (positive_hidden.sum(axis=0) - negative_hidden.sum(axis=0))
self.v0 += self.lr * (batch.sum(axis=0) - negative_visible.sum(axis=0))

batch_errors.append(np.mean((batch - negative_visible) ** 2))

self.errors.append(np.mean(batch_errors))
# Reconstruct a batch of images from the training set
idx = np.random.choice(range(X.shape[0]), self.batch_size)
self.training_recon.append(self.reconstruct(X[idx]))

def _sample(self, X):
return X > np.random.random_sample(size=X.shape)

def reconstruct(self, X=None):
positive_hidden = sigmoid(X.dot(self.W) + self.h0)
hidden_states = self._sample(positive_hidden)
negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0)
return negative_visible

0 comments on commit 05c879a

Please sign in to comment.