Restricted Boltzmann Machine

Rohit-Kumar18 · Sep 30, 2017 · 05c879a · 05c879a
1 parent c75b1a3
commit 05c879a
Show file tree

Hide file tree

Showing 8 changed files with 158 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 Python implementations of some of the fundamental Machine Learning models and algorithms from scratch.
 
 The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible 
-but rather to present the inner workings of them in a transparent way.
+but rather to present the inner workings of them in a transparent and accessible way.
 
 ## Table of Contents
 - [Machine Learning From Scratch](#machine-learning-from-scratch)
@@ -17,6 +17,7 @@ but rather to present the inner workings of them in a transparent way.
     + [Density-Based Clustering](#density-based-clustering)
     + [Generating Handwritten Digits](#generating-handwritten-digits)
     + [Deep Reinforcement Learning](#deep-reinforcement-learning)
+    + [Image Reconstruction With RBM](#image-reconstruction-with-rbm)
     + [Genetic Algorithm](#genetic-algorithm)
     + [Association Analysis](#association-analysis)
   * [Implementations](#implementations)
@@ -164,6 +165,17 @@ but rather to present the inner workings of them in a transparent way.
     Figure: Deep Q-Network solution to the CartPole-v1 environment in OpenAI gym.
 </p>
 
+### Image Reconstruction With RBM
+    $ python mlfromscratch/examples/restricted_boltzmann_machine.py
+
+<p align="center">
+    <img src="http://eriklindernoren.se/images/rbm_digits.gif" width="640">
+</p>
+<p align="center">
+    Figure: Shows how the network progresses in getting better at reconstructing <br>
+    the digit 2 in the MNIST training set.
+</p>
+
 ### Genetic Algorithm
     $ python mlfromscratch/examples/genetic_algorithm.py
 
@@ -247,6 +259,7 @@ but rather to present the inner workings of them in a transparent way.
 - [K-Means](mlfromscratch/unsupervised_learning/k_means.py)
 - [Partitioning Around Medoids](mlfromscratch/unsupervised_learning/partitioning_around_medoids.py)
 - [Principal Component Analysis](mlfromscratch/unsupervised_learning/principal_component_analysis.py)
+- [Restricted Boltzmann Machine](mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py)
 
 ### Reinforcement Learning
 - [Deep Q-Network](mlfromscratch/reinforcement_learning/deep_q_network.py)

diff --git a/mlfromscratch/examples/restricted_boltzmann_machine.py b/mlfromscratch/examples/restricted_boltzmann_machine.py
@@ -0,0 +1,48 @@
+import logging
+
+import numpy as np
+from sklearn import datasets
+from sklearn.datasets import fetch_mldata
+import matplotlib.pyplot as plt
+
+from mlfromscratch.unsupervised_learning import RBM
+
+logging.basicConfig(level=logging.DEBUG)
+
+def main():
+
+    mnist = fetch_mldata('MNIST original')
+
+    X = mnist.data / 255.0
+    y = mnist.target
+
+    # Select the samples of the digit 2
+    X = X[y == 2]
+
+    # Limit dataset to 500 samples
+    idx = np.random.choice(range(X.shape[0]), size=500, replace=False)
+    X = X[idx]
+
+    rbm = RBM(n_hidden=50, n_iterations=200, batch_size=25, learning_rate=0.001)
+    rbm.fit(X)
+
+    training_gen = rbm.training_recon
+
+    # Plot images showing how the network progresses in getting better at
+    # reconstructing the digits in the training set
+    for epoch, batch in enumerate(training_gen):
+        fig, axs = plt.subplots(5, 5)
+        plt.suptitle("Restricted Boltzmann Machine")
+        cnt = 0
+        for i in range(5):
+            for j in range(5):
+                axs[i,j].imshow(batch[cnt].reshape((28, 28)), cmap='gray')
+                axs[i,j].axis('off')
+                cnt += 1
+        fig.savefig("rbm_%d.png" % epoch)
+        plt.close()
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/supervised_learning/k_nearest_neighbors.py b/mlfromscratch/supervised_learning/k_nearest_neighbors.py
@@ -16,14 +16,15 @@ def __init__(self, k=5):
         self.k = k
 
     def _vote(self, neighbors):
-        """ Return the most label class among the neighbors """
+        """ Return the most common label among the neighbors """
         counts = np.bincount(neighbors[:, 1].astype('int'))
         return counts.argmax()
 
     def predict(self, X_test, X_train, y_train):
         y_pred = np.empty(X_test.shape[0])
         # Determine the class of each sample
         for i, test_sample in enumerate(X_test):
+            # Two columns [distance, label], for each observed sample
             neighbors = np.empty((X_train.shape[0], 2))
             # Calculate the distance from each observed sample to the
             # sample we wish to predict

diff --git a/mlfromscratch/supervised_learning/perceptron.py b/mlfromscratch/supervised_learning/perceptron.py
@@ -29,7 +29,7 @@ def __init__(self, n_iterations=20000, activation_function=Sigmoid, loss=SquareL
         self.n_iterations = n_iterations
         self.learning_rate = learning_rate
         self.loss = loss()
-        self.activation = activation_function()
+        self.activation_func = activation_function()
 
     def fit(self, X, y):
         n_samples, n_features = np.shape(X)
@@ -43,9 +43,9 @@ def fit(self, X, y):
         for i in range(self.n_iterations):
             # Calculate outputs
             linear_output = X.dot(self.W) + self.w0
-            y_pred = self.activation(linear_output)
+            y_pred = self.activation_func(linear_output)
             # Calculate the loss gradient w.r.t the input of the activation function
-            error_gradient = self.loss.gradient(y, y_pred) * self.activation.gradient(linear_output)
+            error_gradient = self.loss.gradient(y, y_pred) * self.activation_func.gradient(linear_output)
             # Calculate the gradient of the loss with respect to each weight
             grad_wrt_w = X.T.dot(error_gradient)
             grad_wrt_w0 = np.sum(error_gradient, axis=0, keepdims=True)
@@ -55,5 +55,5 @@ def fit(self, X, y):
 
     # Use the trained model to predict labels of X
     def predict(self, X):
-        y_pred = self.activation(X.dot(self.W) + self.w0)
+        y_pred = self.activation_func(X.dot(self.W) + self.w0)
         return y_pred
diff --git a/mlfromscratch/unsupervised_learning/__init__.py b/mlfromscratch/unsupervised_learning/__init__.py
@@ -6,3 +6,4 @@
 from .genetic_algorithm import GeneticAlgorithm
 from .k_means import KMeans
 from .partitioning_around_medoids import PAM
+from .restricted_boltzmann_machine import RBM
diff --git a/mlfromscratch/unsupervised_learning/dbscan.py b/mlfromscratch/unsupervised_learning/dbscan.py
@@ -18,7 +18,7 @@ class DBSCAN():
     def __init__(self, eps=1, min_samples=5):
         self.eps = eps
         self.min_samples = min_samples
-        # List of arrays (clusters) containing sample indices
+        # List of lists (each is a cluster) containing sample indices
         self.clusters = []
         self.visited_samples = []
         # Hashmap {"sample_index": [neighbor1, neighbor2, ...]}

diff --git a/mlfromscratch/unsupervised_learning/k_means.py b/mlfromscratch/unsupervised_learning/k_means.py
@@ -22,7 +22,7 @@ def __init__(self, k=2, max_iterations=500):
         self.max_iterations = max_iterations
 
     def _init_random_centroids(self, X):
-        """ Initialize the centroids as random samples """
+        """ Initialize the centroids as k random samples of X"""
         n_samples, n_features = np.shape(X)
         centroids = np.zeros((self.k, n_features))
         for i in range(self.k):
@@ -71,13 +71,14 @@ def _get_cluster_labels(self, clusters, X):
     def predict(self, X):
         """ Do K-Means clustering and return cluster indices """
 
-        # Initialize centroids
+        # Initialize centroids as k random samples from X
         centroids = self._init_random_centroids(X)
 
         # Iterate until convergence or for max iterations
         for _ in range(self.max_iterations):
             # Assign samples to closest centroids (create clusters)
             clusters = self._create_clusters(centroids, X)
+            # Save current centroids for convergence check
             prev_centroids = centroids
             # Calculate new centroids from the clusters
             centroids = self._calculate_centroids(clusters, X)

diff --git a/mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py b/mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py
@@ -0,0 +1,85 @@
+import logging
+import numpy as np
+import progressbar
+
+from mlfromscratch.utils.misc import bar_widgets
+from mlfromscratch.utils import batch_iterator
+from mlfromscratch.deep_learning.activation_functions import Sigmoid
+
+"""
+References:
+A Practical Guide to Training Restricted Boltzmann Machines https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
+"""
+
+sigmoid = Sigmoid()
+
+class RBM():
+    """Bernoulli Restricted Boltzmann Machine (RBM)
+
+    Parameters:
+    -----------
+    n_hidden: int:
+        The number of processing nodes (neurons) in the hidden layer. 
+    learning_rate: float
+        The step length that will be used when updating the weights.
+    batch_size: int
+        The size of the mini-batch used to calculate each weight update.
+    n_iterations: float
+        The number of training iterations the algorithm will tune the weights for.
+    """
+    def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, n_iterations=100):
+        self.n_iterations = n_iterations
+        self.batch_size = batch_size
+        self.lr = learning_rate
+        self.n_hidden = n_hidden
+
+        self.progressbar = progressbar.ProgressBar(widgets=bar_widgets)
+
+    def _initialize_weights(self, X):
+        n_visible = X.shape[1]
+        self.W = np.random.normal(scale=0.1, size=(n_visible, self.n_hidden))
+
+        self.v0 = np.zeros(n_visible)       # Bias visible
+        self.h0 = np.zeros(self.n_hidden)   # Bias hidden
+
+    def fit(self, X, y=None):
+        '''Contrastive Divergence training procedure'''
+
+        self._initialize_weights(X)
+
+        self.errors = []
+        self.training_recon = []
+        for i in self.progressbar(range(self.n_iterations)):
+            batch_errors = []
+            for batch in batch_iterator(X, batch_size=self.batch_size):
+                # Positive phase
+                positive_hidden = sigmoid(batch.dot(self.W) + self.h0)
+                hidden_states = self._sample(positive_hidden)
+                positive_associations = batch.T.dot(positive_hidden)
+
+                # Negative phase
+                negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0)
+                negative_visible = self._sample(negative_visible)
+                negative_hidden = sigmoid(negative_visible.dot(self.W) + self.h0)
+                negative_associations = negative_visible.T.dot(negative_hidden)
+
+                self.W  += self.lr * (positive_associations - negative_associations)
+                self.h0 += self.lr * (positive_hidden.sum(axis=0) - negative_hidden.sum(axis=0))
+                self.v0 += self.lr * (batch.sum(axis=0) - negative_visible.sum(axis=0))
+
+                batch_errors.append(np.mean((batch - negative_visible) ** 2))
+
+            self.errors.append(np.mean(batch_errors))
+            # Reconstruct a batch of images from the training set
+            idx = np.random.choice(range(X.shape[0]), self.batch_size)
+            self.training_recon.append(self.reconstruct(X[idx]))
+
+    def _sample(self, X):
+        return X > np.random.random_sample(size=X.shape)
+
+    def reconstruct(self, X=None):
+        positive_hidden = sigmoid(X.dot(self.W) + self.h0)
+        hidden_states = self._sample(positive_hidden)
+        negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0)
+        return negative_visible
+