first commit

Idate96 · Oct 11, 2017 · 2b0b1cf · 2b0b1cf
1 parent 5f4b768
commit 2b0b1cf
Show file tree

Hide file tree

Showing 13 changed files with 2,267 additions and 15 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/src/implementations/costs.py b/src/implementations/costs.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+"""Function used to compute the loss."""
+import numpy as np
+def compute_loss(y, tx, w, type='mse'):
+    """Calculate the loss.
+
+    You can calculate the loss using mse or mae.
+    """
+    N = tx.shape[0]
+    if type == 'mse':
+        cost = 1 / N * (y - tx @ w).T @ (y - tx @ w)
+    elif type == 'mae':
+        cost = 1/N * np.sum(np.abs((y - tx @ w)))
+    else:
+        raise ValueError("type not recognised")
+    return cost
diff --git a/src/implementations/ex02.ipynb b/src/implementations/ex02.ipynb
diff --git a/src/implementations/gradient descent.py b/src/implementations/gradient descent.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+"""Gradient Descent"""
+import datetime
+import numpy as np
+
+def compute_gradient(y, tx, w, type='mse'):
+    """Compute the gradient."""
+    if type == 'mse':
+        gradient = -2/np.shape(tx)[0] * tx.T @ (y - tx @ w)
+    elif type == 'mae':
+        gradient = -1 / np.shape(tx)[0] * tx.T @ np.sign(y - tx @ w)
+    else:
+        raise ValueError('type not implemented')
+    assert np.shape(w) == gradient
+    return gradient
+
+
+def gradient_descent(y, tx, initial_w, loss_func, gradient_func, max_iters, gamma):
+    """Gradient descent algorithm."""
+    # Define parameters to store w and loss
+    ws = [initial_w]
+    losses = []
+    w = initial_w
+    for n_iter in range(max_iters):
+        loss = loss_func(y, tx, w)
+        dw = gradient_func(y, tx, w)
+        w = w - gamma * dw
+        # store w and loss
+        ws.append(w)
+        losses.append(loss)
+        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
+              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
+
+    return losses, ws
+
+def train():
+    max_iters = 50
+    gamma = 0.1
+
+    # Initialization
+    w_initial = np.array([0, 0])
+
+    # Start gradient descent.
+    start_time = datetime.datetime.now()
+    gradient_losses, gradient_ws = gradient_descent(y, tx, w_initial, max_iters, gamma)
+    end_time = datetime.datetime.now()
+
+    # Print result
+    exection_time = (end_time - start_time).total_seconds()
+    print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time))
diff --git a/src/implementations/grid_search.py b/src/implementations/grid_search.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+""" Grid Search"""
+
+import numpy as np
+import costs
+
+
+def generate_w(num_intervals):
+    """Generate a grid of values for w0 and w1."""
+    w0 = np.linspace(-100, 200, num_intervals)
+    w1 = np.linspace(-150, 150, num_intervals)
+    return w0, w1
+
+
+def get_best_parameters(w0, w1, losses):
+    """Get the best w from the result of grid search."""
+    min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape)
+    return losses[min_row, min_col], w0[min_row], w1[min_col]
+
+
+def grid_search(y, tx, w0, w1, loss_func):
+    """Algorithm for grid search."""
+    losses = np.zeros((len(w0), len(w1)))
+    for idx_0, w0_elem in enumerate(w0):
+        for idx_1, w1_elem in enumerate(w1):
+            losses[idx_0, idx_1] = loss_func(y, tx, np.asarray([w0_elem, w1_elem]))
+    return losses
diff --git a/src/implementations/helpers.py b/src/implementations/helpers.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+"""some helper functions."""
+import numpy as np
+
+
+def load_data(sub_sample=True, add_outlier=False):
+    """Load data and convert it to the metrics system."""
+    path_dataset = "height_weight_genders.csv"
+    data = np.genfromtxt(
+        path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
+    height = data[:, 0]
+    weight = data[:, 1]
+    gender = np.genfromtxt(
+        path_dataset, delimiter=",", skip_header=1, usecols=[0],
+        converters={0: lambda x: 0 if b"Male" in x else 1})
+    # Convert to metric system
+    height *= 0.025
+    weight *= 0.454
+
+    # sub-sample
+    if sub_sample:
+        height = height[::50]
+        weight = weight[::50]
+
+    if add_outlier:
+        # outlier experiment
+        height = np.concatenate([height, [1.1, 1.2]])
+        weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]])
+
+    return height, weight, gender
+
+
+def standardize(x):
+    """Standardize the original data set."""
+    mean_x = np.mean(x)
+    x = x - mean_x
+    std_x = np.std(x)
+    x = x / std_x
+    return x, mean_x, std_x
+
+
+def build_model_data(height, weight):
+    """Form (y,tX) to get regression data in matrix form."""
+    y = weight
+    x = height
+    num_samples = len(y)
+    tx = np.c_[np.ones(num_samples), x]
+    return y, tx
+
+
+def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
+    """
+    Generate a minibatch iterator for a dataset.
+    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
+    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
+    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
+    Example of use :
+    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
+        <DO-SOMETHING>
+    """
+    data_size = len(y)
+
+    if shuffle:
+        shuffle_indices = np.random.permutation(np.arange(data_size))
+        shuffled_y = y[shuffle_indices]
+        shuffled_tx = tx[shuffle_indices]
+    else:
+        shuffled_y = y
+        shuffled_tx = tx
+    for batch_num in range(num_batches):
+        start_index = batch_num * batch_size
+        end_index = min((batch_num + 1) * batch_size, data_size)
+        if start_index != end_index:
+            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]
diff --git a/src/implementations/plots.py b/src/implementations/plots.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+"""function for plot."""
+import matplotlib.pyplot as plt
+import numpy as np
+from grid_search import get_best_parameters
+
+
+def prediction(w0, w1, mean_x, std_x):
+    """Get the regression line from the model."""
+    x = np.arange(1.2, 2, 0.01)
+    x_normalized = (x - mean_x) / std_x
+    return x, w0 + w1 * x_normalized
+
+
+def base_visualization(grid_losses, w0_list, w1_list,
+                       mean_x, std_x, height, weight):
+    """Base Visualization for both models."""
+    w0, w1 = np.meshgrid(w0_list, w1_list)
+
+    fig = plt.figure()
+
+    # plot contourf
+    ax1 = fig.add_subplot(1, 2, 1)
+    cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet)
+    fig.colorbar(cp, ax=ax1)
+    ax1.set_xlabel(r'$w_0$')
+    ax1.set_ylabel(r'$w_1$')
+    # put a marker at the minimum
+    loss_star, w0_star, w1_star = get_best_parameters(
+        w0_list, w1_list, grid_losses)
+    ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20)
+
+    # plot f(x)
+    ax2 = fig.add_subplot(1, 2, 2)
+    ax2.scatter(height, weight, marker=".", color='b', s=5)
+    ax2.set_xlabel("x")
+    ax2.set_ylabel("y")
+    ax2.grid()
+
+    return fig
+
+
+def grid_visualization(grid_losses, w0_list, w1_list,
+                       mean_x, std_x, height, weight):
+    """Visualize how the trained model looks like under the grid search."""
+    fig = base_visualization(
+        grid_losses, w0_list, w1_list, mean_x, std_x, height, weight)
+
+    loss_star, w0_star, w1_star = get_best_parameters(
+        w0_list, w1_list, grid_losses)
+    # plot prediciton
+    x, f = prediction(w0_star, w1_star, mean_x, std_x)
+    ax2 = fig.get_axes()[2]
+    ax2.plot(x, f, 'r')
+
+    return fig
+
+
+def gradient_descent_visualization(
+        gradient_losses, gradient_ws,
+        grid_losses, grid_w0, grid_w1,
+        mean_x, std_x, height, weight, n_iter=None):
+    """Visualize how the loss value changes until n_iter."""
+    fig = base_visualization(
+        grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
+
+    ws_to_be_plotted = np.stack(gradient_ws)
+    if n_iter is not None:
+        ws_to_be_plotted = ws_to_be_plotted[:n_iter]
+
+    ax1, ax2 = fig.get_axes()[0], fig.get_axes()[2]
+    ax1.plot(
+        ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1],
+        marker='o', color='w', markersize=10)
+    pred_x, pred_y = prediction(
+        ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1],
+        mean_x, std_x)
+    ax2.plot(pred_x, pred_y, 'r')
+
+    return fig
diff --git a/src/implementations/stochastic_gradient_descent.py b/src/implementations/stochastic_gradient_descent.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""Stochastic Gradient Descent"""
+import numpy as np
+
+def compute_stoch_gradient(y, tx, w):
+    """Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
+    # ***************************************************
+    # INSERT YOUR CODE HERE
+    # TODO: implement stochastic gradient computation.It's same as the gradient descent.
+    # ***************************************************
+    raise NotImplementedError
+
+
+def stochastic_gradient_descent(y, tx, initial_w, grad_func, loss_func, batch_size, max_iters, gamma):
+    """Stochastic gradient descent algorithm."""
+    ws = [initial_w]
+    losses = []
+    w = initial_w
+    num_batches = int(np.shape(tx)[0]/batch_size)
+    for epoch_num in range(max_iters):
+        for batch_idx in range(num_batches):
+            tx_batch = tx[batch_idx * batch_size : (batch_idx + 1)*batch_size]
+            y_batch = y[batch_idx * batch_size : (batch_idx + 1)*batch_size]
+            dw = grad_func(y_batch, tx_batch, w)
+            w = w - gamma * dw
+            # store w and loss
+        loss = loss_func(y, tx, w)
+        ws.append(w)
+        losses.append(loss)
+        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
+              bi=epoch_num, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
+
+    return losses, ws