-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Lorenzo Terenzi
committed
Oct 11, 2017
1 parent
5f4b768
commit 2b0b1cf
Showing
13 changed files
with
2,267 additions
and
15 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Function used to compute the loss.""" | ||
import numpy as np | ||
def compute_loss(y, tx, w, type='mse'): | ||
"""Calculate the loss. | ||
You can calculate the loss using mse or mae. | ||
""" | ||
N = tx.shape[0] | ||
if type == 'mse': | ||
cost = 1 / N * (y - tx @ w).T @ (y - tx @ w) | ||
elif type == 'mae': | ||
cost = 1/N * np.sum(np.abs((y - tx @ w))) | ||
else: | ||
raise ValueError("type not recognised") | ||
return cost |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Gradient Descent""" | ||
import datetime | ||
import numpy as np | ||
|
||
def compute_gradient(y, tx, w, type='mse'): | ||
"""Compute the gradient.""" | ||
if type == 'mse': | ||
gradient = -2/np.shape(tx)[0] * tx.T @ (y - tx @ w) | ||
elif type == 'mae': | ||
gradient = -1 / np.shape(tx)[0] * tx.T @ np.sign(y - tx @ w) | ||
else: | ||
raise ValueError('type not implemented') | ||
assert np.shape(w) == gradient | ||
return gradient | ||
|
||
|
||
def gradient_descent(y, tx, initial_w, loss_func, gradient_func, max_iters, gamma): | ||
"""Gradient descent algorithm.""" | ||
# Define parameters to store w and loss | ||
ws = [initial_w] | ||
losses = [] | ||
w = initial_w | ||
for n_iter in range(max_iters): | ||
loss = loss_func(y, tx, w) | ||
dw = gradient_func(y, tx, w) | ||
w = w - gamma * dw | ||
# store w and loss | ||
ws.append(w) | ||
losses.append(loss) | ||
print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format( | ||
bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1])) | ||
|
||
return losses, ws | ||
|
||
def train(): | ||
max_iters = 50 | ||
gamma = 0.1 | ||
|
||
# Initialization | ||
w_initial = np.array([0, 0]) | ||
|
||
# Start gradient descent. | ||
start_time = datetime.datetime.now() | ||
gradient_losses, gradient_ws = gradient_descent(y, tx, w_initial, max_iters, gamma) | ||
end_time = datetime.datetime.now() | ||
|
||
# Print result | ||
exection_time = (end_time - start_time).total_seconds() | ||
print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# -*- coding: utf-8 -*- | ||
""" Grid Search""" | ||
|
||
import numpy as np | ||
import costs | ||
|
||
|
||
def generate_w(num_intervals): | ||
"""Generate a grid of values for w0 and w1.""" | ||
w0 = np.linspace(-100, 200, num_intervals) | ||
w1 = np.linspace(-150, 150, num_intervals) | ||
return w0, w1 | ||
|
||
|
||
def get_best_parameters(w0, w1, losses): | ||
"""Get the best w from the result of grid search.""" | ||
min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape) | ||
return losses[min_row, min_col], w0[min_row], w1[min_col] | ||
|
||
|
||
def grid_search(y, tx, w0, w1, loss_func): | ||
"""Algorithm for grid search.""" | ||
losses = np.zeros((len(w0), len(w1))) | ||
for idx_0, w0_elem in enumerate(w0): | ||
for idx_1, w1_elem in enumerate(w1): | ||
losses[idx_0, idx_1] = loss_func(y, tx, np.asarray([w0_elem, w1_elem])) | ||
return losses |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# -*- coding: utf-8 -*- | ||
"""some helper functions.""" | ||
import numpy as np | ||
|
||
|
||
def load_data(sub_sample=True, add_outlier=False): | ||
"""Load data and convert it to the metrics system.""" | ||
path_dataset = "height_weight_genders.csv" | ||
data = np.genfromtxt( | ||
path_dataset, delimiter=",", skip_header=1, usecols=[1, 2]) | ||
height = data[:, 0] | ||
weight = data[:, 1] | ||
gender = np.genfromtxt( | ||
path_dataset, delimiter=",", skip_header=1, usecols=[0], | ||
converters={0: lambda x: 0 if b"Male" in x else 1}) | ||
# Convert to metric system | ||
height *= 0.025 | ||
weight *= 0.454 | ||
|
||
# sub-sample | ||
if sub_sample: | ||
height = height[::50] | ||
weight = weight[::50] | ||
|
||
if add_outlier: | ||
# outlier experiment | ||
height = np.concatenate([height, [1.1, 1.2]]) | ||
weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]]) | ||
|
||
return height, weight, gender | ||
|
||
|
||
def standardize(x): | ||
"""Standardize the original data set.""" | ||
mean_x = np.mean(x) | ||
x = x - mean_x | ||
std_x = np.std(x) | ||
x = x / std_x | ||
return x, mean_x, std_x | ||
|
||
|
||
def build_model_data(height, weight): | ||
"""Form (y,tX) to get regression data in matrix form.""" | ||
y = weight | ||
x = height | ||
num_samples = len(y) | ||
tx = np.c_[np.ones(num_samples), x] | ||
return y, tx | ||
|
||
|
||
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True): | ||
""" | ||
Generate a minibatch iterator for a dataset. | ||
Takes as input two iterables (here the output desired values 'y' and the input data 'tx') | ||
Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`. | ||
Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches. | ||
Example of use : | ||
for minibatch_y, minibatch_tx in batch_iter(y, tx, 32): | ||
<DO-SOMETHING> | ||
""" | ||
data_size = len(y) | ||
|
||
if shuffle: | ||
shuffle_indices = np.random.permutation(np.arange(data_size)) | ||
shuffled_y = y[shuffle_indices] | ||
shuffled_tx = tx[shuffle_indices] | ||
else: | ||
shuffled_y = y | ||
shuffled_tx = tx | ||
for batch_num in range(num_batches): | ||
start_index = batch_num * batch_size | ||
end_index = min((batch_num + 1) * batch_size, data_size) | ||
if start_index != end_index: | ||
yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# -*- coding: utf-8 -*- | ||
"""function for plot.""" | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
from grid_search import get_best_parameters | ||
|
||
|
||
def prediction(w0, w1, mean_x, std_x): | ||
"""Get the regression line from the model.""" | ||
x = np.arange(1.2, 2, 0.01) | ||
x_normalized = (x - mean_x) / std_x | ||
return x, w0 + w1 * x_normalized | ||
|
||
|
||
def base_visualization(grid_losses, w0_list, w1_list, | ||
mean_x, std_x, height, weight): | ||
"""Base Visualization for both models.""" | ||
w0, w1 = np.meshgrid(w0_list, w1_list) | ||
|
||
fig = plt.figure() | ||
|
||
# plot contourf | ||
ax1 = fig.add_subplot(1, 2, 1) | ||
cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet) | ||
fig.colorbar(cp, ax=ax1) | ||
ax1.set_xlabel(r'$w_0$') | ||
ax1.set_ylabel(r'$w_1$') | ||
# put a marker at the minimum | ||
loss_star, w0_star, w1_star = get_best_parameters( | ||
w0_list, w1_list, grid_losses) | ||
ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20) | ||
|
||
# plot f(x) | ||
ax2 = fig.add_subplot(1, 2, 2) | ||
ax2.scatter(height, weight, marker=".", color='b', s=5) | ||
ax2.set_xlabel("x") | ||
ax2.set_ylabel("y") | ||
ax2.grid() | ||
|
||
return fig | ||
|
||
|
||
def grid_visualization(grid_losses, w0_list, w1_list, | ||
mean_x, std_x, height, weight): | ||
"""Visualize how the trained model looks like under the grid search.""" | ||
fig = base_visualization( | ||
grid_losses, w0_list, w1_list, mean_x, std_x, height, weight) | ||
|
||
loss_star, w0_star, w1_star = get_best_parameters( | ||
w0_list, w1_list, grid_losses) | ||
# plot prediciton | ||
x, f = prediction(w0_star, w1_star, mean_x, std_x) | ||
ax2 = fig.get_axes()[2] | ||
ax2.plot(x, f, 'r') | ||
|
||
return fig | ||
|
||
|
||
def gradient_descent_visualization( | ||
gradient_losses, gradient_ws, | ||
grid_losses, grid_w0, grid_w1, | ||
mean_x, std_x, height, weight, n_iter=None): | ||
"""Visualize how the loss value changes until n_iter.""" | ||
fig = base_visualization( | ||
grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight) | ||
|
||
ws_to_be_plotted = np.stack(gradient_ws) | ||
if n_iter is not None: | ||
ws_to_be_plotted = ws_to_be_plotted[:n_iter] | ||
|
||
ax1, ax2 = fig.get_axes()[0], fig.get_axes()[2] | ||
ax1.plot( | ||
ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1], | ||
marker='o', color='w', markersize=10) | ||
pred_x, pred_y = prediction( | ||
ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1], | ||
mean_x, std_x) | ||
ax2.plot(pred_x, pred_y, 'r') | ||
|
||
return fig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Stochastic Gradient Descent""" | ||
import numpy as np | ||
|
||
def compute_stoch_gradient(y, tx, w): | ||
"""Compute a stochastic gradient from just few examples n and their corresponding y_n labels.""" | ||
# *************************************************** | ||
# INSERT YOUR CODE HERE | ||
# TODO: implement stochastic gradient computation.It's same as the gradient descent. | ||
# *************************************************** | ||
raise NotImplementedError | ||
|
||
|
||
def stochastic_gradient_descent(y, tx, initial_w, grad_func, loss_func, batch_size, max_iters, gamma): | ||
"""Stochastic gradient descent algorithm.""" | ||
ws = [initial_w] | ||
losses = [] | ||
w = initial_w | ||
num_batches = int(np.shape(tx)[0]/batch_size) | ||
for epoch_num in range(max_iters): | ||
for batch_idx in range(num_batches): | ||
tx_batch = tx[batch_idx * batch_size : (batch_idx + 1)*batch_size] | ||
y_batch = y[batch_idx * batch_size : (batch_idx + 1)*batch_size] | ||
dw = grad_func(y_batch, tx_batch, w) | ||
w = w - gamma * dw | ||
# store w and loss | ||
loss = loss_func(y, tx, w) | ||
ws.append(w) | ||
losses.append(loss) | ||
print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format( | ||
bi=epoch_num, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1])) | ||
|
||
return losses, ws |
Oops, something went wrong.