Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deep learning q4 error #17

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 58 additions & 55 deletions 10-deep-learning.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ library(ISLR2)
library(neuralnet)
library(sigmoid)
set.seed(5)
train <- sample(seq_len(nrow(ISLR2::Boston)), nrow(ISLR2::Boston) * 2/3)
train <- sample(seq_len(nrow(ISLR2::Boston)), nrow(ISLR2::Boston) * 2 / 3)

net <- neuralnet(crim ~ lstat + medv + ptratio + rm,
data = ISLR2::Boston[train, ],
act.fct = relu,
hidden = c(2, 3)
data = ISLR2::Boston[train, ],
act.fct = relu,
hidden = c(2, 3)
)
plot(net)
```
Expand Down Expand Up @@ -201,10 +201,13 @@ When we take the negative of this, it is equivalent to 10.14 for two classes
knitr::include_graphics("images/nn2.png")
```

Note that, because there is no boundary padding, each convolution layer will
consist of a 28x28 array.

> b. How many parameters are in this model?

There are 5 convolution matrices each with 5x5 weights (plus 5 bias terms) to
estimate, therefore 130 parameters
There are 3 convolution matrices each with 5x5 weights (plus 3 bias terms) to
estimate, therefore $3 \times 5 \times 5 + 3 = 78$ parameters

> c. Explain how this model can be thought of as an ordinary feed-forward
> neural network with the individual pixels as inputs, and with constraints on
Expand All @@ -222,9 +225,11 @@ connections to all other output nodes.
> d. If there were no constraints, then how many weights would there be in the
> ordinary feed-forward neural network in (c)?

With no constraints, we would connect each output pixel in our 5x32x32
convolution layer to each node in the 32x32 original image (plus 5 bias terms),
giving a total of 5,242,885 weights to estimate.
With no constraints, we would connect each input pixel in our original 32x32
image to each output pixel in each of our convolution layers, with an bias
term for each original pixel. So each output pixel would require 32x32 weights
+ 1 bias term. This would give a total of (32×32+1)×28×28×3 = 2,410,800
parameters.

### Question 5

Expand All @@ -246,7 +251,7 @@ absolute error.
> a. Draw a graph of this function over the range $\beta \in [−6, 6]$.

```{r}
r <- function(x) sin(x) + x/10
r <- function(x) sin(x) + x / 10
x <- seq(-6, 6, 0.1)
plot(x, r(x), type = "l")
```
Expand All @@ -270,11 +275,11 @@ x^{m+1} = x^m - \rho (cos(x^m) + 1/10)
$$

```{r}
iter <- function(x, rho) x - rho*(cos(x) + 1/10)
iter <- function(x, rho) x - rho * (cos(x) + 1 / 10)
gd <- function(start, rho = 0.1) {
b <- start
v <- b
while(abs(b - iter(b, 0.1)) > 1e-8) {
while (abs(b - iter(b, 0.1)) > 1e-8) {
b <- iter(b, 0.1)
v <- c(v, b)
}
Expand Down Expand Up @@ -306,7 +311,7 @@ points(res, r(res), col = "red", pch = 19)
### Question 7

> Fit a neural network to the `Default` data. Use a single hidden layer with 10
> units, and dropout regularization. Have a look at Labs 10.9.1-10.9.2 for
> units, and dropout regularization. Have a look at Labs 10.9.1--10.9.2 for
> guidance. Compare the classification performance of your model with that of
> linear logistic regression.

Expand All @@ -331,15 +336,16 @@ nn <- keras_model_sequential() |>
layer_dropout(rate = 0.4) |>
layer_dense(units = 1)

compile(nn, loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
compile(nn,
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
)

history <- fit(nn,
x[-testid, ], y[-testid],
epochs = 100,
batch_size = 26,
x[-testid, ], y[-testid],
epochs = 100,
batch_size = 26,
validation_data = list(x[testid, ], y[testid]),
verbose = 0
)
Expand Down Expand Up @@ -381,15 +387,15 @@ images <- list.files("images/animals")
x <- array(dim = c(length(images), 224, 224, 3))
for (i in seq_len(length(images))) {
img <- image_load(paste0("images/animals/", images[i]), target_size = c(224, 224))
x[i,,,] <- image_to_array(img)
x[i, , , ] <- image_to_array(img)
}

model <- application_resnet50(weights = "imagenet")

pred <- model |>
predict(x) |>
imagenet_decode_predictions(top = 5)

names(pred) <- images
print(pred)
```
Expand All @@ -405,7 +411,7 @@ Fitting the model as described in the text.
```{r}
library(tidyverse)
library(ISLR2)
xdata <- data.matrix(NYSE[, c("DJ_return", "log_volume","log_volatility")])
xdata <- data.matrix(NYSE[, c("DJ_return", "log_volume", "log_volatility")])
istrain <- NYSE[, "train"]
xdata <- scale(xdata)

Expand All @@ -416,8 +422,8 @@ lagm <- function(x, k = 1) {
}

arframe <- data.frame(
log_volume = xdata[, "log_volume"],
L1 = lagm(xdata, 1),
log_volume = xdata[, "log_volume"],
L1 = lagm(xdata, 1),
L2 = lagm(xdata, 2),
L3 = lagm(xdata, 3),
L4 = lagm(xdata, 4),
Expand All @@ -436,7 +442,7 @@ V0 <- var(arframe[!istrain, "log_volume"])
Now we add month (and work with tidyverse).

```{r}
arframe$month = as.factor(str_match(NYSE$date, "-(\\d+)-")[,2])[-(1:5)]
arframe$month <- as.factor(str_match(NYSE$date, "-(\\d+)-")[, 2])[-(1:5)]
arfit2 <- lm(log_volume ~ ., data = arframe[istrain, ])
arpred2 <- predict(arfit2, arframe[!istrain, ])
V0 <- var(arframe[!istrain, "log_volume"])
Expand Down Expand Up @@ -498,16 +504,16 @@ model |>

history <- model |>
fit(
xrnn[istrain,, ],
xrnn[istrain, , ],
arframe[istrain, "log_volume"],
batch_size = 64,
epochs = 200,
validation_data = list(xrnn[!istrain,, ], arframe[!istrain, "log_volume"]),
validation_data = list(xrnn[!istrain, , ], arframe[!istrain, "log_volume"]),
verbose = 0
)

plot(history, smooth = FALSE)
kpred <- predict(model, xrnn[!istrain,, ])
kpred <- predict(model, xrnn[!istrain, , ])
1 - mean((kpred - arframe[!istrain, "log_volume"])^2) / V0
```

Expand Down Expand Up @@ -536,33 +542,31 @@ From the book:

```{r, c10q11}
xfun::cache_rds({

model <- keras_model_sequential() |>
model <- keras_model_sequential() |>
layer_flatten(input_shape = c(5, 3)) |>
layer_dense(units = 32, activation = "relu") |>
layer_dropout(rate = 0.4) |>
layer_dropout(rate = 0.4) |>
layer_dense(units = 1)

model |> compile(
loss = "mse",
optimizer = optimizer_rmsprop(),
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = "mse"
)

history <- model |>
fit(
xrnn[istrain,, ],
xrnn[istrain, , ],
arframe[istrain, "log_volume"],
batch_size = 64,
epochs = 200,
validation_data = list(xrnn[!istrain,, ], arframe[!istrain, "log_volume"]),
validation_data = list(xrnn[!istrain, , ], arframe[!istrain, "log_volume"]),
verbose = 0
)

plot(history, smooth = FALSE, metrics = "mse")
kpred <- predict(model, xrnn[!istrain,, ])
kpred <- predict(model, xrnn[!istrain, , ])
1 - mean((kpred - arframe[!istrain, "log_volume"])^2) / V0

})
```

Expand All @@ -581,16 +585,16 @@ in the RNN. Thus, our input for each observation will be 4 x 5 (rather than
```{r, c10q12}
xfun::cache_rds({
xdata <- data.matrix(
NYSE[, c("day_of_week", "DJ_return", "log_volume","log_volatility")]
NYSE[, c("day_of_week", "DJ_return", "log_volume", "log_volatility")]
)
istrain <- NYSE[, "train"]
xdata <- scale(xdata)

arframe <- data.frame(
log_volume = xdata[, "log_volume"],
log_volume = xdata[, "log_volume"],
L1 = lagm(xdata, 1),
L2 = lagm(xdata, 2),
L3 = lagm(xdata, 3),
L3 = lagm(xdata, 3),
L4 = lagm(xdata, 4),
L5 = lagm(xdata, 5)
)
Expand All @@ -600,33 +604,33 @@ xfun::cache_rds({
n <- nrow(arframe)
xrnn <- data.matrix(arframe[, -1])
xrnn <- array(xrnn, c(n, 4, 5))
xrnn <- xrnn[,, 5:1]
xrnn <- xrnn[, , 5:1]
xrnn <- aperm(xrnn, c(1, 3, 2))
dim(xrnn)

model <- keras_model_sequential() |>
layer_simple_rnn(units = 12,
layer_simple_rnn(
units = 12,
input_shape = list(5, 4),
dropout = 0.1,
dropout = 0.1,
recurrent_dropout = 0.1
) |>
layer_dense(units = 1)

model |> compile(optimizer = optimizer_rmsprop(), loss = "mse")

history <- model |>
history <- model |>
fit(
xrnn[istrain,, ],
xrnn[istrain, , ],
arframe[istrain, "log_volume"],
batch_size = 64,
epochs = 200,
validation_data = list(xrnn[!istrain,, ], arframe[!istrain, "log_volume"]),
validation_data = list(xrnn[!istrain, , ], arframe[!istrain, "log_volume"]),
verbose = 0
)
)

kpred <- predict(model, xrnn[!istrain,, ])
kpred <- predict(model, xrnn[!istrain, , ])
1 - mean((kpred - arframe[!istrain, "log_volume"])^2) / V0

})
```

Expand All @@ -641,7 +645,7 @@ xfun::cache_rds({
xfun::cache_rds({
library(knitr)
accuracy <- c()
for(max_features in c(1000, 3000, 5000, 10000)) {
for (max_features in c(1000, 3000, 5000, 10000)) {
imdb <- dataset_imdb(num_words = max_features)
c(c(x_train, y_train), c(x_test, y_test)) %<-% imdb

Expand All @@ -656,13 +660,13 @@ xfun::cache_rds({

model |> compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
loss = "binary_crossentropy",
metrics = "acc"
)

history <- fit(model, x_train, y_train,
epochs = 10,
batch_size = 128,
history <- fit(model, x_train, y_train,
epochs = 10,
batch_size = 128,
validation_data = list(x_test, y_test),
verbose = 0
)
Expand All @@ -676,7 +680,6 @@ xfun::cache_rds({
"Accuracy" = accuracy
) |>
kable()

})
```

Expand Down
Binary file modified images/nn2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading