-
Notifications
You must be signed in to change notification settings - Fork 0
/
Stochastic_Gradient_Descent_2.R
123 lines (95 loc) · 3.36 KB
/
Stochastic_Gradient_Descent_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#' Implementation of the stochastic gradient descent with loss function presented in "An stochastic ..."
#'
#' @param df A data frame.
#' @param eta Learning rate
#' @param num_iter number of iterations
#' @param batch_num number of data rows in the batch (specifically for SGD)
#' @return theta
sgd2 <- function(df, init = as.vector(rep(1, ncol(df)-1)), eta = .001, num_iter = 1000, exact = NULL){
# TODO: check if batch_num is less than number of rows in df
thetas_prev = init # initializing weights
epoch = 1
# initialize a data frame to store thetas after every iteration
if (!is.null(exact)){
## Add error column
m <- matrix(NA, ncol = ncol(df), nrow = 1)
}
else{
m <- matrix(NA, ncol = ncol(df)-1, nrow = 1)
}
temp_df <- data.frame(m, check.names = FALSE)
names <- c()
# make a vector of names for columns
for (i in 2:ncol(df)-2) {
names <- append(names, paste0("Theta.", i))
}
if (!is.null(exact)){
# Add error column
names <- append(names, "error")
}
# assign the names vector to the column names of temp data frame
colnames(temp_df) <- names
# insert first initial thetas from input and remove NAs when initiated temp_df(could be done differently to name columns?)
if (!is.null(exact)){
# Add error
error = norm_L2(thetas_prev - exact)
temp_df <- na.omit(rbind(temp_df, c(thetas_prev, error)))
}
else{
# Change first row of NaNs to initial thetas value
temp_df[1,] <- thetas_prev
}
Dh <- thetas_prev;
while (epoch <= num_iter){
batch <- df[sample(nrow(df), size = 1, replace=FALSE),] # take a random batch from the data
X <- batch[1:(ncol(df)-1)] # matrix of data values, ommitting targets
Y <- batch[ncol(df)] # vector of target values
# Dh = thetas_prev #initialize gradient of Loss w.r.t. thetas
PHI <- X
exponent <- t(thetas_prev) %*% PHI #calculate the exponent of the logistic function
PI <- exp(exponent)/(1+exp(exponent)) #logistic function
D <- -log((PI^Y)*(1-PI)^(1-Y))
# Dh <- Dh + (PI %*% PHI) - (Y * PHI) #recalculate gradients using data
# Dh <- Dh + PHI * drop(Y - PI) #recalculate gradients using data
# Dh <- as.numeric( exp(PHI %*% thetas_prev) / (1 + exp(PHI %*% thetas_prev)) - Y) * PHI
Dh <- as.numeric( exp(t(thetas_prev) %*% PHI) / (1 + exp(t(thetas_prev) %*% PHI)) - Y) * PHI
# D <-
# print(Dh)
thetas_prev <- thetas_prev - eta * Dh #recalculate weights using updated gradient
# print(thetas_prev)
epoch <- epoch + 1 # Go to next epoch
# eta = eta / 1.02
# Append weights/weights (and errors) to df
if (!is.null(exact)){
# Add error
error = norm_L2(thetas_prev - exact)
temp_df <- rbind(temp_df, c(thetas_prev, error))
}
else{
# need to drop extra dimension
temp_df <- rbind(temp_df, drop(thetas_prev))
}
} #end while
return(temp_df)
}
# Testing
p <- 5
n <- 10000
x <- matrix(rnorm(n * p), n, p)
x=cbind(1,x)
betas <- runif(p+1, -2, 2)
hc <- function(x) 1 /(1 + exp(-x)) # inverse canonical link
p.true <- hc(x %*% betas)
y <- rbinom(n, 1, p.true)
df <- cbind(x,y)
init=betas+rnorm(p+1,0,1)
# Normalize the data
# df <- min_max_norm(df)
# print(init)
# library(pracma)
print(head(df))
out_sgd2 <- sgd2(df, eta = .1, num_iter = 1000, exact = betas)
# Estimated values
print(tail(out_sgd2))
#exact values
print(betas)