-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBatched_Stochastic_Gradient_Descent.R
111 lines (91 loc) · 3.25 KB
/
Batched_Stochastic_Gradient_Descent.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#' Implementation of the stochastic gradient descent with loss function presented in "An stochastic ..."
#'
#' @param df A data frame.
#' @param eta Learning rate
#' @param num_iter number of iterations
#' @param batch_num number of data rows in the batch (specifically for SGD)
#' @return theta
sgd_batched <- function(df, init = as.vector(rep(1, ncol(df)-1)), eta = .001, num_iter = 1000, batch_num = 30, exact = NULL){
# TODO: check if batch_num is less than number of rows in df
thetas_prev = init # initializing weights
epoch = 1
# initialize a data frame to store thetas after every iteration
if (!is.null(exact)){
## Add error column
m <- matrix(NA, ncol = ncol(df), nrow = 1)
}
else{
m <- matrix(NA, ncol = ncol(df)-1, nrow = 1)
}
temp_df <- data.frame(m, check.names = FALSE)
names <- c()
# make a vector of names for columns
for (i in 2:ncol(df)-2) {
names <- append(names, paste0("Theta.", i))
}
if (!is.null(exact)){
# Add error column
names <- append(names, "error")
}
# assign the names vector to the column names of temp data frame
colnames(temp_df) <- names
# insert first initial thetas from input and remove NAs when initiated temp_df(could be done differently to name columns?)
if (!is.null(exact)){
# Add error
error = norm_L2(thetas_prev - exact)
temp_df <- na.omit(rbind(temp_df, c(thetas_prev, error)))
}
else{
# Change first row of NaNs to initial thetas value
temp_df[1,] <- thetas_prev
}
while (epoch <= num_iter){
# NOTE: batch works
batch <- df[sample(nrow(df), size=batch_num, replace=FALSE),] # take a random batch from the data
# NOTE: values works
X <- batch[,1:(ncol(df)-1)] # matrix of data values, ommitting targets
# NOTE: targets works
Y <- batch[,ncol(df)] # vector of target values
Dh = thetas_prev #initialize gradient of Loss w.r.t. thetas
# print(thetas_prev)
for (i in 1:batch_num){ #iterate through each row of data in the batch
PHI <- X[i,] #take the ith row in df for every instance in the sequence
exponent <- thetas_prev %*% PHI #calculate the exponent of the logistic function
PI <- exp(exponent)/(1+exp(exponent)) #logistic function
Dh <- as.numeric((1 / (1 + exp(-(PHI %*% thetas_prev)))- Y[i])) * PHI
thetas_prev <- thetas_prev - eta * Dh #recalculate weights using updated gradient
}
epoch <- epoch + 1 # Go to next epoch
# eta = eta / 1.02
# Append weights/weights (and errors) to df
if (!is.null(exact)){
# Add error
error = norm_L2(thetas_prev - exact)
temp_df <- rbind(temp_df, c(thetas_prev, error))
}
else{
# need to drop extra dimension (don't ask)
temp_df <- rbind(temp_df, drop(thetas_prev))
}
} #end while
return(temp_df)
}
# Testing
p <- 5
n <- 10000
x <- matrix(rnorm(n * p), n, p)
x=cbind(1,x)
betas <- runif(p+1, -2, 2)
hc <- function(x) 1 /(1 + exp(-x)) # inverse canonical link
p.true <- hc(x %*% betas)
y <- rbinom(n, 1, p.true)
df <- cbind(x,y)
# print(df)
init=betas+rnorm(p+1,0,1)
# Normalize the data
# df <- min_max_norm(df)
# print(init)
library(pracma)
print(tail(sgd_batched(df, eta = .01, num_iter = 1000, batch_num = 10)))
#exact values
print(betas)