-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGrid_vs_Bayes_XOR.jl
235 lines (139 loc) · 5.76 KB
/
Grid_vs_Bayes_XOR.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#Grid_vs_Bayes_XOR.jl
#Here we will optimize the learning rate and sigmoid parameter for the XOR neural network
include("XOR_MD.jl")
include("Kernals.jl")
include("gaussian_process.jl")
#Set the random seed:
srand(1234) #Seed for stalzer srand(1234)
"""
We will compare the effect of randomly selecting a learning rate and sigmoid
hyperparemeter vs the use of Bayesian Optimization for finding the optimial LR on the MSE.
Suppose we have limited computing time of 100000 epochs and that we haveju N tries to
minimise the MSE. Let us say that the learning rate is between a and b
"""
#Initialise Layers and params ==========================================
Layer_1=uniform(0,1,2,2)
Layer_2=uniform(0,1,2,1)
epoch_vec=linspace(100,1000,10) # For the final report up this to 100 and leave for 10 minute to get smoothest graph
epoch_bayes_result=zeros(epoch_vec)
epoch_random_result=zeros(epoch_vec)
a=0.001 #Change to 0.001
b=1
c=0.001 #Was the same as above
d=1 #Change back to what it was before which was same as a, b
N=20
#Curry the sigmoid functions:
function hyper_curry(h)
return (x->sigmoid(x,h))
end
function hyper_curry_deriv(h)
return (x->sigmoid_deriv(x,h))
end
Random_Learning_Rates=uniform(a,b,N,1)
Random_Hyperparameters=uniform(c,d,N,1)
Random_Mat=cat(2,Random_Learning_Rates,Random_Hyperparameters)
Random_MSE=zeros(N)
timingsrand=zeros(length(epoch_vec))
timingsbays=zeros(length(epoch_vec))
convergence=zeros(length(epoch_vec))
for p=1:length(epoch_vec)
epochs=epoch_vec[p]
# Random Learning Rates Examples ========================================
#Random_Mat conjoins Random_Learning_Rates and Random_Hyperparameters
# Random_Mat is a Nx2 matrix where Random_Mat[1,:] is the first entry
#with LR_1 and hyperparemeter 1.
tic()
for i=1:length(Random_Learning_Rates)
node_function=hyper_curry(Random_Mat[i,2])
node_deriv=hyper_curry_deriv(Random_Mat[i,2])
learning_rate=Random_Mat[i,1]
Random_MSE[i]=Train_Neural_Net_Loop(epochs,Layer_1,Layer_2,learning_rate,node_function,node_deriv)[3]
println("Epoch Complete")
end
println("Random Learning Rates Training Completed")
q=toc()
timingsrand[p]=q
#Bayesian Optimization Examples===================================================
#Here are the points we can pick from in the Optimization
LR_Test=linspace(a,b,40)
HP_Test=linspace(c,d,40)
#Here is the carteisan product of these written as a vector
Test=gen_points([LR_Test,HP_Test])[1]
#We first have to pick a random point to begin bayesian optimization:
#currently starts with the midpoint, possibly randomise this:
Bayesian_Points=[Test[Int(round(length(Test)/2))]]
#Bayesian_Points is an vector of arrays where in each array first entry is LR second entry is Hyper-Parameters:
#Define hyperparemeter functions:
node_function=hyper_curry(Bayesian_Points[1][2])
node_deriv=hyper_curry_deriv(Bayesian_Points[1][2])
#Define Learning Rate:
learning_rate=Bayesian_Points[1][1]
#Run first train before Bayesian Optimization:
Bayesian_MSE=[Train_Neural_Net_Loop(epochs,Layer_1,Layer_2,learning_rate,node_function,node_deriv)[3]]
#Begin Bayesian Optimization:
tic()
for k=2:N
D=[(Bayesian_Points[i],Bayesian_MSE[i]) for i=1:length(Bayesian_Points)]
mu, sigma, D=gaussian_process_chol(std_exp_square_ker,D,1e-6,Test)
println("Gaussian Process Complete","\r")
mu=reshape(mu,length(mu));
sigma=reshape(sigma,length(sigma))
new_point=findmin(mu-sigma)[2]
#Here we will need to change the number 2 to k
Bayesian_Points=cat(1,Bayesian_Points,[Test[new_point]])
learning_rate=Bayesian_Points[k][1]
node_function=hyper_curry(Bayesian_Points[k][2])
node_deriv=hyper_curry_deriv(Bayesian_Points[k][2])
value_to_be_appended=Train_Neural_Net_Loop(epochs,Layer_1,Layer_2,learning_rate,node_function,node_deriv)[3]
if value_to_be_appended !=Bayesian_MSE[k-1]
Bayesian_MSE=cat(1,Bayesian_MSE,[value_to_be_appended])
println("Epoch Complete")
if k==N
convergence[p]=N
end
else
println("Found Optimum on the ", k-1, " iteration of ", N, " iterations")
Bayesian_Points=Bayesian_Points[1:length(Bayesian_Points)-1]
convergence[p]=k-1
break
end
end
q=toc()
timingsbays[p]=q
# Bayesian Plotting =========================================================
println(" The optimium is located at ",Bayesian_Points[end])
epoch_random_result[p]=minimum(Random_MSE)
epoch_bayes_result[p]=minimum(Bayesian_MSE)
#Move this to the bottom
println("Bayesian_Learning_Rates Training Complete")
println("The minimum MSE by Bayesian Optimization was", minimum(Bayesian_MSE))
println("The mininmum MSE by Random Selection was", minimum(Random_MSE))
println("completed cycle ",p, " out of overall cycle", length(epoch_vec))
end
using PyPlot
# fig = figure("pyplot_subplot_mixed",figsize=(7,7))
# ax=axes()
plot(epoch_vec,epoch_bayes_result,label="Bayesian Optimization")
plot(epoch_vec,epoch_random_result,label="Random Grid Search",alpha=0.7)
title("MSE Plot for different epochs")
xlabel("Epochs")
ylabel("MSE")
legend(loc="upper right",fancybox="true")
grid("on")
show()
using PyPlot
plot(epoch_vec,timingsrand,label="Random Timings")
plot(epoch_vec,timingsbays, label="Bayesian Op Timings")
title("Timings")
xlabel("Epochs")
ylabel("Time (s)")
legend(loc="upper right",fancybox="true")
grid("on")
show()
using PyPlot
plot(epoch_vec,convergence)
title("convergance")
xlabel("Epochs")
ylabel("convergance val")
grid("on")
show()