-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclassifier_autoLoss.py
215 lines (167 loc) · 7.82 KB
/
classifier_autoLoss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import os
import time
import math
from glob import glob
import tensorflow as tf
import numpy as np
from six.moves import xrange
import _pickle
from ops import *
from utils import *
class autoLoss(object):
def __init__(self, sess, size_D, size_G, batch_size, T=10, k=1, C=1):
#Link to DC-GAN via the session
self.sess = sess
self.batch_size = batch_size
#Hyperparameters
self.T = T
self.k = k
self.C = C
#Attributes of controller
self.X = []
self.size_G = size_G
self.size_D = size_D
self.y_pred = 1
self.reward_list = np.ones(k+2).tolist() #Need past k+1 reward to calculate reward at t+T
#Debug
self.x1 = tf.constant(2)
self.y1 = tf.constant(5)
self.reward_term = 1.0
self.net_loss_D = 0
self.net_loss_G = 0
self.build_controller()
## The Controller NN #
def controller(self, feature, reuse=False):
with tf.variable_scope("controller") as scope:
if reuse:
scope.reuse_variables()
y = linear(tf.reshape(feature, [1, -1]), 1, 'controller_lin')
return tf.nn.sigmoid(y), y
## The Conditions on sampling Y(t) ##
def cond_1(self):
return 0, self.optimizer.compute_gradients(tf.log(self.y_sigmoid), self.c_vars), tf.multiply(self.x1, 17)
def cond_2(self):
return 1, self.optimizer.compute_gradients(tf.log(1 - self.y_sigmoid), self.c_vars), tf.add(self.y1, 23)
## Controller Ops Definition ##
def build_controller(self):
#Forward Pass
self.feat = tf.placeholder(tf.float32, [1, 9], name='feature_t')
self.y_sigmoid, self.y = self.controller(self.feat)
#Useful operations in backward pass
# initialize the optimizer
self.optimizer = tf.train.AdamOptimizer(0.0001, beta1=0.5)
# grab all trainable variables
self.c_vars = tf.trainable_variables(scope="controller")
# try:
# tf.global_variables_initializer().run()
# except:
# tf.initialize_all_variables().run()
# ccc = self.sess.run(self.c_vars)
# print(ccc)
# define variables to save the gradients in each batch
self.accumulated_gradients = [tf.Variable(tf.zeros_like(tv.initialized_value()),
trainable=False) for tv in self.c_vars]
# define operation to reset the accumulated gradients to zero
self.reset_gradients = [gradient.assign(tf.zeros_like(gradient)) for gradient in
self.accumulated_gradients]
#Backward Pass
# compute the gradients
# gradients = optimizer.compute_gradients(loss, trainable_variables)
self.y_predt, self.gradients, self.check_cond = tf.cond(tf.less(tf.random_uniform([]), tf.reshape(self.y_sigmoid, [])), \
self.cond_1, self.cond_2)
# Note: Gradients is a list of tuples containing the gradient and the
# corresponding variable so gradient[0] is the actual gradient. Also divide
# the gradients by BATCHES_PER_STEP so the learning rate still refers to
# steps not batches.
# define operation to evaluate a batch and accumulate the gradients
self.evaluate_batch = [accumulated_gradient.assign_add(gradient[0]/self.batch_size)
for accumulated_gradient, gradient in zip(self.accumulated_gradients, self.gradients)]
# define operation to apply the gradients
self.apply_gradients = self.optimizer.apply_gradients([(self.reward_term*accumulated_gradient, c_var)
for accumulated_gradient, c_var
in zip(self.accumulated_gradients, self.c_vars)])
# define variable and operations to track the average batch loss
# average_loss = tf.Variable(0., trainable=False)
# update_loss = average_loss.assign_add(loss/BATCHES_PER_STEP)
# reset_loss = average_loss.assign(0.)
## Take one step ##
def step_controller(self, t, grad_D, grad_G, loss_D_real, loss_D_fake, loss_G, perf_G, perf_D):
self.new_feature = self.add_feature(t, 100*grad_D, 100*grad_G, loss_D_real, loss_D_fake, loss_G, perf_G, perf_D)
_, _, check, self.y_pred = self.sess.run([self.gradients, self.evaluate_batch, self.check_cond, self.y_predt], feed_dict={self.feat: self.new_feature})
# self.y_pred = self.sess.run(self.y_predt)
# print(self.y_pred)
return self.y_pred
##Update Parameters##
def update_controller(self, reward_term):
## UNDO THIS ONCE REWARD FIGURED OUT
self.reward_list.append(reward_term)
del self.reward_list[0]
self.reward_term = self.C*self.k*(self.reward_list[-1] - self.reward_list[-2])/(self.reward_list[-2] - self.reward_list[ -2 - self.k] + 1e-3)
print("Reward Term: ")
print(self.reward_term)
# print(" Variables: ")
# print(self.sess.run(self.c_vars))
# print(self.y.eval({self.feat: self.new_feature}))
self.sess.run(self.apply_gradients)
# print("Updating controller. For this batch the losses were: ")
# print(self.net_loss_D)
# print(self.net_loss_G)
self.reset_controller()
## Add Features to the feature list ##
def add_feature(self, t, grad_D, grad_G, loss_D_real, loss_D_fake, loss_G, perf_G, perf_D):
progress = (1.0*t)/self.T
norm_grad_G = np.linalg.norm([np.linalg.norm(np.array(grads)) for grads in grad_G])
norm_grad_G /= np.sqrt(self.size_G)
norm_grad_D = np.linalg.norm([np.linalg.norm(np.array(grads)) for grads in grad_D])
norm_grad_D /= np.sqrt(np.array(self.size_D))
log_norm = np.log(norm_grad_G/norm_grad_D)
self.net_loss_G += loss_G
loss_D = loss_D_fake + loss_D_real
self.net_loss_D += loss_D
loss_ratio = loss_G/loss_D
perf_G = perf_G
perf_D = perf_D
#1 #2 #2 #2 #3 #3 #3 #4 #4
new_feature = np.array([[progress, norm_grad_G, norm_grad_D, log_norm, loss_G, loss_D, loss_ratio, perf_G, perf_D]])
# print(new_feature)
self.X.append(new_feature)
# print("Shape of Feature List: ")
# print(np.array(self.X).shape)
return new_feature
## Reset X, Grads on t == nT ##
def reset_controller(self):
self.X = []
self.net_loss_D = 0
self.net_loss_G = 0
self.sess.run(self.reset_gradients)
# if self.y_sigmoid > np.random.rand():
# self.y_pred = 1
# else:
# self.y_pred = 0
# z = tf.multiply(a, b)
# result = tf.cond(x < y, lambda: tf.add(x, z), lambda: tf.square(y))
# define variable and operations to track the average batch loss
# average_loss = tf.Variable(0., trainable=False)
# update_loss = average_loss.assign_add(loss/BATCHES_PER_STEP)
# reset_loss = average_loss.assign(0.)
# [...]
# if __name__ == '__main__':
# session = tf.Session(config=CONFIG)
# session.run(tf.global_variables_initializer())
# data = [batch_data[i] for i in range(BATCHES_PER_STEP)]
# for batch_data in data:
# session.run([evaluate_batch, update_loss],
# feed_dict={input: batch_data})
# # apply accumulated gradients
# session.run(apply_gradients)
# # get loss
# loss = session.run(average_loss)
# # reset variables for next step
# session.run([reset_gradients, reset_loss])
# """
# Args:
# sess: TensorFlow session
# T: Time after which reward must be calculated
# X: Feature List
# IS: Function which returns IS
# """