-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtf_util.py
192 lines (170 loc) · 8.01 KB
/
tf_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
from utils_ import *
from defs_ import *
def apply_temporal_fusion(input_tensor, vector_dimension, temporal_dimension, fusion_method=defs.fusion_method.reshape, name="temporal_fusion", lstm_encoder = None ):
'''
Apply fusion over the temporal (column) dimension of the input tensor
:param self:
:param input_tensor:
:param vector_dimension:
:param temporal_dimension:
:param fusion_method:
:return:
'''
if fusion_method == defs.fusion_method.last:
# keep only the response at the last time step
output = tf.slice(input_tensor, [0, temporal_dimension - 1, 0], [-1, 1, vector_dimension], name="lstm_output_reshape")
#debug("LSTM last timestep output : %s" % str(output.shape))
# squeeze empty dimension to get vector
output = tf.squeeze(output, axis=1, name="lstm_output_squeeze")
debug("Agreggated last-squeezed output : %s" % str(output.shape))
elif fusion_method == defs.fusion_method.avg:
# average per-timestep results
output = tf.reduce_mean(input_tensor, axis=1)
debug("Aggregated time-averaged output : %s" % str(output.shape))
elif fusion_method == defs.fusion_method.reshape:
output = tf.reshape(input_tensor,[-1, vector_dimension])
else:
error("Undefined frame fusion type : %s" % str(fusion_method))
return output
def convert_dim_fc(input_tensor, output_dim, name="fc_convert", reuse = False):
"""
Make and apply a fully-connected layer to map the input_dim to the output_dim, if needed
"""
# input_tensor = print_tensor(input_tensor, "Input to fc-convert with name %s : " % name)
input_dim = int(input_tensor.shape[-1])
info("Converting dim %d to %d with fc layer" % (input_dim, output_dim))
if input_dim == output_dim:
return input_tensor
input_shape = input_tensor.shape
if not reuse:
# layer initializations
fc_out_init = tf.truncated_normal((input_dim, output_dim), stddev=0.05, name=name + "_w_init")
fc_out_b_init = tf.constant(0.1, shape=(output_dim,), name=name + "_b_init")
# create the layers
fc_out_w = tf.get_variable(initializer=fc_out_init, name=name + "_w")
fc_out_b = tf.get_variable(initializer=fc_out_b_init, name=name + "_b")
# fc_out_w = tf.Variable(fc_out__init, name=name + "_w")
# fc_out_b = tf.Variable(fc_out_b_init, name=name + "_b")
else:
fc_out_w = tf.get_variable(name + "_w")
fc_out_b = tf.get_variable(name + "_b")
output = tf.nn.xw_plus_b(input_tensor, fc_out_w, fc_out_b, name=name)
# output = print_tensor(output, "Output from fc-convert with name %s" % name)
debug("F [%s]: %s * %s + %s = %s" % (name,str(input_shape), str(fc_out_w.shape), str(fc_out_b.shape), str(output.shape)))
return output
def vectorize(input_tensor, depth_dim):
return tf.reshape(input_tensor, [ -1, depth_dim])
# dcnn helpers
def make_fusion(input_tensor, window, strides, padding, name):
"""
Pooling definition helper function
:param input_tensor:
:param window:
:param strides:
:param padding:
:param name:
:return:
"""
value = tf.nn.max_pool(input_tensor, ksize = window, strides= strides, padding = padding, name=name)
debug("P [%s]: %s -> %s" % (name,str(input_tensor.shape), str(value.shape)))
return value
def make_conv(input_tensor, kernel_params, strides, scopename, init_w=(0.0, 0.1), padding="SAME" ):
"""
Convolution definition helper function
:param input_tensor:
:param kernel_params:
:param strides:
:param scopename:
:param init_w:
:param padding:
:return:
"""
init_k = tf.truncated_normal(kernel_params, mean=init_w[0], stddev=init_w[1])
weights = tf.Variable(initial_value=init_k, name="weights")
biases = tf.constant(0.0, tf.float32, [kernel_params[-1]], name="biases")
conv = tf.nn.conv2d(input_tensor, weights, strides, padding=padding)
value = tf.nn.bias_add(conv, biases)
debug("C [%s]: %s c %s + %s = %s" % (scopename,str(input_tensor.shape), str(weights.shape), str(biases.shape), str(value.shape)))
return tf.nn.relu(value, name = scopename)
def vec_seq_concat(seq_tensor, vec_tensor, sequence_length, order = 'vecfirst'):
"""
concatenate each vector in vec_tensor to each element in seq_tensor, wrt the sequence length
:param seq_tensor:
:param vec_tensor:
:param sequence_length:
:param order:
:return:
"""
vec_dim = int(vec_tensor.shape[-1])
# repeat the vec tensor to the sequence length
vec_tensor = print_tensor(vec_tensor, "original vec tensor")
seq_tensor = print_tensor(seq_tensor, "seq tensor")
vec_tensor = tf.tile(vec_tensor, [1, sequence_length])
vec_tensor = print_tensor(vec_tensor, "tiled tensor")
# restore to one vector per column
vec_tensor = tf.reshape(vec_tensor, [-1, vec_dim])
vec_tensor = print_tensor(vec_tensor, "reshaped tiled tensor")
# hor. concat with the seq_tensor
if order == 'vecfirst':
res = tf.concat([vec_tensor, seq_tensor],axis=1)
else:
res = tf.concat([seq_tensor, vec_tensor],axis=1)
vec_tensor = print_tensor(vec_tensor, "concatted tensor")
return res
def aggregate_clip_vectors(encoded_frames, encoded_dim, fpc, fusion_method):
debug("Aggregating clip vectors, fpc:%d, dim:%d, inputshape:%s" % (fpc, encoded_dim, str(encoded_frames.shape)))
encoded_frames = tf.reshape(encoded_frames, (-1, fpc, encoded_dim),
name="aggregate_clips")
encoded_frames = print_tensor(encoded_frames, "Reshaped vectors")
encoded_frames = apply_temporal_fusion(encoded_frames, encoded_dim, fpc, fusion_method)
return encoded_frames
def apply_tensor_list_fusion(inputs, fusion_method, dims, fpcs, cpvs):
if len(inputs) == 2:
cpv_ratio = int(cpvs[0] / cpvs[1])
else:
cpv_ratio = None
if fusion_method == defs.fusion_method.avg:
return tf.reduce_mean(inputs, axis=0), dims[0], fpcs[0], cpvs[0]
elif fusion_method == defs.fusion_method.maximum:
return tf.reduce_max(inputs, axis=0), dims[0], fpcs[0], cpvs[0]
elif fusion_method == defs.fusion_method.concat:
if cpv_ratio == 1:
return tf.concat(inputs, axis=1), sum(dims), fpcs[0], cpvs[0]
else:
inputs[1] = replicate_auxilliary_tensor(inputs, dims, cpvs, fpcs)
return vec_seq_concat(inputs[0], inputs[1], fpcs[0]), sum(dims), fpcs[0], cpvs[0]
elif fusion_method == defs.fusion_method.ibias:
if cpv_ratio != 1:
inputs[1] = replicate_auxilliary_tensor(inputs, dims, cpvs, fpcs)
# adaptive fusion with expansion of the auxilliary tensor
mdim, adim = dims
mfpc, afpc = fpcs
mcpv, acpv = cpvs
main, aux = inputs
# reshape seq vector to numclips x fpc x dim
main = tf.reshape(main, [-1, mfpc, mdim])
main = print_tensor(main,"reshaped seq")
# reshape the aux vectors to batch_size x fpc=1 x dim
aux = tf.reshape(aux, [-1, 1, adim])
aux = print_tensor(aux,"reshaped bias")
# insert the aux as the first item in the seq - may need tf.expand on the fused
combo = tf.concat([aux, main], axis=1)
# increase the seq len to account for the input bias extra timestep
combo_fpc = mfpc + 1
info("Input bias augmented fpc: %d + 1 = %d" % (mfpc, combo_fpc))
# restore to batchsize*seqlen x embedding_dim
combo = tf.reshape(combo ,[-1, mdim])
return combo, mdim, combo_fpc, mcpv
else:
error("Unknown fusion method: [%s]" % fusion_method)
def replicate_auxilliary_tensor(inputs, dims, cpvs, fpcs):
# replicate each item in the input <tile_num> times, in place
_, aux_input = inputs
tile_num = int(cpvs[0]/cpvs[1])
dim_main, dim_aux = dims
debug("Tiling tensor [%s] %d times" %(str(aux_input.shape), tile_num))
if tile_num > 1:
aux_input = tf.reshape(aux_input, [1, -1])
aux_input = tf.tile(aux_input, [tile_num, 1])
aux_input = tf.reshape(aux_input, [-1, dim_aux])
return aux_input