-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy paththumbnail_generartion.py
132 lines (102 loc) · 5.84 KB
/
thumbnail_generartion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import tensorflow as tf
params = {'batch_size': 1,
'lr': 0.001,
'cell_type': 'LSTM',
'n_b_boxes': 3
}
# FUNCTION TO GENERATE RECURRENT CELLS
def Recurrent_Cell(output_size, cell_type='Basic'):
if cell_type == 'Basic':
return tf.contrib.rnn.BasicRNNCell(output_size)
if cell_type == 'LSTM':
return tf.contrib.rnn.BasicLSTMCell(output_size)
if cell_type == 'GRU':
return tf.contrib.rnn.GRUCell(output_size)
if cell_type == 'LayerNorm':
return tf.contrib.rnn.LayerNormBasicLSTMCell(output_size)
# FUNCTION TO PERFORM GLOBAL CONTEXT AGGREGATION - Recurrent Part
# 'inputs' is a 4D tensor from a convolution operation
# The output returned is also a 4D tensor
def GCA_RNN(inputs, direction='LR', result_op='Sum'):
shape_list = inputs.get_shape().as_list()
height, width, channels = shape_list[1], shape_list[2], shape_list[3]
# To scan Left-Right
if direction == 'LR':
time_step_size = width
inputs_reshaped = tf.reshape(inputs, (params['batch_size']*height, width, channels))
with tf.variable_scope('LR') as scope:
cell_fw = Recurrent_Cell(output_size=channels, cell_type=params['cell_type'])
cell_bw = Recurrent_Cell(output_size=channels, cell_type=params['cell_type'])
(output_fw, output_bw), last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw,
inputs_reshaped, dtype=tf.float32)
output_fw = tf.reshape(output_fw, [-1, height, width, channels])
output_bw = tf.reshape(output_bw, [-1, height, width, channels])
# To scan Up-Down
if direction == 'UD':
time_step_size = height
input_transposed = tf.transpose(inputs, perm=[0, 2, 1, 3])
inputs_reshaped = tf.reshape(input_transposed, (params['batch_size']*width, height, channels))
with tf.variable_scope('UD') as scope:
cell_fw = Recurrent_Cell(output_size=channels, cell_type=params['cell_type'])
cell_bw = Recurrent_Cell(output_size=channels, cell_type=params['cell_type'])
(output_fw, output_bw), last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw,
inputs_reshaped, dtype=tf.float32)
output_fw = tf.reshape(output_fw, [-1, width, height, channels])
output_bw = tf.reshape(output_bw, [-1, width, height, channels])
output_fw = tf.transpose(output_fw, perm=[0, 2, 1, 3])
output_bw = tf.transpose(output_bw, perm=[0, 2, 1, 3])
if result_op == 'Sum':
return output_fw + output_bw
if result_op == 'Concatenate':
return tf.concat([output_fw, output_bw], 3)
# FUNCTION TO PERFORM GLOBAL CONTEXT AGGREGATION - Attention Part
def GCA(inputs):
shape_list = inputs.get_shape().as_list()
height, width, channels = shape_list[1], shape_list[2], shape_list[3]
# Scan input Left-Right
output_lr = GCA_RNN(inputs, direction='LR', result_op='Sum')
# Scan input Up-Down
output_ud = GCA_RNN(output_lr, direction='UD', result_op='Sum')
conv_output = conv2d(output_ud, height*width, k_h=3, k_w=3, d_h=1, d_w=1)
# Apply softmax to Normalize the last dimension
conv_softmax = tf.nn.softmax(conv_output)
# Attend
conv_softmax_2d = tf.reshape(conv_softmax, (-1, conv_softmax.get_shape().as_list()[-1]))
inputs_2d = tf.reshape(inputs, (-1, channels))
# print("inputs \t", inputs.get_shape().as_list())
# print("inputs 2d \t", inputs_2d.get_shape().as_list())
# print("conv_softmax \t", conv_softmax.get_shape().as_list())
# print("conv_softmax 2d \t", conv_softmax_2d.get_shape().as_list())
# F_attn. if the final attended input
f_attn = tf.matmul(conv_softmax_2d, inputs_2d)
f_attn = tf.reshape(f_attn, shape_list)
return f_attn
# FUNCTION TO DYNAMICALLY GENERATE CONVOLUTION KERNEL AND BIAS FROM GIVEN INPUT ASPECT-RATIO AND SHAPE
# aspect-ratio is a tensor of shape [batch_size, 1] and shape is a list of 4 elements
def Gen_Adap_Weights(aspect_ratio, shape, name="adap_conv_layer"):
with tf.variable_scope(name):
# Number of units in the final layer (for convolution)
n_units = shape[0] * shape[1] * shape[2] * shape[3] + shape[3]
adap_weights = linear(aspect_ratio, 16, 'l_1')
adap_weights = linear(adap_weights, 32, 'l_2')
adap_weights = linear(adap_weights, n_units, 'l_3')
bias = adap_weights[:, -shape[3]:]
bias = tf.reshape(bias, [-1])
weights = tf.reshape(adap_weights[:, :-shape[3]], shape)
return weights, bias
# Modified Region Proposal Network with Adaptive Convolutions
def RPN(conv_features, aspect_ratio):
# The first convolution of RPN module
conv_op_1 = conv2d(conv_features, 512, k_h=3, k_w=3, d_h=1, d_w=1, name="conv_op_1")
# predict objectness-score (3 boxes per position)
w2, b2 = Gen_Adap_Weights(aspect_ratio, [1, 1, 512, params['n_b_boxes']], "ad_obj_scores")
objectness_op = tf.nn.conv2d(conv_op_1, w2, strides=[1, 1, 1, 1], padding='SAME')
objectness_op = tf.reshape(tf.nn.bias_add(objectness_op, b2), objectness_op.get_shape())
# predict bounding box
w3, b3 = Gen_Adap_Weights(aspect_ratio, [1, 1, 512, params['n_b_boxes']*4], "ad_bb_coord")
b_box_op = tf.nn.conv2d(conv_op_1, w3, strides=[1, 1, 1, 1], padding='SAME')
b_box_op = tf.reshape(tf.nn.bias_add(b_box_op, b3), b_box_op.get_shape())
bb_shape = b_box_op.get_shape().as_list()
b_box_op = tf.reshape(b_box_op, [bb_shape[0], bb_shape[1], bb_shape[2], params['n_b_boxes'], 4])
return b_box_op, objectness_op