-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSlicingWithLinearEquation.py
602 lines (310 loc) · 14.1 KB
/
SlicingWithLinearEquation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
"""
Uses the linear equation to get the output to slice. Load that slice into the DCSPN
"""
from __future__ import print_function
from tensorflow.keras import layers #actually imports the keras
from dcspn.spn import SumProductNetwork
from dcspn.layers import SumLayer, ProductLayer, GaussianLeafLayer
from dcspn.utilities import Database, plot_cost_graph
import tensorflow as tf
import numpy as np
import argparse
import os
import gc
import datetime
import logging
import matplotlib.pyplot as plt
########################################TLNN#####################################################
#Variables
batch_size = 128
epochs = 2
weightSize1 = 31370
weightSize2 = 10
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(train_imgs, train_labels), (test_imgs, test_labels) = tf.keras.datasets.mnist.load_data()
#Reshape and normalize training data
train_imgs = train_imgs.reshape(train_imgs.shape[0], img_rows, img_cols, 1)
test_imgs = test_imgs.reshape(test_imgs.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
train_imgs= train_imgs.astype('float32')
test_imgs = test_imgs.astype('float32')
train_imgs /= 255
test_imgs /= 255
#print('x_train shape:', x_train.shape)
print(train_imgs.shape[0], 'train samples')
print(test_imgs.shape[0], 'test samples')
# convert class vectors to binary class matrices
train_labels = tf.keras.utils.to_categorical(train_labels, weightSize1)
test_labels = tf.keras.utils.to_categorical(test_labels, weightSize1)
#Create the dense linear equation
n_x = 31370
n_h = 31370
n_y = 1
W1 = tf.get_variable(
name="W1",
shape=[n_h, n_x],
initializer=tf.glorot_normal_initializer,
dtype=tf.float32
)
b1 = tf.get_variable(
name="b1",
shape=[n_h, 1],
initializer=tf.glorot_normal_initializer,
dtype=tf.float32
)
W2 = tf.get_variable(
name="W2",
shape=[n_y, n_h],
initializer=tf.glorot_normal_initializer,
dtype=tf.float32
)
b2 = tf.get_variable(
name="b2",
shape=[n_y, 1],
initializer=tf.glorot_normal_initializer,
dtype=tf.float32
)
X = tf.placeholder(
name="X",
dtype=tf.float32,
shape=[n_x, None]
)
Z1 = tf.nn.relu(tf.matmul(W1, X) + b1)
Z2 = tf.nn.relu(tf.matmul(W1, Z1) + b2)
print(Z2)
#running compuational graph
# Numpy below
X_feed = np.random.random([n_x, 1])
sess = tf.Session()
print("\n##########################################################################################################\n")
#############################################DCSPN#############################################################
#raise ValueError
#Variables for the DCSPN
reuse = 0
saveName = "testSave"
counter = 0 #For keeping track of where in the vector we are
SEED = 1234
img_height = 28 #the height in pixels of the input image
img_width = 28 #the width in pixels of the input image
img_channel = 1 #the number of channels of the input image.
#Note: for greyscale img_channel = 1, for RGB imag_channel=3
valid_amount = 50
#The leaf components is the number of channels you want your tensor to have
#each pixel will have leaf_components number of means and std calculated for it
leaf_components = 4
#Getting the leaf means and stds
params_shape = (train_imgs.shape[1], train_imgs.shape[2], leaf_components)
leaf_means = np.zeros(params_shape)
leaf_stds = np.zeros(params_shape)
sorted_data = train_imgs
quantile_size = train_imgs.shape[0] / leaf_components
for k in range(leaf_components):
lower_idx = int(k * quantile_size)
upper_idx = int((k + 1) * quantile_size)
slice_data = sorted_data[lower_idx:upper_idx, :, :, :]
leaf_means[:, :, k] = np.reshape(
np.mean(slice_data, axis=0),
(params_shape[0], params_shape[1]))
_std = np.std(slice_data, axis=0)
_std[_std == 0] = 1
leaf_stds[:, :, k] = np.reshape(
_std, (params_shape[0], params_shape[1]))
print("\nmeans shape")
print(leaf_means.shape)
print(leaf_stds.shape)
print("LOAD")
print("\n\n\n\n\n\n\n\n")
#sess = tf.compat.v1.Session()
initializer = "uniform" #for the layer parameters
#The share parameter is related to the weights.
#If share_parameter is false than the weights tensor will be the same size as the sum layer tensor
#If share_parameter is true that the weights tensor is 1x1xc where c is then number of leaf components.
#In this case the weights are the same for all input pixels
share_parameters = False
#Determines if the sum node will perform of max or sum operation
#if false it will perform a sum, if true if will take the maximum child
hard_inference = False
#saves the shape of the input
input_shape = [img_height, img_width, img_channel]
#create an spn, this will hold all of the layers and be used for training and inferences
spn = SumProductNetwork(input_shape=input_shape, reuseValue=reuse)
# Build leaf layer
leaf_layer = GaussianLeafLayer(num_leaf_components=leaf_components)#these leaves will have the Gaussian distribution
#Each leaf, there will be 4/pixel as there are 4 leaf_components, will be a Gaussian distribution using the means and stds previously calculated
spn.add_layer(leaf_layer)#adds the leaf layer to the graph
spn.set_leaf_layer(leaf_layer)#sets it as the leaf layer
#create a sum layer
#A sum layer, for each pixel, combines the channels of that pixel by either adding all the values togethher or choosing the maximum value
#the sum layer will shrink the number of channels but not change the width or height
#if oc is the number of out channels and c is the number of original channels then the sum layer changed from wxhxc to wxhxoc
sum_layer = SumLayer(out_channels=10, #out_channels chooses how many output channnels there will be
hard_inference=hard_inference,
share_parameters=share_parameters,
initializer=initializer)
#because this is a toy example there is only one out channel as we only want one sum layer. The hard_inference and share_parameters are described above
#One out channel means that the tensor will be reduced from wxhxc to wxhx1
#connect the sum layer to the tree, the sum layer is the parent of the leaf layer
spn.add_forward_layer_edge(leaf_layer, sum_layer)
#this builds a tree upwards, so leaf_layer is the child of the sum_layer, building up towards the root
#the pool window is used by the product layer
#the size of the pooling window setermines which pixels will be filtered together
#The size of the pooling window determines the width and height of the new tensor that is produced
#if pwxph is the size of the pooling window and the input tensor is wxhxc then the new tensor is (w/pw)x(h/ph)xh
pool_window = (img_width,img_height)
#Because this is a toy example and only one product layer is desired the size of the pooling window should be thhe same as the size of the input
sum_pooling = True #to create and average sum before pooling
#create a product layer
#A product layer is like a filter layer in a CNN. It reduced the height and width based off the size of the pooling window
#The number of channels will remain the same
product_layer = ProductLayer(pooling_size=pool_window,
sum_pooling=sum_pooling)
#add the product layer to the graph. It will be the parent of the sum layer
spn.add_forward_layer_edge(sum_layer, product_layer)
root_layer = SumLayer(out_channels=1, #out_channels chooses how many output channnels there will be
hard_inference=hard_inference,
share_parameters=share_parameters,
initializer=initializer)
spn.add_forward_layer_edge(product_layer, root_layer)
#The product layer is now 1x1x1, meaning it can no longer be reduced
#This means that it is the root layer an needs to be assigned as such
#You can create a special layer just to be the root if you wish but it is not necessary
spn.set_root_layer(root_layer)
print("The model is now created. Its name is spn.")
#This model has 3 layers: the leaf layer, a sum layer and a product layer
#The leaf layer contains the values from the input and is a tensor of size wxhxc where w and h are the dimensions of the input and c is the number of leaf_components
#The next layer is a sum layer which sums together the channels. Because this is a toy example the sum layer here only has one out channel in order to shrink the model
#In the sum layer the model goes from wxhxc to wxhx1 with only one channel as the output
#The final layer is the product layer. Once again, because this is a toy example only one product layer is desired.
#As such the pooling window is the same size as the input(wxh) so the tensor goes from size wxhx1 to 1x1x1
#Because the size is 1x1x1 this layer cannot be reduced therefore this layer is also the root
#It must be set as the root so that the spn is aware of it
print("Starting to fit the model")
backward_masks = None #a mask for inferences, chooses which child is active
#All children of a product node are active. The max child of a sum node is active
forward = None
#print(external_weights[2][0])
#CHANGE BACK WHEN NOT WORKING TOMORROW!!!!!!!!!
#external_weights = tf.convert_to_tensor(external_weights[2][0])
#defining loss function and optimizer, calls build forward to build tensorflow graph
#Before this point the layers were established but have no values
forward = spn.compile(learning_rate=0.01,
optimizer="adam",
reuse=False,
external_weights=Z2)
print("################################")
X_feed = np.random.random([n_x, 1])
feed_means_stds = {
spn.leaf_layer.means: leaf_means,
spn.leaf_layer.stds: leaf_stds,
X: X_feed}
#the fit function is what does the actual learning. It will perform a forward pass then perform gradiaent decent
#This loop will repeat within the function for the specified number of epochs
#The costs returned is the error over time, in other words, how different the produced images are from the correct image
#The cost is Negative Log-Likelihood(NLL)
spn.fit(train_data=train_imgs, epochs=2, add_to_feed=feed_means_stds, minibatch_size=64)
#raise ValueError
"""
I THINK THIS CODE IS WRONG!!!!!!!
Alright some new problem:
where is spn.inputs given and actual value. Because that is now the variable spn_inputs
"""
#MPE is Most Probable Explanation
print("\nMPE inference")
#WHY ISN'T THE MAGINALIZATION TAKING PLACE ABOVE TRAINING!!!!!!!!!!!!!???????????????
#DOES IT MATTER!!!!!!!!!!!!!!!!!!!!!??????????
#marginalize half of the images
#variables for storing the maginalized images
eval_data_marg = None
#marginalize the left side all images, the algorithms job is to complete them
"""
Potential Problem:
The maginalization might not be working as it produces a split image.
One side is completely back and the other completely white
"""
#This is normal for the output here. I know that this works
eval_data_marg = Database.marginalize(
test_imgs, ver_idxs=[[0, img_height]],
hor_idxs=[[0, int(img_width / 2)]])
print("hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhq")
#mpe inference
spn_input = spn.inputs_marg
#constrct a backwards mask, choosing which child is activated
if backward_masks is None:
print("\n\nBMN!!!!")
spn_input = spn.inputs
backward_masks = spn.build_backward_masks(forward)
mpe_leaf = spn.build_mpe_leaf(backward_masks, replace_marg_vars=spn_input)
print(spn_input)
#print(spn.inputs.eval(session=sess))
#print(spn.inputs[forward_input]["output"])
#Form a hold variable for the leaf means and standard deviation
#THHE FORWARD INFERENCE IS THE PROBLEM LINE
#raise ValueError
#perform the forward inference, getting the value that ends up in the root
root_values = spn.forward_inference(
forward, eval_data_marg, add_to_feed=feed_means_stds,
alt_input=spn_input)
#get the NLL value for the validation data
root_value = -1.0 * np.mean(root_values)
print('{"metric": "Val NLL", "value": %f}' % (root_value))
#BELOW HERE JUST OUTPUTTING IMAGE
#get the MPE
mpe_assignment = spn.mpe_inference(
mpe_leaf, eval_data_marg, add_to_feed=feed_means_stds,
alt_input=spn_input)
# De-normalize results, the results are currently between 0 and 1. They need to be denormalized in order to be properly displayed and correctly calculate the MSE
#mpe_assignment = mpe_assignment*255
#unorm_eval_data = test_imgs*255
unorm_eval_data = test_imgs
# MSE is Mean Squared Error
print("Computing Mean Square Error")
#where the images will be saved too, must already have the folder toy inside the folder outputs
save_imgs_path = "outputs/toy"
#Get the mean squared error
#mse = Database.mean_square_error(
# mpe_assignment, unorm_eval_data, save_imgs_path=save_imgs_path)
#print("MSE: {}".format(mse))
#mpe_assignment, unorm_eval_data, save_imgs_path=save_imgs_path
#def mean_square_error(data, data_target, save_imgs_path=None):
# MSE needs data as 2D images
if len(mpe_assignment.shape) == 4:
data = np.squeeze(mpe_assignment)
unorm_eval_data = np.squeeze(unorm_eval_data)
# Simple MSE function
def _mse(d1, d2):
# Shape of d1 and d2 should be [H, W]
mse = np.mean(
((255 * d2).astype(dtype="int") -
(255 * d1).astype(dtype="int")) ** 2)
return mse
mse_img = {
_mse(data[img_idx, :, :],
unorm_eval_data[img_idx, :, :]): img_idx
for img_idx in range(data.shape[0])
}
log_mse = open("{}/mse_log.txt".format(save_imgs_path), "w")
for idx, img_mse in enumerate(reversed(sorted(mse_img.keys()))):
Database.save_image(
"{}/{}.png".format(save_imgs_path, idx),
data[mse_img[img_mse], :, :])
log_mse.write("{},{}\n".format(idx, img_mse))
log_mse.close()
print("\n\n\nNOW THE DIMENSIONS")
print("layer 0- leaf layer")
print(spn.layers[0])
#print(spn.layers[0].shape)
#print(spn.layers[0].weights)
print("\nlayer 1- sum layer")
print(spn.layers[1])
#print(spn.layers[1].compute_output_shape(spn.layers[1]))
print(spn.layers[1].weights)
print("\nlayer 2- product layer")
print(spn.layers[2])
#print(spn.layers[2].shape)
#print(spn.layers[2].weights)
print("\nlayer 3- root layer")
print(spn.layers[3])
#print(spn.layers[3].shape)
print(spn.layers[3].weights)