Neural Network outputs same values for All Inputs - tensorflow

I have a ConvNet model. It is outputting the exact same values for all cases of forward propagation.
Originally, it didn't happen during training and only evaluation because the dropout rate was set to 1 and there was no learning rate. This lead me to believe that I was restoring the model incorrectly, However, I decided to test it during training by removing the dropout and setting the learning zero. When I outputted the softmax values, every one of them is constant.
I then analyzed the images and labels that were displayed through tensorboard, and every one of them seemed to be changing, there was definitely changing data.
So the problem is not with the inputs but definitely with the forward propagation itself, though I am not able to see where it went wrong.
def weight_variable(shape):
with tf.device('/gpu:0'):
initial = tf.random_normal(shape, stddev=0.00125)
return tf.Variable(initial)
def bias_variable(shape):
with tf.device('/cpu:0'):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def conv(images, W):
return tf.nn.conv2d(images, W, strides = [1, 1, 1, 1], padding = 'SAME')
def forward_propagation(images, dropout_value2):
with tf.device('/gpu:0'):
conv1_feature = weight_variable([8, 8, 3, 16])
conv1_bias = bias_variable([16])
image_matrix = tf.reshape(images, [-1, 800, 800, 3])
conv1_result = tf.nn.relu(conv(image_matrix, conv1_feature) + conv1_bias)
_activation_summary(conv1_result)
conv1_pool = tf.nn.max_pool(conv1_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm1 = tf.nn.lrn(conv1_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm1')
conv2_feature = weight_variable([3, 3, 16, 64])
conv2_bias = bias_variable([64])
conv2_result = tf.nn.relu(conv(norm1, conv2_feature) + conv2_bias)
_activation_summary(conv2_result)
conv2_pool = tf.nn.max_pool(conv2_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm2 = tf.nn.lrn(conv2_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm2')
conv3_feature = weight_variable([3, 3, 64, 128])
conv3_bias = bias_variable([128])
conv3_result = tf.nn.relu(conv(norm2, conv3_feature) + conv3_bias)
_activation_summary(conv3_result)
conv3_pool = tf.nn.max_pool(conv3_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm3 = tf.nn.lrn(conv3_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm3')
conv4_feature = weight_variable([3, 3, 128, 256])
conv4_bias = bias_variable([256])
conv4_result = tf.nn.relu(conv(norm3, conv4_feature) + conv4_bias)
_activation_summary(conv4_result)
conv4_pool = tf.nn.max_pool(conv4_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm4 = tf.nn.lrn(conv4_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm4')
conv5_feature = weight_variable([3, 3, 256, 512])
conv5_bias = bias_variable([512])
conv5_result = tf.nn.relu(conv(norm4, conv5_feature) + conv5_bias)
_activation_summary(conv5_result)
conv5_pool = tf.nn.max_pool(conv5_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm5 = tf.nn.lrn(conv5_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm5')
perceptron1_weight = weight_variable([25 * 25 * 512, 256])
perceptron1_bias = bias_variable([256])
flatten_dense_connect = tf.reshape(norm5, [-1, 25 * 25 * 512])
compute_perceptron1_layer = tf.nn.relu(tf.matmul(flatten_dense_connect, perceptron1_weight) + perceptron1_bias)
_activation_summary(compute_perceptron1_layer)
perceptron2_weight = weight_variable([256, 256])
perceptron2_bias = bias_variable([256])
compute_perceptron2_layer = tf.nn.relu(tf.matmul(compute_perceptron1_layer, perceptron2_weight) + perceptron2_bias)
perceptron3_weight = weight_variable([256, 100])
perceptron3_bias = bias_variable([100])
compute_perceptron3_layer = tf.nn.relu(tf.matmul(compute_perceptron2_layer, perceptron3_weight) + perceptron3_bias)
perceptron4_weight = weight_variable([100, 50])
perceptron4_bias = bias_variable([50])
compute_perceptron5_layer = tf.nn.relu(tf.matmul(compute_perceptron3_layer, perceptron4_weight) + perceptron4_bias)
perceptron5_weight = weight_variable([50, 4])
perceptron5_bias = bias_variable([4])
dropout = tf.nn.dropout(compute_perceptron5_layer, dropout_value2)
result1 = tf.matmul(dropout, perceptron5_weight) + perceptron5_bias
_activation_summary(result1)
return result1
def error(forward_propagation_results, labels):
with tf.device('/cpu:0'):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=forward_propagation_results, labels=labels)
cost = tf.reduce_mean(cross_entropy)
tf.add_to_collection('losses', cost)
tf.summary.scalar('LOSS', cost)
return cost
def train(cost):
with tf.device('/gpu:0'):
train_loss = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(cost)
return train_loss

Related

Matrix size-incompatible for custom multi model

I am getting the following error:
Node: 'BGNet/dense/BiasAdd'
Matrix size-incompatible: In[0]: [1120,0], In[1]: [2048,1024]
[[{{node BGNet/dense/BiasAdd}}]] [Op:__inference_train_function_11676]
I found the root in this part of the model:
File "<ipython-input-14-3dcbdf5337b8>", line 69, in call
f = self.dense(f)
This is my custom multi model:
class BGNet(tf.keras.Model):
def __init__(self, img_h, img_w, img_c, batch_size, classes):
super(BGNet, self).__init__(name='BGNet')
self.img_h = img_h
self.img_w = img_w
self.img_c = img_c
self.batch_size = batch_size
self.classes = classes
# (224, 224, 3)
self.bgblock0 = BGBlock(f=[32, 32, 32, 32],
k=[7, 5, 5, 5],
d=[1, 2, 2, 1],
stage=0)
# (112, 112, 32)
self.bgblock1 = BGBlock(f=[64, 64, 64, 64],
k=[5, 5, 5, 3],
d=[2, 1, 1, 2],
stage=1)
# (56, 56, 64)
self.bgblock2 = BGBlock(f=[128, 128, 128, 128],
k=[5, 5, 3, 3],
d=[2, 1, 2, 1],
stage=2)
# (28, 28, 128)
self.bgblock3 = BGBlock(f=[256, 256, 256, 256],
k=[5, 3, 3, 3,],
d=[1, 2, 1, 2],
stage=3)
# (14, 14, 256)
self.bgblock4 = BGBlock(f=[512, 512, 512],
k=[3, 3, 3],
d=[1, 1, 2],
stage=4)
# (7, 7, 512)
self.bgblock5 = BGBlock(f=[1024, 1024, 1024],
k=[3, 3, 1],
d=[2, 1, 1],
stage=5)
# (4, 4, 1024)
self.bgblock6 = BGBlock(f=[2048, 2048],
k=[1, 1],
d=[1, 2],
stage=6)
# (2, 2, 2048)
self.flatten = tf.keras.layers.Flatten(name='flatten')
self.dense = tf.keras.layers.Dense(1024, activation='tanh', name='dense')
self.dropout = tf.keras.layers.Dropout(0.2, name='dropout')
self.prob = tf.keras.layers.Dense(1, activation='sigmoid', name='prob')
self.concat1 = tf.keras.layers.Concatenate(axis=-1, name='concat1')
self.bbox1 = tf.keras.layers.Dense(512, activation='relu', name='bbox1')
self.bbox2 = tf.keras.layers.Dropout(0.1, name='bbox2')
self.bbox3 = tf.keras.layers.Dense(256, activation='sigmoid', name='bbox3')
self.bbox = tf.keras.layers.Dense(4, name='bbox')
self.concat2 = tf.keras.layers.Concatenate(axis=-1, name='concat2')
self.cat = tf.keras.layers.Dense(len(self.classes), activation='softmax', name='cat')
def call(self, input_tensor, training=True):
x = self.bgblock0(input_tensor)
x = self.bgblock1(x)
x = self.bgblock2(x)
x = self.bgblock3(x)
x = self.bgblock4(x)
x = self.bgblock5(x)
x = self.bgblock6(x)
f = self.flatten(x)
f = self.dense(f)
f = self.dropout(f)
p = self.prob(f)
b = self.concat1([f, p])
b = self.bbox1(b)
b = self.bbox2(b)
b = self.bbox3(b)
b = self.bbox(b)
c = self.concat2([f, b])
c = self.cat(c)
return {'prob': p, 'bbox': b, 'class': c}
model1 = BGNet(H, W, C, B, N)
model1.build(input_shape=(B, H, W, C))
model1.call(tf.keras.layers.Input(shape=(H, W, C), batch_size=B))
model1.summary(print_fn=tf.print, expand_nested=True, show_trainable=True)
The custom (BGBlocks) blocks are not that important but if you are curious they are convolution blocks consisting of conv2d, batchnorm, activation and pooling layers
The model produces 3 outputs of different size vector while sharing the first dense layers. The output layers first predict the confidence score(prob in loss) of the an object being in the image. Next they predict the bounding box(bbox in loss) and finally the class(class in loss) of the bounded object.
The main issue is after the flatten layer. The model builds without errors with input images of (224, 224, 3). This is how the summary of the model looks: model.summary() image
I have even created a custom IOU (Intersection Over Union) for bounding boxes to be used as model metric. The losses are simple, inbuilt and as follows:
loss = {'prob': 'binary_crossentropy', 'bbox': 'mse', 'class': 'categorical_crossentropy'}
Hoe can I resolve this error?

TensorFlow: CovNet returning same output for all the examples

I have 200 images on a set, 100 identical squares and 100 identical circles. Images are 44x41 pixels and images are grayscale. I am trying to build a simple classifier to learn tensorflow.
The problem: the predictor vectors have always the same value regardless the input image.
Here's the code of my neural net:
import tensorflow as tf
import random as r
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
%matplotlib inline
#create pictures
for i in range(100):
fig1 = plt.figure(frameon = False, figsize=(1,1), dpi=32)
ax1 = fig1.add_subplot(111, aspect='equal')
posx = 0.25
posy = 0.25
ax1.add_patch(
patches.Rectangle(
(posx,posy), # (x,y)
0.5, # width
0.5, # height
)
)
ax1.axis('off')
fig1.savefig('rect' + str(i) + '.png', bbox_inches='tight')
for i in range(100):
fig1 = plt.figure(frameon = False, figsize=(1,1), dpi=32)
ax1 = fig1.add_subplot(111, aspect='equal')
posx = 0.5
posy = 0.5
ax1.add_patch(
patches.Circle(
(posx,posy), # (x,y)
0.3,
)
)
ax1.axis('off')
fig1.savefig('circ' + str(i) + '.png', bbox_inches='tight')
# create vectors
train_features = np.zeros((200,44,41,1))
train_labels = np.zeros((200,2))
for i in range(100):
#get rect
im = Image.open("rect" + str(i) + ".png")
im = im.convert(mode = "L")
xxx =list(im.getdata())
imdata = np.reshape(xxx, (44,41,1))
train_features[i] = imdata
train_labels[i] = np.array([0,1])
#get circle
im = Image.open("circ" + str(i) + ".png")
im = im.convert(mode = "L")
xxx = list(im.getdata())
imdata = np.reshape(xxx, (44,41,1))
train_features[i+100] = imdata
train_labels[i+100] = np.array([1,0])
tf.reset_default_graph()
features = tf.placeholder(tf.float32,shape=[None,44,41, 1])
labels = tf.placeholder(tf.float32,shape=[None,2])
weights = tf.Variable(tf.truncated_normal([3,3, 1, 16], stddev=0.1))
biases = tf.Variable(tf.zeros(16))
weights2 = tf.Variable(tf.truncated_normal([3,3, 16, 64], stddev=0.1))
biases2 = tf.Variable(tf.zeros(64))
conv_layer = tf.nn.conv2d(features, weights, strides=[1, 1, 1, 1], padding='SAME')
conv_layer_b = tf.nn.bias_add(conv_layer, biases)
conv_layer_relu = tf.nn.relu(conv_layer_b)
conv_layer_pool = tf.nn.max_pool(conv_layer_relu, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME')
conv_layer2 = tf.nn.conv2d(conv_layer_pool, weights2, strides=[1, 1, 1, 1], padding='SAME')
conv_layer2_b = tf.nn.bias_add(conv_layer2, biases2)
conv_layer2_relu = tf.nn.relu(conv_layer2_b)
conv_layer2_pool = tf.nn.max_pool(conv_layer2_relu, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME')
#fully connected layer
weights_fc = tf.Variable(tf.truncated_normal([44*41*64, 256], stddev=0.1))
biases_fc = tf.Variable(tf.zeros([256]))
fc = tf.reshape(conv_layer2_pool, [-1, weights_fc.get_shape().as_list()[0]])
fc_logit = tf.add(tf.matmul(fc, weights_fc), biases_fc)
fc_relu = tf.nn.relu(fc_logit)
#fc_drop = tf.nn.dropout(fc_relu, 0.75)
# final layer
weights_out = tf.Variable(tf.truncated_normal([256, 2], stddev=0.1))
biases_out = tf.Variable(tf.zeros([2]))
out = tf.add(tf.matmul(fc_relu, weights_out), biases_out)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for _ in range(100):
sess.run(optimizer, feed_dict={
features: train_features[:],
labels: train_labels[:]})
for i in range(200):
outx = sess.run(out, feed_dict={
features: [train_features[i]],
labels: [train_labels[i]]})
print(outx)
print(train_labels[i])
print('---')
Try not to give the same name to two tensors. For example, you have conv_layer that is equal to tf.nn.conv2d(features, weights, strides=[1, 1, 1, 1], padding='SAME') then rewriten to tf.nn.bias_add(conv_layer, biases), then once more then its another shape and then ....
Use this naming for example:
conv_layer = tf.nn.conv2d(features, weights, strides=[1, 1, 1, 1], padding='SAME')
conv_layer_b = tf.nn.bias_add(conv_layer, biases)
conv_layer_relu = tf.nn.relu(conv_layer_b)
conv_layer_pool = tf.nn.max_pool(conv_layer_relu, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME')
The algorithm learns one image at a time. Try to feed all the images in your set if your machine can handel it: sess.run(optimizer, feed_dict={features: train_features[:], labels: train_labels[:]}). If not 100 images from both classes. Are the images shuffled or first come 100 circle and than 100 squares? Here can lie the error. You update your weights 100 times with only squares in the last loop.
Can I see the the complete program, with the part that you print the predicted vector? As a first stage I would take the dropout out; let it overfit. And then, maybe, use a smaller fc_layer (512 or 256), smaller learning rate (0.01), and I prefere tf.get_variable('w1', shape=[3,3,1,16]) instead of tf.Variable(...), initialize the biases with value 0.1.

simple Recurrent Neural Net from scratch using tensorflow

I've build a simple recurrent neural net with one hidden layer with 4 nodes in it. This is my code:
import tensorflow as tf
# hyper parameters
learning_rate = 0.0001
number_of_epochs = 10000
# Computation Graph
W1 = tf.Variable([[1.0, 1.0, 1.0, 1.0]], dtype=tf.float32, name = 'W1')
W2 = tf.Variable([[1.0], [1.0], [1.0], [1.0]], dtype=tf.float32, name = 'W2')
WR = tf.Variable([[1.0, 1.0, 1.0, 1.0]], dtype=tf.float32, name = 'WR')
# b = tf.Variable([[0], [0], [0], [0]], dtype=tf.float32)
prev_val = [[0.0]]
X = tf.placeholder(tf.float32, [None, None], name = 'X')
labels = tf.placeholder(tf.float32, [None, 1], name = 'labels')
sess = tf.Session()
sess.run(tf.initialize_all_variables())
z = tf.matmul(X, W1) + tf.matmul(prev_val, WR)# - b
prev_val = z
predict = tf.matmul(z, W2)
error = tf.reduce_mean((labels - predict)**2)
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
time_series = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
lbsx = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
for i in range(number_of_epochs):
for j in range(len(time_series)):
curr_X = time_series[j]
lbs = lbsx[j]
sess.run(train, feed_dict={X: [[curr_X]], labels: [[lbs]]})
print(sess.run(predict, feed_dict={X: [[0]]}))
print(sess.run(predict, feed_dict={X: [[1]]}))
I'm getting output:
[[ 0.]]
[[ 3.12420416e-05]]
With input 1, it should output 0 and vice versa. I'm also confused regarding the 'previous value'. Should it be a placeholder? I'd really appreciate your efforts to fix the code.

tf.reshape doesn't work as expected

Actually, I don't know how to describe this question. It's so strange.
import tensorflow as tf
import numpy as np
import pickle
def weight_and_bias(name ,shape):
weight = tf.get_variable("W" + name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B" + name, shape=shape[-1], initializer=tf.contrib.layers.xavier_initializer())
return weight, bias
def conv2d_2x2(x, W):
return tf.nn.conv2d(x, W, strides=[1, 5, 5, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
sess = tf.InteractiveSession()
source = tf.placeholder(tf.float32, [None, None, 50, 50])
source_len = tf.placeholder(tf.int32, [None])
source_max_step = tf.shape(source)[1]
target = tf.placeholder(tf.float32, [None, None, 50, 50])
target_len = tf.placeholder(tf.int32, [None])
target_max_step = tf.shape(target)[1]
W_conv, B_conv = weight_and_bias('conv1', [5, 5, 1, 32])
source = tf.reshape(source, [-1, 50, 50], "source_reshape")
source_tmp = tf.reshape(source, [-1, 50, 50 ,1])
source_conv = tf.nn.relu(conv2d_2x2(source_tmp, W_conv) + B_conv)
source_pool = max_pool_2x2(source_conv)
source_flat = tf.reshape(source_pool, [-1, 5 * 5 * 32], "source_pool_reshape")
source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape")
W_conv, B_conv = weight_and_bias('conv2', [5, 5, 1, 32])
target = tf.reshape(target, [-1, 50, 50], "target_reshape")
target_tmp = tf.reshape(target, [-1, 50, 50 ,1])
target_conv = tf.nn.relu(conv2d_2x2(target_tmp, W_conv) + B_conv)
target_pool = max_pool_2x2(target_conv)
target_flat = tf.reshape(target_pool, [-1, 5 * 5 * 32], "target_pool_reshape")
target = tf.reshape(target_flat, [-1, target_max_step, 5*5*32], "target_flat_reshape")
source_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
target_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
source_rnn_output, _ = tf.nn.dynamic_rnn(source_cell, source, source_len, dtype=tf.float32, scope = "source")
target_rnn_output, _ = tf.nn.dynamic_rnn(target_cell, target, target_len, dtype=tf.float32, scope = "target")
source_output = tf.transpose(source_rnn_output, [1, 0, 2])
target_output = tf.transpose(target_rnn_output, [1, 0, 2])
source_final_output = tf.gather(source_output, -1)
target_final_output = tf.gather(target_output, -1)
output = tf.concat(1, [source_final_output, target_final_output])
W_sf, B_sf = weight_and_bias('sf', [1000, 2])
predict = tf.nn.softmax(tf.matmul(output, W_sf) + B_sf)
y = tf.placeholder(tf.float32, [None, 2])
cross_entropy = -tf.reduce_sum(y * tf.log(predict))
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.arg_max(predict, 1), tf.arg_max(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with open('set', 'rb') as f:
_set = pickle.load(f)
training_set = _set[0]
training_len = _set[1]
training_label = _set[2]
sess.run(tf.global_variables_initializer())
for i in range(20000):
if i % 100 == 0:
train_accuacy = accuracy.eval(feed_dict = {source: training_set[0], target: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
print("step %d, training accuracy %g"%(i, train_accuacy))
train_step.run(feed_dict = {source: training_set[0], target: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
These are my whole code, I can't find any problem in it.
But a ValueError: Cannot feed value of shape (1077, 27, 50, 50) for Tensor 'source_flat_reshape:0', which has shape '(?, ?, 800)' was raised.
The error message is strange, because it seems happen at source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape"), but how could source_flat has a shape of (1077, 27, 50, 50)? It should be (1077*77, 800)
And, sometimes another ValueError: Cannot feed value of shape (1077, 27, 50, 50) for Tensor 'Reshape:0', which has shape '(?, 50, 50)' was raised.
It is also difficult to understand, why it happened?
Hope anyone could give me a hand.
Look what happens when you use feed_dict - you reference the variables source and target. However, the python variable no longer refers to the placeholders but rather the reshape ops - hence the op is 'skipped'.
The easiest fix is renaming the placeholders to something unique. Further down in the network it is OK to reuse the same name (you could just call every layer net), it doesn't matter as long as you no longer need to reference them.
Try giving this a go?
import tensorflow as tf
import numpy as np
import pickle
def weight_and_bias(name ,shape):
weight = tf.get_variable("W" + name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B" + name, shape=shape[-1], initializer=tf.contrib.layers.xavier_initializer())
return weight, bias
def conv2d_2x2(x, W):
return tf.nn.conv2d(x, W, strides=[1, 5, 5, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
sess = tf.InteractiveSession()
source_placeholder = tf.placeholder(tf.float32, [None, None, 50, 50])
source_len = tf.placeholder(tf.int32, [None])
source_max_step = tf.shape(source)[1]
target_placeholder = tf.placeholder(tf.float32, [None, None, 50, 50])
target_len = tf.placeholder(tf.int32, [None])
target_max_step = tf.shape(target)[1]
W_conv, B_conv = weight_and_bias('conv1', [5, 5, 1, 32])
source = tf.reshape(source_placeholder, [-1, 50, 50], "source_reshape")
source_tmp = tf.reshape(source, [-1, 50, 50 ,1])
source_conv = tf.nn.relu(conv2d_2x2(source_tmp, W_conv) + B_conv)
source_pool = max_pool_2x2(source_conv)
source_flat = tf.reshape(source_pool, [-1, 5 * 5 * 32], "source_pool_reshape")
source = tf.reshape(source_flat, [-1, source_max_step, 5*5*32], "source_flat_reshape")
W_conv, B_conv = weight_and_bias('conv2', [5, 5, 1, 32])
target = tf.reshape(target_placeholder, [-1, 50, 50], "target_reshape")
target_tmp = tf.reshape(target, [-1, 50, 50 ,1])
target_conv = tf.nn.relu(conv2d_2x2(target_tmp, W_conv) + B_conv)
target_pool = max_pool_2x2(target_conv)
target_flat = tf.reshape(target_pool, [-1, 5 * 5 * 32], "target_pool_reshape")
target = tf.reshape(target_flat, [-1, target_max_step, 5*5*32], "target_flat_reshape")
source_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
target_cell = tf.nn.rnn_cell.LSTMCell(500, initializer=tf.contrib.layers.xavier_initializer())
source_rnn_output, _ = tf.nn.dynamic_rnn(source_cell, source, source_len, dtype=tf.float32, scope = "source")
target_rnn_output, _ = tf.nn.dynamic_rnn(target_cell, target, target_len, dtype=tf.float32, scope = "target")
source_output = tf.transpose(source_rnn_output, [1, 0, 2])
target_output = tf.transpose(target_rnn_output, [1, 0, 2])
source_final_output = tf.gather(source_output, -1)
target_final_output = tf.gather(target_output, -1)
output = tf.concat(1, [source_final_output, target_final_output])
W_sf, B_sf = weight_and_bias('sf', [1000, 2])
predict = tf.nn.softmax(tf.matmul(output, W_sf) + B_sf)
y = tf.placeholder(tf.float32, [None, 2])
cross_entropy = -tf.reduce_sum(y * tf.log(predict))
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.arg_max(predict, 1), tf.arg_max(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with open('set', 'rb') as f:
_set = pickle.load(f)
training_set = _set[0]
training_len = _set[1]
training_label = _set[2]
sess.run(tf.global_variables_initializer())
for i in range(20000):
if i % 100 == 0:
train_accuacy = accuracy.eval(feed_dict = {source_placeholder: training_set[0], target_placeholder: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})
print("step %d, training accuracy %g"%(i, train_accuacy))
train_step.run(feed_dict = {source_placeholder: training_set[0], target_placeholder: training_set[1], source_len: training_len[0], target_len: training_len[1], y: training_label})

Implement rotation invariant convolution layer using tensorflow

I am trying to implement rotation-invariant convolution layer using tensorflow as a part of udacity deep learning course.
I tried the approach below but it doesn't because i get the following error:
grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
424 raise LookupError(
425 "No gradient defined for operation '%s' (op type: %s)" %
--> 426 (op.name, op.type))
427 if grad_fn and any(out_grads):
428 # NOTE: If _AggregatedGrads didn't compute a value for the i'th
LookupError: No gradient defined for operation 'Reverse_1' (op type: Reverse)
Here I tried to implement the simplest approach by feeding an original and 90 degree rotated image to the same convolution kernel. After this i rotated back the output of the corresponding convolution layer.
Do you have any ideas how to fix this approach or may be you know a better way to implement at least 90 degree rotation-invariant convolution layer?
my code:
batch_size = 16
patch_size_1 = 3
patch_size_2 = 3
patch_size_3 = 2
depth = 32
num_hidden = 32
mx_pool_size_1 = 3
mx_pool_size_2 = 3
mx_pool_size_3 = 2
stride = 1
starter_learning_rate = 0.05
num_steps = 1001
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
global_step = tf.Variable(0, trainable=False)
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal([patch_size_1, patch_size_1, num_channels, depth], stddev=0.1))
layer1_biases = tf.Variable(tf.zeros([depth]))
layer12_weights = tf.Variable(tf.truncated_normal([patch_size_1, patch_size_1, num_channels, depth], stddev=0.1))
layer12_biases = tf.Variable(tf.zeros([depth]))
layer2_weights = tf.Variable(tf.truncated_normal([patch_size_2, patch_size_2, depth, depth], stddev=0.1))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
layer3_weights = tf.Variable(tf.truncated_normal([patch_size_3, patch_size_3, depth, depth], stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
layer4_weights = tf.Variable(tf.truncated_normal(
#[image_size * image_size * depth / (mx_pool_size_1 ** 2 * mx_pool_size_2 ** 2 * stride ** 4) , num_hidden],
[512 , num_hidden],
stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
layer5_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
layer5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
# Model.
def model(data, train = False):
conv1 = tf.nn.conv2d(data, layer1_weights, [1, stride, stride, 1], padding='SAME')
conv1 = tf.nn.max_pool(conv1,
[1, mx_pool_size_1, mx_pool_size_1, 1],
[1, mx_pool_size_1, mx_pool_size_1, 1],
padding='SAME')
hidden1 = tf.nn.relu(conv1 + layer1_biases)
hidden1 = tf.reshape(hidden1, [-1, 100, 1, depth])
data1 = tf.reverse(tf.transpose(data, [0, 2, 1, 3]), [False, True, False, False])
conv2 = tf.nn.conv2d(data, layer12_weights, [1, stride, stride, 1], padding='SAME')
conv2 = tf.nn.max_pool(conv2,
[1, mx_pool_size_1, mx_pool_size_1, 1],
[1, mx_pool_size_1, mx_pool_size_1, 1],
padding='SAME')
hidden2 = tf.nn.relu(conv2 + layer12_biases)
hidden2 = tf.reverse(tf.transpose(hidden2, [0, 2, 1, 3]), [False, True, False, False])
hidden2 = tf.reshape(hidden2, [-1, 100, 1, depth])
hidden = tf.concat(2, [hidden1, hidden2])
hidden = tf.nn.max_pool(hidden,[1, 1, 2, 1], [1, 1, 2, 1], padding='SAME')
hidden = tf.reshape(hidden, [-1, 10, 10, depth])
conv = tf.nn.conv2d(hidden,
layer2_weights,
[1, stride, stride, 1],
padding='SAME')
conv = tf.nn.max_pool(conv,
[1, mx_pool_size_2, mx_pool_size_2, 1],
[1, mx_pool_size_2, mx_pool_size_2, 1],
padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer4_weights) + layer4_biases)
return tf.matmul(hidden, layer5_weights) + layer5_biases
# Training computation.
logits = model(tf_train_dataset, True)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
learning_rate = starter_learning_rate #tf.train.exponential_decay(starter_learning_rate, global_step, batch_size , 0.999, staircase=True)
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
test_prediction = tf.nn.softmax(model(tf_test_dataset))
The latest release of TensorFlow (0.6.0) was missing a gradient definition for tf.reverse(). It was added in a subsequent commit, and you can add the following code to the top level of your program to make use of it without upgrading:
#tf.RegisterGradient("Reverse")
def _ReverseGrad(op, grad):
reverse_dims = op.inputs[1]
return tf.reverse(grad, reverse_dims), None