conv2d does not work - tensorflow

I am training a CNN in tensorflow but the program seems to have stuck at the first tf.nn.conv2d step. I have imported the cifar 10 dataset from keras and running my code in floydhub.
I haven't started the session yet, this is just the computation graph.
Link to the complete notebook
This is the part where it gets stuck:
# forward propagation
# convolution layer 1
c1 = tf.nn.conv2d(x_train, w1, strides = [1,1,1,1], padding = 'VALID')
# activation function for c1: relu
r1 = tf.nn.relu(c1)
# maxpooling
p1 = tf.nn.max_pool(r1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
# convolution layer 2
c2 = tf.nn.conv2d(p1, w2, strides = [1,1,1,1], padding='VALID')
# activation function for c2: relu
r2 = tf.nn.relu(c2)
# maxpooling
p2 = tf.nn.max_pool(r2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
# flattening the previous max pool layer
l1 = tf.contrib.layers.flatten(p2)
# fully connected layer
final = tf.contrib.layers.fully_connected(l1, 10, activation_fn = None)
EDIT:-
This is how I imported the dataset
# importing 50000 images of size 32x32x3
i = 0
# this list holds the images
img_base = []
for img in glob.glob("train\\*.png"):
img_base.append(cv2.imread(img))
# x_train is a nx32x32x3 matrix of n no. of images
x_train = np.array(img_base[0:40000]).astype(np.float32)
placeholders for training images and labels
# creating placeholders
x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.float32, [None, 10])
weight initialisation`
tf.reset_default_graph()
# creating weights
w1 = tf.get_variable('w1', [4,4,3,10], initializer=tf.contrib.layers.xavier_initializer())
w2 = tf.get_variable('w2', [4,4,10,15], initializer=tf.contrib.layers.xavier_initializer())

Related

Roi pooling and backpropagation

I have implemented ROI pooling at my graph. The code is as follows.
def __init__(self,fatness,image_shape, vocab, r_vocab, num_classes,rnn_cells_num):
CTCUtils.vocab = vocab
CTCUtils.r_vocab = r_vocab
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.input_labels=tf.placeholder(dtype=tf.string, shape=(config.train.input_labels_size,))
self.input_dat = tf.placeholder(dtype=tf.float32, shape=(None,config.train.extracted_feature_height,config.train.extracted_feature_width,512))
self.in_boxes = tf.placeholder(dtype=tf.float32, shape=(config.train.input_labels_size,5))
self.num_classes = num_classes
self.rnn_cells_num = rnn_cells_num
self.saver = tf.train.Saver()
self.poolheight=1
self.poolwidth=32
self.sess = tf.Session(graph = tf.get_default_graph())
with slim.arg_scope([slim.conv2d, slim.max_pool2d]):
########################################################
#########CONV layers before ROI pooling#################
########################################################
net = slim.repeat(self.input_dat, 4, slim.conv2d, fatness, [3, 3], padding='SAME',scope='conv6',weights_regularizer=slim.l2_regularizer(config.weight_decay),weights_initializer=tf.contrib.layers.xavier_initializer(),biases_initializer = tf.zeros_initializer(),activation_fn=tf.nn.relu)
self.in_boxes=tf.dtypes.cast(self.in_boxes,tf.int32)
########################################################
#######ROI pooling layer################################
########################################################
rec_fmap_clone = roi_pooling(net, self.in_boxes, pool_height=self.poolheight, pool_width=self.poolwidth) #shape is (1, 20, 256, 1, 32)
decision=(rec_fmap_clone.get_shape()==None)
if (decision==False):
self.rec_fmap = tf.identity(rec_fmap_clone)
shape=np.shape(self.rec_fmap)
self.rec_fmap=np.reshape(self.rec_fmap, (shape[1],shape[2],shape[3],shape[4]))
self.rec_fmap=tf.transpose(self.rec_fmap, perm=[0, 2, 3, 1])
else:
self.rec_fmap=tf.ones([config.train.input_labels_size, 1, 32, 256], tf.float32)
with slim.arg_scope([slim.conv2d],normalizer_fn=slim.batch_norm,weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.0005)):
classes = slim.conv2d(self.rec_fmap, self.num_classes, [1, 13])
pattern = slim.fully_connected(slim.flatten(classes), self.rnn_cells_num) # patterns number
width = int(self.rec_fmap.get_shape()[2])
pattern = tf.reshape(pattern, (-1, 1, 1, self.rnn_cells_num))
pattern = tf.tile(pattern, [1, 1, width, 1])
inf = tf.concat(axis=3, values=[classes, pattern]) # skip connection over RNN
inf = slim.conv2d(inf, self.num_classes, [1, 1], normalizer_fn=None,activation_fn=None) # fully convolutional linear activation
inf = tf.squeeze(inf, [1])
prob = tf.transpose(inf, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [self.input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
self.loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
self.learning_rate = tf.train.piecewise_constant(self.global_step, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
self.opt_loss = tf.contrib.layers.optimize_loss(self.loss, self.global_step, self.learning_rate, config.train.opt_type, config.train.grad_noise_scale, name='train_step')
self.sess.run(tf.global_variables_initializer())
The graph has a few convolution layers before ROI pooling and ctc loss is used for optimization.
The concern is whether convolution layers before ROI pooling are optimized in back propagation.
According to discussion here, ROI pooling layer itself is differentiable.
But when the graph is plotted in tensorboard, the graph is disconnected after ROI pooling layer.
How can I check and make sure the conv layers before ROI pooling are update in optimization?
The issue was solved by putting conv layers after RoiPooling.
The first graph was used only for feature extraction using RoiPooling. RoiPooling output size was set bigger dimensions. Then those outputs were used as inputs to the second graph. There conv layers were placed. So that I have weights to optimize.
The modified graph is shown below.

Neural network output issue

I built a neural network with tensorflow, here the code :
class DQNetwork:
def __init__(self, state_size, action_size, learning_rate, name='DQNetwork'):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
with tf.variable_scope(name):
# We create the placeholders
self.inputs_ = tf.placeholder(tf.float32, shape=[state_size[1], state_size[0]], name="inputs")
self.actions_ = tf.placeholder(tf.float32, [None, self.action_size], name="actions_")
# Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
self.target_Q = tf.placeholder(tf.float32, [None], name="target")
self.fc = tf.layers.dense(inputs = self.inputs_,
units = 50,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation = tf.nn.elu)
self.output = tf.layers.dense(inputs = self.fc,
units = self.action_size,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation=None)
# Q is our predicted Q value.
self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_))
# The loss is the difference between our predicted Q_values and the Q_target
# Sum(Qtarget - Q)^2
self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
But i have an issue with the output,
the output should normaly be at the same size than "action_size", and action_size value is 3
but i got an output like [[5][3]] instead of just [[3]] and i realy don't understand why...
This network got 2 dense layers, one with 50 perceptrons and the other with 3 perceptrons (= action_size).
state_size is format : [[9][5]]
If someone know why my output is two dimensions i will be very thankful
Your self.inputs_ placeholder has shape (5, 9). You perform the matmul(self.inputs_, fc1.w) operation in dense layer fc1 which has shape (9, 50) and it results in shape (5, 50). You then apply another dense layer with shape (50, 3) which results in output shape (5, 3).
The same schematically:
matmul(shape(5, 9), shape(9, 50)) ---> shape(5, 50) # output of 1st dense layer
matmul(shape(5, 50), shape(50, 3)) ---> shape(5, 3) # output of 2nd dense layer
Usually, the first dimension of the input placeholder represents batch size and the second dimension is the dimension of inputs feature vector. So for each sample in a batch you (batch size is 5 in your case) you get the output shape 3.
To get probabilities, use this:
import tensorflow as tf
import numpy as np
inputs_ = tf.placeholder(tf.float32, shape=(None, 9))
actions_ = tf.placeholder(tf.float32, shape=(None, 3))
fc = tf.layers.dense(inputs=inputs_, units=2)
output = tf.layers.dense(inputs=fc, units=3)
reduced = tf.reduce_mean(output, axis=0)
probs = tf.nn.softmax(reduced) # <--probabilities
inputs_vals = np.ones((5, 9))
actions_vals = np.ones((1, 3))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(probs.eval({inputs_:inputs_vals,
actions_:actions_vals}))
# [0.01858923 0.01566187 0.9657489 ]

restoring error in tensorflow

I am a beginner with tensorflow and I was tinkering around with convnet for image recognition.However after I save my model I am getting an error while restoring it.
this is my tensor graph components ->
Y_train = to_categorical(y_train,num_classes=4)
Y_test = to_categorical(y_test,num_classes=4)
X = tf.placeholder(tf.float32, shape=(None, 64,64,3))
Y = tf.placeholder(tf.float32, shape=(None, 4))
w1 = tf.get_variable("w1", [4,4,3,8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
w2 = tf.get_variable("w2", [2,2,8,16], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
Z1 = tf.nn.conv2d(X,w1, strides = [1,1,1,1], padding = 'SAME')
A1 = tf.nn.relu(Z1)
P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
Z2 = tf.nn.conv2d(P1,w2, strides = [1,1,1,1], padding = 'SAME')
A2 = tf.nn.relu(Z2)
P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
P2 = tf.contrib.layers.flatten(P2)
Z3 = tf.contrib.layers.fully_connected(P2,4,activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = Y))
optimizer = tf.train.AdamOptimizer(0.004).minimize(cost)
this is a basic convnet which i have successfully trained and tested.
However the problem I am facing is that after all the epochs have been completed for training, How do I save this model such that I can use it again in some file say predict.py where in I can just import it and make predictions
So i read some blogs on save and restore and did that but then I was getting the below mention error
Attempting to use uninitialized value fully_connected/biases [[Node:
fully_connected/biases/read = IdentityT=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
so given the convnet what should I do to use that model ? can someone provide working code for both the model file and predict file.

Implementing convolutional layers using Tensorflow

I am trying to implement convolutional layers for text classification from this blog post with some modification to suit my needs.
In the blog, there is only one convolution layer while I'd like mine to have two convolutional layers followed by ReLU and max-pooling.
The code so far is:
vocab_size = 2000
embedding_size = 100
filter_height = 5
filter_width = embedding_size
no_of_channels = 1
no_of_filters = 256
sequence_length = 50
filter_size = 3
no_of_classes = 26
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, no_of_classes], name="input_y")
# Defining the embedding layer:
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W")
embedded_chars = tf.nn.embedding_lookup(W, input_x)
embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
# Convolution block:
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")
Here, W is the filter matrix.
However, this gives the error:
ValueError: Dimensions must be equal, but are 256 and 1 for 'convolution-block_16/conv2' (op: 'Conv2D') with input shapes: [?,46,1,256], [5,100,1,256].
I realise I have erred in the dimensions of the layer, but I am unable to fix it or put in the correct dimensions.
If anybody could provide any guidance/help, it'd be really helpful.
Thank you.
Can't quite understand what you code to do, but change as follows will fix your problem.
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_channels #change the output channel as input#]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "SAME", ##Change the padding scheme
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")

Fully Convolutional Network, Training Error

I apologize that I'm not good at English.
I'm trying to build my own Fully Convolutional Network using TensorFlow.
But I have difficulties on training this model with my own image data, whereas the MNIST data worked properly.
Here is my FCN model code: (Not using pre-trained or pre-bulit model)
import tensorflow as tf
import numpy as np
Loading MNIST Data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
images_flatten = tf.placeholder(tf.float32, shape=[None, 784])
images = tf.reshape(images_flatten, [-1,28,28,1]) # CNN deals with 3 dimensions
labels = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32) # Dropout Ratio
Convolutional Layers
# Conv. Layer #1
W1 = tf.Variable(tf.truncated_normal([3, 3, 1, 4], stddev = 0.1))
b1 = tf.Variable(tf.truncated_normal([4], stddev = 0.1))
FMA = tf.nn.conv2d(images, W1, strides=[1,1,1,1], padding='SAME')
# FMA stands for Fused Multiply Add, which means convolution
RELU = tf.nn.relu(tf.add(FMA, b1))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #2
W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 8], stddev = 0.1))
b2 = tf.Variable(tf.truncated_normal([8], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W2, strides=[1,1,1,1], padding='SAME')
RELU = tf.nn.relu(tf.add(FMA, b2))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #3
W3 = tf.Variable(tf.truncated_normal([7, 7, 8, 16], stddev = 0.1))
b3 = tf.Variable(tf.truncated_normal([16], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W3, strides=[1,1,1,1], padding='VALID')
RELU = tf.nn.relu(tf.add(FMA, b3))
# Dropout
Dropout = tf.nn.dropout(RELU, keep_prob)
# Conv. Layer #4
W4 = tf.Variable(tf.truncated_normal([1, 1, 16, 10], stddev = 0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev = 0.1))
FMA = tf.nn.conv2d(Dropout, W4, strides=[1,1,1,1], padding='SAME')
LAST_RELU = tf.nn.relu(tf.add(FMA, b4))
Summary: [Conv-ReLU-Pool] - [Conv-ReLU-Pool] - [Conv-ReLU] - [Dropout] - [Conv-ReLU]
Define Loss, Accuracy
prediction = tf.squeeze(LAST_RELU)
# Because FCN returns (1 x 1 x class_num) in training
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, labels))
# First arg is 'logits=' and the other one is 'labels='
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
label_max = tf.argmax(labels, 1)
pred_max = tf.argmax(prediction, 1)
correct_pred = tf.equal(pred_max, label_max)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Training Model
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10000):
image_batch, label_batch = mnist.train.next_batch(100)
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0:
tr = sess.run([loss, accuracy], feed_dict={images: image_batch, labels: label_batch, keep_prob: 1.0})
print("Step %d, Loss %g, Accuracy %g" % (i, tr[0], tr[1]))
Loss: 0.784 (Approximately)
Accuracy: 94.8% (Approximately)
The problem is that, training this model with MNIST data worked very well, but with my own data, loss is always same(0.6319), and the output layer is always 0.
There is no difference with the code, excepting for the third convolutional layer's filter size. This filter size and input size which is compressed by previous pooling layers, must have same width & height. That's why the filter size in this layer is [7,7].
What is wrong with my model?..
The only different code between two cases (MNIST, my own data) is:
Placeholder
My own data has (128 x 64 x 1) and the label is 'eyes', 'not_eyes'
images = tf.placeholder(tf.float32, [None, 128, 64, 1])
labels = tf.placeholder(tf.int32, [None, 2])
3rd Convolutional Layer
W3 = tf.Variable(tf.truncated_normal([32, 16, 8, 16], stddev = 0.1))
Feeding (Batch)
image_data, label_data = input_data.get_batch(TRAINING_FILE, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10000):
image_batch, label_batch = sess.run([image_data, label_data])
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0: ... # Validation part is almost same, too...
coord.request_stop()
coord.join(threads)
Here "input_data" is an another python file in the same directory, and "get_batch(TRAINING_FILE, 10)" is the function that returns batch data. The code is:
def get_input_queue(txtfile_name):
images = []
labels = []
for line in open(txtfile_name, 'r'): # Here txt file has data's path, label, label number
cols = re.split(',|\n', line)
labels.append(int(cols[2]))
images.append(tf.image.decode_jpeg(tf.read_file(cols[0]), channels = 1))
input_queue = tf.train.slice_input_producer([images, labels], shuffle = True)
return input_queue
def get_batch(txtfile_name, batch_size):
input_queue = get_input_queue(txtfile_name)
image = input_queue[0]
label = input_queue[1]
image = tf.reshape(image, [128, 64, 1])
batch_image, batch_label = tf.train.batch([image, label], batch_size)
batch_label_one_hot = tf.one_hot(tf.to_int64(batch_label), 2, on_value=1.0, off_value=0.0)
return batch_image, batch_label_one_hot
It seems not to have any problem .... :( Please Help me..!!
Are your inputs scaled appropriately?. The jpegs are in [0-255] range and it needs to be scaled to [-1 - 1]. You can try:
image = tf.reshape(image, [128, 64, 1])
image = tf.scalar_mul((1.0/255), image)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
What is the accuracy you are getting with your model for MNIST? It would be helpful if you post the code. Are you using the trained model to evaluate the output for your own data.
A general suggestion on setting up the convolution model is provided here.
Here is the model suggestion according to the article :-
INPUT -> [[CONV -> RELU]*N -> POOL?]*M -> [FC -> RELU]*K -> FC
Having more than one layers of CONV->RELU pair before pooling improves learning complex features. Try with N=2 instead of 1.
Some other suggestions:
While you are preparing your data reduce it to smaller size than 128x64. Try same size as the MNIST data ..
image = tf.reshape(image, [28, 28, 1])
If your eye/noeye image is color, then convert it to greyscale and normalize the values to unity range. You can do this using numpy or tf, here is how using numpy
grayscale-->
img = np.dot(np.array(img, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
normalize-->
mean = np.mean(img, dtype='float32')
std = np.std(img, dtype='float32', ddof=1)
if std < 1e-4: std = 1.
img = (img - mean) / std