I want to implement deconvolution layer in tensorflow for FCN model, I used tf.nn.conv2d_transpose for each of 5 conv outputs, what I need is that the output shape of each of the 5 deconv to be the same as the input image shape. So I set
deconv_shape = tf.shape(input)
tf.nn.conv2d_transpose(value=deconv5_1,
filter=[32, 32, 1, 1],
output_shape=deconv_shape,
strides=16,
padding="same",
name="deconv5_2")
Am I doing it right?
I think your implementation isn't correct, here's the few step to get it right.
in_channels = input.shape[-1]
# here set the output_height, width as [stride*input_height, stride*input_width]]
output_shape = [batch_size, output_height, output_width, out_channels]
filter_size =2 # for example
stride = 2 # for example if you want 2x scale of input height, width
shape = [filter_size, filter_size, out_channels, in_channels]
w = tf.get_variable(
name='W',
shape=shape,
initializer=w_init,
regularizer=w_regularizer,
trainable=trainable
)
output = tf.nn.conv2d_transpose(
input, w, output_shape=output_shape, strides=[1, stride, stride, 1])
Related
I have implemented ROI pooling at my graph. The code is as follows.
def __init__(self,fatness,image_shape, vocab, r_vocab, num_classes,rnn_cells_num):
CTCUtils.vocab = vocab
CTCUtils.r_vocab = r_vocab
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.input_labels=tf.placeholder(dtype=tf.string, shape=(config.train.input_labels_size,))
self.input_dat = tf.placeholder(dtype=tf.float32, shape=(None,config.train.extracted_feature_height,config.train.extracted_feature_width,512))
self.in_boxes = tf.placeholder(dtype=tf.float32, shape=(config.train.input_labels_size,5))
self.num_classes = num_classes
self.rnn_cells_num = rnn_cells_num
self.saver = tf.train.Saver()
self.poolheight=1
self.poolwidth=32
self.sess = tf.Session(graph = tf.get_default_graph())
with slim.arg_scope([slim.conv2d, slim.max_pool2d]):
########################################################
#########CONV layers before ROI pooling#################
########################################################
net = slim.repeat(self.input_dat, 4, slim.conv2d, fatness, [3, 3], padding='SAME',scope='conv6',weights_regularizer=slim.l2_regularizer(config.weight_decay),weights_initializer=tf.contrib.layers.xavier_initializer(),biases_initializer = tf.zeros_initializer(),activation_fn=tf.nn.relu)
self.in_boxes=tf.dtypes.cast(self.in_boxes,tf.int32)
########################################################
#######ROI pooling layer################################
########################################################
rec_fmap_clone = roi_pooling(net, self.in_boxes, pool_height=self.poolheight, pool_width=self.poolwidth) #shape is (1, 20, 256, 1, 32)
decision=(rec_fmap_clone.get_shape()==None)
if (decision==False):
self.rec_fmap = tf.identity(rec_fmap_clone)
shape=np.shape(self.rec_fmap)
self.rec_fmap=np.reshape(self.rec_fmap, (shape[1],shape[2],shape[3],shape[4]))
self.rec_fmap=tf.transpose(self.rec_fmap, perm=[0, 2, 3, 1])
else:
self.rec_fmap=tf.ones([config.train.input_labels_size, 1, 32, 256], tf.float32)
with slim.arg_scope([slim.conv2d],normalizer_fn=slim.batch_norm,weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.0005)):
classes = slim.conv2d(self.rec_fmap, self.num_classes, [1, 13])
pattern = slim.fully_connected(slim.flatten(classes), self.rnn_cells_num) # patterns number
width = int(self.rec_fmap.get_shape()[2])
pattern = tf.reshape(pattern, (-1, 1, 1, self.rnn_cells_num))
pattern = tf.tile(pattern, [1, 1, width, 1])
inf = tf.concat(axis=3, values=[classes, pattern]) # skip connection over RNN
inf = slim.conv2d(inf, self.num_classes, [1, 1], normalizer_fn=None,activation_fn=None) # fully convolutional linear activation
inf = tf.squeeze(inf, [1])
prob = tf.transpose(inf, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [self.input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
self.loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
self.learning_rate = tf.train.piecewise_constant(self.global_step, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
self.opt_loss = tf.contrib.layers.optimize_loss(self.loss, self.global_step, self.learning_rate, config.train.opt_type, config.train.grad_noise_scale, name='train_step')
self.sess.run(tf.global_variables_initializer())
The graph has a few convolution layers before ROI pooling and ctc loss is used for optimization.
The concern is whether convolution layers before ROI pooling are optimized in back propagation.
According to discussion here, ROI pooling layer itself is differentiable.
But when the graph is plotted in tensorboard, the graph is disconnected after ROI pooling layer.
How can I check and make sure the conv layers before ROI pooling are update in optimization?
The issue was solved by putting conv layers after RoiPooling.
The first graph was used only for feature extraction using RoiPooling. RoiPooling output size was set bigger dimensions. Then those outputs were used as inputs to the second graph. There conv layers were placed. So that I have weights to optimize.
The modified graph is shown below.
I tried Convolution Neural Network with Tensorflow.
However, Shape causes an error.
First is part of the main function.
while True:
with mss.mss() as sct:
Game_Scr = np.array(sct.grab(Game_Scr_pos))[:,:,:3]
cv2.imshow('Game_Src', Game_Scr)
cv2.waitKey(0)
Game_Scr = cv2.resize(Game_Scr, dsize=(960, 540), interpolation=cv2.INTER_AREA)
print(Game_Scr.shape)
print(Convolution(Game_Scr))
Second is my called function.
def Convolution(img):
kernel = tf.Variable(tf.truncated_normal(shape=[4], stddev=0.1))
sess = tf.Session()
with tf.Session() as sess:
img = img.astype('float32')
Bias1 = tf.Variable(tf.truncated_normal(shape=[4],stddev=0.1))
conv2d = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME')# + Bias1
conv2d = sess.run(conv2d)
return conv2d
ValueError: Shape must be rank 4 but is rank 3 for 'Conv2D' (op: 'Conv2D') with input shapes: [540,960,3], [4].
I tried changing the shape many times, but I get the same error.
Try replacing
img = img.astype('float32')
with
img = tf.expand_dims(img.astype('float32'), 0)
The dimention of tf.nn.conv2d input shoul be 4, (batch_size, image_hight, image_with, image_channels). You where missing the batch_size, tf.expand_dims just add that dimention (with a batch_size of 1 since you only have one image).
As per the official documentation here, input tensor should be of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor should be of shape [filter_height, filter_width, in_channels, out_channels].
Try by changing your Convolution function to something like this:
def Convolution(img):
kernel = tf.Variable(tf.truncated_normal(shape=[200, 200, 3, 3], stddev=0.1))
sess = tf.Session()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
img = img.astype('float32')
conv2d = tf.nn.conv2d(np.expand_dims(img, 0), kernel, strides=[1, 1, 1, 1], padding='SAME')# + Bias1
conv2d = sess.run(conv2d)
return conv2d
I am trying to implement convolutional layers for text classification from this blog post with some modification to suit my needs.
In the blog, there is only one convolution layer while I'd like mine to have two convolutional layers followed by ReLU and max-pooling.
The code so far is:
vocab_size = 2000
embedding_size = 100
filter_height = 5
filter_width = embedding_size
no_of_channels = 1
no_of_filters = 256
sequence_length = 50
filter_size = 3
no_of_classes = 26
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, no_of_classes], name="input_y")
# Defining the embedding layer:
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W")
embedded_chars = tf.nn.embedding_lookup(W, input_x)
embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
# Convolution block:
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")
Here, W is the filter matrix.
However, this gives the error:
ValueError: Dimensions must be equal, but are 256 and 1 for 'convolution-block_16/conv2' (op: 'Conv2D') with input shapes: [?,46,1,256], [5,100,1,256].
I realise I have erred in the dimensions of the layer, but I am unable to fix it or put in the correct dimensions.
If anybody could provide any guidance/help, it'd be really helpful.
Thank you.
Can't quite understand what you code to do, but change as follows will fix your problem.
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_channels #change the output channel as input#]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "SAME", ##Change the padding scheme
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")
I'm trying to create a dice_loss function in Tensorflow.
I'm facing a trouble with tensorlfow. Executing the following code
import tensorflow as tf
import tensorlayer as tl
def conv3d(x, inChans, outChans, kernel_size, stride, padding):
weights = weight_variable([kernel_size, kernel_size, kernel_size, inChans, outChans])
biases = bias_variable([outChans])
conv = tf.nn.conv3d(x, weights, strides=[1, stride, stride, stride, 1], padding=padding)
return tf.nn.bias_add(conv, biases)
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
return optimizer.apply_gradients(grads)
def main(argv=None):
image = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="input_image")
annotation = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
logits, pred_annotation = vnet.VNet(image)
loss = 1 - tl.cost.dice_coe(output=pred_annotation, target=annotation, axis=[1,2,3,4])
trainable_var = tf.trainable_variables()
train_op = train(loss, trainable_var)
sess = tf.Session()
...
...
def VNet(x):
...
out = tf.nn.elu(BatchNorm3d(conv3d(x, inChans, 2, kernel_size=5, stride=1, padding="SAME")))
out = conv3d(out, 2, 2, kernel_size=1, stride=1, padding="SAME")
annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction'))
return out, tf.expand_dims(annotation_pred, dim=4)
I get the following error:
ValueError: No gradients provided for any variable: ...
Someone can help me?
When you do annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction')), you get an index of the max value in your tensor. This index can't be derivated, thus the gradient can't flow throught this operation.
So as your loss is only defined by this value, and the gradient can't flow throught it, no gradient can be calculated for your network.
I don't know specificately how the dice loss work, but maybe you wanted to use tf.max instead of tf.argmax, or you have to find a way to use an operation that can let the gradient flow.
Similarly to the Caffe framework, where it is possible to watch the learned filters during CNNs training and it's resulting convolution with input images, I wonder if is it possible to do the same with TensorFlow?
A Caffe example can be viewed in this link:
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
Grateful for your help!
To see just a few conv1 filters in Tensorboard, you can use this code (it works for cifar10)
# this should be a part of the inference(images) function in cifar10.py file
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
with tf.variable_scope('visualization'):
# scale weights to [0 1], type is still float
x_min = tf.reduce_min(kernel)
x_max = tf.reduce_max(kernel)
kernel_0_to_1 = (kernel - x_min) / (x_max - x_min)
# to tf.image_summary format [batch_size, height, width, channels]
kernel_transposed = tf.transpose (kernel_0_to_1, [3, 0, 1, 2])
# this will display random 3 filters from the 64 in conv1
tf.image_summary('conv1/filters', kernel_transposed, max_images=3)
I also wrote a simple gist to display all 64 conv1 filters in a grid.