tensorflow size of convolution layers - tensorflow

I am attempting to recreate a cnn from a research paper, but I am still new to deep learning.
I am given a 3d patch of size 32x32x7. I first want to perform a convolution of size 3x3 with 32 features and a stride of 2. Then from that result, I need to perform a 3x3x4 convolution with 64 features and a stride of 1. I do not want to pool or have an activation function between the two convolutions. Why can't I just feed the results of my first convolution into the second one?
import tensorflow as tf
sess = tf.InteractiveSession()
def conv3d(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[2, 2, 2, 2, 2],
padding='SAME')
def conv3d_s1(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[1, 1, 1, 1, 1],
padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None, 7168])
y_ = tf.placeholder(tf.float32, shape=[None, 3])
W = tf.Variable(tf.zeros([7168,3]))
#first convolution
W_conv1 = weight_variable([3, 3, 1, 1, 32])
x_image = tf.reshape(x, [-1, 32, 32, 7, 1])
h_conv1 = conv3d(x_image, W_conv1)
#second convolution
W_conv2 = weight_variable([3, 3, 4, 1, 64])
h_conv2 = conv3d_s1(h_conv1, W_conv2)
Thank you!

After first conv3d you have tensor with shape [None, 16, 16, 4, 32], therefore you have to use kernel with shape [3, 3, 4, 32, 64] in the second conv3d_s1.

Related

What is the meaning of the parameter of max_pool2d?

In the 8th line of code shown below, I don't know the meaning of the second [2,2].
Is it the size of stride? (the API of max_pool2d is also shown below) By the way, how did the number 1024 in the 15th line of code come from?
def build_graph(top_k):
# with tf.device('/cpu:0'):
keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2')
max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3')
max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')
flatten = slim.flatten(max_pool_3)
fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1')
logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2')
# logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))
Here is the API of max_pool2d:
#add_arg_scope
def max_pool2d(inputs,
kernel_size,
stride=2,
padding='VALID',
data_format=DATA_FORMAT_NHWC,
outputs_collections=None,
scope=None):
The second [2,2] in max_pool2d specifies the strides of the layer along the x-axis and the y-axis respectively.
Also, the 1024 number in the fully_connected function specifies the number of the neurons in that layer.
I would clarify that the number of the neurons of the fully_connected layer is not related to the strides of the max_pool2d layer. The size of the fully_connected layer can be set arbitrarily without considering its input size. Typically, the output layer size or the number of the classes in the problem should be considered while deciding about the number of the last fully_connected layers in the networks rather than the layer's input size.

How to build a neural network in tensorflow with custom activation functions?

I'm new to tensorflow. I'm building a 3-layer neural network (just one hidden layer ) using tensorflow and I want to apply a custom activation function to its hidden layer.
I implemented it using np library:
def my_network(input_layer,centers,beta, weights):
layer_1 = input_layer
gaussian = np.array([[sum([i*i for i in vec]) for vec in layer_1-center] for center in centers])
a = beta.reshape(len(beta),1)* gaussian
layer_2 = np.array([[np.exp(i) for i in vec] for vec in a])
output = tf.matmul(np.transpose(layer_2).astype(np.float32), weights['w'])
return output
I want to convert it to some code that is suitable with tensorflow and its gradients. How should I do this?
Try this small snippet for multiple convolution layers:
# placeholders
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="input_X")
y = tf.placeholder(tf.float32, [None, 14, 14, 1], name="Output_y")
# C1
with tf.name_scope("layer1"):
W1 = tf.get_variable("W1", shape=[3, 3, 1, 32],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') + b1
layer1_act = tf.nn.relu(layer1) # here you can change to other activation function
# C2
with tf.name_scope("layer2"):
W2 = tf.get_variable("W2", shape=[3, 3, 32, 64],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", shape=[64], initializer=tf.contrib.layers.xavier_initializer())
layer2 = tf.nn.conv2d(layer1_act, W2, strides=[1, 1, 1, 1], padding='SAME') + b2
layer2_act = tf.nn.relu(layer2) # here you can change to other activation function
# max pool
with tf.name_scope("maxpool"):
maxpool = tf.nn.max_pool(layer2_act, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') #just to show how to use maxpool
# C3
with tf.name_scope("layer3"):
W3 = tf.get_variable("W3", shape=[3, 3, 64, 32],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer3 = tf.nn.conv2d(maxpool, W3, strides=[1, 1, 1, 1], padding='SAME') + b3
layer3_act = tf.nn.relu(layer3) # here you can change to other activation function
#draw graph of train operation
with tf.name_scope('loss and train operation'):
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=tf.cast(y, tf.int32),
predictions=layer3_act))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
train_op = optimizer.minimize(loss)

How to convert the CIFAR10 tutorial to NCHW

I'm trying to convert the Tensorflow CIFAR10 tutorial from NHWC to NCHW, but can't figure out how to do so. I have only found answers such as this, which is a couple of lines of code without an explanation of how it works and where to use it. Here are a couple of unsuccessful attempts I have made using this approach:
def inference(images):
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 3, 64],
stddev=5e-2,
wd=0.0)
# ****************************************************************** #
### Original
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
### Attempt 1
imgs = tf.transpose(images, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
### Attempt 2
kern = tf.transpose(kernel, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(images, kern, [1, 1, 1, 1], padding='SAME')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
# ****************************************************************** #
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
...
Which get the errors (respectively):
ValueError: Dimensions must be equal, but are 24 and 3 for 'conv1/Conv2D' (op: 'Conv2D') with input shapes: [64,3,24,24], [5,5,3,64].
ValueError: Dimensions must be equal, but are 3 and 5 for 'conv1/Conv2D' (op: 'Conv2D') with input shapes: [64,24,24,3], [5,64,5,3].
Can someone please provide a set of steps I can follow to convert this example to NCHW successfully.
In your attempt #1 , try the following:
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME', data_format = 'NCHW')
(i.e. add data_format = 'NCHW' to the parameters)
e.g. as follows:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as session:
kernel = tf.ones(shape=[5, 5, 3, 64])
images = tf.ones(shape=[64,24,24,3])
imgs = tf.transpose(images, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME', data_format = 'NCHW')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
print("conv=",conv.eval())

Keras Fully-Connected Dense Output M x N?

I am looking at the examples/image_orc.py example in Keras, when I run it I see something like
_______________
max2 (MaxPooling2D) (None, 32, 16, 16) 0 conv2[0][0]
____________________________________________________________________________________________________
reshape (Reshape) (None, 32, 256) 0 max2[0][0]
____________________________________________________________________________________________________
dense1 (Dense) (None, 32, 32) 8224 reshape[0][0]
_____________________________________________________________________________________
The Dense layer outputs a tensor 32x32. I am trying to replicate this in pur TensorFlow where tf.matmul would be used, but how can I output 32x32 using matmul?
Addition:
I am not trying to replicate the Keras example exactly,
w = 128; h = 64
# junk image, only one
dataset = np.zeros((1,w,h,1))
import tensorflow as tf
pool_size = 1
num_filters = 16
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
inputs = tf.placeholder(tf.float32, [None, w, h, 1])
W_conv1 = weight_variable([3, 3, 1, num_filters])
b_conv1 = bias_variable([num_filters])
h_conv1 = tf.nn.relu(conv2d(inputs, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([3, 3, num_filters, num_filters])
b_conv2 = bias_variable([num_filters])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 32, 256])
W_fc1 = weight_variable([256, 32])
b_fc1 = bias_variable([32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
print inputs.shape
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(h_pool2_flat, feed_dict={inputs: dataset})
print 'output',output.shape
And I get
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_5' (op: 'MatMul') with input shapes: [?,32,256], [256,32].
A smaller example
import numpy as np
import tensorflow as tf
dataset = np.zeros((3,2,4))
inputs = tf.placeholder(tf.float32, [None, 2, 4])
print inputs
W = tf.zeros((4,5))
print W
W2 = tf.matmul(inputs, W)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(W2, feed_dict={inputs: dataset})
print 'output',output.shape
This also gives similar error
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_12' (op: 'MatMul') with input shapes: [?,2,4], [4,5].
Any ideas?
Thanks,
That 32 is there because it was in the previous layer. It keeps unchanged.
The tf.matmul multiplies considering the two last dimensions, as stated here. (See the examples taking more than two dimensions)
I see you've got a Dense(32) there, with input size = 256.
This means that the weights matrix is (256,32). In keras, the multiplication as seen here is inputs x kernel.
So, if you have the input tensor shaped as (?, any, 256), and the weights matrix shaped as (256,32), all you need is:
output = tf.matmul(input,weights)
This will output a shape (?, any, 32) - any is there untouched because it just was there before.
You may also want to sum the biases, which will follow the same principle. You need a bias vector of shape (32,).

Running a Multi Scale network with Tensor Flow

I want to try to build a multi-scale CNN using tensorflow from the cifar10 code.
For what I understood I should take the output of the first conv layer and merge it with the output of the second conv layer to feed the first fully connected layer. Is that right? If yes, how to actually do this?
I have almost the same first layers as for the cifar10 except for the norm1 and the pool1 layers that are switched
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# norm1
norm1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# pool1
pool1 = tf.nn.max_pool(norm1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
Then I try to merge the norm1 layer with the pool2 layer using concat.
Here's how I do this
# local3
with tf.variable_scope('local3') as scope:
#concatenate tensors
concat = tf.concat(2,[pool1,pool2])
# Move everything into depth so we can perform a single matrix multiply.
dim=1
for d in concat.get_shape()[1:].as_list():
dim *= d
reshape = tf.reshape(concat, [FLAGS.batch_size, dim])
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name)
_activation_summary(local3)
I'm not even sure that this is the right procedure because the loss is now 17 when in the single scale case I had the initial loss set around 3.
Is this common?
Thanks in advance.