I'm trying to convert the Tensorflow CIFAR10 tutorial from NHWC to NCHW, but can't figure out how to do so. I have only found answers such as this, which is a couple of lines of code without an explanation of how it works and where to use it. Here are a couple of unsuccessful attempts I have made using this approach:
def inference(images):
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 3, 64],
stddev=5e-2,
wd=0.0)
# ****************************************************************** #
### Original
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
### Attempt 1
imgs = tf.transpose(images, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
### Attempt 2
kern = tf.transpose(kernel, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(images, kern, [1, 1, 1, 1], padding='SAME')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
# ****************************************************************** #
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
...
Which get the errors (respectively):
ValueError: Dimensions must be equal, but are 24 and 3 for 'conv1/Conv2D' (op: 'Conv2D') with input shapes: [64,3,24,24], [5,5,3,64].
ValueError: Dimensions must be equal, but are 3 and 5 for 'conv1/Conv2D' (op: 'Conv2D') with input shapes: [64,24,24,3], [5,64,5,3].
Can someone please provide a set of steps I can follow to convert this example to NCHW successfully.
In your attempt #1 , try the following:
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME', data_format = 'NCHW')
(i.e. add data_format = 'NCHW' to the parameters)
e.g. as follows:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as session:
kernel = tf.ones(shape=[5, 5, 3, 64])
images = tf.ones(shape=[64,24,24,3])
imgs = tf.transpose(images, [0, 3, 1, 2]) # NHWC -> NCHW
conv = tf.nn.conv2d(imgs, kernel, [1, 1, 1, 1], padding='SAME', data_format = 'NCHW')
conv = tf.transpose(conv, [0, 2, 3, 1]) # NCHW -> NHWC
print("conv=",conv.eval())
Related
In the 8th line of code shown below, I don't know the meaning of the second [2,2].
Is it the size of stride? (the API of max_pool2d is also shown below) By the way, how did the number 1024 in the 15th line of code come from?
def build_graph(top_k):
# with tf.device('/cpu:0'):
keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2')
max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3')
max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')
flatten = slim.flatten(max_pool_3)
fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1')
logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2')
# logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))
Here is the API of max_pool2d:
#add_arg_scope
def max_pool2d(inputs,
kernel_size,
stride=2,
padding='VALID',
data_format=DATA_FORMAT_NHWC,
outputs_collections=None,
scope=None):
The second [2,2] in max_pool2d specifies the strides of the layer along the x-axis and the y-axis respectively.
Also, the 1024 number in the fully_connected function specifies the number of the neurons in that layer.
I would clarify that the number of the neurons of the fully_connected layer is not related to the strides of the max_pool2d layer. The size of the fully_connected layer can be set arbitrarily without considering its input size. Typically, the output layer size or the number of the classes in the problem should be considered while deciding about the number of the last fully_connected layers in the networks rather than the layer's input size.
I'm new to tensorflow. I'm building a 3-layer neural network (just one hidden layer ) using tensorflow and I want to apply a custom activation function to its hidden layer.
I implemented it using np library:
def my_network(input_layer,centers,beta, weights):
layer_1 = input_layer
gaussian = np.array([[sum([i*i for i in vec]) for vec in layer_1-center] for center in centers])
a = beta.reshape(len(beta),1)* gaussian
layer_2 = np.array([[np.exp(i) for i in vec] for vec in a])
output = tf.matmul(np.transpose(layer_2).astype(np.float32), weights['w'])
return output
I want to convert it to some code that is suitable with tensorflow and its gradients. How should I do this?
Try this small snippet for multiple convolution layers:
# placeholders
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="input_X")
y = tf.placeholder(tf.float32, [None, 14, 14, 1], name="Output_y")
# C1
with tf.name_scope("layer1"):
W1 = tf.get_variable("W1", shape=[3, 3, 1, 32],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') + b1
layer1_act = tf.nn.relu(layer1) # here you can change to other activation function
# C2
with tf.name_scope("layer2"):
W2 = tf.get_variable("W2", shape=[3, 3, 32, 64],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", shape=[64], initializer=tf.contrib.layers.xavier_initializer())
layer2 = tf.nn.conv2d(layer1_act, W2, strides=[1, 1, 1, 1], padding='SAME') + b2
layer2_act = tf.nn.relu(layer2) # here you can change to other activation function
# max pool
with tf.name_scope("maxpool"):
maxpool = tf.nn.max_pool(layer2_act, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') #just to show how to use maxpool
# C3
with tf.name_scope("layer3"):
W3 = tf.get_variable("W3", shape=[3, 3, 64, 32],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", shape=[32], initializer=tf.contrib.layers.xavier_initializer())
layer3 = tf.nn.conv2d(maxpool, W3, strides=[1, 1, 1, 1], padding='SAME') + b3
layer3_act = tf.nn.relu(layer3) # here you can change to other activation function
#draw graph of train operation
with tf.name_scope('loss and train operation'):
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=tf.cast(y, tf.int32),
predictions=layer3_act))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
train_op = optimizer.minimize(loss)
I am attempting to recreate a cnn from a research paper, but I am still new to deep learning.
I am given a 3d patch of size 32x32x7. I first want to perform a convolution of size 3x3 with 32 features and a stride of 2. Then from that result, I need to perform a 3x3x4 convolution with 64 features and a stride of 1. I do not want to pool or have an activation function between the two convolutions. Why can't I just feed the results of my first convolution into the second one?
import tensorflow as tf
sess = tf.InteractiveSession()
def conv3d(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[2, 2, 2, 2, 2],
padding='SAME')
def conv3d_s1(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[1, 1, 1, 1, 1],
padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None, 7168])
y_ = tf.placeholder(tf.float32, shape=[None, 3])
W = tf.Variable(tf.zeros([7168,3]))
#first convolution
W_conv1 = weight_variable([3, 3, 1, 1, 32])
x_image = tf.reshape(x, [-1, 32, 32, 7, 1])
h_conv1 = conv3d(x_image, W_conv1)
#second convolution
W_conv2 = weight_variable([3, 3, 4, 1, 64])
h_conv2 = conv3d_s1(h_conv1, W_conv2)
Thank you!
After first conv3d you have tensor with shape [None, 16, 16, 4, 32], therefore you have to use kernel with shape [3, 3, 4, 32, 64] in the second conv3d_s1.
I am trying to use Tensorflow to classify some object representations. I used the same architecture as in the Tensorflow Cifar-10 example, with the last layer defined as:
with tf.variable_scope('sigmoid_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, num_classes],
stddev=1 / 192.0, wd=0.0)
biases = _variable_on_cpu('biases', [num_classes],
initializer)
sigmoid_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(sigmoid_linear)
return sigmoid_linear
In my case, num_classes is 2, and the amount of channels in the representation fed to the neural network is 8. Furthermore, I'm currently debugging with only 5 examples. The output of the last layer has a shape of[40,2]. I expect the first dimension is due to 5 examples * 8 channels and the second due to the number of classes.
In order to use compare the logits and the labels using e.g. tensorflow.nn.SparseSoftmaxCrossEntropyWithLogits I need them to have a common shape. How can I interpret the current content of the logits in the current shape, and how can I reduce the first dimension of the logits to be the same as num_classes?
Edit: the shape of the input to the inference function has a shape of [5,101,1008,8]. The inference function is defined as:
def inference(representations):
"""Build the model.
Args:
STFT spectra: spectra returned from distorted_inputs() or inputs().
Returns:
Logits.
"""
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, nChannels, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(representations, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], initializer,
)
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# norm1
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 64, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], initializer)
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], initializer)
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[384, 192],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [192], initializer)
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
_activation_summary(local4)
with tf.variable_scope('sigmoid_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, num_classes],
stddev=1 / 192.0, wd=0.0)
biases = _variable_on_cpu('biases', [num_classes],
initializer)
sigmoid_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(sigmoid_linear)
return sigmoid_linear
After more debugging I could find the problem. The posted code with the layers, originally from the Tensorflow tutorial, works well (of course it does). I printed all shapes, after each layer, and found out that the number 40 was not due to 5 examples * 8 channels, but that I had previously set batch_size = 40, and thus also higher than the amount of training examples. The mismatch began after the reshaping in the local layer 3. The question can now be closed.
I want to try to build a multi-scale CNN using tensorflow from the cifar10 code.
For what I understood I should take the output of the first conv layer and merge it with the output of the second conv layer to feed the first fully connected layer. Is that right? If yes, how to actually do this?
I have almost the same first layers as for the cifar10 except for the norm1 and the pool1 layers that are switched
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# norm1
norm1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# pool1
pool1 = tf.nn.max_pool(norm1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
Then I try to merge the norm1 layer with the pool2 layer using concat.
Here's how I do this
# local3
with tf.variable_scope('local3') as scope:
#concatenate tensors
concat = tf.concat(2,[pool1,pool2])
# Move everything into depth so we can perform a single matrix multiply.
dim=1
for d in concat.get_shape()[1:].as_list():
dim *= d
reshape = tf.reshape(concat, [FLAGS.batch_size, dim])
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name)
_activation_summary(local3)
I'm not even sure that this is the right procedure because the loss is now 17 when in the single scale case I had the initial loss set around 3.
Is this common?
Thanks in advance.