I try to implement a CNN model based on MNIST tutorial on TF website.
Here is my code
import tensorflow as tf
import numpy as np
from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
def cnn_model_fn(features, labels, mode):
"""Model function for CNN."""
# Input Layer
# Reshape X to 4-D tensor: [batch_size, width, height, channels]
# breaKHis images are 32x32 pixels, and have three color channel
input_layer = tf.reshape(features, [-1, 32, 32, 3])
# Convolutional Layer #1
# Computes 32 features using a 5x5 filter with ReLU activation.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 32, 32, 1]
# Output Tensor Shape: [batch_size, 32, 32, 32]
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
#print conv1.get_shape().as_list()
# Pooling Layer #1
# First max pooling layer with a 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 32, 32, 32]
# Output Tensor Shape: [batch_size, 16, 16, 32]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
#print pool1.get_shape().as_list()
# Convolutional Layer #2
# Computes 32 features using a 5x5 filter.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 16, 16, 32]
# Output Tensor Shape: [batch_size, 16, 16, 32]
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
#print conv2.get_shape().as_list()
# Pooling Layer #2
# Second max pooling layer with a 3x3 filter and stride of 2
# Input Tensor Shape: [batch_size, 16, 16, 32]
# Output Tensor Shape: [batch_size, 8, 8, 32]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
#print pool2.get_shape().as_list()
# Convolutional Layer #3
# Computes 64 features using a 5x5 filter.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 8, 8, 32]
# Output Tensor Shape: [batch_size, 8, 8, 64]
conv3 = tf.layers.conv2d(
inputs=pool2,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
#print conv3.get_shape().as_list()
# Pooling Layer #3
# Second max pooling layer with a 3x3 filter and stride of 2
# Input Tensor Shape: [batch_size, 8, 8, 64]
# Output Tensor Shape: [batch_size, 4, 4, 64]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 4, 4, 64]
# Output Tensor Shape: [batch_size, 4 * 4 * 64]
pool3Shape = pool3.get_shape().as_list()
#print pool3Shape
pool2_flat = tf.reshape(pool2, [-1, 4*4*64])
# Dense Layer
# Densely connected layer with 64 neurons
# Input Tensor Shape: [batch_size, 4 * 4 * 64]
# Output Tensor Shape: [batch_size, 64]
dense = tf.layers.dense(inputs=pool2_flat, units=64, activation=tf.nn.relu)
# Add dropout operation; 0.6 probability that element will be kept
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == learn.ModeKeys.TRAIN)
# Logits layer
# Input Tensor Shape: [batch_size, 64]
# Output Tensor Shape: [batch_size, 2]
logits = tf.layers.dense(inputs=dropout, units=2)
loss = None
train_op = None
# Calculate Loss (for both TRAIN and EVAL modes)
if mode != learn.ModeKeys.INFER:
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=2)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.001,
optimizer="SGD")
# Generate Predictions
predictions = {
"classes": tf.argmax(
input=logits, axis=1),
"probabilities": tf.nn.softmax(
logits, name="softmax_tensor")
}
# Return a ModelFnOps object
return model_fn_lib.ModelFnOps(
mode=mode, predictions=predictions, loss=loss, train_op=train_op)
And it throws me the error: InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match. lhs shape= [1024,64] rhs shape= [2048,64]
I think there should be something wrong in the last FC layer but don't know where it is.
You can check this answer,
Tensorflow Assign requires shapes of both tensors to match. lhs shape= [20] rhs shape= [48]
Maybe you can install the previous version and try it again.
Related
I am training a DCGAN model with tensorflow.keras, and I added BatchNormalization layers in both generator and discriminator.
I train gan with following steps:
1. train discriminator with real images and images from generator(using generator.predict)
2. train adversarial network(compiled with discriminator.trainable=False)
Then I found that after a few rounds the training loss returned by train_on_batch() of both generator and discriminator goes to zero. But when I use test_on_batch() the loss is still huge for generator. And the generated images are all mess.
At first I thought that it is because in the 2.'s step mentioned above when training the adversarial network, the discriminator's input only containing fake images makes the batch normalization layers get different distribution as the 1.'s step when both fake & real images were fed.
But even if I removed all batch normalization layers in discriminator, the same problem still exists. Only when all batch normalization layers were removed the problem will disappear. Also I found out that existences of Dropout layers don't make a difference. I wondering why batch normalization can cause such problem, even if in generator fed with noises with same distribution.
# Model definition
class DCGAN_128:
def __init__(self, hidden_dim):
generator = M.Sequential()
generator.add(L.Dense(128 * 8 * 8, input_shape=[hidden_dim]))
generator.add(L.Reshape([8, 8, 128]))
generator.add(L.UpSampling2D()) # [8, 8, 128]
generator.add(L.Conv2D(128, kernel_size=3, padding="same")) # [16, 16, 128]
generator.add(L.LayerNormalization()) # 4
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [32, 32, 128]
generator.add(L.Conv2D(64, kernel_size=5, padding="same")) # [32, 32, 64]
generator.add(L.LayerNormalization()) # 8
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [64, 64, 128]
generator.add(L.Conv2D(32, kernel_size=7, padding="same")) # [64, 64, 32]
generator.add(L.LayerNormalization()) # 12
generator.add(L.ReLU())
generator.add(L.UpSampling2D()) # [128, 128, 32]
generator.add(L.Conv2D(3, kernel_size=3, padding="same", activation=A.sigmoid)) # [128, 128, 3]
discriminator = M.Sequential()
discriminator.add(L.Conv2D(32, kernel_size=5, strides=2, padding="same", input_shape=[128, 128, 3]))
discriminator.add(L.LeakyReLU())
# discriminator.add(L.Dropout(0.25)) # [64, 64, 32]
discriminator.add(L.Conv2D(64, kernel_size=3, strides=2, padding="same"))
# discriminator.add(L.BatchNormalization(epsilon=1e-5)) # 4
discriminator.add(L.LeakyReLU())
# discriminator.add(L.Dropout(0.25)) # [32, 32, 64]
discriminator.add(L.Conv2D(128, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LayerNormalization()) # 8
discriminator.add(L.LeakyReLU()) # [16, 16, 128]
discriminator.add(L.Dropout(0.25))
discriminator.add(L.Conv2D(256, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LayerNormalization()) # 12
discriminator.add(L.LeakyReLU()) # [8, 8, 256]
discriminator.add(L.Dropout(0.25))
discriminator.add(L.Conv2D(512, kernel_size=3, strides=2, padding="same"))
discriminator.add(L.LeakyReLU()) # [4, 4, 512]
discriminator.add(L.Flatten())
discriminator.add(L.Dense(1, activation=A.sigmoid))
self.model_gen = generator
self.model_dis = discriminator
self.adv_input = L.Input([hidden_dim])
self.adv_output = discriminator(generator(self.adv_input))
self.model_adversarial = M.Model(self.adv_input, self.adv_output)
# Training
dcgan = hidden_dim = 100
DCGAN_128(hidden_dim)
data_loader = AnimeFacesLoader([128, 128])
batch_size = 32
n_rounds = 40000
dis_model = dcgan.model_dis
gen_model = dcgan.model_gen
adv_model = dcgan.model_adversarial
gen_model.summary()
adv_model.summary()
dis_model.compile(Opt.Adam(0.0002), Lo.binary_crossentropy)
dis_model.trainable = False
adv_model.compile(Opt.Adam(0.0002), Lo.binary_crossentropy)
layer_outputs = [layer.output for layer in dis_model.layers]
visual_model = tf.keras.Model(dis_model.input, layer_outputs)
for rounds in range(n_rounds):
# Get output images
if rounds % 100 == 0 and rounds > 0:
noise = np.random.uniform(-1, 1, [16, hidden_dim])
tiled_images = np.zeros([4*128, 4*128, 3]).astype(np.uint8)
generated_imgs = gen_model.predict(noise)
generated_imgs *= 256
generated_imgs = generated_imgs.astype(np.uint8)
for i in range(16):
tiled_images[int(i / 4)*128: int(i / 4)*128 + 128,
int(i % 4)*128: int(i % 4)*128 + 128, :] = generated_imgs[i, :, :, :]
Image.fromarray(tiled_images).save("Output/DCGAN/" + "rounds_{0}.jpg".format(rounds))
'''
layer_visualization = visual_model.predict(generated_imgs[:1])
for i in range(len(layer_visualization)):
plt.imshow(layer_visualization[i][0, :, :, 0])
plt.show()
'''
# train discriminator on real & fake images
real_imgs = data_loader.get_batch(batch_size)
real_ys = np.ones([batch_size, 1])
noise = np.random.uniform(-1, 1, [batch_size, hidden_dim])
fake_ys = np.zeros([batch_size, 1])
fake_imgs = gen_model.predict(noise)
imgs = np.concatenate([real_imgs, fake_imgs], axis=0)
ys = np.concatenate([real_ys, fake_ys], axis=0)
loss_dis = dis_model.train_on_batch(imgs, ys)
print("Round {}, Loss dis:{:.4f}".format(rounds, loss_dis))
loss_dis_test = dis_model.test_on_batch(imgs, ys)
print(loss_dis_test)
noise = np.random.uniform(-1, 1, [batch_size, hidden_dim])
fake_ys = np.ones([batch_size, 1])
loss_gen = adv_model.train_on_batch(noise, fake_ys)
print("Round {}, Loss gen:{:.4f}".format(rounds, loss_gen))
loss_gen_test = adv_model.test_on_batch(noise, fake_ys)
print(loss_gen_test)
I have a TF CNN model and now I want to use the weight pruning API of tensor flow with this but all the examples I checked online, it works only with Keras model
I want to prune wights of my existing model
def mnist_cnn(inputs):
input_layer = tf.reshape(inputs, [-1, 28, 28, 3])
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# Dense Layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(inputs=dense, rate=0.4)
# Logits Layer
outputs = tf.layers.dense(inputs=dropout, units=10)
return outputs
This tutorial defines weight pruning
https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras
but I can't figure out how to use Pruning APIs with my exiting model ( without upgrading it to keras sequential model )
I am trying to use Tensorflow to classify some object representations. I used the same architecture as in the Tensorflow Cifar-10 example, with the last layer defined as:
with tf.variable_scope('sigmoid_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, num_classes],
stddev=1 / 192.0, wd=0.0)
biases = _variable_on_cpu('biases', [num_classes],
initializer)
sigmoid_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(sigmoid_linear)
return sigmoid_linear
In my case, num_classes is 2, and the amount of channels in the representation fed to the neural network is 8. Furthermore, I'm currently debugging with only 5 examples. The output of the last layer has a shape of[40,2]. I expect the first dimension is due to 5 examples * 8 channels and the second due to the number of classes.
In order to use compare the logits and the labels using e.g. tensorflow.nn.SparseSoftmaxCrossEntropyWithLogits I need them to have a common shape. How can I interpret the current content of the logits in the current shape, and how can I reduce the first dimension of the logits to be the same as num_classes?
Edit: the shape of the input to the inference function has a shape of [5,101,1008,8]. The inference function is defined as:
def inference(representations):
"""Build the model.
Args:
STFT spectra: spectra returned from distorted_inputs() or inputs().
Returns:
Logits.
"""
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, nChannels, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(representations, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], initializer,
)
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# norm1
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 64, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], initializer)
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], initializer)
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[384, 192],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [192], initializer)
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
_activation_summary(local4)
with tf.variable_scope('sigmoid_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, num_classes],
stddev=1 / 192.0, wd=0.0)
biases = _variable_on_cpu('biases', [num_classes],
initializer)
sigmoid_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(sigmoid_linear)
return sigmoid_linear
After more debugging I could find the problem. The posted code with the layers, originally from the Tensorflow tutorial, works well (of course it does). I printed all shapes, after each layer, and found out that the number 40 was not due to 5 examples * 8 channels, but that I had previously set batch_size = 40, and thus also higher than the amount of training examples. The mismatch began after the reshaping in the local layer 3. The question can now be closed.
So I have been looking a bit into Tensorflow and trying to get my head around one thing, i do not know what I am missing. I am checking out the tutorial from https://www.tensorflow.org/versions/master/tutorials/layers and more particularly, from the model building step:
"""Model function for CNN."""
# Input Layer
input_layer = tf.reshape(features, [-1, 28, 28, 1])
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# Dense Layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
If I understand it correctly, input convulated with 32 # 5x5 filters creates an output of 32 feature maps # 28x28. Then max-pooled reduces the feature maps to width and height = 14x14.
So here is the problem for me to understand, we have 32 # 14x14 feature maps and we convolute with another set of 64 filters # 5x5. Should it produce 32*64=2048 feature maps of size 14x14? So when reshaping it, we should use [-1,7*7*2048] after the last max-pooling step? Or is this convulation in conv2 done with a depth of 32, the filters are sized like [5,5,32]? Maybe I am missing some crucial theory or just blind ;>
Would appriacte if someone could help me understand this!
Cheers!
You are using padding "same".
If padding == "same"
output_shape[i] = ceil(input_shape[i]/stides[i])
If padding == "valid":
output_spatial_shape[i] = ceil((input_spatial_shape[i] - (spatial_filter_shape[i]-1) * dilation_rate[i]) / strides[i]).
To get the behavior you're expecting use padding "valid".
Look at API here : https://www.tensorflow.org/api_docs/python/tf/nn/convolution
I hope this helps.
I want to mimic this paper where they use fully connected upsampling layers. I'm using the contributed conv3d_transpose but the concept should be the same as 2D version.
I have an output from a convolutional layer [6,6,6,256] being fed into an upsampling layer that is supposed to output [13,13,13,128]. Since the layer should be fully connected, the filter should be [13,13,13,128] right? (reducing feature map size)
Furthermore, the stride should be 1 correct?
Maybe I am thinking of this backwards, let me explain. The filter defines that size of the inverted receptive field (totally made that up) -- the size of the weight matrix located on the output layer (thus the full [13,13,13,128]). EDIT INCORRECT [ The strides are the length of strides the single window moves on the input image.] --> I now understand that the strides are also in relation to the output layer. For example a filter size 2 with a stride 2, will double the output dimension.
This means that for a fully connected layer, the stride should be 0, but that isn't possible...
The code for my upsampling is here:
temp_batch_size = tf.shape(x)[0] #batch_size shape
with tf.name_scope("deconv6") as scope:
output_shape = [temp_batch_size, (n_input_z / 4), n_input_x / 4, n_input_y / 4, 128]
strides = [1,1,1,1,1]
conv7 = deconv3d(conv6, weights['wdc1'], biases['bdc1'], output_shape, strides, padding=1)
conv7 = tf.reshape(conv7, [-1, n_input_x / 4, n_input_y / 4, (n_input_z / 4) * 128])
conv7 = tf.contrib.layers.batch_norm(conv7)
conv7 = tf.reshape(conv7, [-1, (n_input_z / 4), n_input_x / 4, n_input_y / 4, 128])
The deconv function looks like this:
def deconv3d(prev_layer, w, b, output_shape, strides, padding=0):
# Deconv layer
if padding == 0:
deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="SAME")
else:
deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="VALID")
deconv = tf.nn.bias_add(deconv, b)
deconv = tf.nn.relu(deconv)
return deconv
The weights and biases are here:
'wdc1' : tf.get_variable("weights_7", shape=[13, 13, 13, 128, 256],
initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32),
...
'bdc1': tf.Variable(tf.zeros([128], dtype=tf.float32), name="biases_7", dtype=tf.float32),
From debugging I can verify input and output dimensions:
(Pdb) conv6
<tf.Tensor 'conv5_1/Reshape_1:0' shape=(?, 6, 6, 6, 256) dtype=float32>
(Pdb) output_shape
[<tf.Tensor 'strided_slice:0' shape=() dtype=int32>, 13, 13, 13, 128]
When I run this code, I get the following error:
tensorflow.python.framework.errors.InvalidArgumentError: Conv3DBackpropInput: Number of planes of out_backprop doesn't match computed: actual = 6, computed = 1
[[Node: deconv6/conv3d_transpose = Conv3DBackpropInputV2[T=DT_FLOAT, padding="VALID", strides=[1, 1, 1, 1, 1], _device="/job:localhost/replica:0/task:0/gpu:0"](deconv6/conv3d_transpose/output_shape, weights_7/read, conv5_1/Reshape_1)]]
[[Node: deconv8/BatchNorm/moments/sufficient_statistics/Shape/_39 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_3797_deconv8/BatchNorm/moments/sufficient_statistics/Shape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I assumed from the first Node line the problem was in deconv6, but I'll post the code if you think it's actually in deconv8.
It seems that based on the description from tensorflow official documents, this is not really a deconvolution but a gradient calculation