Roi pooling and backpropagation - tensorflow

I have implemented ROI pooling at my graph. The code is as follows.
def __init__(self,fatness,image_shape, vocab, r_vocab, num_classes,rnn_cells_num):
CTCUtils.vocab = vocab
CTCUtils.r_vocab = r_vocab
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.input_labels=tf.placeholder(dtype=tf.string, shape=(config.train.input_labels_size,))
self.input_dat = tf.placeholder(dtype=tf.float32, shape=(None,config.train.extracted_feature_height,config.train.extracted_feature_width,512))
self.in_boxes = tf.placeholder(dtype=tf.float32, shape=(config.train.input_labels_size,5))
self.num_classes = num_classes
self.rnn_cells_num = rnn_cells_num
self.saver = tf.train.Saver()
self.poolheight=1
self.poolwidth=32
self.sess = tf.Session(graph = tf.get_default_graph())
with slim.arg_scope([slim.conv2d, slim.max_pool2d]):
########################################################
#########CONV layers before ROI pooling#################
########################################################
net = slim.repeat(self.input_dat, 4, slim.conv2d, fatness, [3, 3], padding='SAME',scope='conv6',weights_regularizer=slim.l2_regularizer(config.weight_decay),weights_initializer=tf.contrib.layers.xavier_initializer(),biases_initializer = tf.zeros_initializer(),activation_fn=tf.nn.relu)
self.in_boxes=tf.dtypes.cast(self.in_boxes,tf.int32)
########################################################
#######ROI pooling layer################################
########################################################
rec_fmap_clone = roi_pooling(net, self.in_boxes, pool_height=self.poolheight, pool_width=self.poolwidth) #shape is (1, 20, 256, 1, 32)
decision=(rec_fmap_clone.get_shape()==None)
if (decision==False):
self.rec_fmap = tf.identity(rec_fmap_clone)
shape=np.shape(self.rec_fmap)
self.rec_fmap=np.reshape(self.rec_fmap, (shape[1],shape[2],shape[3],shape[4]))
self.rec_fmap=tf.transpose(self.rec_fmap, perm=[0, 2, 3, 1])
else:
self.rec_fmap=tf.ones([config.train.input_labels_size, 1, 32, 256], tf.float32)
with slim.arg_scope([slim.conv2d],normalizer_fn=slim.batch_norm,weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.0005)):
classes = slim.conv2d(self.rec_fmap, self.num_classes, [1, 13])
pattern = slim.fully_connected(slim.flatten(classes), self.rnn_cells_num) # patterns number
width = int(self.rec_fmap.get_shape()[2])
pattern = tf.reshape(pattern, (-1, 1, 1, self.rnn_cells_num))
pattern = tf.tile(pattern, [1, 1, width, 1])
inf = tf.concat(axis=3, values=[classes, pattern]) # skip connection over RNN
inf = slim.conv2d(inf, self.num_classes, [1, 1], normalizer_fn=None,activation_fn=None) # fully convolutional linear activation
inf = tf.squeeze(inf, [1])
prob = tf.transpose(inf, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [self.input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
self.loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
self.learning_rate = tf.train.piecewise_constant(self.global_step, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
self.opt_loss = tf.contrib.layers.optimize_loss(self.loss, self.global_step, self.learning_rate, config.train.opt_type, config.train.grad_noise_scale, name='train_step')
self.sess.run(tf.global_variables_initializer())
The graph has a few convolution layers before ROI pooling and ctc loss is used for optimization.
The concern is whether convolution layers before ROI pooling are optimized in back propagation.
According to discussion here, ROI pooling layer itself is differentiable.
But when the graph is plotted in tensorboard, the graph is disconnected after ROI pooling layer.
How can I check and make sure the conv layers before ROI pooling are update in optimization?

The issue was solved by putting conv layers after RoiPooling.
The first graph was used only for feature extraction using RoiPooling. RoiPooling output size was set bigger dimensions. Then those outputs were used as inputs to the second graph. There conv layers were placed. So that I have weights to optimize.
The modified graph is shown below.

Related

Training MobilNetV3 SSDLite and poor performance

Hi guys I'm a newbie in deep learning and I'd tried to make my own object detection model MobileNetV3 SSDlite training pascal voc 2007 dataset in tensorflow datasets(tfds).
However, It's performance is poor(mAP 0.3~0.4). And I wonder the reason why it has poor mAP.
The reasons I thought of it:
Training data problem
#I use tfds pascal voc data
#train set
(train_dataset, train_dataset2), ds_info = tfds.load(name="voc/2007", split=["train", "validation"], with_info=True)
train_dataset = train_dataset.concatenate(train_dataset2)
#test set
val_dataset = tfds.load(name="voc/2007", split="test", with_info=False)
Is there any difference between tfds voc2007 and official voc2007 dataset?
Detection model problem
I use imagenet pretrained mobilenetv3 large as a backbone and extract feature from ("multiply_11", "multiply_17") layers which resolution is 19x19 and 10x10
input_tensor = Input((300, 300, 3))
backbone = tf.keras.applications.MobileNetV3Large(include_top=False, alpha=0.75, input_tensor = input_tensor, input_shape = (300, 300, 3))
And extract extra feature map as:
def InvertedResidualBlock(filters, kernel_size, strides, padding):
f1 = Conv2D(filters=filters//2, kernel_size=1, strides = 1, padding=padding, kernel_regularizer=l2(4e-5))
f2 = BatchNormalization()
f3 = ReLU(6.)
f4 = SeparableConv2D(filters = filters,
kernel_size =
kernel_size,
strides = strides,
padding=padding,
#depthwise_regularizer = l2(4e-5),
#pointwise_regularizer = l2(4e-5)
)
f5 = BatchNormalization()
f6 = ReLU(6.)
return reduce(lambda f, g: lambda *args, **kwargs: g(f(*args, **kwargs)), (f1, f2, f3, f4, f5, f6))
class HFPNeckBuilder():
def __init__(self, config) -> None:
self.isLite = config["model_config"]["neck"]["isLite"]
if self.isLite:
self.baseConvBlock = SeparableConvBlock
self.baseConv = SeparableConv
else:
self.baseConvBlock = ConvBlock
self.baseConv = Conv
def __call__(self, ex_stage_output):
Feature_map1 = ex_stage_output[0]
Feature_map2 = ex_stage_output[-1]
Feature_map3 = InvertedResidualBlock(filters= 512, strides = 2, kernel_size = 3, padding="same")(Feature_map2)
Feature_map4 = InvertedResidualBlock(filters= 256, strides = 2, kernel_size = 3, padding="same")(Feature_map3)
Feature_map5 = InvertedResidualBlock(filters= 256, strides = 2, kernel_size = 3, padding="same")(Feature_map4)
Feature_map6 = InvertedResidualBlock(filters= 128, strides = 2, kernel_size = 3, padding="same")(Feature_map5)
return [Feature_map1, Feature_map2, Feature_map3, Feature_map4, Feature_map5, Feature_map6]
Is there any problem in backbone and neck(feature extractor which was mentioned in MobileNetV2 paper)
3.optimizer and lr schedule problem
I had experimented all combination of below setting:
Batch Size:32
Optimizer: SGD(momentum 0.9), Adam, RAdam, RAdam+LookAhead, RAdam+LookAhead+GradientCentralize
Lr Schedule: Cosine decay, Cosine restart, Cosine Warmup and decay with initial learning rate 0.1 ~1e-4
epochs: 150epochs ~ 5000epochs
But all experiments shows poor mAP(0.3~0.4).
Loss Function Problem
I tried two loss function Hard Negative mining + Smooth L1 loss and Focal loss + Smooth L1 loss. I conferd I referred keras official example code for focal loss and pierluigiferrari github for HardNegative mining.
Here is my focal loss code
class SSDLoss(tf.losses.Loss):
'''
Loss with FocalLoss rather than Hard Negative Mining. It is refered from keras reference.
Gamma makes clear the difference between Good detection and Bad detection, if gamma==0 -> crossEntrophy
alpha is weighting factor, if alpha = 0.25 ->BackGround: 0.25, ForeGround: 0.75
'''
def __init__(self, num_classes=80, alpha=0.25, gamma=2.0, config = None):
super(SSDLoss, self).__init__(reduction="auto", name="SSDLoss")
self.alpha = alpha
self.gamma = gamma
self._num_classes = num_classes
self.anchor_boxes = AnchorBox(config).get_anchors()[None, ...]
self._box_variance = [0.1, 0.1, 0.2, 0.2]
def call(self, y_true, y_pred):
y_pred = tf.cast(y_pred, dtype=tf.float32)
box_labels = y_true[:, :, :4]
box_predictions = y_pred[:, :, :4]
cls_labels = tf.one_hot(
tf.cast(y_true[:, :, 4], dtype=tf.int32),
depth=self._num_classes,
dtype=tf.float32,
)
cls_predictions = y_pred[:, :, 4:]
positive_mask = tf.cast(tf.greater(y_true[:, :, 4], -1.0), dtype=tf.float32)
ignore_mask = tf.cast(tf.equal(y_true[:, :, 4], -2.0), dtype=tf.float32)
clf_loss = self.Focal_ClassificationLoss(cls_labels, cls_predictions)
box_loss = self.SmoothL1_BoxLoss(box_labels, box_predictions)
clf_loss = tf.where(tf.equal(ignore_mask, 1.0), 0.0, clf_loss)
box_loss = tf.where(tf.equal(positive_mask, 1.0), box_loss, 0.0)
normalizer = tf.reduce_sum(positive_mask, axis=-1)
clf_loss = tf.math.divide_no_nan(tf.reduce_sum(clf_loss, axis=-1), normalizer)
box_loss = tf.math.divide_no_nan(tf.reduce_sum(box_loss, axis=-1), normalizer)
loss = clf_loss + box_loss
return loss
def SmoothL1_BoxLoss(self, y_true_Box, y_pred_box):
difference = y_true_Box - y_pred_box
absolute_difference = tf.abs(difference) - 0.5
squared_difference = 0.5 * difference ** 2
loss = tf.where(
tf.less(absolute_difference, 1.0),
squared_difference,
absolute_difference)
return tf.reduce_sum(loss, axis=-1)
def Focal_ClassificationLoss(self, y_true_Cls, y_pred_Cls):
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true_Cls, logits=y_pred_Cls)
probs = tf.nn.sigmoid(y_pred_Cls)
alpha = tf.where(tf.equal(y_true_Cls, 1.0), self.alpha, (1.0 - self.alpha))
pt = tf.where(tf.equal(y_true_Cls, 1.0), probs, 1 - probs)
loss = alpha * tf.pow(1.0 - pt, self.gamma) * cross_entropy
return tf.reduce_sum(loss, axis=-1)
I changed it a little bit from the keras official website example code in "Object Detection with RetinaNet".
Question
Is there any advice for improving mAP up to 0.6~0.7? thank you for reading my question.

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!

Fully Convolutional Network, Training Error

I apologize that I'm not good at English.
I'm trying to build my own Fully Convolutional Network using TensorFlow.
But I have difficulties on training this model with my own image data, whereas the MNIST data worked properly.
Here is my FCN model code: (Not using pre-trained or pre-bulit model)
import tensorflow as tf
import numpy as np
Loading MNIST Data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
images_flatten = tf.placeholder(tf.float32, shape=[None, 784])
images = tf.reshape(images_flatten, [-1,28,28,1]) # CNN deals with 3 dimensions
labels = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32) # Dropout Ratio
Convolutional Layers
# Conv. Layer #1
W1 = tf.Variable(tf.truncated_normal([3, 3, 1, 4], stddev = 0.1))
b1 = tf.Variable(tf.truncated_normal([4], stddev = 0.1))
FMA = tf.nn.conv2d(images, W1, strides=[1,1,1,1], padding='SAME')
# FMA stands for Fused Multiply Add, which means convolution
RELU = tf.nn.relu(tf.add(FMA, b1))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #2
W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 8], stddev = 0.1))
b2 = tf.Variable(tf.truncated_normal([8], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W2, strides=[1,1,1,1], padding='SAME')
RELU = tf.nn.relu(tf.add(FMA, b2))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #3
W3 = tf.Variable(tf.truncated_normal([7, 7, 8, 16], stddev = 0.1))
b3 = tf.Variable(tf.truncated_normal([16], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W3, strides=[1,1,1,1], padding='VALID')
RELU = tf.nn.relu(tf.add(FMA, b3))
# Dropout
Dropout = tf.nn.dropout(RELU, keep_prob)
# Conv. Layer #4
W4 = tf.Variable(tf.truncated_normal([1, 1, 16, 10], stddev = 0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev = 0.1))
FMA = tf.nn.conv2d(Dropout, W4, strides=[1,1,1,1], padding='SAME')
LAST_RELU = tf.nn.relu(tf.add(FMA, b4))
Summary: [Conv-ReLU-Pool] - [Conv-ReLU-Pool] - [Conv-ReLU] - [Dropout] - [Conv-ReLU]
Define Loss, Accuracy
prediction = tf.squeeze(LAST_RELU)
# Because FCN returns (1 x 1 x class_num) in training
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, labels))
# First arg is 'logits=' and the other one is 'labels='
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
label_max = tf.argmax(labels, 1)
pred_max = tf.argmax(prediction, 1)
correct_pred = tf.equal(pred_max, label_max)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Training Model
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10000):
image_batch, label_batch = mnist.train.next_batch(100)
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0:
tr = sess.run([loss, accuracy], feed_dict={images: image_batch, labels: label_batch, keep_prob: 1.0})
print("Step %d, Loss %g, Accuracy %g" % (i, tr[0], tr[1]))
Loss: 0.784 (Approximately)
Accuracy: 94.8% (Approximately)
The problem is that, training this model with MNIST data worked very well, but with my own data, loss is always same(0.6319), and the output layer is always 0.
There is no difference with the code, excepting for the third convolutional layer's filter size. This filter size and input size which is compressed by previous pooling layers, must have same width & height. That's why the filter size in this layer is [7,7].
What is wrong with my model?..
The only different code between two cases (MNIST, my own data) is:
Placeholder
My own data has (128 x 64 x 1) and the label is 'eyes', 'not_eyes'
images = tf.placeholder(tf.float32, [None, 128, 64, 1])
labels = tf.placeholder(tf.int32, [None, 2])
3rd Convolutional Layer
W3 = tf.Variable(tf.truncated_normal([32, 16, 8, 16], stddev = 0.1))
Feeding (Batch)
image_data, label_data = input_data.get_batch(TRAINING_FILE, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10000):
image_batch, label_batch = sess.run([image_data, label_data])
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0: ... # Validation part is almost same, too...
coord.request_stop()
coord.join(threads)
Here "input_data" is an another python file in the same directory, and "get_batch(TRAINING_FILE, 10)" is the function that returns batch data. The code is:
def get_input_queue(txtfile_name):
images = []
labels = []
for line in open(txtfile_name, 'r'): # Here txt file has data's path, label, label number
cols = re.split(',|\n', line)
labels.append(int(cols[2]))
images.append(tf.image.decode_jpeg(tf.read_file(cols[0]), channels = 1))
input_queue = tf.train.slice_input_producer([images, labels], shuffle = True)
return input_queue
def get_batch(txtfile_name, batch_size):
input_queue = get_input_queue(txtfile_name)
image = input_queue[0]
label = input_queue[1]
image = tf.reshape(image, [128, 64, 1])
batch_image, batch_label = tf.train.batch([image, label], batch_size)
batch_label_one_hot = tf.one_hot(tf.to_int64(batch_label), 2, on_value=1.0, off_value=0.0)
return batch_image, batch_label_one_hot
It seems not to have any problem .... :( Please Help me..!!
Are your inputs scaled appropriately?. The jpegs are in [0-255] range and it needs to be scaled to [-1 - 1]. You can try:
image = tf.reshape(image, [128, 64, 1])
image = tf.scalar_mul((1.0/255), image)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
What is the accuracy you are getting with your model for MNIST? It would be helpful if you post the code. Are you using the trained model to evaluate the output for your own data.
A general suggestion on setting up the convolution model is provided here.
Here is the model suggestion according to the article :-
INPUT -> [[CONV -> RELU]*N -> POOL?]*M -> [FC -> RELU]*K -> FC
Having more than one layers of CONV->RELU pair before pooling improves learning complex features. Try with N=2 instead of 1.
Some other suggestions:
While you are preparing your data reduce it to smaller size than 128x64. Try same size as the MNIST data ..
image = tf.reshape(image, [28, 28, 1])
If your eye/noeye image is color, then convert it to greyscale and normalize the values to unity range. You can do this using numpy or tf, here is how using numpy
grayscale-->
img = np.dot(np.array(img, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
normalize-->
mean = np.mean(img, dtype='float32')
std = np.std(img, dtype='float32', ddof=1)
if std < 1e-4: std = 1.
img = (img - mean) / std

Problems with reshape in GAN's discriminator (Tensorflow)

I was trying to implement various GANs in Tensorflow (after doing it successfully in PyTorch), and I am having some problems while coding the discriminator part.
The code of the discriminator (very similar to the MNIST CNN tutorial) is:
def discriminator(x):
"""Compute discriminator score for a batch of input images.
Inputs:
- x: TensorFlow Tensor of flattened input images, shape [batch_size, 784]
Returns:
TensorFlow Tensor with shape [batch_size, 1], containing the score
for an image being real for each input image.
"""
with tf.variable_scope("discriminator"):
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
h_1 = leaky_relu(tf.layers.conv2d(x, 32, 5))
m_1 = tf.layers.max_pooling2d(h_1, 2, 2)
h_2 = leaky_relu(tf.layers.conv2d(m_1, 64, 5))
m_2 = tf.layers.max_pooling2d(h_2, 2, 2)
m_2 = tf.contrib.layers.flatten(m_2)
h_3 = leaky_relu(tf.layers.dense(m_2, 4*4*64))
logits = tf.layers.dense(h_3, 1)
return logits
while the code for the generator (architecture of InfoGAN paper) is:
def generator(z):
"""Generate images from a random noise vector.
Inputs:
- z: TensorFlow Tensor of random noise with shape [batch_size, noise_dim]
Returns:
TensorFlow Tensor of generated images, with shape [batch_size, 784].
"""
with tf.variable_scope("generator"):
batch_size = tf.shape(z)[0]
fc = tf.nn.relu(tf.layers.dense(z, 1024))
bn_1 = tf.layers.batch_normalization(fc)
fc_2 = tf.nn.relu(tf.layers.dense(bn_1, 7*7*128))
bn_2 = tf.layers.batch_normalization(fc_2)
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
c_1 = tf.nn.relu(tf.contrib.layers.convolution2d_transpose(bn_2, 64, 4, 2, padding='valid'))
bn_3 = tf.layers.batch_normalization(c_1)
c_2 = tf.tanh(tf.contrib.layers.convolution2d_transpose(bn_3, 1, 4, 2, padding='valid'))
So far, so good. The number of parameters is correct (checked it). However, I am having some problems in the next block of code:
tf.reset_default_graph()
# number of images for each batch
batch_size = 128
# our noise dimension
noise_dim = 96
# placeholder for images from the training dataset
x = tf.placeholder(tf.float32, [None, 784])
# random noise fed into our generator
z = sample_noise(batch_size, noise_dim)
# generated images
G_sample = generator(z)
with tf.variable_scope("") as scope:
#scale images to be -1 to 1
logits_real = discriminator(preprocess_img(x))
# Re-use discriminator weights on new inputs
scope.reuse_variables()
logits_fake = discriminator(G_sample)
# Get the list of variables for the discriminator and generator
D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')
# get our solver
D_solver, G_solver = get_solvers()
# get our loss
D_loss, G_loss = gan_loss(logits_real, logits_fake)
# setup training steps
D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
G_train_step = G_solver.minimize(G_loss, var_list=G_vars)
D_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'discriminator')
G_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'generator')
The problem I am getting is where I am doing the reshape in the discriminator, and the error says:
ValueError: None values not supported.
Sure, the value for the batch_size is None (btw, the same error I am getting even where I am changing it to some number), but shape function (as far as I understand) should get the dynamic shape, not the static one. I think that I am a bit lost here.
For what is worth, I am giving here the link to the entire notebook I am working: https://github.com/TheRevanchist/GANs/blob/master/GANs-TensorFlow.ipynb if someone wants to look at it.
NB: The code here is part of the Stanford CS231n assignment. I have no affiliation with Stanford though, so it isn't homework cheating (proof: the course is finished months ago).
The generator seems to be the problem. The output size should match the discriminator. And the other issues are batch norm should be applied before the activation unit. I have modified the code:
with tf.variable_scope("generator"):
fc = tf.layers.dense(z, 4*4*128)
bn_1 = leaky_relu(tf.layers.batch_normalization(fc))
bn_1 = tf.reshape(bn_1, [-1, 4, 4, 128])
c_1 = tf.layers.conv2d_transpose(bn_1, 64, 5, strides=2, padding='same')
bn_2 = leaky_relu(tf.layers.batch_normalization(c_1))
c_2 = tf.layers.conv2d_transpose(bn_2, 32, 5, strides=2, padding='same')
bn_3 = leaky_relu(tf.layers.batch_normalization(c_2))
c_3 = tf.layers.conv2d_transpose(bn_3, 1, 5, strides=2, padding='same')
c_3 = tf.layers.batch_normalization(c_3)
c_3 = tf.image.resize_images(c_3, (28, 28))
c_3 = tf.contrib.layers.flatten(c_3)
c_3 = tf.tanh(c_3)
return c_3
Your code gives the below output when run with the above changes
Instead of passing None to reshape you must pass -1.
So this:
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
becomes
x = tf.reshape(x, [-1, 28, 28, 1])
and this:
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
becomes:
bn_2 = tf.reshape(bn_2, [-1, 7, 7, 128])
It will infer the batch size from the rest of the shape you provided.

how to visualize feature map of CNN for tensorflow? [duplicate]

Similarly to the Caffe framework, where it is possible to watch the learned filters during CNNs training and it's resulting convolution with input images, I wonder if is it possible to do the same with TensorFlow?
A Caffe example can be viewed in this link:
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
Grateful for your help!
To see just a few conv1 filters in Tensorboard, you can use this code (it works for cifar10)
# this should be a part of the inference(images) function in cifar10.py file
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
with tf.variable_scope('visualization'):
# scale weights to [0 1], type is still float
x_min = tf.reduce_min(kernel)
x_max = tf.reduce_max(kernel)
kernel_0_to_1 = (kernel - x_min) / (x_max - x_min)
# to tf.image_summary format [batch_size, height, width, channels]
kernel_transposed = tf.transpose (kernel_0_to_1, [3, 0, 1, 2])
# this will display random 3 filters from the 64 in conv1
tf.image_summary('conv1/filters', kernel_transposed, max_images=3)
I also wrote a simple gist to display all 64 conv1 filters in a grid.