tf.multinomial outputs number other numbers than range - tensorflow

I am working with the OpenAI gym environment (using policy gradient). My network is outputting an action which is higher than the possible action range.
n_outputs = 9
learning_rate = 0.01
initializer = tf.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, 50, 70, 1])
network = tflearn.conv_2d(X, 32, 5, strides=2, activation='relu')
network = tflearn.max_pool_2d(network, 2)
network = tflearn.conv_2d(network, 32, 5, strides=2, activation='relu')
network = tflearn.max_pool_2d(network, 2)
network = tflearn.fully_connected(network, 256, activation='relu')
hidden = tf.layers.dense(network, 64, activation=tf.nn.relu, kernel_initializer=initializer)
logits = tf.layers.dense(hidden, n_outputs)
outputs = tf.nn.softmax(logits)
action = tf.multinomial(outputs, num_samples=1)
It outputs 9, which creates an error in the gym environment.
The full code.

tf.multinomial will sample outside of the range if it encounters numerical error, so in other words - you have NaNs in your graph.

Related

Tensorflow CNN model incompatible shape error

I have an array with a shape of [274 documents, 439 equal length sentences per document, 384-dimensional sbert embeddings per sentence]. I'm trying to fit a CNN model that predicts a binary value per document.
Below is the model architecture:
embedding_layer = Embedding(274, 384, input_length=439)
sequence_input = Input(shape=(439,))
embedded_sequences = embedding_layer(sequence_input)
# first conv filter
embedded_sequences = Reshape((439, 384, 1))(embedded_sequences)
x = Conv2D(100, (5, 384), activation='relu')(embedded_sequences)
x = MaxPooling2D((439 - 5 + 1, 1))(x)
# second conv filter
y = Conv2D(100, (4, 384), activation='relu')(embedded_sequences)
y = MaxPooling2D((439 - 4 + 1, 1))(y)
# third conv filter
z = Conv2D(100, (3, 384), activation='relu')(embedded_sequences)
z = MaxPooling2D((439 - 3 + 1, 1))(z)
# concatenate the convolutional layers
alpha = concatenate([x,y,z])
# flatten the concatenated values
alpha = Flatten()(alpha)
# add dropout
alpha = Dropout(0.5)(alpha)
# make predictions
preds = Dense(274, activation='softmax')(alpha)
# build model
model = Model(sequence_input, preds)
adadelta = optimizers.Adadelta()
model.compile(loss='categorical_crossentropy',
optimizer=adadelta,
metrics=['acc'])
model.fit(x=X_train_sent_emb_3m, y=y_train_sent_emb_3m, epochs=25 , validation_data=(X_test_sent_emb_3m, y_test_sent_emb_3m))
The model compiles but when I run the fit call I'm getting the following error message:
Epoch 1/25
WARNING:tensorflow:Model was constructed with shape (None, 439) for input KerasTensor(type_spec=TensorSpec(shape=(None, 439), dtype=tf.float32, name='input_15'), name='input_15', description="created by layer 'input_15'"), but it was called on an input with incompatible shape (None, 439, 384).
...
ValueError: total size of new array must be unchanged, input_shape = [439, 384, 384], output_shape = [439, 384, 1]
Any suggestions on what I need to change to make the model work for the shape of the data?

Roi pooling and backpropagation

I have implemented ROI pooling at my graph. The code is as follows.
def __init__(self,fatness,image_shape, vocab, r_vocab, num_classes,rnn_cells_num):
CTCUtils.vocab = vocab
CTCUtils.r_vocab = r_vocab
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.input_labels=tf.placeholder(dtype=tf.string, shape=(config.train.input_labels_size,))
self.input_dat = tf.placeholder(dtype=tf.float32, shape=(None,config.train.extracted_feature_height,config.train.extracted_feature_width,512))
self.in_boxes = tf.placeholder(dtype=tf.float32, shape=(config.train.input_labels_size,5))
self.num_classes = num_classes
self.rnn_cells_num = rnn_cells_num
self.saver = tf.train.Saver()
self.poolheight=1
self.poolwidth=32
self.sess = tf.Session(graph = tf.get_default_graph())
with slim.arg_scope([slim.conv2d, slim.max_pool2d]):
########################################################
#########CONV layers before ROI pooling#################
########################################################
net = slim.repeat(self.input_dat, 4, slim.conv2d, fatness, [3, 3], padding='SAME',scope='conv6',weights_regularizer=slim.l2_regularizer(config.weight_decay),weights_initializer=tf.contrib.layers.xavier_initializer(),biases_initializer = tf.zeros_initializer(),activation_fn=tf.nn.relu)
self.in_boxes=tf.dtypes.cast(self.in_boxes,tf.int32)
########################################################
#######ROI pooling layer################################
########################################################
rec_fmap_clone = roi_pooling(net, self.in_boxes, pool_height=self.poolheight, pool_width=self.poolwidth) #shape is (1, 20, 256, 1, 32)
decision=(rec_fmap_clone.get_shape()==None)
if (decision==False):
self.rec_fmap = tf.identity(rec_fmap_clone)
shape=np.shape(self.rec_fmap)
self.rec_fmap=np.reshape(self.rec_fmap, (shape[1],shape[2],shape[3],shape[4]))
self.rec_fmap=tf.transpose(self.rec_fmap, perm=[0, 2, 3, 1])
else:
self.rec_fmap=tf.ones([config.train.input_labels_size, 1, 32, 256], tf.float32)
with slim.arg_scope([slim.conv2d],normalizer_fn=slim.batch_norm,weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.0005)):
classes = slim.conv2d(self.rec_fmap, self.num_classes, [1, 13])
pattern = slim.fully_connected(slim.flatten(classes), self.rnn_cells_num) # patterns number
width = int(self.rec_fmap.get_shape()[2])
pattern = tf.reshape(pattern, (-1, 1, 1, self.rnn_cells_num))
pattern = tf.tile(pattern, [1, 1, width, 1])
inf = tf.concat(axis=3, values=[classes, pattern]) # skip connection over RNN
inf = slim.conv2d(inf, self.num_classes, [1, 1], normalizer_fn=None,activation_fn=None) # fully convolutional linear activation
inf = tf.squeeze(inf, [1])
prob = tf.transpose(inf, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [self.input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
self.loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
self.learning_rate = tf.train.piecewise_constant(self.global_step, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
self.opt_loss = tf.contrib.layers.optimize_loss(self.loss, self.global_step, self.learning_rate, config.train.opt_type, config.train.grad_noise_scale, name='train_step')
self.sess.run(tf.global_variables_initializer())
The graph has a few convolution layers before ROI pooling and ctc loss is used for optimization.
The concern is whether convolution layers before ROI pooling are optimized in back propagation.
According to discussion here, ROI pooling layer itself is differentiable.
But when the graph is plotted in tensorboard, the graph is disconnected after ROI pooling layer.
How can I check and make sure the conv layers before ROI pooling are update in optimization?
The issue was solved by putting conv layers after RoiPooling.
The first graph was used only for feature extraction using RoiPooling. RoiPooling output size was set bigger dimensions. Then those outputs were used as inputs to the second graph. There conv layers were placed. So that I have weights to optimize.
The modified graph is shown below.

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!

Problems with reshape in GAN's discriminator (Tensorflow)

I was trying to implement various GANs in Tensorflow (after doing it successfully in PyTorch), and I am having some problems while coding the discriminator part.
The code of the discriminator (very similar to the MNIST CNN tutorial) is:
def discriminator(x):
"""Compute discriminator score for a batch of input images.
Inputs:
- x: TensorFlow Tensor of flattened input images, shape [batch_size, 784]
Returns:
TensorFlow Tensor with shape [batch_size, 1], containing the score
for an image being real for each input image.
"""
with tf.variable_scope("discriminator"):
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
h_1 = leaky_relu(tf.layers.conv2d(x, 32, 5))
m_1 = tf.layers.max_pooling2d(h_1, 2, 2)
h_2 = leaky_relu(tf.layers.conv2d(m_1, 64, 5))
m_2 = tf.layers.max_pooling2d(h_2, 2, 2)
m_2 = tf.contrib.layers.flatten(m_2)
h_3 = leaky_relu(tf.layers.dense(m_2, 4*4*64))
logits = tf.layers.dense(h_3, 1)
return logits
while the code for the generator (architecture of InfoGAN paper) is:
def generator(z):
"""Generate images from a random noise vector.
Inputs:
- z: TensorFlow Tensor of random noise with shape [batch_size, noise_dim]
Returns:
TensorFlow Tensor of generated images, with shape [batch_size, 784].
"""
with tf.variable_scope("generator"):
batch_size = tf.shape(z)[0]
fc = tf.nn.relu(tf.layers.dense(z, 1024))
bn_1 = tf.layers.batch_normalization(fc)
fc_2 = tf.nn.relu(tf.layers.dense(bn_1, 7*7*128))
bn_2 = tf.layers.batch_normalization(fc_2)
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
c_1 = tf.nn.relu(tf.contrib.layers.convolution2d_transpose(bn_2, 64, 4, 2, padding='valid'))
bn_3 = tf.layers.batch_normalization(c_1)
c_2 = tf.tanh(tf.contrib.layers.convolution2d_transpose(bn_3, 1, 4, 2, padding='valid'))
So far, so good. The number of parameters is correct (checked it). However, I am having some problems in the next block of code:
tf.reset_default_graph()
# number of images for each batch
batch_size = 128
# our noise dimension
noise_dim = 96
# placeholder for images from the training dataset
x = tf.placeholder(tf.float32, [None, 784])
# random noise fed into our generator
z = sample_noise(batch_size, noise_dim)
# generated images
G_sample = generator(z)
with tf.variable_scope("") as scope:
#scale images to be -1 to 1
logits_real = discriminator(preprocess_img(x))
# Re-use discriminator weights on new inputs
scope.reuse_variables()
logits_fake = discriminator(G_sample)
# Get the list of variables for the discriminator and generator
D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')
# get our solver
D_solver, G_solver = get_solvers()
# get our loss
D_loss, G_loss = gan_loss(logits_real, logits_fake)
# setup training steps
D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
G_train_step = G_solver.minimize(G_loss, var_list=G_vars)
D_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'discriminator')
G_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'generator')
The problem I am getting is where I am doing the reshape in the discriminator, and the error says:
ValueError: None values not supported.
Sure, the value for the batch_size is None (btw, the same error I am getting even where I am changing it to some number), but shape function (as far as I understand) should get the dynamic shape, not the static one. I think that I am a bit lost here.
For what is worth, I am giving here the link to the entire notebook I am working: https://github.com/TheRevanchist/GANs/blob/master/GANs-TensorFlow.ipynb if someone wants to look at it.
NB: The code here is part of the Stanford CS231n assignment. I have no affiliation with Stanford though, so it isn't homework cheating (proof: the course is finished months ago).
The generator seems to be the problem. The output size should match the discriminator. And the other issues are batch norm should be applied before the activation unit. I have modified the code:
with tf.variable_scope("generator"):
fc = tf.layers.dense(z, 4*4*128)
bn_1 = leaky_relu(tf.layers.batch_normalization(fc))
bn_1 = tf.reshape(bn_1, [-1, 4, 4, 128])
c_1 = tf.layers.conv2d_transpose(bn_1, 64, 5, strides=2, padding='same')
bn_2 = leaky_relu(tf.layers.batch_normalization(c_1))
c_2 = tf.layers.conv2d_transpose(bn_2, 32, 5, strides=2, padding='same')
bn_3 = leaky_relu(tf.layers.batch_normalization(c_2))
c_3 = tf.layers.conv2d_transpose(bn_3, 1, 5, strides=2, padding='same')
c_3 = tf.layers.batch_normalization(c_3)
c_3 = tf.image.resize_images(c_3, (28, 28))
c_3 = tf.contrib.layers.flatten(c_3)
c_3 = tf.tanh(c_3)
return c_3
Your code gives the below output when run with the above changes
Instead of passing None to reshape you must pass -1.
So this:
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
becomes
x = tf.reshape(x, [-1, 28, 28, 1])
and this:
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
becomes:
bn_2 = tf.reshape(bn_2, [-1, 7, 7, 128])
It will infer the batch size from the rest of the shape you provided.

How to get CNN kernel values in Tensorflow

I am using the code below to create CNN layers.
conv1 = tf.layers.conv2d(inputs = input, filters = 20, kernel_size = [3,3],
padding = "same", activation = tf.nn.relu)
and I want to get the values of all kernels after training. It does not work it I simply do
kernels = conv1.kernel
So how should I retrieve the value of these kernels? I am also not sure what variables and method does conv2d has since tensorflow don't really tell it in conv2d class.
You can find all the variables in list returned by tf.global_variables() and easily lookup for variable you need.
If you wish to get these variables by name, declare a layer as:
conv_layer_1 = tf.layers.conv2d(activation=tf.nn.relu,
filters=10,
inputs=input_placeholder,
kernel_size=(3, 3),
name="conv1", # NOTE THE NAME
padding="same",
strides=(1, 1))
Recover the graph as:
gr = tf.get_default_graph()
Recover the kernel values as:
conv1_kernel_val = gr.get_tensor_by_name('conv1/kernel:0').eval()
Recover the bias values as:
conv1_bias_val = gr.get_tensor_by_name('conv1/bias:0').eval()
You mean you want to get the value of the weights for the conv1 layer.
You haven't actually defined the weights with conv2d, you need to do that. When I create a convolutional layer I use a function that performs all the necessary steps, here's a copy/paste of the function I use to create a each of my convolutional layers:
def _conv_layer(self, name, in_channels, filters, kernel, input_tensor, strides, dtype=tf.float32):
with tf.variable_scope(name):
w = tf.get_variable("w", shape=[kernel, kernel, in_channels, filters],
initializer=tf.contrib.layers.xavier_initializer_conv2d(), dtype=dtype)
b = tf.get_variable("b", shape=[filters], initializer=tf.constant_initializer(0.0), dtype=dtype)
c = tf.nn.conv2d(input_tensor, w, strides, padding='SAME', name=name + "c")
a = tf.nn.relu(c + b, name=name + "_a")
print name + "_a", a.get_shape().as_list(), name + "_w", w.get_shape().as_list(), \
"params", np.prod(w.get_shape().as_list()[1:]) + filters
return a, w.get_shape().as_list()
This is what I use to define 5 convolutional layers, this example is straight out of my code, so note that it's 5 convolutional layers stacked without using max pooling or anything, strides of 2 and 5x5 kernels.
conv1_a, _ = self._conv_layer("conv1", 3, 24, 5, self.imgs4d, [1, 2, 2, 1]) # 24.8 MiB/feature -> 540 x 960
conv2_a, _ = self._conv_layer("conv2", 24, 80, 5, conv1_a, [1, 2, 2, 1]) # 6.2 MiB -> 270 x 480
conv3_a, _ = self._conv_layer("conv3", 80, 256, 5, conv2_a, [1, 2, 2, 1]) # 1.5 MiB -> 135 x 240
conv4_a, _ = self._conv_layer("conv4", 256, 750, 5, conv3_a, [1, 2, 2, 1]) # 0.4 MiB -> 68 x 120
conv5_a, _ = self._conv_layer("conv5", 750, 2048, 5, conv4_a, [1, 2, 2, 1]) # 0.1 MiB -> 34 x 60
There's also a good tutorial on the tensorflow website on how to set up a convolutional network:
https://www.tensorflow.org/tutorials/deep_cnn
The direct answer to your question is that the weights for the convolutional layer are defined there as w, that's the tensor you're asking about if I understand you correctly.