I have implemented roi_pooling from this library.
After running roi_pooling as
conv5_3 = net.end_points['conv5_3']
#implement ROI Pooling
input_boxes=tf.dtypes.cast(input_boxes,tf.int32)
pooled_features = roi_pooling(conv5_3, input_boxes, pool_height=5, pool_width=30)
then pooled_features has unknown shape.
(Pdb) p pooled_features
<tf.Tensor 'RoiPooling:0' shape=<unknown> dtype=float32>
My network still need to pass another recognition net after roi_pooling.
Recognition net is
def recognitionnet(inputs, fatness = 64, dilation = True):
"""
backbone net of vgg16
"""
# End_points collect relevant activations for external use.
end_points = {}
# Original VGG-16 blocks.
with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME'):
# Block1
net = slim.repeat(inputs, 2, slim.conv2d, fatness, [3, 3], scope='conv1')
end_points['conv1_2'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool1')
end_points['pool1'] = net
# Block 2.
net = slim.repeat(net, 2, slim.conv2d, fatness * 2, [3, 3], scope='conv2')
end_points['conv2_2'] = net
net = slim.max_pool2d(net, [2, 2], scope='pool2')
end_points['pool2'] = net
# fc7 as conv
net = slim.conv2d(net, fatness * 16, [1, 1], scope='fc7')
end_points['fc7'] = net
#model_summary()
#from keras.utils.visualize_util import plot
#plot(model, to_file='model.png')
return net, end_points;
But when pooled_features pass through the first layer of recognitionnet, I have error as
TypeError: TypeErro...neType',)
What could be wrong when pooled_features passed through net = slim.repeat(inputs, 2, slim.conv2d, fatness, [3, 3], scope='conv1').
How can I solve the issue?
What I did was the two graphs were splitted.
The first graph has roi_poling implementation and the second graph has recognitionnet.
Since recognitionnet is in separate graph, we can use placeholder as input to recognitionnet graph.
Related
I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!
I try to save and restore alxenet slim model. But I always get this error when I run the codesaver.restore(sess, tf.train.latest_checkpoint('I:/model/mnist/')).
And which throw the error:
NotFoundError (see above for traceback): Key alexnet_v2/conv1/biases not found in checkpoint.
When I run the tf.global_variables(), I can only get the weights of conv2d, and there is no biases in the result.
I don't understand what's the problem is. Here is my code:
here is my alex model
def alexnet_v2_arg_scope(weight_decay=0.0005,
stddev=0.1,
batch_norm_var_collection='moving_vars',
use_fused_batchnorm=True):
batch_norm_params = {
# Decay for the moving averages.
'decay': 0.9997,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
# collection containing update_ops.
'updates_collections': ops.GraphKeys.UPDATE_OPS,
# Use fused batch norm if possible.
'fused': use_fused_batchnorm,
# collection containing the moving mean and moving variance.
'variables_collections': {
'beta': None,
'gamma': None,
'moving_mean': [batch_norm_var_collection],
'moving_variance': [batch_norm_var_collection],
}
}
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.constant_initializer,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params,
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def alex_net(inputs,
num_classes=10,
is_training=True,
droupout_keep_prob=0.5,
spatial_squeze=True,
scope='alexnet_v2'):
with tf.variable_scope(scope, 'alexnet_v2',[inputs]) as sc:
end_points_collection = sc.name + '_end_points'
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_norm(0.1),
biases_initializer=tf.constant_initializer(0.1),
outputs_collections=end_points_collection):
inputs = tf.reshape(inputs,[-1,28,28,1])
net = slim.conv2d(inputs, 64, [3, 3], 1, padding='VALID', scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 128, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
# net = slim.conv2d(net, 384, [3, 3], scope='conv3')
# net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv3')
# net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
net = slim.conv2d(net, 512, [3, 3], scope='conv4')
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_norm(0.1),
biases_initializer=tf.constant_initializer(0.1)):
# net = slim.conv2d(net, 1028, [6, 6], padding='VALID', scope='fc6')
net = slim.avg_pool2d(net, [6,6], stride=1,padding='VALID',scope='avg_pool5' )
net = slim.dropout(net, droupout_keep_prob, is_training=is_training, scope='dropout6')
# net = slim.conv2d(net, 512, [1, 1], scope='fc7')
# net = slim.dropout(net, droupout_keep_prob, is_training=is_training, scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc7x')
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeze:
net = tf.squeeze(net, [1,2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_point
save model
varialbes_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(varialbes_to_restore)
saver.save(sess,'I:/model/mnist/')
restore model
with tf.Session() as sess:
logits, _ = _alex_slim.alex_net(teX[:200])
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('I:/model/mnist/'))
logits_var = sess.run(logits)
print(logits_var)
Perhapse tensorflow expects you to save it in a checkpoint because you put it in a scope.
You will get the same error if you give a name to a tensor which isn't being saved.
If you don't want to save it, don't give it a name or scope.
I'm trying to set CIFAR10's tf-slim model to have input of dynamic batch, height, width and single channel, i.e. monochromatic images of different sizes. Given that all shapes but channel size are dynamic, the output shape of tf.flatten is (?, ?). Is there any way to circumvent this? I'm trying to adapt CIFAR10 to tf's DeepDream tutorial that uses InceptionV3 with an unspecific input shape.
I'm assuming this happens because CIFAR10 is not fully convolutional
import tensorflow as tf
slim = tf.contrib.slim
images = tf.placeholder(tf.float32, shape=(None, None, None, 1))
NUM_CLASSES = 18
scope = 'CifarNet'
with tf.variable_scope(scope, 'CifarNet', [images, NUM_CLASSES]):
net = slim.conv2d(images, 64, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.flatten(net)
net = slim.fully_connected(net, 384, scope='fc3')
ValueError: The last dimension of the inputs to Dense should be defined. Found None.
In the api of tf.contrib.rnn.DropoutWrapper, I am trying to set variational_recurrent=True, in which case, input_size is mandatory. As explained, input_size is TensorShape objects containing the depth(s) of the input tensors.
depth(s) is confusing, what is it please? Is it just the shape of the tensor as we can get by tf.shape()? Or the number of channels for the special case of images? But my input tensor is not an image.
And I don't understand why dtype is demanded when variational_recurrent=True.
Thanks!
Inpput_size for tf.TensorShape([200, None, 300]) is just 300
Play with this example.
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see TF issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import tensorflow as tf
import numpy as np
n_steps = 2
n_inputs = 3
n_neurons = 5
keep_prob = 0.5
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
basic_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)
basic_cell_drop = tf.contrib.rnn.DropoutWrapper(
basic_cell,
input_keep_prob=keep_prob,
variational_recurrent=True,
dtype=tf.float32,
input_size=n_inputs)
output_seqs, states = tf.contrib.rnn.static_rnn(
basic_cell_drop,
X_seqs,
dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])
init = tf.global_variables_initializer()
X_batch = np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
])
with tf.Session() as sess:
init.run()
outputs_val = outputs.eval(feed_dict={X: X_batch})
print(outputs_val)
See this for more details: https://github.com/tensorflow/tensorflow/issues/7927
Similarly to the Caffe framework, where it is possible to watch the learned filters during CNNs training and it's resulting convolution with input images, I wonder if is it possible to do the same with TensorFlow?
A Caffe example can be viewed in this link:
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
Grateful for your help!
To see just a few conv1 filters in Tensorboard, you can use this code (it works for cifar10)
# this should be a part of the inference(images) function in cifar10.py file
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
with tf.variable_scope('visualization'):
# scale weights to [0 1], type is still float
x_min = tf.reduce_min(kernel)
x_max = tf.reduce_max(kernel)
kernel_0_to_1 = (kernel - x_min) / (x_max - x_min)
# to tf.image_summary format [batch_size, height, width, channels]
kernel_transposed = tf.transpose (kernel_0_to_1, [3, 0, 1, 2])
# this will display random 3 filters from the 64 in conv1
tf.image_summary('conv1/filters', kernel_transposed, max_images=3)
I also wrote a simple gist to display all 64 conv1 filters in a grid.