ValueError: No gradients provided for any variable in Tensorflow - tensorflow

I'm trying to create a dice_loss function in Tensorflow.
I'm facing a trouble with tensorlfow. Executing the following code
import tensorflow as tf
import tensorlayer as tl
def conv3d(x, inChans, outChans, kernel_size, stride, padding):
weights = weight_variable([kernel_size, kernel_size, kernel_size, inChans, outChans])
biases = bias_variable([outChans])
conv = tf.nn.conv3d(x, weights, strides=[1, stride, stride, stride, 1], padding=padding)
return tf.nn.bias_add(conv, biases)
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
return optimizer.apply_gradients(grads)
def main(argv=None):
image = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="input_image")
annotation = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
logits, pred_annotation = vnet.VNet(image)
loss = 1 - tl.cost.dice_coe(output=pred_annotation, target=annotation, axis=[1,2,3,4])
trainable_var = tf.trainable_variables()
train_op = train(loss, trainable_var)
sess = tf.Session()
...
...
def VNet(x):
...
out = tf.nn.elu(BatchNorm3d(conv3d(x, inChans, 2, kernel_size=5, stride=1, padding="SAME")))
out = conv3d(out, 2, 2, kernel_size=1, stride=1, padding="SAME")
annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction'))
return out, tf.expand_dims(annotation_pred, dim=4)
I get the following error:
ValueError: No gradients provided for any variable: ...
Someone can help me?

When you do annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction')), you get an index of the max value in your tensor. This index can't be derivated, thus the gradient can't flow throught this operation.
So as your loss is only defined by this value, and the gradient can't flow throught it, no gradient can be calculated for your network.
I don't know specificately how the dice loss work, but maybe you wanted to use tf.max instead of tf.argmax, or you have to find a way to use an operation that can let the gradient flow.

Related

how to get an array of predictions from tensor flow classification model

I have the following classification model.
I would like to get a numpy array similar to y_t which is the test labels one hot encoded. However I keep getting variable error.
# Construct placeholders
with graph.as_default():
inputs_ = tf.placeholder(tf.float32, [None, seq_len, n_channels], name = 'inputs')
labels_ = tf.placeholder(tf.float32, [None, n_classes], name = 'labels')
keep_prob_ = tf.placeholder(tf.float32, name = 'keep')
learning_rate_ = tf.placeholder(tf.float32, name = 'learning_rate')
with graph.as_default():
# (batch, 100, 3) --> (batch, 50, 6)
conv1 = tf.layers.conv1d(inputs=inputs_, filters=6, kernel_size=2, strides=1,
padding='same', activation = tf.nn.relu)
max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
with graph.as_default():
# Flatten and add dropout
flat = tf.reshape(max_pool_1, (-1, 6*6))
flat = tf.nn.dropout(flat, keep_prob=keep_prob_)
# Predictions
logits = tf.layers.dense(flat, n_classes)
# Cost function and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Then I use the test set
with tf.Session(graph=graph) as sess:
# Restore
saver.restore(sess, tf.train.latest_checkpoint('bschkpnt-cnn'))
for x_t, y_t in get_batches(X_test, y_test, batch_size):
feed = {inputs_: x_t,
labels_: y_t,
keep_prob_: 1}
batch_acc = sess.run(accuracy, feed_dict=feed)
test_acc.append(batch_acc)
print("Test accuracy: {:.6f}".format(np.mean(test_acc)))
y_t is a nX3 bumpy array.
I want to get a y_pred in similar format
Thanks
soft = tf.nn.softmax(logits)
this will be your probability distribution such that sum(soft) = 1. Every value in this array will indicate how sure the model is about the class.
pred = sess.run(soft, feed_dict=feed)
print(pred)
So basically all I do is place an additional softmax, since you have it inbuilt in the loss you calculate, you've to place it again to predict. Then I ask for the output prediction, and just feed the feed_dict again.
Hope this helped!

How to call a customer tensorflow op. in Keras?

I have a customer Tensorflow op. that wrote in C++ and was build successfully to call in Tensorflow code as
from libs.customer_op import customer_op
output = customer_op(x, filter=w, rates=[1, 1, rate, rate], padding="SAME", strides=[1, 1, stride, stride])
Now, I am using Keras with Tensorflow backend. Is it possible to call my above function in Keras. Do we need do some extra register step?
Update: Thanks Matias Valdenegro for your suggestion. I have tried it. This is my full code in tensorflow and what I have done in Keras.
-Tensorflow code
def my_conv(input,num_o,kernel_size, stride):
num_x = input.shape[3].value
offset = slim.conv2d(input, 18, [kernel_size, kernel_size], stride=stride, activation_fn=None, scope='offset', normalizer_fn=None)
w = tf.get_variable('weights', shape=[num_o, num_x, kernel_size, kernel_size],
initializer=tf.contrib.layers.xavier_initializer())
output = customer_conv(x, filter=w, offset=offset,padding="SAME")
-Keras code:
def my_conv(input, num_o, kernel_size, stride):
num_x = input.shape[3].value
offset = KL.Conv2D(18, (kernel_size, kernel_size), strides=(stride,stride))(input)
w = KI.TruncatedNormal(mean=0.0, stddev=0.05, seed=None)
output = Lambda(lambda x: deform_conv_op(x, filter=w, offset=offset, padding="SAME"))(input)
return output
So, this is the place that I will call the function
class CustomerCNN():
def __init__(self, mode):
self.mode = mode
def build(self, mode):
# Inputs
input_image = KL.Input(
shape=config.IMAGE_SHAPE.tolist(), name="input_image")
f1 = Lambda(lambda x: my_conv(x, 256, 3, 1))(input_image)
For above solution, I still remain the issue:
How to initial weight with shape as shape=[num_o, num_x, kernel_size, kernel_size] in Keras
How to call my customer conv my_conv in the class CustomerCNN? Do we need one more Lambda function as I did
You can just call it with a lambda layer:
output = Lambda(lambda x: customer_op(x, filter=w, rates=[1, 1, rate, rate],
padding="SAME", strides=[1, 1, stride, stride]))(input)

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!

Output of loss is None

I have to finetune VGG.There are five convolutional layers and then three fully connected layers. Output from the last fully connected layer is the input of the loss function. Following is my code:
class vgg16:
def __init__(self, imgs1,imgs2, weights=None, sess=None):
self.imgs1 = imgs1
self.imgs2 = imgs2
with tf.variable_scope("siamese") as scope:
self.o1 = self.convlayers(imgs1)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
scope.reuse_variables()
self.o2 = self.convlayers(imgs2)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
#create loss function
def convlayers(self,imgs):
....
# conv1_2
with tf.name_scope('conv1_2') as scope:
......
# pool1
..
)
.....
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
....
# fc2
with tf.name_scope('fc2') as scope:
...
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
def loss(self):
loss=tf.nn.l2_loss(self.fc3l)
self.train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
if __name__ == '__main__':
sess = tf.Session()
imgs1 = tf.placeholder(tf.float32, [None, 224, 224, 3])#jis size ka bhi imaeg hai usko 224x224 may kar diya or RGB chaeay hmay
imgs2 = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
img1 = imread('laska.png', mode='RGB')
img1 = imresize(img1, (224, 224))
img2 = imread('laska2.jpg', mode='RGB')
img2 = imresize(img2,(224, 224))
prob = sess.run(vgg.train_step, feed_dict={vgg.imgs1: [img1],vgg.imgs2: [img2]})
print('loss is:')
print(prob)
The problem is that the output of prob is None. Kindly indicate what I am doing wrong.
PS: I am following siamese architecture. Input to both branches are different images here.
The op self.train_step does not return anything, it just calculates gradients and updates variables. See here.
What you need to do is to save reference to loss tensor in your vgg16 class like this:
self.loss=tf.nn.l2_loss(self.fc3l)
and then execute both train_step and loss operations in single sess.run:
_, loss_value = sess.run([vgg.train_step, vgg.loss], feed_dict=...)
print('loss is:')
print(loss_value)

How does the reuse option in tf.variable_scope work?

I have a following problem, I am writing a simple code to learn how tensorflow works and I am defining the variables for convolution with help of tf.variable_scope. However everytime I try to run this script I get a ValueError saying either to set reuse=None or reuse=True.
Can somebody explain why doesn't it just run the function without defining this option or what would be a solution for that?
My code is:
import re
import tensorflow as tf
import numpy as np
data = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/data.npy')
labels = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = np.zeros((16400,))
labels[10001:16400]=1
labels = labels.astype(int)
data = data.astype(np.float32)
#labels = tf.cast(labels,tf.int64)
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # Initial learning rate.
NUM_CLASSES=2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN= 1000
batch_size=300
def _variable_on_cpu(name, shape, initializer):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer = initializer, dtype = dtype)
return var
def _add_loss_summaries(loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def _variable_with_weight_decay(name, shape, stddev, wd):
dtype = tf.float32
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def _activation_summary(x):
tensor_name = re.sub('_[0-9]*/','', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def iterate_batches(data, labels, batch_size, num_epochs):
N = int(labels.shape[0])
batches_per_epoch = int(N/batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield data[start:stop,:,:,:], labels[start:stop]
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0)
x_tensor = tf.placeholder(tf.float32, shape=(batch_size, 3000,1,1))
y_tensor = tf.placeholder(tf.int64, shape=(batch_size,))
for x,y in iterate_batches(data,labels, 300,1):
print('yey!')
with tf.variable_scope('conv1',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights',
shape=[100,1,1,64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(x_tensor, kernel, [1,3,1,1], padding = 'SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1,20,1,1], strides=[1,2,1,1], padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
with tf.variable_scope('conv2',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [50,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1,3,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,10,1,1], strides=[1,2,1,1], padding='SAME', name='pool2')
with tf.variable_scope('conv3',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [30,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(pool2, kernel, [1,10,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm3')
pool3 = tf.nn.max_pool(norm3, ksize=[1,9,1,1], strides=[1,9,1,1], padding='SAME', name='pool3')
with tf.variable_scope('fc4',reuse=True) as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool3, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 64], stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc4 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(fc4)
with tf.variable_scope('fc5',reuse=True) as scope:
weights = _variable_with_weight_decay('weights', shape=[64, 64],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc5 = tf.nn.relu(tf.matmul(fc4, weights) + biases, name=scope.name)
_activation_summary(fc5)
with tf.variable_scope('softmax_linear',) as scope:
weights = _variable_with_weight_decay('weights', [64, NUM_CLASSES],
stddev=1/64.0, wd=0.0)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(fc5, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(softmax_linear, y_tensor, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
kupa = tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
#neu
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
loss_averages_op = _add_loss_summaries(loss)
summary_op = tf.merge_all_summaries()
#neu
init = tf.initialize_all_variables()
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
sess.run([conv, bias, conv1, pool1, norm1, conv2,norm2, pool2, conv3, norm3, pool3,fc4,fc5], feed_dict={x_tensor:x, y_tensor:y})
sess.run([softmax_linear,loss], feed_dict={x_tensor:x, y_tensor:y})
sess.run([lr, loss_averages_op, summary_op], feed_dict={x_tensor:x, y_tensor:y})
The problem is with this line here:
for x,y in iterate_batches(data,labels, 300,1):
This will recreate the graph however many times which is a bad thing to do as it'll take up more memory each time (this isn't always the case but it can happen).
The reuse=True comes in something like this example below when defining the graph.
# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set of variables is created in the second call.
result2 = my_image_filter(image2)
Tensorflow doesn't know whether or not you want to "reuse" the variables as in should they share the same parameters or not.
In your specific case by looping your recreating the parameters each time and telling tensorflow to simply reuse the variables.
It would be better if you could move the for loop to after the graph creation has already occurred and then you could get rid of the reuse=True everywhere.