How to use tensorflow tf.metrics.mean_iou? - tensorflow

I am trying to use the inbuilt mean_iou function of tensorflow to compute the IoU score for semantic segmentation.
My code is:
#y_mask.shape == [batch_size, h * w, n_classes]
#y_mask.shape == [batch_size, h * w, n_classes]
iou = tf.metrics.mean_iou(tf.argmax(y_mask,2), tf.argmax(mask_,2), n_classes)
However I am getting the following error trace:
tensorflow.python.framework.errors_impl.FailedPreconditionError:
Attempting to use uninitialized value mean_iou/total_confusion
_matrix
[[Node: mean_iou/AssignAdd = AssignAdd[T=DT_DOUBLE, _class=["loc:#mean_iou/total_confusion_matrix"], use_locking=false
, _device="/job:localhost/replica:0/task:0/cpu:0"](mean_iou/total_confusion_matrix, mean_iou/confusion_matrix/SparseTensorDense
Add)]]
Caused by op u'mean_iou/AssignAdd', defined at:
File "sample_tf_ynet.py", line 207, in <module>
trainSeg()
File "sample_tf_ynet.py", line 166, in trainSeg
iou, cm_op = tf.metrics.mean_iou(tf.argmax(y_mask,2), tf.argmax(mask_,2), n_classes)
File "/home/meetshah1995/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/metrics_impl.py", line 782, in mean_iou
update_op = state_ops.assign_add(total_cm, current_cm)
File "/home/meetshah1995/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 75, in assign_ad
d
use_locking=use_locking, name=name)
File "/home/meetshah1995/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in a
pply_op
op_def=op_def)
File "/home/meetshah1995/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/meetshah1995/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value mean_iou/total_confusion_matrix
[[Node: mean_iou/AssignAdd = AssignAdd[T=DT_DOUBLE, _class=["loc:#mean_iou/total_confusion_matrix"], use_locking=false
, _device="/job:localhost/replica:0/task:0/cpu:0"](mean_iou/total_confusion_matrix, mean_iou/confusion_matrix/SparseTensorDense
Add)]]
Please guide me on the correct usage of this for semantic segmentation.

I solved it by calling
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

Simplest form I could come up with (3 classes):
# y_pred and y_true are np.arrays of shape [1, size, channels]
with tf.Session() as sess:
ypredT = tf.constant(np.argmax(y_pred, axis=-1))
ytrueT = tf.constant(np.argmax(y_true, axis=-1))
iou,conf_mat = tf.metrics.mean_iou(ytrueT, ypredT, num_classes=3)
sess.run(tf.local_variables_initializer())
sess.run([conf_mat])
miou = sess.run([iou])
print(miou)
prints:
[0.6127908]

Related

InvalidArgumentError : ConcatOp : Dimensions of inputs should match

Tensorflow 1.7 when using dynamic_rnn.It runs fine at first , but at the 32th(it changes when i run the code) step , the error appears. When i used smaller batch , it seems the code can run longer , however the error still poped up .Just cannt figure out what's wrong.
from mapping import *
def my_input_fn(features, targets, batch_size=20, shuffle=True, num_epochs=None, sequece_lenth=None):
ds = tf.data.Dataset.from_tensor_slices(
(features, targets, sequece_lenth)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
if shuffle:
ds = ds.shuffle(10000)
features, labels, sequence = ds.make_one_shot_iterator().get_next()
return features, labels, sequence
def lstm_cell(lstm_size=50):
return tf.contrib.rnn.BasicLSTMCell(lstm_size)
class RnnModel:
def __init__(self,
batch_size,
hidden_units,
time_steps,
num_features
):
self.batch_size = batch_size
self.hidden_units = hidden_units
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[lstm_cell(i) for i in self.hidden_units])
self.initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
self.model = stacked_lstm
self.state = self.initial_state
self.time_steps = time_steps
self.num_features = num_features
def loss_mean_squre(self, outputs, targets):
pos = tf.add(outputs, tf.ones(self.batch_size))
eve = tf.div(pos, 2)
error = tf.subtract(eve,
targets)
return tf.reduce_mean(tf.square(error))
def train(self,
num_steps,
learningRate,
input_fn,
inputs,
targets,
sequenceLenth):
periods = 10
step_per_periods = int(num_steps / periods)
input, target, sequence = input_fn(inputs, targets, self.batch_size, shuffle=True, sequece_lenth=sequenceLenth)
initial_state = self.model.zero_state(self.batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)
loss = self.loss_mean_squre(tf.reshape(outputs, [self.time_steps, self.batch_size])[-1], target)
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate)
grads_and_vars = optimizer.compute_gradients(loss, self.model.variables)
optimizer.apply_gradients(grads_and_vars)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(num_steps):
sess.run(init_op)
state2, current_loss= sess.run([state, loss])
if i % step_per_periods == 0:
print("period " + str(int(i / step_per_periods)) + ":" + str(current_loss))
return self.model, self.state
def processFeature(df):
df = df.drop('class', 1)
features = []
for i in range(len(df["vecs"])):
features.append(df["vecs"][i])
aa = pd.Series(features).tolist() # tramsform into list
featuresList = []
for i in features:
p1 = []
for k in i:
p1.append(list(k))
featuresList.append(p1)
return featuresList
def processTargets(df):
selected_features = df[
"class"]
processed_features = selected_features.copy()
return tf.convert_to_tensor(processed_features.astype(float).tolist())
if __name__ == '__main__':
dividNumber = 30
"""
some code here to modify my data to input
it looks like this:
inputs before use input function : [fullLenth, charactorLenth, embeddinglenth]
"""
model = RnnModel(15, [100, 80, 80, 1], time_steps=dividNumber, num_features=25)
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
And error is under here
Traceback (most recent call last):
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1330, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1315, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1423, in _call_tf_sessionrun
status, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 124, in train
state2, current_loss, nowAccuracy = sess.run([state, loss, accuracy])
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 908, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1143, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1324, in _do_run
run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1343, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
Caused by op 'rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat', defined at:
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 95, in train
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)#,sequence_length=sequence
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 627, in dynamic_rnn
dtype=dtype)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 824, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3205, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2943, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2880, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3181, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 795, in _time_step
(output, new_state) = call_cell()
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 781, in <lambda>
call_cell = lambda: cell(input_t, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1283, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 620, in call
array_ops.concat([inputs, h], 1), self._kernel)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1181, in concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1101, in concat_v2
"ConcatV2", values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 3309, in create_op
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
this is my code used to check my input
def checkData(inputs, targets, sequencelence):
batch_size = 20
features, target, sequece = my_input_fn(inputs, targets, batch_size=batch_size, shuffle=True, num_epochs=None,
sequece_lenth=sequencelence)
with tf.Session() as sess:
for i in range(1000):
features1, target1, sequece1 = sess.run([features, target, sequece])
assert len(features1) == batch_size
for sentence in features1 :
assert len(sentence) == 30
for word in sentence:
assert len(word) == 25
assert len(target1) == batch_size
assert len(sequece1) == batch_size
print(target1)
print("OK")
The error is coming from LSTMCell.call call method. There we are trying to tf.concat([inputs, h], 1) meaning that we want to concatenate the next input with the current hidden state before matmul'ing with the kernel variables matrix. The error is saying that you can't do it because the batch (0th) dimensions don't match up - your input is shaped [20,25] and your hidden state is shaped [30,100].
For some reason on your 32nd iteration, or whenever you see the error, the input is not batched to 30, but only to 20. This usually happens at the end of your training data when the total number of training examples does not evenly divide your batch size. This hypothesis is also consistent with "When i used smaller batch , it seems the code can run longer" statement.
I had the same issue. When I corrected the image input size to match the input shape, it ran without errors.

Tensorflow batch training OutOfRangeError

Saving variables
Variables saved in 0.88 seconds
Saving metagraph
Metagraph saved in 35.81 seconds
Saving variables
Variables saved in 0.95 seconds
Saving metagraph
Metagraph saved in 33.20 seconds
Traceback (most recent call last):
Caused by op u'batch', defined at:
File "ava_train.py", line 155, in <module>
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, allow_smaller_final_batch=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 872, in batch
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 665, in _batch
dequeued = queue.dequeue_up_to(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 510, in dequeue_up_to
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 1402, in _queue_dequeue_up_to_v2
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 100, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
my code is here
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# process same as cifar10.distorted_inputs
log_dir = '../log'
model_dir = '../model'
max_num_epoch = 80
if not os.path.exists(log_dir):
os.makedirs(log_dir)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
num_train_example = len(os.listdir('../images/'))
# Reads pfathes of images together with their labels
image_list, label_list = read_labeled_image_list('../raw.txt')
images = ops.convert_to_tensor(image_list, dtype=dtypes.string)
labels = ops.convert_to_tensor(label_list, dtype=dtypes.int32)
# Makes an input queue
# input_queue = tf.train.slice_input_producer([images, labels], num_epochs=max_num_epoch, shuffle=True)
input_queue = tf.train.slice_input_producer([images, labels], shuffle=True)
image, label = read_images_from_disk(input_queue)
image_size = 240
keep_probability = 0.8
weight_decay = 5e-5
image = preprocess(image, image_size, image_size, None)
batch_size = 100
epoch_size = 1000
embedding_size = 128
# Optional Image and Label Batching
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, allow_smaller_final_batch=True)
This is the output of training an image classification model based on 20w images. I set allow_smaller_final_batch=True in batch. After some epochs the OutOfRangeError occured.
I don't know the reason and thanks for the help.
Since you get a OutOfRangeError it could be that you are training for more epochs than max_num_epochs, which will result in the slice_input_producer throwing this exception.
One possible workaround would be to remove the num_epochs=max_num_epochs from your slice_input_producer since this will allow it to produce even after the maximum number of epochs has been reached.
I have battled with this particular error for days. I finally found the cause. You are getting this error because your file is corrupted somewhere. Try running this code on another train and test data

Tensor Object is not Iterable with BasicLSTMCell

I have the following code:
def dense_layers(pool3):
with tf.variable_scope('local1') as scope:
# Move everything into depth so we can perform a single matrix multiply.
shape_d = pool3.get_shape()
shape = shape_d[1] * shape_d[2] * shape_d[3]
# tf_shape = tf.stack(shape)
tf_shape = 1024
print("shape:", shape, shape_d[1], shape_d[2], shape_d[3])
# So note that tf_shape = 1024, this means that we have 1024 features are fed into the network. And
# the batch size = 1024. Therefore, the aim is to divide the batch_size into num_steps so that
reshape = tf.reshape(pool3, [-1, tf_shape])
# Now we need to reshape/divide the batch_size into num_steps so that we would be feeding a sequence
# And note that most importantly is to have batch_partition_length followed by step_size in the parameter list.
lstm_inputs = tf.reshape(reshape, [batch_partition_length, step_size, tf_shape])
# print('RNN inputs shape: ', lstm_inputs.get_shape()) # -> (128, 8, 1024).
# Note that the state_size is the number of neurons.
lstm = tf.contrib.rnn.BasicLSTMCell(state_size)
lstm_outputs, final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=lstm_inputs, initial_state=init_state)
tf.assign(init_state, final_state)
So, I am taking the output of the pool layer and try to feed it into the LSTM in the network.
Initially I have declared the following:
state_size = 16
step_size = 8
batch_partition_length = int(batch_size / step_size)
init_state = tf.Variable(tf.zeros([batch_partition_length, state_size])) # -> [128, 16].
Therefore, I am getting an error on:
lstm_outputs, final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=lstm_inputs, initial_state=init_state)
As follows:
Traceback (most recent call last):
File "C:/Users/user/PycharmProjects/AffectiveComputing/Brady_with_LSTM.py", line 197, in <module>
predictions = dense_layers(conv_nets_output)
File "C:/Users/user/PycharmProjects/AffectiveComputing/Brady_with_LSTM.py", line 162, in dense_layers
lstm_outputs, final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=lstm_inputs, initial_state=init_state)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 553, in dynamic_rnn
dtype=dtype)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 720, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2623, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2456, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2406, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 705, in _time_step
(output, new_state) = call_cell()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 691, in <lambda>
call_cell = lambda: cell(input_t, state)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\contrib\rnn\python\ops\core_rnn_cell_impl.py", line 238, in __call__
c, h = state
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 504, in __iter__
raise TypeError("'Tensor' object is not iterable.")
TypeError: 'Tensor' object is not iterable.
Any help is much appreciated!!
The state for LSTMs really consists of two parts
State for the cell(s)
Previous outputs
This is alluded to in the docs for BasicLSTMCell. This paper has a good explanation of how LSTMs work which will help you understand why you need to keep two sets of states in an LSTM implementation. The reason an error is being thrown is because you need to supply a tuple of tensors for the initial state.
That said you have two options:
Supply an initial state that consists of two tensors.
Let the RNN cell generate its own initial state.
You would usually only do 1. if you wanted to override default behavior. In this case you are using the default (zero) initial state so you can do 2.
lstm_outputs, final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=lstm_inputs, dtype=tf.float32)

Prettytensor: Attempting to use uninitialized value

I'm following these tutorials:
https://www.youtube.com/watch?v=wuo4JdG3SvU&list=PL9Hr9sNUjfsmEu1ZniY0XpHSzl5uihcXZ
and prettytensor is introduced in tutorial 4.
Following the tutorial, i wrote this code to run a small neural network:
import tensorflow as tf
# Use PrettyTensor to simplify Neural Network construction.
import prettytensor as pt
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('../data/MNIST/', one_hot=True)
# We know that MNIST images are 28 pixels in each dimension.
img_size = 28
# Images are stored in one-dimensional arrays of this length.
img_size_flat = img_size * img_size
# Tuple with height and width of images used to reshape arrays.
img_shape = (img_size, img_size)
# Number of colour channels for the images: 1 channel for gray-scale.
num_channels = 1
# Number of classes, one class for each of 10 digits.
num_classes = 10
# the placeholders
x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x')
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_true')
# use prettyTensor to build the model
# this will give us the predictions and the loss functions
x_pretty = pt.wrap(x_image)
with pt.defaults_scope(activation_fn=tf.nn.relu):
y_pred, loss = x_pretty.\
conv2d(kernel=5, depth=16, name='layer_conv1').\
max_pool(kernel=2, stride=2).\
conv2d(kernel=5, depth=36, name='layer_conv2').\
max_pool(kernel=2, stride=2).\
flatten().\
fully_connected(size=128, name='layer_fc1').\
softmax_classifier(class_count=10, labels=y_true)
# the model optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)
# the model testing
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# start the session
session = tf.InteractiveSession()
# Start the training
tf.global_variables_initializer().run(session = session)
train_batch_size = 64
for i in range(1000):
print("training batch ",i)
x_batch, y_true_batch = data.train.next_batch(train_batch_size)
session.run(optimizer, feed_dict={x:x_batch, y_true:y_true_batch})
When i tried to run it, I got the following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
Caused by op u'layer_conv1/bias/read', defined at:
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 31, in <module>
the full error trace:
Traceback (most recent call last):
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 55, in <module>
session.run(optimizer, feed_dict={x:x_batch, y_true:y_true_batch})
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
Caused by op u'layer_conv1/bias/read', defined at:
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 31, in <module>
conv2d(kernel=5, depth=16, name='layer_conv1').\
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_class.py", line 1981, in method
result = func(non_seq_layer, *args, **kwargs)
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_image_methods.py", line 163, in __call__
y += self.variable('bias', [size[-1]], bias_init, dt=dtype)
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_class.py", line 1695, in variable
collections=variable_collections)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 1024, in get_variable
custom_getter=custom_getter)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 850, in get_variable
custom_getter=custom_getter)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 346, in get_variable
validate_shape=validate_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 331, in _true_getter
caching_device=caching_device, validate_shape=validate_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 677, in _get_single_variable
expected_shape=shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 224, in __init__
expected_shape=expected_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 370, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1424, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
So my question is, How can i solve this error?
This problem is caused by a bug in the 0.12rc0 release candidate of TensorFlow, and the fact that Pretty Tensor uses a deprecated TensorFlow API (for which I've opened an issue).
Until this bug is fixed, the best workaround I can think of is a hack. Add the following line at the top of your program, after import tensorflow as tf:
tf.GraphKeys.VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES

FailedPreconditionError while trying to use RMSPropOptimizer on tensorflow

I am trying to use the RMSPropOptimizer for minimizing loss. Here's the part of the code that is relevant:
import tensorflow as tf
#build large convnet...
#...
opt = tf.train.RMSPropOptimizer(learning_rate=0.0025, decay=0.95)
#do stuff to get targets and loss...
#...
grads_and_vars = opt.compute_gradients(loss)
capped_grads_and_vars = [(tf.clip_by_value(g, -1, 1), v) for g, v in grads_and_vars]
opt_op = self.opt.apply_gradients(capped_grads_and_vars)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
while(1):
sess.run(opt_op)
Problem is as soon as I run this I get the following error:
W tensorflow/core/common_runtime/executor.cc:1091] 0x10a0bba40 Compute status: Failed precondition: Attempting to use uninitialized value train/output/bias/RMSProp
[[Node: RMSProp/update_train/output/bias/ApplyRMSProp = ApplyRMSProp[T=DT_FLOAT, use_locking=false, _device="/job:localhost/replica:0/task:0/cpu:0"](train/output/bias, train/output/bias/RMSProp, train/output/bias/RMSProp_1, RMSProp/learning_rate, RMSProp/decay, RMSProp/momentum, RMSProp/epsilon, clip_by_value_9)]]
[[Node: _send_MergeSummary/MergeSummary_0 = _Send[T=DT_STRING, client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=-6901001318975381332, tensor_name="MergeSummary/MergeSummary:0", _device="/job:localhost/replica:0/task:0/cpu:0"](MergeSummary/MergeSummary)]]
Traceback (most recent call last):
File "dqn.py", line 213, in <module>
result = sess.run(opt_op)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 385, in run
results = self._do_run(target_list, unique_fetch_targets, feed_dict_string)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 461, in _do_run
e.code)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value train/output/bias/RMSProp
[[Node: RMSProp/update_train/output/bias/ApplyRMSProp = ApplyRMSProp[T=DT_FLOAT, use_locking=false, _device="/job:localhost/replica:0/task:0/cpu:0"](train/output/bias, train/output/bias/RMSProp, train/output/bias/RMSProp_1, RMSProp/learning_rate, RMSProp/decay, RMSProp/momentum, RMSProp/epsilon, clip_by_value_9)]]
Caused by op u'RMSProp/update_train/output/bias/ApplyRMSProp', defined at:
File "dqn.py", line 159, in qLearnMinibatch
opt_op = self.opt.apply_gradients(capped_grads_and_vars)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/training/optimizer.py", line 288, in apply_gradients
update_ops.append(self._apply_dense(grad, var))
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/training/rmsprop.py", line 103, in _apply_dense
grad, use_locking=self._use_locking).op
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/training/gen_training_ops.py", line 171, in apply_rms_prop
grad=grad, use_locking=use_locking, name=name)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 659, in apply_op
op_def=op_def)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1904, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Users/home/miniconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1083, in __init__
self._traceback = _extract_stack()
Note that I don't get this error If am using the usual GradientDescentOptimizer. I am initializing my variables as you can see above but I don't know what 'train/output/bias/RMSProp' is because I don't create any such variable. I only have 'train/output/bias/' which does get initialized above.
Thanks!
So for people from the future running into similar trouble, I found this post helpful:
Tensorflow: Using Adam optimizer
Basically, I was running
sess.run(tf.initialize_all_variables())
before I had defined my loss minimization op
loss = tf.square(targets)
#create the gradient descent op
grads_and_vars = opt.compute_gradients(loss)
capped_grads_and_vars = [(tf.clip_by_value(g, -self.clip_delta, self.clip_delta), v) for g, v in grads_and_vars] #gradient capping
self.opt_op = self.opt.apply_gradients(capped_grads_and_vars)
This needs to be done before running the initialization op!