InvalidArgumentError: Retval[0] does not have value is Thrown when Using tflearn Trainer - tensorflow

I followed this example on using tflearn trainer and coded this:
image_paths, labels = dataset_utils.read_dataset_list('../test/dummy_labels_file.txt')
data_dir = "../test/dummy_data/"
images = dataset_utils.read_images(data_dir=data_dir, image_paths=image_paths, image_extension='png')
print('Done reading images')
images = dataset_utils.resize(images, (1596, 48))
images = dataset_utils.transpose(images)
labels = dataset_utils.encode(labels)
x_train, x_test, y_train, y_test = dataset_utils.split(features=images, test_size=0.5, labels=labels)
... # parameters initiailized here
with tf.Graph().as_default():
X = tf.placeholder(tf.float32, [None, None, num_features])
Y = tf.placeholder(tf.int32)
sparse_Y = network_utils.dense_to_sparse(Y, num_classes)
seq_lens = tf.placeholder(tf.int32, [None])
def dnn(x):
layer = network_utils.bidirectional_grid_lstm(inputs=x, num_hidden=num_hidden_units)
layer = network_utils.get_time_major(inputs=layer, batch_size=network_utils.get_shape(x)[0],
num_classes=num_classes, num_hidden_units=num_hidden_units * 2)
return layer
net = dnn(X)
cost = network_utils.cost(network_utils.ctc_loss(inputs=net, labels=sparse_Y, sequence_length=seq_lens))
optimizer = network_utils.get_optimizer(learning_rate=learning_rate, optimizer_name=optimizer_name)
train_op = tflearn.TrainOp(loss=cost, optimizer=optimizer)
trainer = tflearn.Trainer(train_ops=train_op)
trainer.fit(feed_dicts={X: x_train, Y: y_train, seq_lens: dataset_utils.get_seq_lens(x_train)},
val_feed_dicts={X: x_test, Y: y_test, seq_lens: dataset_utils.get_seq_lens(x_test)},
n_epoch=1) #error happens here
The training starts when I run it but I encounter this error:
Traceback (most recent call last):
File ".../Optimized_OCR/main/train_using_tflearn_trainer.py", line 53, in <module>
tf.app.run(main=main)
File "...\tensorflow\python\platform\app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File ".../Optimized_OCR/main/train_using_tflearn_trainer.py", line 49, in main
n_epoch=1)
File "...\tflearn\helpers\trainer.py", line 338, in fit
show_metric)
File "...\tflearn\helpers\trainer.py", line 817, in _train
feed_batch)
File "...\tensorflow\python\client\session.py", line 889, in run
run_metadata_ptr)
File "...\tensorflow\python\client\session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "...\tensorflow\python\client\session.py", line 1317, in _do_run
options, run_metadata)
File "...\tensorflow\python\client\session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[0] does not have value
Has anyone else encountered this? How do fix this? I do want to use tflearn trainer to have an easier time training and testing my ocr models and I think this is the only thing I need to fix to be able to use it.

Related

Feeding example to tf predictor.from_saved_model() for estimator trained with tf hub module

I try to export the model for text classification with tf hub modules, and then infer a prediction from it for a single string example using predictor.from_saved_model(). I saw some examples of similar ideas, but still couldn't make it work for the case when using tf hub modules to build features. Here is what I do:
train_input_fn = tf.estimator.inputs.pandas_input_fn(
train_df, train_df['label_ids'], num_epochs= None, shuffle=True)
# Prediction on the whole training set.
predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
train_df, train_df['label_ids'], shuffle=False)
embedded_text_feature_column = hub.text_embedding_column(
key='sentence',
module_spec='https://tfhub.dev/google/nnlm-de-dim128/1')
#Estimator
estimator = tf.estimator.DNNClassifier(
hidden_units=[500, 100],
feature_columns=[embedded_text_feature_column],
n_classes=num_of_class,
optimizer=tf.train.AdagradOptimizer(learning_rate=0.003) )
# Training
estimator.train(input_fn=train_input_fn, steps=1000)
#prediction on training set
train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
print('Training set accuracy: {accuracy}'.format(**train_eval_result))
feature_spec = tf.feature_column.make_parse_example_spec([embedded_text_feature_column])
serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
export_dir_base = self.cfg['model_path']
servable_model_path = estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn)
# Example message for inference
message = "Was ist denn los"
saved_model_predictor = predictor.from_saved_model(export_dir=servable_model_path)
content_tf_list = tf.train.BytesList(value=[str.encode(message)])
example = tf.train.Example(
features=tf.train.Features(
feature={
'sentence': tf.train.Feature(
bytes_list=content_tf_list
)
}
)
)
with tf.python_io.TFRecordWriter('the_message.tfrecords') as writer:
writer.write(example.SerializeToString())
reader = tf.TFRecordReader()
data_path = 'the_message.tfrecords'
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
_, serialized_example = reader.read(filename_queue)
output_dict = saved_model_predictor({'inputs': [serialized_example]})
And the output:
Traceback (most recent call last):
File "/Users/dimitrs/component-pythia/src/pythia.py", line 321, in _train
model = algo.generate_model(samples, generation_id)
File "/Users/dimitrs/component-pythia/src/algorithm_layer/algorithm.py", line 56, in generate_model
model = self._process_training(samples, generation)
File "/Users/dimitrs/component-pythia/src/algorithm_layer/tf_hub_classifier.py", line 91, in _process_training
output_dict = saved_model_predictor({'inputs': [serialized_example]})
File "/Users/dimitrs/anaconda3/envs/pythia/lib/python3.6/site-packages/tensorflow/contrib/predictor/predictor.py", line 77, in __call__
return self._session.run(fetches=self.fetch_tensors, feed_dict=feed_dict)
File "/Users/dimitrs/anaconda3/envs/pythia/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/Users/dimitrs/anaconda3/envs/pythia/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/dimitrs/anaconda3/envs/pythia/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/Users/dimitrs/anaconda3/envs/pythia/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Unable to get element as bytes.
Isn't serialized_example the right input that is suggested by serving_input_receiver_fn ?
So, all I need was serialized_example = example.SerializeToString()
Writing the example on a file requires to start a session before reading it back. Simply serialising is enough:
# Example message for inference
message = "Was ist denn los"
saved_model_predictor = predictor.from_saved_model(export_dir=servable_model_path)
content_tf_list = tf.train.BytesList(value=[message.encode('utf-8')])
sentence = tf.train.Feature(bytes_list=content_tf_list)
sentence_dict = {'sentence': sentence}
features = tf.train.Features(feature=sentence_dict)
example = tf.train.Example(features=features)
serialized_example = example.SerializeToString()
output_dict = saved_model_predictor({'inputs': [serialized_example]})

InvalidArgumentError : ConcatOp : Dimensions of inputs should match

Tensorflow 1.7 when using dynamic_rnn.It runs fine at first , but at the 32th(it changes when i run the code) step , the error appears. When i used smaller batch , it seems the code can run longer , however the error still poped up .Just cannt figure out what's wrong.
from mapping import *
def my_input_fn(features, targets, batch_size=20, shuffle=True, num_epochs=None, sequece_lenth=None):
ds = tf.data.Dataset.from_tensor_slices(
(features, targets, sequece_lenth)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
if shuffle:
ds = ds.shuffle(10000)
features, labels, sequence = ds.make_one_shot_iterator().get_next()
return features, labels, sequence
def lstm_cell(lstm_size=50):
return tf.contrib.rnn.BasicLSTMCell(lstm_size)
class RnnModel:
def __init__(self,
batch_size,
hidden_units,
time_steps,
num_features
):
self.batch_size = batch_size
self.hidden_units = hidden_units
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[lstm_cell(i) for i in self.hidden_units])
self.initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
self.model = stacked_lstm
self.state = self.initial_state
self.time_steps = time_steps
self.num_features = num_features
def loss_mean_squre(self, outputs, targets):
pos = tf.add(outputs, tf.ones(self.batch_size))
eve = tf.div(pos, 2)
error = tf.subtract(eve,
targets)
return tf.reduce_mean(tf.square(error))
def train(self,
num_steps,
learningRate,
input_fn,
inputs,
targets,
sequenceLenth):
periods = 10
step_per_periods = int(num_steps / periods)
input, target, sequence = input_fn(inputs, targets, self.batch_size, shuffle=True, sequece_lenth=sequenceLenth)
initial_state = self.model.zero_state(self.batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)
loss = self.loss_mean_squre(tf.reshape(outputs, [self.time_steps, self.batch_size])[-1], target)
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate)
grads_and_vars = optimizer.compute_gradients(loss, self.model.variables)
optimizer.apply_gradients(grads_and_vars)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(num_steps):
sess.run(init_op)
state2, current_loss= sess.run([state, loss])
if i % step_per_periods == 0:
print("period " + str(int(i / step_per_periods)) + ":" + str(current_loss))
return self.model, self.state
def processFeature(df):
df = df.drop('class', 1)
features = []
for i in range(len(df["vecs"])):
features.append(df["vecs"][i])
aa = pd.Series(features).tolist() # tramsform into list
featuresList = []
for i in features:
p1 = []
for k in i:
p1.append(list(k))
featuresList.append(p1)
return featuresList
def processTargets(df):
selected_features = df[
"class"]
processed_features = selected_features.copy()
return tf.convert_to_tensor(processed_features.astype(float).tolist())
if __name__ == '__main__':
dividNumber = 30
"""
some code here to modify my data to input
it looks like this:
inputs before use input function : [fullLenth, charactorLenth, embeddinglenth]
"""
model = RnnModel(15, [100, 80, 80, 1], time_steps=dividNumber, num_features=25)
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
And error is under here
Traceback (most recent call last):
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1330, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1315, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1423, in _call_tf_sessionrun
status, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 124, in train
state2, current_loss, nowAccuracy = sess.run([state, loss, accuracy])
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 908, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1143, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1324, in _do_run
run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1343, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
Caused by op 'rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat', defined at:
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 95, in train
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)#,sequence_length=sequence
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 627, in dynamic_rnn
dtype=dtype)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 824, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3205, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2943, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2880, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3181, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 795, in _time_step
(output, new_state) = call_cell()
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 781, in <lambda>
call_cell = lambda: cell(input_t, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1283, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 620, in call
array_ops.concat([inputs, h], 1), self._kernel)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1181, in concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1101, in concat_v2
"ConcatV2", values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 3309, in create_op
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
this is my code used to check my input
def checkData(inputs, targets, sequencelence):
batch_size = 20
features, target, sequece = my_input_fn(inputs, targets, batch_size=batch_size, shuffle=True, num_epochs=None,
sequece_lenth=sequencelence)
with tf.Session() as sess:
for i in range(1000):
features1, target1, sequece1 = sess.run([features, target, sequece])
assert len(features1) == batch_size
for sentence in features1 :
assert len(sentence) == 30
for word in sentence:
assert len(word) == 25
assert len(target1) == batch_size
assert len(sequece1) == batch_size
print(target1)
print("OK")
The error is coming from LSTMCell.call call method. There we are trying to tf.concat([inputs, h], 1) meaning that we want to concatenate the next input with the current hidden state before matmul'ing with the kernel variables matrix. The error is saying that you can't do it because the batch (0th) dimensions don't match up - your input is shaped [20,25] and your hidden state is shaped [30,100].
For some reason on your 32nd iteration, or whenever you see the error, the input is not batched to 30, but only to 20. This usually happens at the end of your training data when the total number of training examples does not evenly divide your batch size. This hypothesis is also consistent with "When i used smaller batch , it seems the code can run longer" statement.
I had the same issue. When I corrected the image input size to match the input shape, it ran without errors.

String input to categorical column via dataset

I'm trying to learn how to use the Estimator API, using input_fn to provide Dataset backed input to a feature_column generated input layer.
My code looks like
import tensorflow as tf import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
def gen():
for i in range(100000):
for j in range(10):
yield {"in": str(j)}, [str(j+1)]
data = tf.data.Dataset.from_generator(gen, ({"in": tf.string}, tf.string))
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
but running it I get
Traceback (most recent call last):
File "predict.py", line 33, in
input_fn=input_fn)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 711, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 694, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 334, in _model_fn
config=config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 190, in _dnn_model_fn
logits = logit_fn(features=features, mode=mode)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 89, in dnn_logit_fn
features=features, feature_columns=feature_columns)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 230, in input_layer
trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1837, in _get_dense_tensor
inputs, weight_collections=weight_collections, trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2123, in _get_sparse_tensors
return _CategoricalColumn.IdWeightPair(inputs.get(self), None)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1533, in get
transformed = column._transform_feature(self) # pylint: disable=protected-access
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2091, in _transform_feature
input_tensor = _to_sparse_input(inputs.get(self.key))
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1631, in _to_sparse_input
raise ValueError('Undefined input_tensor shape.')
ValueError: Undefined input_tensor shape.
Looking at the tf sources I get the impression I that the categorical_column_with_vocabulary_list expects a tensor as output instead of a string, but I have a hard time understanding how to make my input_fn provide that the right way.
Does anyone have any idea what I'm doing wrong here?
As a comparison, the following code works perfectly fine: https://pastebin.com/28QUNAjA
EDIT
I noticed that replacing tf.data.Dataset.from_generator with tf.data.Dataset.from_tensor_slices makes the code run.
I.e. the following actually works:
import tensorflow as tf
import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
data = tf.data.Dataset.from_tensor_slices(({"in": map(lambda i: str(i), range(10))}, range(1,11)))
data = data.repeat(1000)
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/usr/local/google/home/zond/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
This ought to be a bug, so I created https://github.com/tensorflow/tensorflow/issues/15178.

MomentumOptimizer error: Attempting to use uninitialized value Variable_2/Momentum

I'm learning TensorFlow. I was trying tf.train.MomentumOptimizer but I got the following error:
Traceback (most recent call last):
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 70, in run
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
Caused by op u'Momentum/update_Variable_2/ApplyMomentum', defined at:
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 55, in run
train_step = self.optimizer.minimize(self.cross_entropy)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 289, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 413, in apply_gradients
update_ops.append(processor.update_op(self, grad))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 61, in update_op
return optimizer._apply_dense(g, self._v) # pylint: disable=protected-access
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/momentum.py", line 69, in _apply_dense
use_nesterov=self._use_nesterov).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 348, in apply_momentum
use_nesterov=use_nesterov, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
And following is my code:
import time
import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
class ReluMnistNet:
def __init__(self, optimizer=None):
self.varlist = []
self.optimizer = optimizer or tf.train.GradientDescentOptimizer(0.01)
# fetch dataset
self.mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# prepare environment
layers = [ 100 ]
input_layer = 784
output_layer = 10
self.x = tf.placeholder(tf.float32, [None, input_layer])
last_layer = input_layer
y = self.x
for layer in layers:
b = tf.Variable(tf.zeros([layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,layer], stddev=0.01))
self.varlist.append(W)
y = tf.nn.relu( tf.matmul(y,W) ) + b
last_layer = layer
b = tf.Variable(tf.zeros([output_layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,output_layer], stddev=0.01))
self.varlist.append(W)
self.y = tf.matmul(y,W) + b
self.y_ = tf.placeholder(tf.float32, [None, 10])
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
def prepare(self):
# init = tf.initialize_variables(self.varlist)
init = tf.initialize_all_variables()
self.sess = tf.Session()
self.sess.run(init)
def run(self, batch_size=100, stop=0.001, print_epoch=False):
mnist = self.mnist
data_size = mnist.train.images.shape[0]
last_accuracy = 0
accuracy_history = []
train_step = self.optimizer.minimize(self.cross_entropy)
time1 = time.time()
for i in range(10000):
for j in range(data_size/batch_size):
# random batch
batch_idx = np.arange(data_size)
np.random.shuffle(batch_idx)
batch_idx = batch_idx[0:batch_size]
batch_xs = mnist.train.images[batch_idx]
batch_ys = mnist.train.labels[batch_idx]
# ordered batch
# start = j * batch_size
# end = (j+1) * batch_size
# batch_xs, batch_ys = mnist.train.images[start:end], mnist.train.labels[start:end]
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
# test the accuracy
correct_prediction = tf.equal( tf.argmax(self.y,1), tf.argmax(self.y_,1) )
accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )
accuracy = self.sess.run(accuracy, feed_dict = {self.x: mnist.test.images, self.y_: mnist.test.labels})
accuracy_history.append(accuracy)
if print_epoch:
print i, accuracy
if last_accuracy != 0 and abs(last_accuracy-accuracy) < stop:
break
last_accuracy = accuracy
time2 = time.time()
return accuracy_history, (time2-time1)
def close(self):
if not (self.sess is None):
self.sess.close()
self.sess = None
if __name__ == '__main__':
learner = ReluMnistNet()
# learner.optimizer = tf.train.GradientDescentOptimizer(0.01)
learner.optimizer = tf.train.MomentumOptimizer(0.01, momentum=0.9)
for i in range(10):
learner.prepare()
learner.run(stop=0.01, print_epoch=True)
learner.close()
It seems like a variable named Momentum is uninitialized? However, by calling learner.prepare(), I have called tf.initialize_all_variables(). Even more, I have no variable named Momentum. Why does this happens?
In your code you are calling minimize after initializing global variables
instead you have to do:
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
self.optimize = self.optimizer.minimize(self.cross_entropy)
and in run function instead of
train_step = self.optimizer.minimize(self.cross_entropy)
you should call
train_step = self.optimize
P.S
Momentun is the default name for the MomentumOptimizer

Prettytensor: Attempting to use uninitialized value

I'm following these tutorials:
https://www.youtube.com/watch?v=wuo4JdG3SvU&list=PL9Hr9sNUjfsmEu1ZniY0XpHSzl5uihcXZ
and prettytensor is introduced in tutorial 4.
Following the tutorial, i wrote this code to run a small neural network:
import tensorflow as tf
# Use PrettyTensor to simplify Neural Network construction.
import prettytensor as pt
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('../data/MNIST/', one_hot=True)
# We know that MNIST images are 28 pixels in each dimension.
img_size = 28
# Images are stored in one-dimensional arrays of this length.
img_size_flat = img_size * img_size
# Tuple with height and width of images used to reshape arrays.
img_shape = (img_size, img_size)
# Number of colour channels for the images: 1 channel for gray-scale.
num_channels = 1
# Number of classes, one class for each of 10 digits.
num_classes = 10
# the placeholders
x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x')
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_true')
# use prettyTensor to build the model
# this will give us the predictions and the loss functions
x_pretty = pt.wrap(x_image)
with pt.defaults_scope(activation_fn=tf.nn.relu):
y_pred, loss = x_pretty.\
conv2d(kernel=5, depth=16, name='layer_conv1').\
max_pool(kernel=2, stride=2).\
conv2d(kernel=5, depth=36, name='layer_conv2').\
max_pool(kernel=2, stride=2).\
flatten().\
fully_connected(size=128, name='layer_fc1').\
softmax_classifier(class_count=10, labels=y_true)
# the model optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)
# the model testing
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# start the session
session = tf.InteractiveSession()
# Start the training
tf.global_variables_initializer().run(session = session)
train_batch_size = 64
for i in range(1000):
print("training batch ",i)
x_batch, y_true_batch = data.train.next_batch(train_batch_size)
session.run(optimizer, feed_dict={x:x_batch, y_true:y_true_batch})
When i tried to run it, I got the following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
Caused by op u'layer_conv1/bias/read', defined at:
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 31, in <module>
the full error trace:
Traceback (most recent call last):
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 55, in <module>
session.run(optimizer, feed_dict={x:x_batch, y_true:y_true_batch})
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
Caused by op u'layer_conv1/bias/read', defined at:
File "/home/gal/Documents/Workspace/EclipseWorkspace/Melanoma Classification!/tutorial4/tutorial4Test.py", line 31, in <module>
conv2d(kernel=5, depth=16, name='layer_conv1').\
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_class.py", line 1981, in method
result = func(non_seq_layer, *args, **kwargs)
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_image_methods.py", line 163, in __call__
y += self.variable('bias', [size[-1]], bias_init, dt=dtype)
File "/home/gal/anaconda2/lib/python2.7/site-packages/prettytensor/pretty_tensor_class.py", line 1695, in variable
collections=variable_collections)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 1024, in get_variable
custom_getter=custom_getter)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 850, in get_variable
custom_getter=custom_getter)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 346, in get_variable
validate_shape=validate_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 331, in _true_getter
caching_device=caching_device, validate_shape=validate_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 677, in _get_single_variable
expected_shape=shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 224, in __init__
expected_shape=expected_shape)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 370, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1424, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/gal/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value layer_conv1/bias
[[Node: layer_conv1/bias/read = Identity[T=DT_FLOAT, _class=["loc:#layer_conv1/bias"], _device="/job:localhost/replica:0/task:0/cpu:0"](layer_conv1/bias)]]
So my question is, How can i solve this error?
This problem is caused by a bug in the 0.12rc0 release candidate of TensorFlow, and the fact that Pretty Tensor uses a deprecated TensorFlow API (for which I've opened an issue).
Until this bug is fixed, the best workaround I can think of is a hack. Add the following line at the top of your program, after import tensorflow as tf:
tf.GraphKeys.VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES