Can't restore model with tf.contrib.layers.convolution2d_transpose applied - tensorflow

I'm trying to restore a model using following code:
new_saver = tf.train.import_meta_graph(model_path+'.meta')
new_saver.restore(sess, model_path)
g=tf.get_default_graph()
And for each weight or bias in original graph, I did g.get_tensrr_by_name().
But when I tried to do this on a deconv2d layer, which is something like below:
def deconv2d(self,inputs, num_outputs, kernel_shape, g,scope,strides=[1, 1]):
with tf.variable_scope(scope) as scope:
weights_initializer = g.get_tensor_by_name("prsr/conditioning/deconv/Conv2d_transpose/weights:0")
biases_initializer = g.get_tensor_by_name("prsr/conditioning/deconv/Conv2d_transpose/biases:0")
return tf.contrib.layers.convolution2d_transpose(inputs=inputs, num_outputs=num_outputs,kernel_size=kernel_shape,stride=strides, \
padding='SAME', weights_initializer=weights_initializer,biases_initializer=biases_initializer)
it failed and showed following error:
File "restore.py", line 41, in deconv2d
padding='SAME', weights_initializer=weights_initializer,biases_initializer=biases_initializer)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 177, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1126, in convolution2d_transpose
outputs = layer.apply(inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 323, in apply
return self.__call__(inputs, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 289, in __call__
self.build(input_shapes[0])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 1043, in build
dtype=self.dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1033, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 932, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 349, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 278, in variable_getter
variable_getter=functools.partial(getter, **kwargs))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 228, in _add_variable
trainable=trainable and self.trainable)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1327, in layer_variable_getter
return _model_variable_getter(getter, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1316, in _model_variable_getter
custom_getter=getter, use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 177, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 259, in model_variable
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 177, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 214, in variable
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 341, in _true_getter
use_resource=use_resource)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 638, in _get_single_variable
raise ValueError("If initializer is a constant, do not specify shape.")
ValueError: If initializer is a constant, do not specify shape.
I don't know which shape does this refer to, and I don't think weights_initializer and biases_initializer are constants, they are tensors, right? By the way, I'm very sure that those two tensors, prsr/conditioning/deconv/Conv2d_transpose/weights and prsr/conditioning/deconv/Conv2d_transpose/biasess exist in the original graph, since I checked this using print_tensors_in_checkpoint_file, and I can actually see the values.
So how can I restore model which applies this tf.contrib.layers.convolution2d_transpose() layer? I searched a lot on both stackoverflow and github, but nothing worked. Any help would be appreciated.

weights_initializer and bias_initializer are not what you think they are. You probably think of those two tensors as the initial values for the weights used in the deconvolution, right? However, the initializer argument is a function not a tensor that should look something like this:
def my_initializer(shape, dtype=tf.float32, partition_info=None):
# do some computation to build up a tensor of the given shape
return that_tensor
You could then use this initializer like so:
tf.contrib.layers.convolution2d_transpose(inputs=inputs, ..., weights_initializer=my_initializer)
So, as a solution to your problem, I think the following should work:
def weights_initializer(shape, dtype=tf.float32, partition_info=None):
weights = tf.get_default_graph().get_tensor_by_name("prsr/conditioning/deconv/Conv2d_transpose/weights:0")
return weights
This feels a bit hacky in my opinion, though. Why do you want to load a graph and then use pre-trained weights in a new operation? Why are those weights not associated with this operation before already when you set up the initial model?
PS: When dealing with variables you might find tf.get_variable come in handy. If you create variables using tf.get_variable you can later retrieve those variables with tf.get_variable again, without having to call the cumbersome get_tensor_by_name. Check this for more info.

Related

How to finalize a model in Keras

I have a straightforward model developed in Keras:
....
model = Model(input, output)
model.compile(optimizer='adam', loss='categorical_crossentropy')
graph = tf.compat.v1.get_default_graph()
graph.finalize()
history = model.fit(X, y, epochs=30)
Since I'm dealing with some memory leak problems, it seemed like a good idea to finalize the graph to prevent the mentioned issue. But when I do, I get an exception RuntimeError: Graph is finalized and cannot be modified.:
Traceback (most recent call last):
File "./train.py", line 43, in <module>
history = model.fit(X, y, epochs=30)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py", line 780, in fit
steps_name='steps_per_epoch')
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py", line 157, in model_iteration
f = _make_execution_function(model, mode)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py", line 532, in _make_execution_function
return model._make_execution_function(mode)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py", line 2276, in _make_execution_function
self._make_train_function()
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py", line 2212, in _make_train_function
if not isinstance(K.symbolic_learning_phase(), int):
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py", line 299, in symbolic_learning_phase
False, shape=(), name='keras_learning_phase')
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py", line 2159, in placeholder_with_default
return gen_array_ops.placeholder_with_default(input, shape, name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 6406, in placeholder_with_default
"PlaceholderWithDefault", input=input, shape=shape, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 527, in _apply_op_helper
preferred_dtype=default_dtype)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1224, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py", line 305, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py", line 246, in constant
allow_broadcast=True)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py", line 290, in _constant_impl
name=name).outputs[0]
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3588, in create_op
self._check_not_finalized()
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3225, in _check_not_finalized
raise RuntimeError("Graph is finalized and cannot be modified.")
RuntimeError: Graph is finalized and cannot be modified.
There's nothing custom in this model, all the layers are from Keras library. And I'm using Tensorflow 1.14 and the Keras that comes with it (tensorflow.keras).
My question is, what are my options here? How can I pinpoint the reason for graph change? Or maybe I'm finalizing the graph wrong!?
[UPDATE]
To make sure that the problem is not with my setup and model, I followed the example provided in Tensorflow docs (follow the Colab link). I just added the two lines of code to finalize the graph just before calling the fit method. And I faced the same error. So my question stands, how do you finalize a model in Keras?

Save Model for Serving but "ValueError: Both labels and logits must be provided." when trying to export model

I wanted to save a model to do some predictions on specific pictures. Here is my serving function:
def _serving_input_receiver_fn():
# Note: only handles one image at a time
feat = tf.placeholder(tf.float32, shape=[None, 120, 50, 1])
return tf.estimator.export.TensorServingInputReceiver(features=feat, receiver_tensors=feat)
and here is where I export the model:
export_dir_base = os.path.join(FLAGS.model_dir, 'export')
export_dir = estimator.export_savedmodel(
export_dir_base, _serving_input_receiver_fn)
But I get the following error:
ValueError: Both labels and logits must be provided.
Now this Error I don't understand since the Serving stuff should just create a placeholder so I can later put some images through the placeholder to make predictions on the saved model?
Here is the whole traceback:
Traceback (most recent call last):
File "/home/cezary/models/official/mnist/mnist_tpu.py", line 222, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "/home/cezary/models/official/mnist/mnist_tpu.py", line 206, in main
export_dir_base, _serving_input_receiver_fn)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 650, in export_savedmodel
mode=model_fn_lib.ModeKeys.PREDICT)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 703, in _export_saved_model_for_mode
strip_default_attrs=strip_default_attrs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 811, in _export_all_saved_models
mode=model_fn_lib.ModeKeys.PREDICT)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1971, in _add_meta_graph_for_mode
mode=mode)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 879, in _add_meta_graph_for_mode
config=self.config)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1992, in _call_model_fn
features, labels, mode, config)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1107, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2203, in _model_fn
features, labels, is_export_mode=is_export_mode)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1131, in call_without_tpu
return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1337, in _call_model_fn
estimator_spec = self._model_fn(features=features, **kwargs)
File "/home/cezary/models/official/mnist/mnist_tpu.py", line 95, in model_fn
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_impl.py", line 156, in sigmoid_cross_entropy_with_logits
labels, logits)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 1777, in _ensure_xent_args
raise ValueError("Both labels and logits must be provided.")
ValueError: Both labels and logits must be provided.
Nevermind the mnist naming, I just used the structure of the code, but didn't rename it.
Thanks for any help!
(I can't comment with a brand new account.) I was able to replicate your error by setting features and receiver_tensors to have the same value, but I don't think that your __serving_input_receiver_fn is implemented correctly. Can you follow the example here?

Google cloud TPU: NotImplementedError: Non-resource Variables are not supported inside TPU computations

I am trying to train my model using google cloud's TPUs. The model works fine on CPU and GPU, and I can run the TPU tutorials without any problems (so it is not a problem of connecting to TPUs). However, when I run my program on the TPU cloud I get an error. The most important line is probably the following:
NotImplementedError: Non-resource Variables are not supported inside TPU computations (operator name: training_op/update_2nd_caps/primary_to_first_fc/W/ApplyAdam/RefEnter)
And here is the full error in case there is something important there:
Traceback (most recent call last):
File "TPU_playground.py", line 85, in <module>
capser.train(input_fn=train_input_fn_tpu, steps=n_steps)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 366, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1119, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1132, in _train_model_default
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1992, in _call_model_fn
features, labels, mode, config)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1107, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2223, in _model_fn
_train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2537, in _train_on_tpu_system
device_assignment=ctx.device_assignment)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 733, in shard
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 394, in replicate
device_assignment, name)[1]
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 546, in split_compile_and_replicate
outputs = computation(*computation_inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2530, in multi_tpu_train_steps_on_single_shard
single_tpu_train_step, [_INITIAL_LOSS])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 207, in repeat
cond, body_wrapper, inputs=inputs, infeed_queue=infeed_queue, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 169, in while_loop
name="")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 3209, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2941, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2878, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 120, in body_wrapper
outputs = body(*(inputs + dequeue_ops))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 203, in body_wrapper
return [i + 1] + _convert_to_list(body(*args))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1166, in train_step
self._call_model_fn(features, labels))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1337, in _call_model_fn
estimator_spec = self._model_fn(features=features, **kwargs)
File "/home/adrien_doerig/capser/capser_7_model_fn.py", line 100, in model_fn_tpu
**output_decoder_deconv_params)
File "/home/adrien_doerig/capser/capser_model.py", line 341, in capser_model
loss_training_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step(), name="training_op")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 409, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py", line 114, in apply_gradients
return self._opt.apply_gradients(summed_grads_and_vars, global_step, name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 602, in apply_gradients
update_ops.append(processor.update_op(self, grad))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 113, in update_op
update_op = optimizer._apply_dense(g, self._v) # pylint: disable=protected-access
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 148, in _apply_dense
grad, use_locking=self._use_locking).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 293, in apply_adam
use_nesterov=use_nesterov, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1782, in __init__
self._control_flow_post_processing()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1793, in _control_flow_post_processing
self._control_flow_context.AddOp(self)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2430, in AddOp
self._AddOpInternal(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2451, in _AddOpInternal
real_x = self.AddValue(x)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2398, in AddValue
self._outer_context.AddInnerOp(enter.op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 310, in AddInnerOp
self._AddOpInternal(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 287, in _AddOpInternal
"(operator name: %s)" % op.name)
NotImplementedError: Non-resource Variables are not supported inside TPU computations (operator name: training_op/update_2nd_caps/primary_to_first_fc/W/ApplyAdam/RefEnter)
It seems that the forward pass of the graph is built fine, but the backprop using AdamOptimizer is not supported by the TPUs in this case. I tried using more standard optimizers (GradientDescentOptimizer and MomentumOptimizer) but it doesn't help. All the tensors in the feedforward pass are in formats compatible with TPUs (i.e. tf.float32).
Does anyone have suggestions as to what I should try?
Thank you!
I have found a way to use the TPUs without using the ctpu up command, which solves the problem. I simply do everything exactly as I would do it to run my code on cloud GPUs:
-- see documentation here: https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction
-- a simple explanatory video here: https://www.youtube.com/watch?v=J_d4bEKUG2Q
BUT, the ONLY DIFFERENCE is that I use --scale-tier 'BASIC_TPU' instead of --scale-tier 'STANDARD_1' when I run my job. So the command to run the job is
gcloud ml-engine jobs submit training $JOB_NAME --module-name capser.capser_7_multi_gpu --package-path ./capser --job-dir=gs://capser-data/$JOB_NAME --scale-tier 'BASIC_TPU' --stream-logs --runtime-version 1.9
--region us-central1
(I previously define the variable $JOB_NAME: export JOB_NAME=<input your job name>)
Also, make sure you choose a region which has TPUs! us-central1 works for example.
So maybe it is a small bug when using ctpu up, but it seems not to be a problem when using the above method. I hope that helps!

Tensorflow error "feed a value for placeholder tensor" occurs on the second RBM in a deep belief net

I first constructed an RBM and tested it on a set of data, it worked well. Then I wrote a DBN with stacked RBM and trained it with the same set of data. The program stopped with the following error when it tried to train the second RBM.
Traceback (most recent call last):
File "D:\Python\DL_DG\analysis\debug\debug_01_ppi.py", line 44, in <module>
ppi_dbn.fit(ppi_in)
File "D:/Python/DL_DG/Model\dbn_test.py", line 95, in fit
rbm.fit(input_data)
File "D:/Python/DL_DG/Model\rbm_test.py", line 295, in fit
self.partial_fit(batch_x, b, e)
File "D:/Python/DL_DG/Model\rbm_test.py", line 188, in partial_fit
feed_dict={self.x: batch_x})
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input/x' with dtype float and shape [?,128]
[[Node: input/x = Placeholder[dtype=DT_FLOAT, shape=[?,128], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'input/x', defined at:
File "<string>", line 1, in <module>
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\idlelib\run.py", line 142, in main
ret = method(*args, **kwargs)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\idlelib\run.py", line 460, in runcode
exec(code, self.locals)
File "D:\Python\DL_DG\analysis\debug\debug_01_ppi.py", line 42, in <module>
learning_rate_rbm=[0.001,0.01],rbm_gauss_visible=True)
File "D:/Python/DL_DG/Model\dbn_test.py", line 52, in __init__
sample_gauss_visible=self.sample_gauss_visible, sigma=self.sigma))
File "D:/Python/DL_DG/Model\rbm_test.py", line 358, in __init__
xavier_const,err_function,use_tqdm,tqdm)
File "D:/Python/DL_DG/Model\rbm_test.py", line 46, in __init__
self.x = tf.placeholder(tf.float32, [None, self.n_visible],name='x')
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1548, in placeholder
return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2094, in _placeholder
name=name)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input/x' with dtype float and shape [?,128]
[[Node: input/x = Placeholder[dtype=DT_FLOAT, shape=[?,128], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
The error occurs at the following function:
def partial_fit(self, batch_x, k, j):
print(batch_x.dtype, batch_x.shape)
summary, _ = self.sess.run([self.merged, self.update_weights + self.update_deltas],
feed_dict={self.x: batch_x})
self.train_writer.add_summary(summary, k*self.batch_size+j)
I output the type and shape of batch_x. The shape is the same during the whole training process. The type is float64 when training the first rbm, and float32 when training the second rbm. That's where it stopped and throw out the error.
The DBN worked well when I didn't compute the summary and just used the following code:
self.sess.run(self.update_weights + self.update_deltas,feed_dict={self.x: batch_x})
It also worked well if I only train a single RBM (with or without the summary).
The batch_x used to train the second RBM is probabilities of the hidden layer in the first RBM.
Could somebody help me solve this problem? I'm not sure if the float64 is the problem.
I guess it's hard for anyone to solve the problem only with the two pieces of code I give. lol. The full code is too long to post here.
I save the output of the first RBM and use it as input to train another RBM. It works well. Thus, I think the problem is not the type or shape of the feeded batch_x, but the structure of the DBN, or the way I collected summaries.
Hope my situation can help others with similar problems.

Unimplemented Error: TensorArray has size zero

I am getting this weird error when trying to train a sequence to sequence model in tensorflow. The sequence to sequence model is a video captioning system. I have encoded the frames of the videos in sequence features of the SequenceExampleProto. After I prefetch the features containing the list of jpeg encoded strings, I decode them using the following function:
video = tf.map_fn(lambda x: tf.image.decode_jpeg(x, channels=3), encoded_video, dtype=tf.uint8)
The model compiles but during training time, I'm getting the following error which is caused by this code. The error says that the TensorArray is zero, whereas here the TensorArray should not be zero. Any help is appreciated:
tensorflow.python.framework.errors_impl.UnimplementedError: TensorArray has size zero, but element shape [?,?,3] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#input_fn/decode/map/TensorArray_1"], dtype=DT_UINT8, element_shape=[?,?,3], _device="/job:localhost/replica:0/task:0/cpu:0"](input_fn/decode/map/TensorArray_1, input_fn/decode/map/TensorArrayStack/range, input_fn/decode/map/while/Exit_1/_479)]]
Caused by op u'input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3', defined at:
File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/ubuntu/ASLNet/seq2seq/bin/train.py", line 277, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "/home/ubuntu/ASLNet/seq2seq/bin/train.py", line 272, in main
schedule=FLAGS.schedule)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 111, in run
return _execute_schedule(experiment, schedule)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule
return task()
File "seq2seq/contrib/experiment.py", line 104, in continuous_train_and_eval
monitors=self._train_monitors)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 281, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 430, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 925, in _train_model
features, labels = input_fn()
File "seq2seq/training/utils.py", line 274, in input_fn
frame_format="jpeg")
File "seq2seq/training/utils.py", line 365, in process_video
video = tf.map_fn(lambda x: tf.image.decode_jpeg(x, channels=3), encoded_video, dtype=tf.uint8)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/functional_ops.py", line 390, in map_fn
results_flat = [r.stack() for r in r_a]
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/tensor_array_ops.py", line 301, in stack
return self.gather(math_ops.range(0, self.size()), name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/tensor_array_ops.py", line 328, in gather
element_shape=element_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 2244, in _tensor_array_gather_v3
element_shape=element_shape, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
UnimplementedError (see above for traceback): TensorArray has size zero, but element shape [?,?,3] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#input_fn/decode/map/TensorArray_1"], dtype=DT_UINT8, element_shape=[?,?,3], _device="/job:localhost/replica:0/task:0/cpu:0"](input_fn/decode/map/TensorArray_1, input_fn/decode/map/TensorArrayStack/range, input_fn/decode/map/while/Exit_1/_479)]]
Fixed. I followed the suggestion from tensorflow map_fn TensorArray has inconsistent shapes and implemented the following:
with tf.name_scope("decode", values=[encoded_video]):
input_jpeg_strings = tf.TensorArray(tf.string, video_length)
input_jpeg_strings = input_jpeg_strings.unstack(encoded_video)
init_array = tf.TensorArray(tf.float32, size=video_length)
def cond(i, ta):
return tf.less(i, video_length)
def body(i, ta):
image = input_jpeg_strings.read(i)
image = tf.image.decode_jpeg(image, 3, name='decode_image')
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
assert (resize_height > 0) == (resize_width > 0)
image = tf.image.resize_images(image, size=[resize_height, resize_width], method=tf.image.ResizeMethod.BILINEAR)
return i + 1, ta.write(i, image)
_, input_image = tf.while_loop(cond, body, [0, init_array])