tf.clip_by_norm(grad, 1.0) throws InvalidArgumentError Shapes must be equal rank, but are 2 and 1 - tensorflow

can someone explain why tensorflow is giving me trouble when running the following code.
import tensorflow as tf
x = tf.keras.layers.Input(shape=(1,))
y = tf.keras.layers.Dense(1, activation=tf.nn.relu)(x)
loss = tf.losses.mean_squared_error(x,y)
grad = tf.gradients(loss, tf.trainable_variables())
# !!! GIVES ME TROUBLE !!!
clipped_grad = tf.clip_by_norm(grad, 1.0)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(y, feed_dict={x: [[1.0], [2.0], [3.0]]})
The error I get:
Traceback (most recent call last):
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 1589, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes must be equal rank, but are 2 and 1
From merging shape 0 with other shapes. for 'clip_by_norm/t' (op: 'Pack') with input shapes: [1,1], [1].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/TObs/.PyCharmCE2018.1/config/scratches/scratch.py", line 11, in <module>
clipped_grad = tf.clip_by_norm(grad, 1.0)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\ops\clip_ops.py", line 140, in clip_by_norm
t = ops.convert_to_tensor(t, name="t")
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 1011, in convert_to_tensor
as_ref=False)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 1107, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\ops\array_ops.py", line 960, in _autopacking_conversion_function
return _autopacking_helper(v, inferred_dtype, name or "packed")
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\ops\array_ops.py", line 923, in _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 5532, in pack
"Pack", values=values, axis=axis, name=name)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
op_def=op_def)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 1756, in __init__
control_input_ops)
File "D:\Program Files\Python\Python_3_6_2\lib\site-packages\tensorflow\python\framework\ops.py", line 1592, in _create_c_op
raise ValueError(str(e))
ValueError: Shapes must be equal rank, but are 2 and 1
From merging shape 0 with other shapes. for 'clip_by_norm/t' (op: 'Pack') with input shapes: [1,1], [1].
Any thoughts? Im running on a Windows10 machine, with tensorflow-gpu 1.9.0, NVidia GTX 1080.
Help would be much appreciated :)
Cheers,
Tobs.

so after tinkering around I found out that one have to apply the tf.clip_by_norm seperatly for each value in the gradients tensor liek so:
clipped_gradients = [tf.clip_by_norm(g, grad_norm_clip) for g in tf.gradients(loss, tf.trainable_variables())]
I guess, thats the right way to do, right?
Cheers,
Tobs.

Related

tensorflow v1 GradientTape: AttributeError: 'NoneType' object has no attribute 'eval'

I want to compute the gradient of the distance between the NSynth WaveNet encoding of two sine waves.
This is tensorflow v1.
I am working with code based upon https://github.com/magenta/magenta/blob/master/magenta/models/nsynth/wavenet/fastgen.py
A minimal example of my bug is in this colab notebook: https://colab.research.google.com/drive/1oTEU8QAaOs0K1A0KHrAdt7kA7MkadNDr?usp=sharing
Here is the code:
# Commented out IPython magic to ensure Python compatibility.
# %tensorflow_version 1.x
!pip3 install -q magenta
!wget -c http://download.magenta.tensorflow.org/models/nsynth/wavenet-ckpt.tar && tar xvf wavenet-ckpt.tar
checkpoint_path = './wavenet-ckpt/model.ckpt-200000'
import math
from magenta.models.nsynth.wavenet import fastgen
import tensorflow as tf
session_config = tf.ConfigProto(allow_soft_placement=True)
session_config.gpu_options.allow_growth = True
sess = tf.Session(config=session_config)
pi = 3.1415926535897
SR = 16000
sample_length = 64000
DURATION_SECONDS = sample_length / SR
def sine(hz):
time = tf.linspace(0.0, DURATION_SECONDS, sample_length)
return tf.constant(0.5) * tf.cos(2.0 * pi * time * hz)
net = fastgen.load_nsynth(batch_size=2, sample_length=sample_length)
saver = tf.train.Saver()
saver.restore(sess, checkpoint_path)
"""We have two sine waves at 440 and 660 Hz. We use the encoder to generate two (125, 16) encodings:"""
twosines = tf.stack([sine(440), sine(660)]).eval(session=sess)
print(sess.run(net["encoding"], feed_dict={net["X"]: twosines}).shape)
"""Compute the distance between the two sine waves"""
distencode = tf.reduce_mean(tf.abs(net["encoding"][0] - net["encoding"][1]))
print(sess.run(distencode, feed_dict={net["X"]: twosines}))
"""I don't know why the following code doesn't work, but if I did I could solve the real task....
"""
net["X"] = twosines
distencode.eval(session=sess)
"""Here is the code that I need to work. I want to compute the gradient of the distance between the NSynth encoding of two sine waves:"""
fp = tf.constant(660.0)
newsines = tf.stack([sine(440), sine(fp)])
with tf.GradientTape() as g:
g.watch(fp)
dd_dfp = g.gradient(distencode, fp)
print(dd_dfp.eval(session=sess))
The last block, which I want to evaluate, gets the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-12-b5b8cdd00b24> in <module>()
4 g.watch(fp)
5 dd_dfp = g.gradient(distencode, fp)
----> 6 print(dd_dfp.eval(session=sess))
AttributeError: 'NoneType' object has no attribute 'eval'
I believe I need to define the operations to be executed within this block. However, I am using a pretrained model that I am just computing the distance over, so I am not sure how to define execution in that block.
The second-to-last block, which would help me fix the last block, gives the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-10-c3411dcbfa2c> in <module>()
3 with tf.GradientTape() as g:
4 g.watch(fp)
----> 5 dd_dfp = g.gradient(distencode, g)
6 print(dd_dfp.eval(session=sess))
/tensorflow-1.15.2/python3.6/tensorflow_core/python/eager/backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
997 flat_sources = [_handle_or_self(x) for x in flat_sources]
998 for t in flat_sources_raw:
--> 999 if not t.dtype.is_floating:
1000 logging.vlog(
1001 logging.WARN, "The dtype of the source tensor must be "
AttributeError: 'GradientTape' object has no attribute 'dtype'
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tensorflow-1.15.2/python3.6/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1364 try:
-> 1365 return fn(*args)
1366 except errors.OpError as e:
8 frames
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [2,64000]
[[{{node Placeholder}}]]
[[Mean/_759]]
(1) Invalid argument: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [2,64000]
[[{{node Placeholder}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
/tensorflow-1.15.2/python3.6/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [2,64000]
[[node Placeholder (defined at /tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py:1748) ]]
[[Mean/_759]]
(1) Invalid argument: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [2,64000]
[[node Placeholder (defined at /tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
Original stack trace for 'Placeholder':
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 664, in launch_instance
app.start()
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
self._run_once()
File "/usr/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
handle._run()
File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
self._callback(*self._args)
File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 758, in _run_callback
ret = callback()
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 548, in <lambda>
self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 462, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 492, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 444, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-5120c8282e75>", line 1, in <module>
net = fastgen.load_nsynth(batch_size=2, sample_length=sample_length)
File "/tensorflow-1.15.2/python3.6/magenta/models/nsynth/wavenet/fastgen.py", line 64, in load_nsynth
x = tf.placeholder(tf.float32, shape=[batch_size, sample_length])
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/array_ops.py", line 2619, in placeholder
return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/gen_array_ops.py", line 6669, in placeholder
"Placeholder", dtype=dtype, shape=shape, name=name)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
Thank you.

no kernel image is available for execution on the device

I training maskrcnn ,use tf-1.2 can train, but I use tf-1.5 it not training
The error is as follows:
Caused by op u'pyramid_1/AssignGTBoxes/Where_6', defined at:
File "/home/zhouzd2/letrain/applications/letrain.py", line 349, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 124, in run
_sys.exit(main(argv))
File "/home/zhouzd2/letrain/applications/letrain.py", line 346, in main
LeTrain().model_train(user_mode)
File "/home/zhouzd2/letrain/platform/base_train.py", line 1228, in model_train
cluster=self.cluster_spec)
File "/home/zhouzd2/letrain/platform/deployment/model_deploy.py", line 226, in create_clones
outputs, feed_ops,verify_model_loss = model_fn(*args, **kwargs)
File "/home/zhouzd2/letrain/platform/base_train.py", line 1195, in clone_fn
model_loss, end_points, feed_ops = network_fn(data_direct, data_batch, int_network_fn)
File "/home/zhouzd2/letrain/applications/letrain.py", line 214, in get_loss
FLAGS.batch_size)
File "/home/zhouzd2/letrain/applications/fmrcnn/get_fmrcnn_loss.py", line 23, in model_fn
loss_weights=[0.2, 0.2, 1.0, 0.2, 1.0])
File "/home/zhouzd2/letrain/applications/fmrcnn/libs/nets/pyramid_network.py", line 580, in build
is_training=is_training, gt_boxes=gt_boxes)
File "/home/zhouzd2/letrain/applications/fmrcnn/libs/nets/pyramid_network.py", line 263, in build_heads
assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
File "/home/zhouzd2/letrain/applications/fmrcnn/libs/layers/wrapper.py", line 173, in assign_boxes
inds = tf.where(tf.equal(assigned_layers, l))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 2538, in where
return gen_array_ops.where(condition=condition, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 6087, in where
"Where", input=condition, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): WhereOp: Could not launch cub::DeviceReduce::Sum to count number of true / nonzero indices. temp_storage_bytes: 1, status: no kernel image is available for execution on the device
[[Node: pyramid_1/AssignGTBoxes/Where_6 = Where[T=DT_BOOL, _device="/job:worker/replica:0/task:0/device:GPU:0"](pyramid_1/AssignGTBoxes/Equal_6_S9493)]]
[[Node: pyramid_1/AssignGTBoxes/Reshape_8_G1028 = _Recv[client_terminated=false, recv_device="/job:worker/replica:0/task:0/device:CPU:0", send_device="/job:worker/replica:0/task:0/device:GPU:0", send_device_incarnation=5407481677180697062, tensor_name="edge_1349_pyramid_1/AssignGTBoxes/Reshape_8", tensor_type=DT_INT64, _device="/job:worker/replica:0/task:0/device:CPU:0"]()]]
No problem when loading calculation graphs, error is reported in sess.run()。
Does anyone know how to solve this problem? Or does anyone know what function can replace tf.where?
Thank you!
If you are using Visual Studio:
Right click on the project > Properies > Cuda C/C++ > Device
and add the following to Code Generation field
compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;

Tensorflow error "feed a value for placeholder tensor" occurs on the second RBM in a deep belief net

I first constructed an RBM and tested it on a set of data, it worked well. Then I wrote a DBN with stacked RBM and trained it with the same set of data. The program stopped with the following error when it tried to train the second RBM.
Traceback (most recent call last):
File "D:\Python\DL_DG\analysis\debug\debug_01_ppi.py", line 44, in <module>
ppi_dbn.fit(ppi_in)
File "D:/Python/DL_DG/Model\dbn_test.py", line 95, in fit
rbm.fit(input_data)
File "D:/Python/DL_DG/Model\rbm_test.py", line 295, in fit
self.partial_fit(batch_x, b, e)
File "D:/Python/DL_DG/Model\rbm_test.py", line 188, in partial_fit
feed_dict={self.x: batch_x})
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input/x' with dtype float and shape [?,128]
[[Node: input/x = Placeholder[dtype=DT_FLOAT, shape=[?,128], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'input/x', defined at:
File "<string>", line 1, in <module>
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\idlelib\run.py", line 142, in main
ret = method(*args, **kwargs)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\idlelib\run.py", line 460, in runcode
exec(code, self.locals)
File "D:\Python\DL_DG\analysis\debug\debug_01_ppi.py", line 42, in <module>
learning_rate_rbm=[0.001,0.01],rbm_gauss_visible=True)
File "D:/Python/DL_DG/Model\dbn_test.py", line 52, in __init__
sample_gauss_visible=self.sample_gauss_visible, sigma=self.sigma))
File "D:/Python/DL_DG/Model\rbm_test.py", line 358, in __init__
xavier_const,err_function,use_tqdm,tqdm)
File "D:/Python/DL_DG/Model\rbm_test.py", line 46, in __init__
self.x = tf.placeholder(tf.float32, [None, self.n_visible],name='x')
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1548, in placeholder
return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2094, in _placeholder
name=name)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\pil562\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input/x' with dtype float and shape [?,128]
[[Node: input/x = Placeholder[dtype=DT_FLOAT, shape=[?,128], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
The error occurs at the following function:
def partial_fit(self, batch_x, k, j):
print(batch_x.dtype, batch_x.shape)
summary, _ = self.sess.run([self.merged, self.update_weights + self.update_deltas],
feed_dict={self.x: batch_x})
self.train_writer.add_summary(summary, k*self.batch_size+j)
I output the type and shape of batch_x. The shape is the same during the whole training process. The type is float64 when training the first rbm, and float32 when training the second rbm. That's where it stopped and throw out the error.
The DBN worked well when I didn't compute the summary and just used the following code:
self.sess.run(self.update_weights + self.update_deltas,feed_dict={self.x: batch_x})
It also worked well if I only train a single RBM (with or without the summary).
The batch_x used to train the second RBM is probabilities of the hidden layer in the first RBM.
Could somebody help me solve this problem? I'm not sure if the float64 is the problem.
I guess it's hard for anyone to solve the problem only with the two pieces of code I give. lol. The full code is too long to post here.
I save the output of the first RBM and use it as input to train another RBM. It works well. Thus, I think the problem is not the type or shape of the feeded batch_x, but the structure of the DBN, or the way I collected summaries.
Hope my situation can help others with similar problems.

questions about python3.6 and tensorflow1.2

when i run a program, an error "Shape (10, ?) must have rank at least 3" appeared.
my code is below and maybe the error is "output,_=tf.nn.dynamic_rnn(cell,x_,dtype=tf.float32)"
however I cannot revise.
can you help me?
Traceback (most recent call last):
File "C:/Users/yyb/PycharmProjects/untitled1/myLSTM.py", line 49, in <module>
regressor.fit(train_X,train_y,batch_size=BATCH_SIZE,steps=TRAINING_STEPS)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py", line 289, in new_func
return func(*args, **kwargs)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 439, in fit
SKCompat(self).fit(x, y, batch_size, steps, max_steps, monitors)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 1350, in fit
monitors=all_monitors)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py", line 289, in new_func
return func(*args, **kwargs)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 455, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 955, in _train_model
model_fn_ops = self._get_train_ops(features, labels)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 1162, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 1133, in _call_model_fn
model_fn_results = self._model_fn(features, labels, **kwargs)
File "C:/Users/yyb/PycharmProjects/untitled1/myLSTM.py", line 35, in lstm_model
output,_=tf.nn.dynamic_rnn(cell,x_,dtype=tf.float32)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 637, in _dynamic_rnn_loop
for input_ in flat_input)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 637, in <genexpr>
for input_ in flat_input)
File "C:\Users\yyb\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 649, in with_rank_at_least
raise ValueError("Shape %s must have rank at least %d" % (self, rank))
ValueError: Shape (10, ?) must have rank at least 3
#coding=utf-8
import numpy as np
import tensorflow as tf
import matplotlib as mpl
mpl.use('Agg')
from matplotlib import pyplot as plt
learn=tf.contrib.learn
HIDDEN_SIZE=30
NUM_LAYERS=2
TIMESTEPS=10
TRAINING_STEPS=10000
BATCH_SIZE=32
TRAINING_EXAMPLES=10000
TESTING_EXAMPLES=1000
SAMPLE_GAP=0.01
def generate_data(seq):
X=[]
y=[]
for i in range(len(seq)-TIMESTEPS-1):
X.append([seq[i:i+TIMESTEPS]])
y.append([seq[i+TIMESTEPS]])
return np.array(X,dtype=np.float32),np.array(y,dtype=np.float32)
def lstm_model(X,y):
lstm_cell=tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
cell=tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*NUM_LAYERS)
x_=tf.unstack(X,axis=1)
output,_=tf.nn.dynamic_rnn(cell,x_,dtype=tf.float32)
output=output[-1]
prediction,loss=learn.models.linear_regression(output,y)
train_op=tf.contrib.layers.optimize_loss(loss,tf.contrib.framework.get_global_step(),optimizer="Adagrad",learning_rate=0.1)
return prediction,loss,train_op
regressor=learn.Estimator(model_fn=lstm_model)
test_start=TRAINING_EXAMPLES*SAMPLE_GAP
test_end=(TRAINING_EXAMPLES+TESTING_EXAMPLES)*SAMPLE_GAP
train_X,train_y=generate_data(np.sin(np.linspace(0,test_start,TRAINING_EXAMPLES,dtype=np.float32)))
test_X,test_y=generate_data(np.sin(np.linspace(test_start,test_end,TESTING_EXAMPLES,dtype=np.float32)))
regressor.fit(train_X,train_y,batch_size=BATCH_SIZE,steps=TRAINING_STEPS)
predicted=[[pred] for pred in regressor.predict(test_X)]
rmse=np.sqrt(((predicted-test_y)**2).mean(axis=0))
print('Mean square error is: %f'%rmse[0])
fig=plt.figure()
plot_predicted=plt.plot(predicted,label='predicted')
plot_test=plt.plot(test_y,label='real_sin')
plt.legend([plot_predicted,plot_test],['predicted','real_sin'])
fig.savefig('sin.png')

Unimplemented Error: TensorArray has size zero

I am getting this weird error when trying to train a sequence to sequence model in tensorflow. The sequence to sequence model is a video captioning system. I have encoded the frames of the videos in sequence features of the SequenceExampleProto. After I prefetch the features containing the list of jpeg encoded strings, I decode them using the following function:
video = tf.map_fn(lambda x: tf.image.decode_jpeg(x, channels=3), encoded_video, dtype=tf.uint8)
The model compiles but during training time, I'm getting the following error which is caused by this code. The error says that the TensorArray is zero, whereas here the TensorArray should not be zero. Any help is appreciated:
tensorflow.python.framework.errors_impl.UnimplementedError: TensorArray has size zero, but element shape [?,?,3] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#input_fn/decode/map/TensorArray_1"], dtype=DT_UINT8, element_shape=[?,?,3], _device="/job:localhost/replica:0/task:0/cpu:0"](input_fn/decode/map/TensorArray_1, input_fn/decode/map/TensorArrayStack/range, input_fn/decode/map/while/Exit_1/_479)]]
Caused by op u'input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3', defined at:
File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/ubuntu/ASLNet/seq2seq/bin/train.py", line 277, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "/home/ubuntu/ASLNet/seq2seq/bin/train.py", line 272, in main
schedule=FLAGS.schedule)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 111, in run
return _execute_schedule(experiment, schedule)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule
return task()
File "seq2seq/contrib/experiment.py", line 104, in continuous_train_and_eval
monitors=self._train_monitors)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 281, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 430, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 925, in _train_model
features, labels = input_fn()
File "seq2seq/training/utils.py", line 274, in input_fn
frame_format="jpeg")
File "seq2seq/training/utils.py", line 365, in process_video
video = tf.map_fn(lambda x: tf.image.decode_jpeg(x, channels=3), encoded_video, dtype=tf.uint8)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/functional_ops.py", line 390, in map_fn
results_flat = [r.stack() for r in r_a]
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/tensor_array_ops.py", line 301, in stack
return self.gather(math_ops.range(0, self.size()), name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/tensor_array_ops.py", line 328, in gather
element_shape=element_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 2244, in _tensor_array_gather_v3
element_shape=element_shape, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
UnimplementedError (see above for traceback): TensorArray has size zero, but element shape [?,?,3] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: input_fn/decode/map/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#input_fn/decode/map/TensorArray_1"], dtype=DT_UINT8, element_shape=[?,?,3], _device="/job:localhost/replica:0/task:0/cpu:0"](input_fn/decode/map/TensorArray_1, input_fn/decode/map/TensorArrayStack/range, input_fn/decode/map/while/Exit_1/_479)]]
Fixed. I followed the suggestion from tensorflow map_fn TensorArray has inconsistent shapes and implemented the following:
with tf.name_scope("decode", values=[encoded_video]):
input_jpeg_strings = tf.TensorArray(tf.string, video_length)
input_jpeg_strings = input_jpeg_strings.unstack(encoded_video)
init_array = tf.TensorArray(tf.float32, size=video_length)
def cond(i, ta):
return tf.less(i, video_length)
def body(i, ta):
image = input_jpeg_strings.read(i)
image = tf.image.decode_jpeg(image, 3, name='decode_image')
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
assert (resize_height > 0) == (resize_width > 0)
image = tf.image.resize_images(image, size=[resize_height, resize_width], method=tf.image.ResizeMethod.BILINEAR)
return i + 1, ta.write(i, image)
_, input_image = tf.while_loop(cond, body, [0, init_array])