The original size of my images is 3900 x 6000 x 3. I make overlapping patches of shape (232024, 28, 28, 3) and then make batches of size 1000. I have a CNN model for semantic segmentation as follows:
def conv_layer(inputs, filters, kernel_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "conv"):
with tf.name_scope(name):
input_shape = inputs.shape.as_list()
filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], input_shape[3], filters], dtype = tf.float32)
filter = tf.Variable(initial_value = filter_tensor, trainable = True, name = "kernel")
bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
conv2d = tf.nn.conv2d(input = tf.cast(inputs, dtype = tf.float32), filter = filter, strides = [1, strides, strides, 1], padding = padding)
activation = tf.nn.relu(conv2d + bias)
tf.summary.histogram("weights", filter)
tf.summary.histogram("biases", bias)
tf.summary.histogram("activations", activation)
return tf.cast(activation, dtype = tf.float16)
def deconv_layer(inputs, filters, kernel_size, output_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "deconv"):
with tf.name_scope(name):
input_shape = inputs.shape.as_list()
deconv_shape = tf.stack([tf.shape(inputs)[0], output_size[0], output_size[1],filters])
filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], filters, input_shape[3]], dtype = tf.float32)
filter = tf.Variable(initial_value = filter_tensor, trainable = True, name = "kernel")
bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
print("bias:")
print(bias)
conv2d_transpose = tf.nn.conv2d_transpose(value = tf.cast(inputs, dtype = tf.float32),
filter = filter,
strides = [1, strides, strides, 1],
output_shape=deconv_shape,
padding = padding)
activation = tf.nn.relu(conv2d_transpose + bias)
tf.summary.histogram("weights", filter)
tf.summary.histogram("biases", bias)
tf.summary.histogram("activations", activation)
return tf.cast(activation, dtype = tf.float16)
def semantic_seg_model(features, mode, batch_size):
bias_constant = 0.1
conv_filters = [20, 50, 90]
conv_sizes = []
tf.summary.image('input', features, batch_size)
"""Model function for CNN."""
# Encoding starts here.
# Convolutional Layer 1
# Input: 100 x 100
conv = conv_layer(inputs=features,
filters=conv_filters[0],
kernel_size=[5, 5],
bias_constant = bias_constant,
name = "conv1")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 2
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [5, 5],
strides = 2,
bias_constant = bias_constant,
name = "conv2")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 3
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[2],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
name = "conv3")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Deconvolution Layer 3
# Input: 100 x 100
deconv = deconv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
output_size = [conv_sizes[1][1], conv_sizes[1][2]],
name = "deconv3")
print(deconv.shape)
# Deconvolution Layer 2
# Input: 100 x 100
deconv = deconv_layer(inputs = deconv,
filters = conv_filters[0],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
output_size = [conv_sizes[0][1], conv_sizes[0][2]],
name = "deconv2")
print(deconv.shape)
deconv = deconv_layer(inputs = deconv,
filters = 3,
kernel_size = [5, 5],
output_size = [features.shape.as_list()[1], features.shape.as_list()[2]],
bias_constant = bias_constant,
name = "deconv1")
print(deconv.shape)
return deconv
epochs = 1000
learning_rate = 1e-50
image, label = tf.train.slice_input_producer([features, labels], shuffle = False)
BATCH_SIZE = 1000
THREAD_NUM = 5
MIN_AFTER_DEQUEUE = 10000
queue_capacity = MIN_AFTER_DEQUEUE + THREAD_NUM * BATCH_SIZE
image_batch, label_batch = tf.train.batch(tensors = [image, label],
batch_size = BATCH_SIZE,
capacity = queue_capacity,
num_threads = THREAD_NUM,
allow_smaller_final_batch = True)
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE)
#cost
with tf.name_scope("cross_entropy"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = label_batch)
cost = tf.reduce_mean( cross_entropy )
# return cost, optimizer, accr
tf.summary.scalar("xent", cost)
#optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(label_batch, 1), tf.argmax(output, 1))
accr = tf.reduce_mean(tf.cast(correct_prediction, tf.float16))
tf.summary.scalar("accuracy", accr)
merged_summary = tf.summary.merge_all()
# Session configs
config = tf.ConfigProto()
config.log_device_placement = True
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction=0.8
# Initialize session
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
enqueue_threads = tf.train.start_queue_runners(sess = sess, coord = coord)
try:
for epoch in range(epochs):
if coord.should_stop():
break
epoch_loss = 0
train_loss = []; train_accuracy = []
s = sess.run(merged_summary)
writer.add_summary(s, epoch)
for batch in range(math.ceil(features.shape.as_list()[0]/BATCH_SIZE)):
_, sess_cost, sess_accuracy = sess.run([optimizer, cost, accr])
train_loss.append(sess_cost)
train_accuracy.append(sess_accuracy)
train_loss = np.mean(train_loss)
train_accuracy = np.mean(train_accuracy)
saver.save(sess, "./semantic_seg_model_1", global_step=epoch)
print ("[%02d/%02d] trainLoss: %.4f trainAcc: %.2f"
% (epoch + 1, epochs, sess_cost, sess_accuracy))
except Exception as e:
# Report exceptions to the coordinator.
coord.request_stop(e)
finally:
# Terminate as usual. It is safe to call `coord.request_stop()` twice.
coord.request_stop()
coord.join(enqueue_threads)
sess.close()
I get an error when I start the training session. The error is as follows:
[01/1000] trainLoss: 0.0000 trainAcc: 1.00
INFO:tensorflow:Error reported to Coordinator: , Nan
in summary histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Caused by op 'deconv2/biases', defined at: File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\runpy.py", line 193,
in _run_module_as_main
"main", mod_spec) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\runpy.py", line 85,
in _run_code
exec(code, run_globals) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel_launcher.py",
line 16, in
app.launch_new_instance() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\traitlets\config\application.py",
line 658, in launch_instance
app.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelapp.py",
line 478, in start
self.io_loop.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\ioloop.py",
line 177, in start
super(ZMQIOLoop, self).start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\ioloop.py",
line 888, in start
handler_func(fd_obj, events) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 440, in _handle_events
self._handle_recv() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 472, in _handle_recv
self._run_callback(callback, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 414, in _run_callback
callback(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 281, in dispatcher
return self.dispatch_shell(stream, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 232, in dispatch_shell
handler(stream, idents, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 397, in execute_request
user_expressions, allow_stdin) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\ipkernel.py",
line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\zmqshell.py",
line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2850, in run_ast_nodes
if self.run_code(code, result): File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE) File
"", line 107, in semantic_seg_model
name = "deconv2") File "", line 78, in deconv_layer
tf.summary.histogram("biases", bias) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\summary\summary.py",
line 192, in histogram
tag=tag, values=values, name=scope) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py",
line 187, in _histogram_summary
"HistogramSummary", tag=tag, values=values, name=name) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 787, in _apply_op_helper
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 2956, in create_op
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 1470, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Nan in summary
histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Number of iterations completed this epoch: 0
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call
last) c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_call(self, fn, *args) 1322 try:
-> 1323 return fn(*args) 1324 except errors.OpError as e:
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _run_fn(session, feed_dict, fetch_list, target_list, options,
run_metadata) 1301 feed_dict,
fetch_list, target_list,
-> 1302 status, run_metadata) 1303
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\errors_impl.py
in exit(self, type_arg, value_arg, traceback_arg)
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: Nan in summary histogram for: deconv2/biases
[[Node: deconv2/biases = HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call
last) in ()
40 # Terminate as usual. It is safe to call coord.request_stop() twice.
41 coord.request_stop()
---> 42 coord.join(enqueue_threads)
43
44 sess.close()
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\training\coordinator.py
in join(self, threads, stop_grace_period_secs, ignore_live_threads)
387 self._registered_threads = set()
388 if self._exc_info_to_raise:
--> 389 six.reraise(*self._exc_info_to_raise)
390 elif stragglers:
391 if ignore_live_threads:
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\six.py
in reraise(tp, value, tb)
691 if value.traceback is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
in ()
13 train_loss = []; train_accuracy = []
14
---> 15 s = sess.run(merged_summary)
16 writer.add_summary(s, epoch)
17
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in run(self, fetches, feed_dict, options, run_metadata)
887 try:
888 result = self._run(None, fetches, feed_dict, options_ptr,
--> 889 run_metadata_ptr)
890 if run_metadata:
891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _run(self, handle, fetches, feed_dict, options, run_metadata)
1118 if final_fetches or final_targets or (handle and
feed_dict_tensor): 1119 results = self._do_run(handle,
final_targets, final_fetches,
-> 1120 feed_dict_tensor, options, run_metadata) 1121 else: 1122 results = []
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_run(self, handle, target_list, fetch_list, feed_dict, options,
run_metadata) 1315 if handle is None: 1316 return
self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1317 options, run_metadata) 1318 else: 1319 return self._do_call(_prun_fn, self._session,
handle, feeds, fetches)
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_call(self, fn, *args) 1334 except KeyError: 1335
pass
-> 1336 raise type(e)(node_def, op, message) 1337 1338 def _extend_graph(self):
InvalidArgumentError: Nan in summary histogram for: deconv2/biases
[[Node: deconv2/biases = HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Caused by op 'deconv2/biases', defined at: File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\runpy.py", line 193,
in _run_module_as_main
"main", mod_spec) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\runpy.py", line 85,
in _run_code
exec(code, run_globals) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel_launcher.py",
line 16, in
app.launch_new_instance() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\traitlets\config\application.py",
line 658, in launch_instance
app.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelapp.py",
line 478, in start
self.io_loop.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\ioloop.py",
line 177, in start
super(ZMQIOLoop, self).start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\ioloop.py",
line 888, in start
handler_func(fd_obj, events) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 440, in _handle_events
self._handle_recv() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 472, in _handle_recv
self._run_callback(callback, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 414, in _run_callback
callback(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 281, in dispatcher
return self.dispatch_shell(stream, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 232, in dispatch_shell
handler(stream, idents, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 397, in execute_request
user_expressions, allow_stdin) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\ipkernel.py",
line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\zmqshell.py",
line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2850, in run_ast_nodes
if self.run_code(code, result): File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE) File
"", line 107, in semantic_seg_model
name = "deconv2") File "", line 78, in deconv_layer
tf.summary.histogram("biases", bias) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\summary\summary.py",
line 192, in histogram
tag=tag, values=values, name=scope) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py",
line 187, in _histogram_summary
"HistogramSummary", tag=tag, values=values, name=name) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 787, in _apply_op_helper
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 2956, in create_op
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 1470, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Nan in summary
histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Someone at github tensorflow issues suggested trying reduce the learning rate as the model diverged, but that didn't help. Another one suggested that dtype should be changed to float32 from float16 as float16 is problematic. When I change the dtype of data to float32 then I get the following error in the python log console:
[libprotobuf ERROR
C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\36\cmake_build\protobuf\src\protobuf\src\google\protobuf\message_lite.cc:297]
Exceeded maximum protobuf size of 2GB. [libprotobuf ERROR
C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\36\cmake_build\protobuf\src\protobuf\src\google\protobuf\message_lite.cc:297]
Exceeded maximum protobuf size of 2GB.
This very same error occurs when I try to increase the width and hieght of the overlapping patches of image. I have also tried reducing BATCH_SIZE but didn't help.
I have 4GB NVIDIA GeForce GTX 960M dedicated graphics card and 16GB RAM with Intel Core i7-6700HQ CPU # 2.60 GHz 2.60 GHz. Python version is 3.6.4 and Tensorflow version is 1.4 with GPU.
Update 1:
Updated model:
def semantic_seg_model(features, mode, batch_size):
bias_constant = 0.1
conv_filters = [10, 25, 90]
conv_sizes = []
tf.summary.image('input', features, batch_size)
"""Model function for CNN."""
# Encoding starts here.
# Convolutional Layer 1
# Input: 100 x 100
conv = conv_layer(inputs=features,
filters=conv_filters[0],
kernel_size=[2, 2],
bias_constant = bias_constant,
name = "conv1")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 2
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [2, 2],
bias_constant = bias_constant,
name = "conv2")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Deconvolution Layer 2
# Input: 100 x 100
deconv = deconv_layer(inputs = conv,
filters = conv_filters[0],
kernel_size = [2, 2],
bias_constant = bias_constant,
output_size = [conv_sizes[0][1], conv_sizes[0][2]],
name = "deconv2")
print(deconv.shape)
deconv = deconv_layer(inputs = deconv,
filters = 3,
kernel_size = [2, 2],
output_size = [features.shape.as_list()[1], features.shape.as_list()[2]],
bias_constant = bias_constant,
name = "deconv1")
print(deconv.shape)
return tf.cast(deconv, dtype = tf.float16)
I suspect that the problem is that you have significantly overfit; the real evidence here is:
[01/1000] trainLoss: 0.0000 trainAcc: 1.00
This says that after only one epoch you have perfectly fit to the training data; a sure sign of overfitting. Thus the resulting NaN is probably an unsurprising effect of this problem, since you have now almost certainly have learned weights that will return 0 or inf on data or batches that it hasn't seen (since it is so badly overfit).
To resolve this issue, I recommend simplifying your model substantially until you get something that doesn't overfit so quickly; for example, fewer and smaller conv and deconv layers. Then you can start to build back in that complexity. You will then also find that you will likely want to build in some dropout and/or batch normalization to deal with this overfitting (note: while it is tempting to just start adding this complexity to your existing model, I recommend against it; get something simple working first, then add complexity from there...).
Final note: if you simplify the problem as suggested above you will likely have a better minimal example to share; that should let us get to the bottom of your problem more quickly.
Related
I use BERT to do binary classifier,the batch size is 8, but when I calculate the loss value, always get the following errors:
Traceback (most recent call last): File
"C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1356, in _do_call
return fn(*args) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1429, in _call_tf_sessionrun
run_metadata) tensorflow.python.framework.errors_impl.InvalidArgumentError:
Incompatible shapes: [3] vs. [8] [[{{node
gradients/sub_grad/BroadcastGradientArgs}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"E:/project_chris/aad_bert_version/run.py", line 81, in
input_y: y_train}) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 950, in run
run_metadata_ptr) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1173, in _run
feed_dict_tensor, options, run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1350, in _do_run
run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1370, in _do_call
raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError:
Incompatible shapes: [3] vs. [8] [[node
gradients/sub_grad/BroadcastGradientArgs (defined at
E:/project_chris/aad_bert_version/run.py:57) ]]
Original stack trace for 'gradients/sub_grad/BroadcastGradientArgs':
File "E:/project_chris/aad_bert_version/run.py", line 57, in
train_op = tf.train.AdamOptimizer(lr).minimize(loss) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\training\optimizer.py",
line 403, in minimize
grad_loss=grad_loss) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\training\optimizer.py",
line 512, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_impl.py",
line 158, in gradients
unconnected_gradients) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 731, in _GradientsHelper
lambda: grad_fn(op, *out_grads)) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 403, in _MaybeCompile
return grad_fn() # Exit early File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 731, in
lambda: grad_fn(op, *out_grads)) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\math_grad.py",
line 1027, in _SubGrad
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gen_array_ops.py",
line 1004, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 788, in _apply_op_helper
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\util\deprecation.py",
line 507, in new_func
return func(*args, **kwargs) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 3616, in create_op
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 2005, in init
self._traceback = tf_stack.extract_stack()
...which was originally created as op 'sub', defined at: File
"E:/project_chris/aad_bert_version/run.py", line 56, in
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(input_y, [-1]))) File
"C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\math_ops.py",
line 884, in binary_op_wrapper
return func(x, y, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gen_math_ops.py",
line 11574, in sub
"Sub", x=x, y=y, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 788, in _apply_op_helper
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\util\deprecation.py",
line 507, in new_func
return func(*args, **kwargs) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 3616, in create_op
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 2005, in init
self._traceback = tf_stack.extract_stack()
lr = 0.0006 # 学习率
# 配置文件
data_root = './bert_model_chinese'
bert_config_file = os.path.join(data_root, 'bert_config.json')
bert_config = modeling.BertConfig.from_json_file(bert_config_file)
init_checkpoint = os.path.join(data_root, 'bert_model.ckpt')
bert_vocab_file = os.path.join(data_root, 'vocab.txt')
token = tokenization.CharTokenizer(vocab_file=bert_vocab_file)
input_ids = tf.placeholder(tf.int32, shape=[None, None], name='input_ids')
input_mask = tf.placeholder(tf.int32, shape=[None, None], name='input_masks')
segment_ids = tf.placeholder(tf.int32, shape=[None, None], name='segment_ids')
input_y = tf.placeholder(tf.float32, shape=[None, 1], name="input_y")
weights = {
'out': tf.Variable(tf.random_normal([768, 1]))
}
biases = {
'out': tf.Variable(tf.constant(0.1, shape=[1, ]))
}
model = modeling.BertModel(
config=bert_config,
is_training=False,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=False)
tvars = tf.trainable_variables()
(assignment, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
tf.train.init_from_checkpoint(init_checkpoint, assignment)
output_layer_pooled = model.get_pooled_output() # 这个获取句子的output
output_layer_pooled = tf.nn.dropout(output_layer_pooled, keep_prob=0.9)
w_out = weights['out']
b_out = biases['out']
pred = tf.add(tf.matmul(output_layer_pooled, w_out), b_out, name="pre1")
pred = tf.reshape(pred, shape=[-1, 1], name="pre")
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(input_y, [-1])))
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
EPOCHS = 5
max_sentence_length = 512
batch_size = 8
data_path = './data'
train_input,predict_input =fffffuck(data_path,bert_vocab_file,True,True,
'./temp',max_sentence_length,batch_size,batch_size,batch_size)
data_loader = TextLoader(train_input,batch_size)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
data_loader.shuff()
for j in range(data_loader.num_batches):
x_train, y_train = data_loader.next_batch(j)
print(y_train)
print(y_train.shape)
x_input_ids = x_train[0]
x_input_mask = x_train[1]
x_segment_ids = x_train[2]
loss_, _ = sess.run([loss, train_op],
feed_dict={input_ids: x_input_ids, input_mask: x_input_mask, segment_ids: x_segment_ids,
input_y: y_train})
print('loss:', loss_)
class TextLoader(object):
def __init__(self, dataSet,batch_size):
self.data = dataSet
self.batch_size = batch_size
self.shuff()
def shuff(self):
self.num_batches = int(len(self.data) // self.batch_size)
if self.num_batches == 0:
assert False, 'Not enough data, make batch_size small.'
np.random.shuffle(self.data)
def next_batch(self,k):
x = []
y = []
for i in range(self.batch_size):
tmp = list(self.data)[k*self.batch_size + i][:3]
x.append(tmp)
y_ = list(self.data)[k*self.batch_size + i][3]
y.append(y_)
x = np.array(x)
return x,np.array(y).reshape([self.batch_size,1])
Hello there I hope you guys are in good health. I am working on tensorflow and traning my neural network on cifar 10 dataset I am working on kaggle. I am new to this field and have many complicated problems. seeking for your help thank you.
Here is my code
import tensorflow as tf
import pandas as pd
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
import os
import platform
from subprocess import check_output
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
x=tf.placeholder('float',[None,3072])
y=tf.placeholder('float')
%matplotlib inline
def load_pickle(f):
version = platform.python_version_tuple()
if version[0] == '2':
return pickle.load(f)
elif version[0] == '3':
return pickle.load(f, encoding='latin1')
raise ValueError("invalid python version: {}".format(version))
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = load_pickle(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000,3072)
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
# Load the raw CIFAR-10 data
cifar10_dir = '../input/cifar-10-batches-py/'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
x_train, y_train, x_val, y_val, x_test, y_test = get_CIFAR10_data()
print('Train data shape: ', x_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', x_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', x_test.shape)
print('Test labels shape: ', y_test.shape)
layer1_neuron=500
layer2_neuron=500
layer3_neuron=500
number_of_class=10
batch_size=200
#my neural network
def neural_network(x_train):
hidden_layer_1={
'weights':tf.Variable(tf.random_normal([3072,layer1_neuron])),
'biases': tf.Variable(tf.random_normal([layer1_neuron]))
}
hidden_layer_2={
'weights':tf.Variable(tf.random_normal([layer1_neuron,layer2_neuron])),
'biases':tf.Variable(tf.random_normal([layer2_neuron]))
}
hidden_layer_3={
'weights':tf.Variable(tf.random_normal([layer2_neuron,layer3_neuron])),
'biases':tf.Variable(tf.random_normal([layer3_neuron]))
}
output={
'weights':tf.Variable(tf.random_normal([layer3_neuron,number_of_class])),
'biases':tf.Variable(tf.random_normal([number_of_class]))
}
l1=tf.add(tf.matmul(x_train,hidden_layer_1['weights']),hidden_layer_1['biases'])
l1=tf.nn.relu(l1)
l2=tf.add(tf.matmul(l1,hidden_layer_2['weights']),hidden_layer_2['biases'])
l2=tf.nn.relu(l2)
l3=tf.add(tf.matmul(l2,hidden_layer_3['weights']),hidden_layer_3['biases'])
l3=tf.nn.relu(l3)
output=tf.add(tf.matmul(l3,output['weights']),output['biases'])
return output
# for splitting out batches of data
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def traning_neuralNetwork(x_train,y_train):
total_epochs=10
total_loss=0
epoch_loss=0
batch_size=200
num_batch = int(np.ceil(49000/batch_size))
prediction=neural_network(x)
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer=tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range (total_epochs):
total_loss=0
for _ in range (num_batch):
x_train,y_train=next_batch(batch_size,x_train,y_train)
_,epoch_loss=sess.run([optimizer,cost],feed_dict={x:x_train,y:y_train})
total_loss+=epoch_loss
print('Epoch ',epoch, " loss = ",total_loss)
print("Traning Complete!")
correct=tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct,'float'))
print('accuracy',accuracy.eval({x:x_test,y :y_test}))
traning_neuralNetwork(x_train,y_train)
**I am facing error that logits and labels of cost function are not of same shape **
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1329 try:
-> 1330 return fn(*args)
1331 except errors.OpError as e:
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1314 return self._call_tf_sessionrun(
-> 1315 options, feed_dict, fetch_list, target_list, run_metadata)
1316
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1422 self._session, options, feed_dict, fetch_list, target_list,
-> 1423 status, run_metadata)
1424
/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
515 compat.as_text(c_api.TF_Message(self.status.status)),
--> 516 c_api.TF_GetCode(self.status.status))
517 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: logits and labels must be same size: logits_size=[200,10] labels_size=[1,200]
[[Node: softmax_cross_entropy_with_logits_sg_3 = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](softmax_cross_entropy_with_logits_sg_3/Reshape, softmax_cross_entropy_with_logits_sg_3/Reshape_1)]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-17-aeb4ef85487e> in <module>()
----> 1 traning_neuralNetwork(x_train,y_train)
<ipython-input-16-a54e1136abe5> in traning_neuralNetwork(x_train, y_train)
67 for _ in range (num_batch):
68 x_train,y_train=next_batch(batch_size,x_train,y_train)
---> 69 _,epoch_loss=sess.run([optimizer,cost],feed_dict={x:x_train,y:y_train})
70 total_loss+=epoch_loss
71 print('Epoch ',epoch, " loss = ",total_loss)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
906 try:
907 result = self._run(None, fetches, feed_dict, options_ptr,
--> 908 run_metadata_ptr)
909 if run_metadata:
910 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1141 if final_fetches or final_targets or (handle and feed_dict_tensor):
1142 results = self._do_run(handle, final_targets, final_fetches,
-> 1143 feed_dict_tensor, options, run_metadata)
1144 else:
1145 results = []
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1322 if handle is None:
1323 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1324 run_metadata)
1325 else:
1326 return self._do_call(_prun_fn, handle, feeds, fetches)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1341 except KeyError:
1342 pass
-> 1343 raise type(e)(node_def, op, message)
1344
1345 def _extend_graph(self):
InvalidArgumentError: logits and labels must be same size: logits_size=[200,10] labels_size=[1,200]
[[Node: softmax_cross_entropy_with_logits_sg_3 = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](softmax_cross_entropy_with_logits_sg_3/Reshape, softmax_cross_entropy_with_logits_sg_3/Reshape_1)]]
Caused by op 'softmax_cross_entropy_with_logits_sg_3', defined at:
File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/opt/conda/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2808, in run_ast_nodes
if self.run_code(code, result):
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-17-aeb4ef85487e>", line 1, in <module>
traning_neuralNetwork(x_train,y_train)
File "<ipython-input-16-a54e1136abe5>", line 59, in traning_neuralNetwork
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 250, in new_func
return func(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1957, in softmax_cross_entropy_with_logits
labels=labels, logits=logits, dim=dim, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1871, in softmax_cross_entropy_with_logits_v2
precise_logits, labels, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 7142, in softmax_cross_entropy_with_logits
name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3306, in create_op
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[200,10] labels_size=[1,200]
[[Node: softmax_cross_entropy_with_logits_sg_3 = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](softmax_cross_entropy_with_logits_sg_3/Reshape, softmax_cross_entropy_with_logits_sg_3/Reshape_1)]]
The labels y_train need to be one-hot encoded:
y_train = tf.one_hot(indices=y_train, depth=10)
See my comment
I raised an issue in github at: https://github.com/tensorflow/tensorflow/issues/14924. Here is the details.
This is OK:
import tensorflow as tf
sess = tf.InteractiveSession()
xx = tf.constant(1, shape=[32,1,4,4,1], dtype=tf.float32)
yy = tf.constant(1, shape=[1,32,1,4,4], dtype=tf.float32)
zz = xx * yy
sess.run([zz])
However:
x2 = tf.constant(1, shape=[10,32,1,4,4,1])
y2 = tf.constant(1, shape=[10,1,32,1,4,4])
z2 = x2 * y2
sess.run(z2)
Gives an error:
UnimplementedError (see above for traceback): Broadcast between [10,32,1,4,4,1] and [10,1,32,1,4,4] is not supported yet. [[Node: mul_1 = Mul[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Const_2, Const_3)]]
Log:
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-2-eef82717f8d8> in <module>()
2 y2 = tf.constant(1, shape=[10,1,32,1,4,4])
3 z2 = x2 * y2
----> 4 sess.run(z2)
/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
887 try:
888 result = self._run(None, fetches, feed_dict, options_ptr,
--> 889 run_metadata_ptr)
890 if run_metadata:
891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1118 if final_fetches or final_targets or (handle and feed_dict_tensor):
1119 results = self._do_run(handle, final_targets, final_fetches,
-> 1120 feed_dict_tensor, options, run_metadata)
1121 else:
1122 results = []
/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1315 if handle is None:
1316 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1317 options, run_metadata)
1318 else:
1319 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1334 except KeyError:
1335 pass
-> 1336 raise type(e)(node_def, op, message)
1337
1338 def _extend_graph(self):
UnimplementedError: Broadcast between [10,32,1,4,4,1] and [10,1,32,1,4,4] is not supported yet.
[[Node: mul_1 = Mul[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Const_2, Const_3)]]
Caused by op u'mul_1', defined at:
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
app.launch_new_instance()
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-eef82717f8d8>", line 3, in <module>
z2 = x2 * y2
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 894, in binary_op_wrapper
return func(x, y, name=name)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1117, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2726, in _mul
"Mul", x=x, y=y, name=name)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/jetadmin/anaconda2/envs/ygtf/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
UnimplementedError (see above for traceback): Broadcast between [10,32,1,4,4,1] and [10,1,32,1,4,4] is not supported yet.
[[Node: mul_1 = Mul[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Const_2, Const_3)]]
An update:
I assume the reason is related to how the dimensions are matching, instead of the total number of dimensions, or the number of mis-match. Because the following script runs OK, where x3 has the 2nd to last dimension changes from 4 to 1, adding one more places of mismatch.
x3 = tf.constant(1, shape=[10,32,1,4,1,1])
y3 = tf.constant(1, shape=[10,1,32,1,4,4])
z3 = x3 * y3
sess.run(z3)
As you may have already observed, at the moment Tensorflow has restricted the number of dimensions mismatch which it will correct to broadcast.
For that purpose, I have written my own broadcasting function which will broadcast the variable number of tensors to one common shape. However note that this function will not work if the shape of the tensor is not defined or contains None in its shape.
def broadcast_tensors(tensors):
shapes = [t.get_shape().as_list() for t in tensors]
max_rank = max([len(s) for s in shapes])
# Rank equalize all the tensors
for index in range(len(shapes)):
shape = shapes[index]
if len(shape) == max_rank:
continue
tensor = tensors[index]
for _ in range(max_rank - len(shape)):
shape.insert(0, 1)
tensor = tf.expand_dims(tensor, axis = 0)
tensors[index] = tensor
# Ensure if broadcasting is possible
from collections import Counter
broadcast_shape = []
for index in range(max_rank):
dimensions = [s[index] for s in shapes]
repeats = Counter(dimensions)
if len(repeats) > 2 or (len(repeats) == 2 and \
1 not in list(repeats.keys())):
raise Exception("Broadcasting not possible")
broadcast_shape.append(max(repeats.keys()))
# Broadcast the tensors
for axis, dimension in enumerate(broadcast_shape):
tensors = [tf.concat([t] * dimension, axis = axis) \
if t.get_shape()[axis] == 1 else t for t in tensors]
return tensors
Output:
x = tf.constant(1, shape = [10, 32, 1, 4, 4, 1])
y = tf.constant(1, shape = [1, 32, 1, 4, 1])
z = tf.constant(1, shape = [32, 4, 1, 1])
x, y, z = broadcast_tensors([x, y, z])
print(x.get_shape(), y.get_shape(), z.get_shape())
# (10, 32, 32, 4, 4, 1) (10, 32, 32, 4, 4, 1) (10, 32, 32, 4, 4, 1)
x = tf.constant(1, shape = [10, 32, 1, 4, 4, 1])
y = tf.constant(1, shape = [1, 32, 3, 4, 2])
z = tf.constant(1, shape = [32, 3, 1, 3])
x, y, z = broadcast_tensors([x, y, z])
# Exception: Broadcasting not possible
I am writing a program to do some handwriting recognition using Tensorflow.
I followed the tutorial on the TF website, but there is some problem with my code.
Here is my code:
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.05)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
#x
x = tf.placeholder(tf.float32, shape = [None, 2500])
x_image = tf.reshape(x, [-1, 50, 50, 1])
#y
y_ = tf.placeholder(tf.float32, shape = [None, 10])
#useful layers
def conv2dsame(x, W, strides):
return tf.nn.conv2d(x, W, strides = strides, padding = 'SAME')
def conv2dvalid(x, W, strides):
return tf.nn.conv2d(x, W, strides = strides, padding = 'VALID')
def max_pool_2x2(x, strides):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides = strides, padding = 'SAME')
#hidden layer 1: conv layer
W_conv1 = weight_variable([12, 12, 1, 25])
b_conv1 = bias_variable([25])
h_conv1 = tf.nn.relu(conv2dvalid(x_image, W_conv1, [1,2,2,1])+b_conv1)
#hidden layer 2: conv layer
W_conv2 = weight_variable([5, 5, 25, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2dvalid(h_conv1, W_conv2, [1,1,1,1])+b_conv2)
#hidden layer 3: 2x2 max pool
h_pool2 = max_pool_2x2(h_conv2, [1,2,2,1])
#hidden layer 4: fully-connected layer
W_fc1 = weight_variable([8*8*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#hidden layer 5: fully-connected layer
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2)+b_fc2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_conv, labels = y_))
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 3000
for i in range(iteration):
batch = get_mini_batch(50)
if i%100 == 0:
train_accuracy = sess.run(accuracy, feed_dict={x:batch[0], y_:batch[1], keep_prob: 1.})
print ('step %d, training accuracy %g'%(i, train_accuracy))
sess.run(train_step, feed_dict={x:batch[0], y_:batch[1], keep_prob: 0.75})
val_set = get_mini_batch(50)
print ('test accuracy %g'%accuracy.eval(feed_dict={x:val_set[0], y:val_set[1], keep_prob: 1.0}, session=sess))
where get_mini_batch is a function which returns some mini-batch.
It returns the following error message:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1038 try:
-> 1039 return fn(*args)
1040 except errors.OpError as e:
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1020 feed_dict, fetch_list, target_list,
-> 1021 status, run_metadata)
1022
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_2' with dtype float
[[Node: Placeholder_2 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-38-1b652368caea> in <module>()
6 batch = get_mini_batch(50)
7 if i%100 == 0:
----> 8 train_accuracy = sess.run(accuracy, feed_dict={x:batch[0], y_:batch[1], keep_prob: 1.})
9 print ('step %d, training accuracy %g'%(i, train_accuracy))
10 sess.run(train_step, feed_dict={x:batch[0], y_:batch[1], keep_prob: 0.75})
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
776 try:
777 result = self._run(None, fetches, feed_dict, options_ptr,
--> 778 run_metadata_ptr)
779 if run_metadata:
780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
980 if final_fetches or final_targets:
981 results = self._do_run(handle, final_targets, final_fetches,
--> 982 feed_dict_string, options, run_metadata)
983 else:
984 results = []
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1030 if handle is None:
1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032 target_list, options, run_metadata)
1033 else:
1034 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1050 except KeyError:
1051 pass
-> 1052 raise type(e)(node_def, op, message)
1053
1054 def _extend_graph(self):
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_2' with dtype float
[[Node: Placeholder_2 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'Placeholder_2', defined at:
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
app.launch_new_instance()
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 596, in launch_instance
app.start()
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
super(ZMQIOLoop, self).start()
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 498, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-e240ef8f1b26>", line 22, in <module>
keep_prob = tf.placeholder(tf.float32)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 1507, in placeholder
name=name)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1997, in _placeholder
name=name)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_2' with dtype float
[[Node: Placeholder_2 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I googled this message and nothing is helpful. I think it might because of the dropout layer, but I have already put keep_prob in feed_dict.
What does 'Placeholder_2' actually mean?
Can anyone help me?
In your last row, replace y with y_. That is, you need to have:
print ('test accuracy %g'%accuracy.eval(feed_dict={x:val_set[0], y_:val_set[1], keep_prob: 1.0}, session=sess))
This is because you defined the associated placeholder as follows:
y_ = tf.placeholder(tf.float32, shape = [None, 10])
so the variable y does not exist in your code.
I had made CNN model for my dataset.
I had used batch for feed data.
when I used batch size is one, It is working.
but if I used batch size is not one (ex :128)
it make error.
this is my code.
I attach all my code.
there is 1623 columns data.
import tensorflow as tf
import numpy as np
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
l1a = tf.nn.relu(tf.nn.conv2d(X, w, strides=[1, 1, 1, 1], padding='SAME')) # l1a shape=(?, 24, 60, 32)
l1 = tf.nn.avg_pool(l1a, ksize=[1, 4, 4, 1],strides=[1, 2, 2, 1], padding='SAME')# l1 shape=(?, 6, 30, 32)
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME')) # l2a shape=(?, 6, 30, 64)
l2 = tf.nn.avg_pool(l2a, ksize=[1, 2, 3, 1], strides=[1, 2, 3, 1], padding='SAME') # l2 shape=(?, 3, 10, 64)
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, strides=[1, 1, 1, 1], padding='SAME')) # l3a shape=(?, 3, 10, 128)
l3 = tf.nn.max_pool(l3a, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME') # l3 shape=(?, 3, 5, 128)
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 1920)
l3 = tf.nn.dropout(l3, p_keep_conv)
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
pyx = tf.matmul(l4, w_o)
return pyx
X = tf.placeholder(tf.float32, [None, 24,60,1])
Y = tf.placeholder(tf.float32, [None, 1])
w = init_weights([4, 4, 1, 32]) # 4x4x1 conv, 32 outputs
w2 = init_weights([2, 3, 32, 64]) # 2x3x32 conv, 64 outputs
w3 = init_weights([1, 2, 64, 128]) # 1x2x64 conv, 128 outputs
w4 = init_weights([128 * 5 * 3, 625]) # FC 128 * 5 * 3 inputs, 625 outputs
w_o = init_weights([625, 1]) # FC 625 inputs, 1 outputs (labels)
#B = tf.Variable(tf.random_normal([625]))
print ("W shape:", w.get_shape())
print ("W2 shape:", w2.get_shape())
print ("W3 shape:", w3.get_shape())
print ("W4 shape:", w4.get_shape())
print ("Wo shape:", w_o.get_shape())
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
squared_deltas1 = tf.square(Y - py_x)
squared_deltas = tf.sqrt(squared_deltas1)
cost = tf.reduce_mean(squared_deltas)
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
cost_sum = tf.summary.scalar("cost",cost)
def read_my_file_format(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, value = reader.read(filename_queue)
record_defaults = [[1],[1],[1],.........[1],[1],[1]]
#1623
record_defaults = [tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
..................
tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
]
Col1,Col2,Col3,......,Col1621,Col1622,Col1623=tf.decode_csv(value, record_defaults=record_defaults)
features = tf.pack([Col4,Col5,Col6, ....... Col1618,Col1619,Col1620])
label = tf.pack([Col29])
return features, label
def input_pipeline(batch_size, num_epochs):
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
'''
filename_queue = tf.train.string_input_producer(["G:\CNN\1999.csv","G:\CNN\2000.csv","G:\CNN\2001.csv","G:\CNN\2002.csv",
"G:\CNN\2003.csv","G:\CNN\2004.csv","G:\CNN\2005.csv","G:\CNN\2006.csv",
"G:\CNN\2007.csv","G:\CNN\2008.csv"], num_epochs=num_epochs, shuffle=True)
'''
filename_queue = tf.train.string_input_producer(["test_1000.csv"], num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
example_batch, label_batch = tf.train.shuffle_batch([example, label],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
examples, labels = input_pipeline(128,1)
print (examples)
examples = tf.reshape(examples, [-1,24,60,1])
print (examples)
#examples = examples.reshape(-1, 24, 60, 1) # 28x28x1 input img
i = 0
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess = tf.Session()
merged = tf.summary.merge_all()
trainwriter =tf.summary.FileWriter("./board/custom", sess.graph)
sess.run(init_op)
print(w.eval(session = sess))
print(w2.eval(session = sess))
print(w3.eval(session = sess))
print(w4.eval(session = sess))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
i = i + 1
example_batch, label_batch = sess.run([examples, labels])
sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
if i % 1 == 0:
summary = sess.run(merged, feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1})
trainwriter.add_summary(summary,i)
print(cost.eval(feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}, session = sess))
'''
loss = tf.abs(y-y_)
accuracy = tf.reduce_mean(loss)
print(cross_entropy.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess))
'''
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
sess.close()
this is code for select batch size.
examples, labels = input_pipeline(128,1)
if I write batch size to bigger than one, It makes this error
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1020 try:
-> 1021 return fn(*args)
1022 except errors.OpError as e:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1002 feed_dict, fetch_list, target_list,
-> 1003 status, run_metadata)
1004
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
468 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 469 pywrap_tensorflow.TF_GetCode(status))
470 finally:
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-1-d05205b7cce1> in <module>()
1866 i = i + 1
1867 example_batch, label_batch = sess.run([examples, labels])
-> 1868 sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
1869
1870 if i % 1 == 0:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
764 try:
765 result = self._run(None, fetches, feed_dict, options_ptr,
--> 766 run_metadata_ptr)
767 if run_metadata:
768 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
962 if final_fetches or final_targets:
963 results = self._do_run(handle, final_targets, final_fetches,
--> 964 feed_dict_string, options, run_metadata)
965 else:
966 results = []
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1012 if handle is None:
1013 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1014 target_list, options, run_metadata)
1015 else:
1016 return self._do_call(_prun_fn, self._session, handle, feed_dict,
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1032 except KeyError:
1033 pass
-> 1034 raise type(e)(node_def, op, message)
1035
1036 def _extend_graph(self):
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
Caused by op 'gradients/sub_grad/BroadcastGradientArgs', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
app.launch_new_instance()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 51, in <module>
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 269, in minimize
grad_loss=grad_loss)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 335, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 482, in gradients
in_grads = grad_fn(op, *out_grads)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_grad.py", line 594, in _SubGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 390, in _broadcast_gradient_args
name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'sub', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
[elided 18 identical lines from previous traceback]
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 48, in <module>
squared_deltas1 = tf.square(Y - py_x)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_ops.py", line 814, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 2758, in sub
result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
I want use batch function but in this case I can not use this.
how can I solve this problem?