Adding dropout (tf.nn.dropout) results in Nan - tensorflow

Being a beginner to tensorflow and CNN I'm working on emotion recognition to understand these.
The following code works when dropout layer is removed, however results in Nan when added. I've googled around and came across solutions such as reducing learning rate etc. None has worked for me.
The net :
def cnn(self, data):
conv = tf.nn.conv2d(data, self.w_1, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + self.b_1)
pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv = tf.nn.conv2d(norm, self.w_2, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + self.b_2)
pool = tf.nn.max_pool(hidden, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
list_shape = norm.get_shape().as_list()
reshape = tf.reshape(pool, [list_shape[0], list_shape[1] * list_shape[2] * list_shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, self.w_3) + self.b_3)
hidden = tf.nn.relu(tf.matmul(hidden, self.w_4) + self.b_4)
dropout = tf.nn.dropout(hidden, self.dropout_prob)
return tf.matmul(dropout, self.w_5) + self.b_5
The model:
self.tf_x = tf.placeholder(tf.float32, shape=(self.batch_size, self.image_size, self.image_size, 1))
self.tf_y = tf.placeholder(tf.float32, shape=(self.batch_size, self.num_labels))
self.dropout_prob = tf.placeholder(tf.float32)
self.w_1 = tf.Variable(tf.truncated_normal([5, 5, 1, 64], stddev=0.1))
self.b_1 = tf.Variable(tf.zeros([64]))
self.w_2 = tf.Variable(tf.truncated_normal([9, 9, 64, 128], stddev=0.04))
self.b_2 = tf.Variable(tf.constant(1.0, shape=[128]))
self.w_3 = tf.Variable(tf.truncated_normal([self.image_size//4 * self.image_size//4 * 128, 392], stddev=0.1))
self.b_3 = tf.Variable(tf.constant(1.0, shape=(392,)))
self.w_4 = tf.Variable(tf.truncated_normal([392, 196], stddev=0.1))
self.b_4 = tf.Variable(tf.constant(1.0, shape=(196,)))
self.w_5 = tf.Variable(tf.truncated_normal([196, self.num_labels], stddev=0.04))
self.b_5 = tf.Variable(tf.constant(1.0, shape=[self.num_labels]))
self.logits = self.cnn(self.tf_x)
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.tf_y, logits=self.logits))
self.optimizer = tf.train.AdamOptimizer(1e-6).minimize(self.loss)
self.train_pred = tf.nn.softmax(self.logits)
tf.summary.histogram('weights_1', self.w_1)
tf.summary.histogram('weights_2', self.w_2)
tf.summary.histogram('weights_3', self.w_3)
tf.summary.histogram('weights_4', self.w_4)
tf.summary.scalar('loss', self.loss)
self.merged = tf.summary.merge_all()
The error:
Traceback (most recent call last):
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module>
main()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main
emotion_cnn.train_test_validate()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 127, in train_test_validate
_,summary, l1, predictions1 = self.session.run([self.optimizer, self.merged, self.loss, self.train_pred], feed_dict=feed_dict1)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 767, in run
run_metadata_ptr)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\client\session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: weights_1
[[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]]
Caused by op 'weights_1', defined at:
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 75, in <module>
main()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\main.py", line 64, in main
emotion_cnn.train_test_validate()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 104, in train_test_validate
self.model()
File "C:\Users\joyte\Documents\GitHub\Emotion-recognizer\Emotion.py", line 82, in model
tf.summary.histogram('weights_1', self.w_1)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\summary\summary.py", line 203, in histogram
tag=scope.rstrip('/'), values=values, name=scope)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 139, in _histogram_summary
name=name)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op
op_def=op_def)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\joyte\Anaconda3\envs\ML\lib\site-packages\tensorflow\python\framework\ops.py", line 1226, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights_1
[[Node: weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](weights_1/tag, Variable/read/_81)]]
E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_gpu_executor.cc:637] Deallocating stream with pending work

Related

tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3] vs. [8]?

I use BERT to do binary classifier,the batch size is 8, but when I calculate the loss value, always get the following errors:
Traceback (most recent call last): File
"C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1356, in _do_call
return fn(*args) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1429, in _call_tf_sessionrun
run_metadata) tensorflow.python.framework.errors_impl.InvalidArgumentError:
Incompatible shapes: [3] vs. [8] [[{{node
gradients/sub_grad/BroadcastGradientArgs}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"E:/project_chris/aad_bert_version/run.py", line 81, in
input_y: y_train}) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 950, in run
run_metadata_ptr) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1173, in _run
feed_dict_tensor, options, run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1350, in _do_run
run_metadata) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\client\session.py",
line 1370, in _do_call
raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError:
Incompatible shapes: [3] vs. [8] [[node
gradients/sub_grad/BroadcastGradientArgs (defined at
E:/project_chris/aad_bert_version/run.py:57) ]]
Original stack trace for 'gradients/sub_grad/BroadcastGradientArgs':
File "E:/project_chris/aad_bert_version/run.py", line 57, in
train_op = tf.train.AdamOptimizer(lr).minimize(loss) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\training\optimizer.py",
line 403, in minimize
grad_loss=grad_loss) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\training\optimizer.py",
line 512, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_impl.py",
line 158, in gradients
unconnected_gradients) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 731, in _GradientsHelper
lambda: grad_fn(op, *out_grads)) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 403, in _MaybeCompile
return grad_fn() # Exit early File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gradients_util.py",
line 731, in
lambda: grad_fn(op, *out_grads)) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\math_grad.py",
line 1027, in _SubGrad
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gen_array_ops.py",
line 1004, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 788, in _apply_op_helper
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\util\deprecation.py",
line 507, in new_func
return func(*args, **kwargs) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 3616, in create_op
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 2005, in init
self._traceback = tf_stack.extract_stack()
...which was originally created as op 'sub', defined at: File
"E:/project_chris/aad_bert_version/run.py", line 56, in
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(input_y, [-1]))) File
"C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\math_ops.py",
line 884, in binary_op_wrapper
return func(x, y, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\ops\gen_math_ops.py",
line 11574, in sub
"Sub", x=x, y=y, name=name) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 788, in _apply_op_helper
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\util\deprecation.py",
line 507, in new_func
return func(*args, **kwargs) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 3616, in create_op
op_def=op_def) File "C:\Users\Meiwei\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\framework\ops.py",
line 2005, in init
self._traceback = tf_stack.extract_stack()
lr = 0.0006 # 学习率
# 配置文件
data_root = './bert_model_chinese'
bert_config_file = os.path.join(data_root, 'bert_config.json')
bert_config = modeling.BertConfig.from_json_file(bert_config_file)
init_checkpoint = os.path.join(data_root, 'bert_model.ckpt')
bert_vocab_file = os.path.join(data_root, 'vocab.txt')
token = tokenization.CharTokenizer(vocab_file=bert_vocab_file)
input_ids = tf.placeholder(tf.int32, shape=[None, None], name='input_ids')
input_mask = tf.placeholder(tf.int32, shape=[None, None], name='input_masks')
segment_ids = tf.placeholder(tf.int32, shape=[None, None], name='segment_ids')
input_y = tf.placeholder(tf.float32, shape=[None, 1], name="input_y")
weights = {
'out': tf.Variable(tf.random_normal([768, 1]))
}
biases = {
'out': tf.Variable(tf.constant(0.1, shape=[1, ]))
}
model = modeling.BertModel(
config=bert_config,
is_training=False,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=False)
tvars = tf.trainable_variables()
(assignment, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
tf.train.init_from_checkpoint(init_checkpoint, assignment)
output_layer_pooled = model.get_pooled_output() # 这个获取句子的output
output_layer_pooled = tf.nn.dropout(output_layer_pooled, keep_prob=0.9)
w_out = weights['out']
b_out = biases['out']
pred = tf.add(tf.matmul(output_layer_pooled, w_out), b_out, name="pre1")
pred = tf.reshape(pred, shape=[-1, 1], name="pre")
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(input_y, [-1])))
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
EPOCHS = 5
max_sentence_length = 512
batch_size = 8
data_path = './data'
train_input,predict_input =fffffuck(data_path,bert_vocab_file,True,True,
'./temp',max_sentence_length,batch_size,batch_size,batch_size)
data_loader = TextLoader(train_input,batch_size)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
data_loader.shuff()
for j in range(data_loader.num_batches):
x_train, y_train = data_loader.next_batch(j)
print(y_train)
print(y_train.shape)
x_input_ids = x_train[0]
x_input_mask = x_train[1]
x_segment_ids = x_train[2]
loss_, _ = sess.run([loss, train_op],
feed_dict={input_ids: x_input_ids, input_mask: x_input_mask, segment_ids: x_segment_ids,
input_y: y_train})
print('loss:', loss_)
class TextLoader(object):
def __init__(self, dataSet,batch_size):
self.data = dataSet
self.batch_size = batch_size
self.shuff()
def shuff(self):
self.num_batches = int(len(self.data) // self.batch_size)
if self.num_batches == 0:
assert False, 'Not enough data, make batch_size small.'
np.random.shuffle(self.data)
def next_batch(self,k):
x = []
y = []
for i in range(self.batch_size):
tmp = list(self.data)[k*self.batch_size + i][:3]
x.append(tmp)
y_ = list(self.data)[k*self.batch_size + i][3]
y.append(y_)
x = np.array(x)
return x,np.array(y).reshape([self.batch_size,1])

Tensorflow giving strange error in relation to variable reuse, stating a kernel already exists

Fantastic news I figured this out and will keep the solution here for posterity.
I needed to begin my script with tf.restore_default_graph()
I am in the beginning stages of writing a GAN in Tensorflow, and I am getting a weird error message in regards to whether or not I intend to be reusing a variable. It is basically saying (I think) that I am trying to define a kernel twice for one of my convolutions. Code and error are attached. Thank you!
import tensorflow as tf
import numpy as np
import os
from definitions import *
"""
HYPERPARAMETERS
"""
BATCH_SIZE = 10 #number of slices in the batches fed to Discrim
NUM_STEPS = 100 #number of iterations before we save
GEN_LR = 1e-5
DIS_LR = 1e-5
EPS = 1e-10
KERNEL = 3
x=tf.placeholder(tf.float32,shape=[BATCH_SIZE,256,256,1],name='GenInput')
y=tf.placeholder(tf.float32,shape=[BATCH_SIZE,256,256,1],name='GenOutput')
#label=tf.placeholder(tf.int32, name='IsReal') #1=real 0=generated
#whole_dataset=Dataset2D('/Users/Karl/Inputs/training set/DEC-MRI_training','/Users/Karl/Inputs/training set/ROI_Liu_modified/')
def gen(x):
with tf.variable_scope('GenBlk1'):
with tf.variable_scope('conv1'):
conv1=tf.layers.conv2d(x, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
conv1=tf.nn.relu(conv1)
with tf.variable_scope('conv2'):
conv2=tf.layers.conv2d(conv1, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
conv2=tf.nn.relu(conv2)
with tf.variable_scope('conv3'):
conv3=tf.layers.conv2d(conv2, 5, (KERNEL, KERNEL), strides=(1, 1), padding="same")
conv3=tf.nn.relu(conv3)
#xp=tf.layers.max_pooling2d(inputs, pool_size,strides,padding='valid')
return conv3
def discriminator(y):
with tf.variable_scope('DisBlk1'):
y=tf.layers.conv2d(y, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
y=tf.nn.relu(y)
y=tf.layers.conv2d(y, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
y=tf.nn.relu(y)
y=tf.layers.conv2d(y, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
y=tf.nn.relu(y)
y=tf.layers.dense(y,2)
#xp=tf.layers.max_pooling2d(inputs, pool_size,strides,padding='valid')
return y
def main(x,whole_dataset):
#ops
pred = gen(x)
discrim_fake = discriminator(predict)
#discrim_real = discriminator(y)
#gLoss= =
#summaries
with tf.name_scope("generator_output"):
tf.summary.image("outputs", pred)
tf.summary.scalar("discriminator_loss", dLoss)
tf.summary.scalar("generator_loss_GAN", gLoss)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name + "/values", var)
saver = tf.train.Saver(max_to_keep=10)
GLOBAL_STEP=0
#with tf.Session() as sess:
# while True: #main loop
main(x,whole_dataset)
This is the error:
runfile('/Users/Karl/Research/NNStuff/GAN_breast/main.py', wdir='/Users/Karl/Research/NNStuff/GAN_breast')
Reloaded modules: definitions
Traceback (most recent call last):
File "<ipython-input-74-b7a187cb0f1a>", line 1, in <module>
runfile('/Users/Karl/Research/NNStuff/GAN_breast/main.py', wdir='/Users/Karl/Research/NNStuff/GAN_breast')
File "/Users/Karl/anaconda/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 77, in <module>
main(x,whole_dataset)
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 63, in main
tf.summary.image("outputs", gen(x))
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 31, in gen
conv1=tf.layers.conv2d(x, 32, (KERNEL, KERNEL), strides=(1, 1), padding="same")
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/convolutional.py", line 551, in conv2d
return layer.apply(inputs)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 503, in apply
return self.__call__(inputs, *args, **kwargs)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 443, in __call__
self.build(input_shapes[0])
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/convolutional.py", line 137, in build
dtype=self.dtype)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 383, in add_variable
trainable=trainable and self.trainable)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 367, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/Users/Karl/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable GenBlk1/conv1/conv2d/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 30, in generator
with tf.variable_scope('conv1'):
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 55, in main
#ops
File "/Users/Karl/Research/NNStuff/GAN_breast/main.py", line 77, in <module>
main(x,whole_dataset)

Nan in summary histogram for: deconv2/biases

The original size of my images is 3900 x 6000 x 3. I make overlapping patches of shape (232024, 28, 28, 3) and then make batches of size 1000. I have a CNN model for semantic segmentation as follows:
def conv_layer(inputs, filters, kernel_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "conv"):
with tf.name_scope(name):
input_shape = inputs.shape.as_list()
filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], input_shape[3], filters], dtype = tf.float32)
filter = tf.Variable(initial_value = filter_tensor, trainable = True, name = "kernel")
bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
conv2d = tf.nn.conv2d(input = tf.cast(inputs, dtype = tf.float32), filter = filter, strides = [1, strides, strides, 1], padding = padding)
activation = tf.nn.relu(conv2d + bias)
tf.summary.histogram("weights", filter)
tf.summary.histogram("biases", bias)
tf.summary.histogram("activations", activation)
return tf.cast(activation, dtype = tf.float16)
def deconv_layer(inputs, filters, kernel_size, output_size, strides = 1, padding = "SAME", bias_constant = 0.0, name = "deconv"):
with tf.name_scope(name):
input_shape = inputs.shape.as_list()
deconv_shape = tf.stack([tf.shape(inputs)[0], output_size[0], output_size[1],filters])
filter_tensor = tf.truncated_normal([kernel_size[0], kernel_size[1], filters, input_shape[3]], dtype = tf.float32)
filter = tf.Variable(initial_value = filter_tensor, trainable = True, name = "kernel")
bias = tf.Variable(tf.constant(bias_constant, shape=[filters]), name="bias")
print("bias:")
print(bias)
conv2d_transpose = tf.nn.conv2d_transpose(value = tf.cast(inputs, dtype = tf.float32),
filter = filter,
strides = [1, strides, strides, 1],
output_shape=deconv_shape,
padding = padding)
activation = tf.nn.relu(conv2d_transpose + bias)
tf.summary.histogram("weights", filter)
tf.summary.histogram("biases", bias)
tf.summary.histogram("activations", activation)
return tf.cast(activation, dtype = tf.float16)
def semantic_seg_model(features, mode, batch_size):
bias_constant = 0.1
conv_filters = [20, 50, 90]
conv_sizes = []
tf.summary.image('input', features, batch_size)
"""Model function for CNN."""
# Encoding starts here.
# Convolutional Layer 1
# Input: 100 x 100
conv = conv_layer(inputs=features,
filters=conv_filters[0],
kernel_size=[5, 5],
bias_constant = bias_constant,
name = "conv1")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 2
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [5, 5],
strides = 2,
bias_constant = bias_constant,
name = "conv2")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 3
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[2],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
name = "conv3")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Deconvolution Layer 3
# Input: 100 x 100
deconv = deconv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
output_size = [conv_sizes[1][1], conv_sizes[1][2]],
name = "deconv3")
print(deconv.shape)
# Deconvolution Layer 2
# Input: 100 x 100
deconv = deconv_layer(inputs = deconv,
filters = conv_filters[0],
kernel_size = [5, 5],
bias_constant = bias_constant,
strides = 2,
output_size = [conv_sizes[0][1], conv_sizes[0][2]],
name = "deconv2")
print(deconv.shape)
deconv = deconv_layer(inputs = deconv,
filters = 3,
kernel_size = [5, 5],
output_size = [features.shape.as_list()[1], features.shape.as_list()[2]],
bias_constant = bias_constant,
name = "deconv1")
print(deconv.shape)
return deconv
epochs = 1000
learning_rate = 1e-50
image, label = tf.train.slice_input_producer([features, labels], shuffle = False)
BATCH_SIZE = 1000
THREAD_NUM = 5
MIN_AFTER_DEQUEUE = 10000
queue_capacity = MIN_AFTER_DEQUEUE + THREAD_NUM * BATCH_SIZE
image_batch, label_batch = tf.train.batch(tensors = [image, label],
batch_size = BATCH_SIZE,
capacity = queue_capacity,
num_threads = THREAD_NUM,
allow_smaller_final_batch = True)
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE)
#cost
with tf.name_scope("cross_entropy"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = label_batch)
cost = tf.reduce_mean( cross_entropy )
# return cost, optimizer, accr
tf.summary.scalar("xent", cost)
#optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(label_batch, 1), tf.argmax(output, 1))
accr = tf.reduce_mean(tf.cast(correct_prediction, tf.float16))
tf.summary.scalar("accuracy", accr)
merged_summary = tf.summary.merge_all()
# Session configs
config = tf.ConfigProto()
config.log_device_placement = True
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction=0.8
# Initialize session
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
enqueue_threads = tf.train.start_queue_runners(sess = sess, coord = coord)
try:
for epoch in range(epochs):
if coord.should_stop():
break
epoch_loss = 0
train_loss = []; train_accuracy = []
s = sess.run(merged_summary)
writer.add_summary(s, epoch)
for batch in range(math.ceil(features.shape.as_list()[0]/BATCH_SIZE)):
_, sess_cost, sess_accuracy = sess.run([optimizer, cost, accr])
train_loss.append(sess_cost)
train_accuracy.append(sess_accuracy)
train_loss = np.mean(train_loss)
train_accuracy = np.mean(train_accuracy)
saver.save(sess, "./semantic_seg_model_1", global_step=epoch)
print ("[%02d/%02d] trainLoss: %.4f trainAcc: %.2f"
% (epoch + 1, epochs, sess_cost, sess_accuracy))
except Exception as e:
# Report exceptions to the coordinator.
coord.request_stop(e)
finally:
# Terminate as usual. It is safe to call `coord.request_stop()` twice.
coord.request_stop()
coord.join(enqueue_threads)
sess.close()
I get an error when I start the training session. The error is as follows:
[01/1000] trainLoss: 0.0000 trainAcc: 1.00
INFO:tensorflow:Error reported to Coordinator: , Nan
in summary histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Caused by op 'deconv2/biases', defined at: File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\runpy.py", line 193,
in _run_module_as_main
"main", mod_spec) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\runpy.py", line 85,
in _run_code
exec(code, run_globals) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel_launcher.py",
line 16, in
app.launch_new_instance() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\traitlets\config\application.py",
line 658, in launch_instance
app.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelapp.py",
line 478, in start
self.io_loop.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\ioloop.py",
line 177, in start
super(ZMQIOLoop, self).start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\ioloop.py",
line 888, in start
handler_func(fd_obj, events) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 440, in _handle_events
self._handle_recv() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 472, in _handle_recv
self._run_callback(callback, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 414, in _run_callback
callback(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 281, in dispatcher
return self.dispatch_shell(stream, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 232, in dispatch_shell
handler(stream, idents, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 397, in execute_request
user_expressions, allow_stdin) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\ipkernel.py",
line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\zmqshell.py",
line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2850, in run_ast_nodes
if self.run_code(code, result): File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE) File
"", line 107, in semantic_seg_model
name = "deconv2") File "", line 78, in deconv_layer
tf.summary.histogram("biases", bias) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\summary\summary.py",
line 192, in histogram
tag=tag, values=values, name=scope) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py",
line 187, in _histogram_summary
"HistogramSummary", tag=tag, values=values, name=name) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 787, in _apply_op_helper
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 2956, in create_op
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 1470, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Nan in summary
histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Number of iterations completed this epoch: 0
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call
last) c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_call(self, fn, *args) 1322 try:
-> 1323 return fn(*args) 1324 except errors.OpError as e:
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _run_fn(session, feed_dict, fetch_list, target_list, options,
run_metadata) 1301 feed_dict,
fetch_list, target_list,
-> 1302 status, run_metadata) 1303
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\errors_impl.py
in exit(self, type_arg, value_arg, traceback_arg)
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: Nan in summary histogram for: deconv2/biases
[[Node: deconv2/biases = HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call
last) in ()
40 # Terminate as usual. It is safe to call coord.request_stop() twice.
41 coord.request_stop()
---> 42 coord.join(enqueue_threads)
43
44 sess.close()
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\training\coordinator.py
in join(self, threads, stop_grace_period_secs, ignore_live_threads)
387 self._registered_threads = set()
388 if self._exc_info_to_raise:
--> 389 six.reraise(*self._exc_info_to_raise)
390 elif stragglers:
391 if ignore_live_threads:
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\six.py
in reraise(tp, value, tb)
691 if value.traceback is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
in ()
13 train_loss = []; train_accuracy = []
14
---> 15 s = sess.run(merged_summary)
16 writer.add_summary(s, epoch)
17
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in run(self, fetches, feed_dict, options, run_metadata)
887 try:
888 result = self._run(None, fetches, feed_dict, options_ptr,
--> 889 run_metadata_ptr)
890 if run_metadata:
891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _run(self, handle, fetches, feed_dict, options, run_metadata)
1118 if final_fetches or final_targets or (handle and
feed_dict_tensor): 1119 results = self._do_run(handle,
final_targets, final_fetches,
-> 1120 feed_dict_tensor, options, run_metadata) 1121 else: 1122 results = []
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_run(self, handle, target_list, fetch_list, feed_dict, options,
run_metadata) 1315 if handle is None: 1316 return
self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1317 options, run_metadata) 1318 else: 1319 return self._do_call(_prun_fn, self._session,
handle, feeds, fetches)
c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py
in _do_call(self, fn, *args) 1334 except KeyError: 1335
pass
-> 1336 raise type(e)(node_def, op, message) 1337 1338 def _extend_graph(self):
InvalidArgumentError: Nan in summary histogram for: deconv2/biases
[[Node: deconv2/biases = HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Caused by op 'deconv2/biases', defined at: File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\runpy.py", line 193,
in _run_module_as_main
"main", mod_spec) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\runpy.py", line 85,
in _run_code
exec(code, run_globals) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel_launcher.py",
line 16, in
app.launch_new_instance() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\traitlets\config\application.py",
line 658, in launch_instance
app.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelapp.py",
line 478, in start
self.io_loop.start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\ioloop.py",
line 177, in start
super(ZMQIOLoop, self).start() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\ioloop.py",
line 888, in start
handler_func(fd_obj, events) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 440, in _handle_events
self._handle_recv() File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 472, in _handle_recv
self._run_callback(callback, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\zmq\eventloop\zmqstream.py",
line 414, in _run_callback
callback(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tornado\stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 281, in dispatcher
return self.dispatch_shell(stream, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 232, in dispatch_shell
handler(stream, idents, msg) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\kernelbase.py",
line 397, in execute_request
user_expressions, allow_stdin) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\ipkernel.py",
line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\ipykernel\zmqshell.py",
line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2850, in run_ast_nodes
if self.run_code(code, result): File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\IPython\core\interactiveshell.py",
line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in
output = semantic_seg_model(image_batch, tf.estimator.ModeKeys.TRAIN, BATCH_SIZE) File
"", line 107, in semantic_seg_model
name = "deconv2") File "", line 78, in deconv_layer
tf.summary.histogram("biases", bias) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\summary\summary.py",
line 192, in histogram
tag=tag, values=values, name=scope) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py",
line 187, in _histogram_summary
"HistogramSummary", tag=tag, values=values, name=name) File "c:\users\fawad
khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\op_def_library.py",
line 787, in _apply_op_helper
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 2956, in create_op
op_def=op_def) File "c:\users\fawad khalil\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\framework\ops.py",
line 1470, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Nan in summary
histogram for: deconv2/biases [[Node: deconv2/biases =
HistogramSummary[T=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](deconv2/biases/tag,
deconv2/bias/read/_105)]] [[Node: batch/fifo_queue_Size/_91 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device_incarnation=1,
tensor_name="edge_37_batch/fifo_queue_Size", tensor_type=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Someone at github tensorflow issues suggested trying reduce the learning rate as the model diverged, but that didn't help. Another one suggested that dtype should be changed to float32 from float16 as float16 is problematic. When I change the dtype of data to float32 then I get the following error in the python log console:
[libprotobuf ERROR
C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\36\cmake_build\protobuf\src\protobuf\src\google\protobuf\message_lite.cc:297]
Exceeded maximum protobuf size of 2GB. [libprotobuf ERROR
C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\36\cmake_build\protobuf\src\protobuf\src\google\protobuf\message_lite.cc:297]
Exceeded maximum protobuf size of 2GB.
This very same error occurs when I try to increase the width and hieght of the overlapping patches of image. I have also tried reducing BATCH_SIZE but didn't help.
I have 4GB NVIDIA GeForce GTX 960M dedicated graphics card and 16GB RAM with Intel Core i7-6700HQ CPU # 2.60 GHz 2.60 GHz. Python version is 3.6.4 and Tensorflow version is 1.4 with GPU.
Update 1:
Updated model:
def semantic_seg_model(features, mode, batch_size):
bias_constant = 0.1
conv_filters = [10, 25, 90]
conv_sizes = []
tf.summary.image('input', features, batch_size)
"""Model function for CNN."""
# Encoding starts here.
# Convolutional Layer 1
# Input: 100 x 100
conv = conv_layer(inputs=features,
filters=conv_filters[0],
kernel_size=[2, 2],
bias_constant = bias_constant,
name = "conv1")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Convolutional Layer 2
# Input: 100 x 100
conv = conv_layer(inputs = conv,
filters = conv_filters[1],
kernel_size = [2, 2],
bias_constant = bias_constant,
name = "conv2")
conv_sizes.append(conv.shape.as_list())
print(conv.shape)
# Deconvolution Layer 2
# Input: 100 x 100
deconv = deconv_layer(inputs = conv,
filters = conv_filters[0],
kernel_size = [2, 2],
bias_constant = bias_constant,
output_size = [conv_sizes[0][1], conv_sizes[0][2]],
name = "deconv2")
print(deconv.shape)
deconv = deconv_layer(inputs = deconv,
filters = 3,
kernel_size = [2, 2],
output_size = [features.shape.as_list()[1], features.shape.as_list()[2]],
bias_constant = bias_constant,
name = "deconv1")
print(deconv.shape)
return tf.cast(deconv, dtype = tf.float16)
I suspect that the problem is that you have significantly overfit; the real evidence here is:
[01/1000] trainLoss: 0.0000 trainAcc: 1.00
This says that after only one epoch you have perfectly fit to the training data; a sure sign of overfitting. Thus the resulting NaN is probably an unsurprising effect of this problem, since you have now almost certainly have learned weights that will return 0 or inf on data or batches that it hasn't seen (since it is so badly overfit).
To resolve this issue, I recommend simplifying your model substantially until you get something that doesn't overfit so quickly; for example, fewer and smaller conv and deconv layers. Then you can start to build back in that complexity. You will then also find that you will likely want to build in some dropout and/or batch normalization to deal with this overfitting (note: while it is tempting to just start adding this complexity to your existing model, I recommend against it; get something simple working first, then add complexity from there...).
Final note: if you simplify the problem as suggested above you will likely have a better minimal example to share; that should let us get to the bottom of your problem more quickly.

Tensorflow CNN Batch size error

I had made CNN model for my dataset.
I had used batch for feed data.
when I used batch size is one, It is working.
but if I used batch size is not one (ex :128)
it make error.
this is my code.
I attach all my code.
there is 1623 columns data.
import tensorflow as tf
import numpy as np
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
l1a = tf.nn.relu(tf.nn.conv2d(X, w, strides=[1, 1, 1, 1], padding='SAME')) # l1a shape=(?, 24, 60, 32)
l1 = tf.nn.avg_pool(l1a, ksize=[1, 4, 4, 1],strides=[1, 2, 2, 1], padding='SAME')# l1 shape=(?, 6, 30, 32)
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME')) # l2a shape=(?, 6, 30, 64)
l2 = tf.nn.avg_pool(l2a, ksize=[1, 2, 3, 1], strides=[1, 2, 3, 1], padding='SAME') # l2 shape=(?, 3, 10, 64)
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, strides=[1, 1, 1, 1], padding='SAME')) # l3a shape=(?, 3, 10, 128)
l3 = tf.nn.max_pool(l3a, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME') # l3 shape=(?, 3, 5, 128)
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 1920)
l3 = tf.nn.dropout(l3, p_keep_conv)
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
pyx = tf.matmul(l4, w_o)
return pyx
X = tf.placeholder(tf.float32, [None, 24,60,1])
Y = tf.placeholder(tf.float32, [None, 1])
w = init_weights([4, 4, 1, 32]) # 4x4x1 conv, 32 outputs
w2 = init_weights([2, 3, 32, 64]) # 2x3x32 conv, 64 outputs
w3 = init_weights([1, 2, 64, 128]) # 1x2x64 conv, 128 outputs
w4 = init_weights([128 * 5 * 3, 625]) # FC 128 * 5 * 3 inputs, 625 outputs
w_o = init_weights([625, 1]) # FC 625 inputs, 1 outputs (labels)
#B = tf.Variable(tf.random_normal([625]))
print ("W shape:", w.get_shape())
print ("W2 shape:", w2.get_shape())
print ("W3 shape:", w3.get_shape())
print ("W4 shape:", w4.get_shape())
print ("Wo shape:", w_o.get_shape())
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
squared_deltas1 = tf.square(Y - py_x)
squared_deltas = tf.sqrt(squared_deltas1)
cost = tf.reduce_mean(squared_deltas)
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
cost_sum = tf.summary.scalar("cost",cost)
def read_my_file_format(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, value = reader.read(filename_queue)
record_defaults = [[1],[1],[1],.........[1],[1],[1]]
#1623
record_defaults = [tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
..................
tf.constant([1], dtype=tf.float32),
tf.constant([1], dtype=tf.float32),
]
Col1,Col2,Col3,......,Col1621,Col1622,Col1623=tf.decode_csv(value, record_defaults=record_defaults)
features = tf.pack([Col4,Col5,Col6, ....... Col1618,Col1619,Col1620])
label = tf.pack([Col29])
return features, label
def input_pipeline(batch_size, num_epochs):
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
'''
filename_queue = tf.train.string_input_producer(["G:\CNN\1999.csv","G:\CNN\2000.csv","G:\CNN\2001.csv","G:\CNN\2002.csv",
"G:\CNN\2003.csv","G:\CNN\2004.csv","G:\CNN\2005.csv","G:\CNN\2006.csv",
"G:\CNN\2007.csv","G:\CNN\2008.csv"], num_epochs=num_epochs, shuffle=True)
'''
filename_queue = tf.train.string_input_producer(["test_1000.csv"], num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
example_batch, label_batch = tf.train.shuffle_batch([example, label],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
examples, labels = input_pipeline(128,1)
print (examples)
examples = tf.reshape(examples, [-1,24,60,1])
print (examples)
#examples = examples.reshape(-1, 24, 60, 1) # 28x28x1 input img
i = 0
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess = tf.Session()
merged = tf.summary.merge_all()
trainwriter =tf.summary.FileWriter("./board/custom", sess.graph)
sess.run(init_op)
print(w.eval(session = sess))
print(w2.eval(session = sess))
print(w3.eval(session = sess))
print(w4.eval(session = sess))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
i = i + 1
example_batch, label_batch = sess.run([examples, labels])
sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
if i % 1 == 0:
summary = sess.run(merged, feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1})
trainwriter.add_summary(summary,i)
print(cost.eval(feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}, session = sess))
'''
loss = tf.abs(y-y_)
accuracy = tf.reduce_mean(loss)
print(cross_entropy.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess))
'''
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
sess.close()
this is code for select batch size.
examples, labels = input_pipeline(128,1)
if I write batch size to bigger than one, It makes this error
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1020 try:
-> 1021 return fn(*args)
1022 except errors.OpError as e:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1002 feed_dict, fetch_list, target_list,
-> 1003 status, run_metadata)
1004
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
468 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 469 pywrap_tensorflow.TF_GetCode(status))
470 finally:
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-1-d05205b7cce1> in <module>()
1866 i = i + 1
1867 example_batch, label_batch = sess.run([examples, labels])
-> 1868 sess.run(train_op , feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
1869
1870 if i % 1 == 0:
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
764 try:
765 result = self._run(None, fetches, feed_dict, options_ptr,
--> 766 run_metadata_ptr)
767 if run_metadata:
768 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
962 if final_fetches or final_targets:
963 results = self._do_run(handle, final_targets, final_fetches,
--> 964 feed_dict_string, options, run_metadata)
965 else:
966 results = []
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1012 if handle is None:
1013 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1014 target_list, options, run_metadata)
1015 else:
1016 return self._do_call(_prun_fn, self._session, handle, feed_dict,
C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1032 except KeyError:
1033 pass
-> 1034 raise type(e)(node_def, op, message)
1035
1036 def _extend_graph(self):
InvalidArgumentError: Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
Caused by op 'gradients/sub_grad/BroadcastGradientArgs', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
app.launch_new_instance()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 51, in <module>
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 269, in minimize
grad_loss=grad_loss)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\training\optimizer.py", line 335, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 482, in gradients
in_grads = grad_fn(op, *out_grads)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_grad.py", line 594, in _SubGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 390, in _broadcast_gradient_args
name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'sub', defined at:
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
[elided 18 identical lines from previous traceback]
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-d05205b7cce1>", line 48, in <module>
squared_deltas1 = tf.square(Y - py_x)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\math_ops.py", line 814, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 2758, in sub
result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Program Files\Anaconda3\envs\tensorflow_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128,1] vs. [256,1]
[[Node: gradients/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_grad/Shape, gradients/sub_grad/Shape_1)]]
I want use batch function but in this case I can not use this.
how can I solve this problem?

Error: Tensorflow CNN dimension

Hi. I'm new to Tensorflow and trying to run cifar10 dataset with CNN.
My Network is constructed with three layers such as
Convolution + Max Pooling
Fully Connected Layer
Softmax Layer
Below is my tensorflow code of the model.
15 def model(X, w, w2, w_o, p_keep_conv, p_keep_hidden):
16
17 layer1 = tf.nn.relu(tf.nn.conv2d(X, w,strides=[1, 1, 1, 1], padding='SAME'))
18 layer1 = tf.nn.max_pool(l1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
19
20 layer1 = tf.reshape(l1,[-1,w2.get_shape().as_list()[0]])
21 layer1 = tf.nn.dropout(l1, p_keep_conv)
22
23 layer2 = tf.nn.relu(tf.matmul(layer1, w2))
24 layer2 = tf.nn.dropout(l4, p_keep_hidden)
25
26 pyx = tf.matmul(layer2, w_o)
27 return pyx
28
The input image has [-1, 32, 32, 3] shape.(32*32 pixel, RGB)
Since the filter of max pooling is [1,2,2,1], the stride is [1,2,2,1] and the output channel is 5,
I think the form of weight (w2 in below code) between max pooling layer and fully connected layer need to be [5*16*16*3, 125].
(5: channels, 16: 32/2 pixel, 3: rgb, 125: # of output neuron)
Below is my tensorflow code of the parameters.
60 trX = trX.reshape(-1, 32, 32, 3) # 32x32x3 input img
61 teX = teX.reshape(-1, 32, 32, 3) # 32x32x3 input img
62
63 X = tf.placeholder("float", [None, 32, 32, 3])
64 Y = tf.placeholder("float", [None, 10])
65
66 w = init_weights([5, 5, 3, 5])
67 w2 = init_weights([5*16*16*3, 125])
68 w_o = init_weights([125, 10])
69
70 p_keep_conv = tf.placeholder("float")
71 p_keep_hidden = tf.placeholder("float")
72
73 py_x = model(X, w, w2, w_o, p_keep_conv, p_keep_hidden)
74
75 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
76 #train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
77 train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)
78 predict_op = tf.argmax(py_x, 1)
79
However it show me an error like below.
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 715, in _do_call
return fn(*args)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 697, in _run_fn
status, run_metadata)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/contextlib.py", line 66, in __exit__
next(self.gen)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/errors.py", line 450, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors.InvalidArgumentError: Input to reshape is a tensor with 6400 values, but the requested shape requires a multiple of 3840
[[Node: Reshape = Reshape[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool, Reshape/shape)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "convCifar.py", line 99, in <module>
p_keep_conv: 0.8, p_keep_hidden: 0.5})
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: Input to reshape is a tensor with 6400 values, but the requested shape requires a multiple of 3840
[[Node: Reshape = Reshape[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool, Reshape/shape)]]
Caused by op 'Reshape', defined at:
File "convCifar.py", line 82, in <module>
py_x = model(X, w, w4, w_o, p_keep_conv, p_keep_hidden)
File "convCifar.py", line 27, in model
l1 = tf.reshape(l1,[-1,w4.get_shape().as_list()[0]])
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1383, in reshape
name=name)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
I think the problem is about the dimension of "w2"(weight between max pooling layer and fully connected layer). Also, I can not understand How the 6400 can be occurred.
How can fix the error?
Please let me know if the information is small.
Thank you!
The error tensorflow.python.framework.errors.InvalidArgumentError: Input to reshape is a tensor with 6400 values, but the requested shape requires a multiple of 3840 suggests that the input tensor of tf.reshape() in line 20 has a number of values that is not a multiple of 3840.
That's because tensor l1 isn't defined within function model (you might have used it earlier and it might have 6400 values). You probably want to set l1=layer1. Note that tensor l4 isn't defined in function model either.
Please, let me know if my answer doesn't solve your error.