Related
I am trying to build machine leaning program to compare between the images of Cat and dogs and have created TFRecords file successfully and now when i am trying to read the file for training, i am getting an error which is as given below.This is my code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
data_path = 'train.tfrecords'
with tf.Session() as sess:
feature = {'train/image': tf.FixedLenFeature([],tf.string),
'train/label': tf.FixedLenFeature([],tf.int64)}
filename_queue = tf.train.string_input_producer([data_path],num_epochs=1000)
reader = tf.TFRecordReader()
serialized_example = reader.read(queue=filename_queue,name=None)
features = tf.parse_single_example(serialized_example,features=feature)
image = tf.decode_raw(features['train/image'], tf.float32)
label = tf.cast(features['train/label'], tf.int32)
image = tf.reshape(image, [224, 224, 3])
images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1,
min_after_dequeue=10)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for batch_index in range(5):
img, lbl = sess.run([images, labels])
img = img.astype(np.uint8)
for j in range(6):
plt.subplot(2, 3, j + 1)
plt.imshow(img[j, ...])
plt.title('cat' if lbl[j] == 0 else 'dog')
pl t.show()
coord.request_stop()
coord.join(threads)
sess.close()
I am getting this error
C:\Users\snklp\Anaconda3\envs\untitled\python.exe C/Users/snklp/PycharmProjects/untitled/read_tfrecords.py
2018-07-24 14:58:44.870802: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
Traceback (most recent call last):
File "C:/Users/snklp/PycharmProjects/untitled/read_tfrecords.py", line 18, in <module>
serialized_example = tf.TFRecordReader.read(queue=filename_queue,name=None)
TypeError: read() missing 1 required positional argument: 'self'
Process finished with exit code 1
I tried to create a class Read with self argument in the read() function but nothing happened. I m am not getting this error. Can anybody help me in this???
The code is as below and runs perfectly:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
xData = np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)
yTrainData = np.array([[1], [0], [1]], dtype=np.float32)
model = Sequential()
model.add(Dense(64, input_dim=3, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(xData, yTrainData, epochs=10, batch_size=128, verbose=2)
xTestData = np.array([[2, 8, 1], [3, 1, 9]], dtype=np.float32)
resultAry = model.predict(xTestData)
print("Cal result: %s" % resultAry)
I can't work out the code in TensowFlow, something I've written is like this:
import tensorflow as tf
import numpy as np
xData = np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)
yTrainData = np.array([[1], [0], [1]], dtype=np.float32)
x = tf.placeholder(tf.float32)
yTrain = tf.placeholder(tf.float32)
w = tf.Variable(tf.ones([64]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1]), dtype=tf.float32)
y = tf.nn.relu(w * x + b)
w1 = tf.Variable(tf.ones([3]), dtype=tf.float32)
b1 = tf.Variable(0, dtype=tf.float32)
y1 = tf.reduce_mean(tf.nn.sigmoid(w1 * y + b1))
loss = tf.abs(y1 - tf.reduce_mean(yTrain))
optimizer = tf.train.AdadeltaOptimizer(0.1)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(10):
for j in range(3):
result = sess.run([loss, y1, yTrain, x, w, b, train], feed_dict={x: xData[j], yTrain: yTrainData[j]})
if i % 10 == 0:
print("i: %d, j: %d, loss: %10.10f, y1: %f, yTrain: %s, x: %s" % (i, j, float(result[0]), float(result[1]), yTrainData[j], xData[j]))
result = sess.run([y1, loss], feed_dict={x: [1, 6, 0], yTrain: 0})
print(result)
But I will got the following error while running,
Traceback (most recent call last):
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1327, in _do_call
return fn(*args)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1306, in _run_fn
status, run_metadata)
File "C:\Python36\lib\contextlib.py", line 88, in __exit__
next(self.gen)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "testidc.py", line 36, in <module>
result = sess.run([loss, y1, yTrain, x, w, b, train], feed_dict={x: xData[j], yTrain: yTrainData[j]})
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
Caused by op 'mul', defined at:
File "testidc.py", line 15, in <module>
y = tf.nn.relu(w * x + b)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variables.py", line 705, in _run_op
return getattr(ops.Tensor, operator)(a._AsTensor(), *args)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\math_ops.py", line 865, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1088, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1449, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
The main reason is the shape of W, must be the same as x in TensowFlow, but in Keras, the hidden Dense layer could have more nodes than the input(such as 64 in the example).
I need help for the equivalent TensorFlow code instead of the Keras one. Thanks.
This is an example that uses the tf.estimator.Estimator framework:
import tensorflow as tf
import numpy as np
# The model
def model(features):
dense = tf.layers.dense(inputs=features['x'], units=64, activation=tf.nn.relu)
dropout = tf.layers.dropout(dense, 0.2)
logits = tf.layers.dense(inputs=dropout, units=1, activation=tf.nn.sigmoid)
return logits
# Stuff needed to use the tf.estimator.Estimator framework
def model_fn(features, labels, mode):
logits = model(features)
predictions = {
'classes': tf.argmax(input=logits, axis=1),
'probabilities': tf.nn.softmax(logits)
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-4)
train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
else:
train_op = None
accuracy = tf.metrics.accuracy(
tf.argmax(labels, axis=1), predictions['classes'])
metrics = {'accuracy': accuracy}
# Create a tensor named train_accuracy for logging purposes
tf.identity(accuracy[1], name='train_accuracy')
tf.summary.scalar('train_accuracy', accuracy[1])
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
# Setting up input for the model
def input_fn(mode, batch_size):
# function that processes your input and returns two tensors "samples" and "labels"
# that the estimator will use to fetch input batches.
# See https://www.tensorflow.org/get_started/input_fn for how to write this function.
return samples, labels
# Using the model
def main():
# Create the Estimator
classifier = tf.estimator.Estimator(
model_fn=model_fn, model_dir='some_dir')
# Train the model
# NOTE: I use this to make it compatible with your example, but you should
# defnitely set up your own input_fn above
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)},
y=np.array([[1], [0], [1]]),
num_epochs=10,
batch_size=128,
shuffle=False)
classifier.train(
input_fn=train_input_fn,
steps=20000, # change as needed
)
# Predict on new data
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)},
num_epochs=1,
batch_size=1,
shuffle=False)
predictions_iterator = classifier.predict(
input_fn=predict_input_fn)
print('Predictions results:')
for pred in predictions_iterator:
print(pred)
There is quite bit going on here, so I'll try to explain the blocks one by one.
The model
The model is defined as a composition of tf.layers in a separate model function. This is done to keep the actual model_fn (which is required by the Estimator framework) independent of the model architecture.
The function takes a features parameter, which is the output of a call to input_fn (see below). In this example, since we're using tf.estimator.inputs.numpy_input_fn, features is a dictionary with item x:input_tensor. We use the input tensor as input for our model graph.
model_fn
This function is required by the framework and is used to generate a specification for your Estimator that is dependent on the mode the estimato is being used for. Typically, an estimator used for prediction will have less operations than when it's used for training (you don't have the loss, optimizer, etc). This function takes care of adding all that is necessary to your model graph for the three possible modes of operation (prediction, evaluation, training).
Breaking it down to logical pieces, we have:
Prediction: we only need the model graph, the predictions and the corresponding predicted labels (we could skip the labels, but having it here is handy).
Evaluation: we need everything for prediction plus: a loss function, some metric to evaluate on and optionally some summaries to visualize the metrics in Tensorboard.
Training: we need everything for evaluation plus: a training operation from an optimizer (in your sample, RMSProp)
input_fn
This is where we provide the input to our estimator.
Have a look at Building Input Functions with tf.estimator for a guide on how your custom input_fn should look like. For the example, we'll use the numpy_input_fn function from the framework.
Note that usually one input_fn handles all operation modes according to a mode parameter. Since we're using numpy_input_fn, we need two different instances of it for training and prediction to provide the data as needed.
main
Here we actually train and use the estimator.
Firstly, we get an Estimator instance with the model_fn we specified, then we call train() and wait for the training to be over.
Once that is done, calling predict() returns an iterable that you can use to get the prediction results for all the samples in the dataset you're predicting.
This is a couple of months old but it's worth noting that there is absolutely no reason to not use keras with tensorflow. It's even part of the tensorflow library now!
So if you want full control of your tensors but still want to use keras' layers, you can easily achieve that by using keras as-is:
x = tf.placeholder(tf.float32, [None, 1024])
y = keras.layers.Dense(512, activation='relu')(x)
For more on that, keras' creator made a pretty cool post about it.
I'm getting an incompatible shape error when trying trying to add a CNN to a ready siamese code that I got from github : here is the link :
https://github.com/ywpkwon/siamese_tf_mnist
here is the code for running the session:
""" Siamese implementation using Tensorflow with MNIST example.
This siamese network embeds a 28x28 image (a point in 784D)
into a point in 2D.
By Youngwook Paul Kwon (young at berkeley.edu)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os
#import helpers
import inference
import visualize
# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()
# setup siamese network
siamese = inference.siamese();
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()
# start training
if new:
for step in range(1000):
batch_x1, batch_y1 = mnist.train.next_batch(128)
batch_x2, batch_y2 = mnist.train.next_batch(128)
batch_y = (batch_y1 == batch_y2).astype('float')
_, loss_v = sess.run([train_step, siamese.loss], feed_dict={
siamese.x1: batch_x1,
siamese.x2: batch_x2,
siamese.y_: batch_y})
if step % 10 == 0:
print ('step %d: loss' % (step))
print (loss_v)
here is the code for creating the Siamese model.
import tensorflow as tf
class siamese:
# Create model
def __init__(self):
self.x1 = tf.placeholder(tf.float32, [None, 784])
self.x2 = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("siamese") as scope:
self.o1 = self.network(self.x1)
scope.reuse_variables()
self.o2 = self.network(self.x2)
# Create loss
self.y_ = tf.placeholder(tf.float32, [None])
self.loss = self.loss_with_step()
def network(self, x):
weights = []
fc1 = self.fc_layer(x, 1024, "fc1" , [5, 5, 1, 32])
return fc1
def fc_layer(self, bottom, n_weight, name,kernel_shape ): #[5, 5, 1, 32]
assert len(bottom.get_shape()) == 2
#n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
weights_for_convolution = tf.get_variable(name+"weights_for_convolution", kernel_shape,
initializer=tf.random_normal_initializer())
bias_shape = kernel_shape[-1]
biases_for_convolution = tf.get_variable(name+"biases_for_convolution", [bias_shape],
initializer=tf.constant_initializer(0.1))
biases_for_connected_layer = tf.get_variable(name+"biases_for_connected_layer", [1024],
initializer=tf.constant_initializer(0.1))
weights_for_connected_layer = tf.get_variable(name+"weights_for_connected_layer", [7*7*64,1024],
initializer=tf.random_normal_initializer())
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[1024,2], initializer=initer)
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[2], dtype=tf.float32))
#weights_for_readout_layer = tf.get_variable("weights_for_readout_layer", [1024,2],
#initializer=tf.random_normal_initializer())
#biases_for_readout_layer = tf.get_variable("biases_for_readout_layer", [2],
#initializer=tf.constant_initializer(0.1))
bottom1 = tf.reshape(bottom,[-1,28,28,1]) ##
c2 = tf.nn.conv2d(bottom1, weights_for_convolution, strides=[1, 1, 1, 1], padding='SAME')
conv = tf.nn.bias_add(c2, biases_for_convolution)
relu = tf.nn.relu(conv)
out = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#print tf.shape(out)
h_out_flat = tf.reshape(out ,[-1,7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_out_flat, weights_for_connected_layer) + biases_for_connected_layer)
#compute model output
final_output = tf.matmul(h_fc1,W) + b
#fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return final_output
def loss_with_spring(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
print tf.shape(eucd2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
# yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
def loss_with_step(self):
margin = 5.0
labels_t = self.y_ #128
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
Actually as the batch size is 128 label-t is 128,
the problem here is that the euclidean distance in the loss_with_step function,
as well as in the loss_with_spring function is of size 256 and not 128 I don't really know why!
here is the error I get.
Traceback (most recent call last):
File "run1.py", line 56, in <module>
siamese.y_: batch_y})
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost/ replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
Caused by op u'y_x_eucd', defined at:
File "run1.py", line 28, in <module>
siamese = inference1.siamese();
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 18, in __init__
self.loss = self.loss_with_step()
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 110, in loss_with_step
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/math_ops.py", line 286, in multiply
return gen_math_ops._mul(x, y, name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/op_def_library.py", line 767, in apply
_op
op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost /replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
can anyone help?
Looks like your reshaping after the convolution is wrong. The output of the convolution layer would be 14x14x32 for a 28x28x1 input passed through conv(stride=1)-maxpool(stride 2). So you need to change the flatten layer to :
h_out_flat = tf.reshape(out ,[-1,14*14*32])
and also the weights_for_connected_layer appropriately.
I'm trying to inference single image using tensorflow cifar10 example:
https://www.tensorflow.org/versions/r0.8/tutorials/deep_cnn/index.html#convolutional-neural-networks
def restore_vars(saver, sess):
""" Restore saved net, global score and step, and epsilons OR
create checkpoint directory for later storage. """
#sess.run(tf.initialize_all_variables())
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
return True
else:
print('No checkpoint file found')
return False
def eval_single_img():
input_img = tf.image.decode_jpeg(tf.read_file("test.jpg"), channels=3)
input_img =
input_img = tf.reshape(input_img, [3, 32, 32])
input_img = tf.transpose(input_img, [1, 2, 0])
reshaped_image = tf.cast(input_img, tf.float32)
resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, 24, 24)
float_image = tf.image.per_image_whitening(resized_image)
image = tf.expand_dims(float_image, 0) # create a fake batch of images (batch_size = 1)
logits = cifar10.inference(image)
_, top_k_pred = tf.nn.top_k(logits, k=5)
# Restore the moving average version of the learned variables for eval.
variable_averages = tf.train.ExponentialMovingAverage(
cifar10.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
restored = restore_vars(saver, sess)
top_indices = sess.run([top_k_pred])
print ("Predicted ", top_indices[0], " for your input image.")
**ERROR MESSAGE:
tensorflow.python.framework.errors.InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [18,384] rhs shape= [2304,384]
[[Node: save/Assign_5 = Assign[T=DT_FLOAT, _class=["loc:#local3/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/cpu:0"](local3/weights, save/restore_slice_5)]]
Caused by op u'save/Assign_5', defined at:
What might be causing this?**
I have modified existing cifar10 example to work as a siamese network.
But I am facing some difficulties in training it.
Changes Made :
placeholder instead of queue
custom loss function
Here is my modified cifar10_train.py :
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os.path
import time
import input_data
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import cifar10
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', 'tmp/cifar10_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 1000000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
def train():
"""Train CIFAR-10 for a number of steps."""
dataset = input_data.read()
image, image_p, label = dataset.train_dataset
image_size = dataset.image_size
batch_size = 28
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images = tf.placeholder(tf.float32, shape=(batch_size, image_size[0], image_size[1], image_size[2]))
images2 = tf.placeholder(tf.float32, shape=(batch_size, image_size[0], image_size[1], image_size[2]))
labels = tf.placeholder(tf.float32, shape=(batch_size))
tf.image_summary('images', images)
tf.image_summary('images2', images)
# Build a Graph that computes the logits predictions from the
# inference model.
with tf.variable_scope('inference') as scope:
logits = cifar10.inference(images)
scope.reuse_variables()
logits2 = cifar10.inference(images2)
# Calculate loss.
loss = cifar10.loss(logits, logits2, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.merge_all_summaries()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
graph_def=sess.graph_def)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
offset = (step * batch_size) % (dataset.train_samples - batch_size)
_, loss_value = sess.run([train_op, loss], feed_dict={images: image[offset:(offset + batch_size)], images2: image_p[offset:(offset + batch_size)], labels: 1.0*label[offset:(offset + batch_size)]})
duration = time.time() - start_time
print(loss_value)
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
def main(argv=None):
# pylint: disable=unused-argument
train()
if __name__ == '__main__':
tf.app.run()
Modified cifar10.py
"""Builds the CIFAR-10 network.
Summary of available functions:
# Compute input images and labels for training. If you would like to run
# evaluations, use inputs() instead.
inputs, labels = distorted_inputs()
# Compute inference on the model inputs to make a prediction.
predictions = inference(inputs)
# Compute the total loss of the prediction with respect to the labels.
loss = loss(predictions, labels)
# Create a graph to run one step of training with respect to the loss.
train_op = train(loss, global_step)
"""
# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import os
import re
import sys
import tarfile
from six.moves import urllib
import tensorflow as tf
import input_data
FLAGS = tf.app.flags.FLAGS
# Basic model parameters.
tf.app.flags.DEFINE_integer('batch_size', 28,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('data_dir_p', '/tmp/cifar10_data',
"""Path to the CIFAR-10 data directory.""")
# Global constants describing the CIFAR-10 data set.
# IMAGE_SIZE = cifar10_input.IMAGE_SIZE
# NUM_CLASSES = cifar10_input.NUM_CLASSES
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = input_data.train_samples
# NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
Q = 360.6244
# If a model is trained with multiple GPU's prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
def _activation_summary(x):
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, stddev, wd):
var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
if wd:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def inference(data):
# We instantiate all variables using tf.get_variable() instead of
# tf.Variable() in order to share variables across multiple GPU training runs.
# If we only ran this model on a single GPU, we could simplify this function
# by replacing all instances of tf.get_variable() with tf.Variable().
#
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 1, 20],
stddev=0.1, wd=0.0)
conv = tf.nn.conv2d(data, kernel, [1, 1, 1, 1], padding='VALID')
biases = _variable_on_cpu('biases', [20], tf.constant_initializer(0.0))
conv1 = tf.nn.bias_add(conv, biases)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='VALID', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 20, 50],
stddev=0.1, wd=0.0)
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='VALID')
biases = _variable_on_cpu('biases', [50], tf.constant_initializer(0.0))
conv2 = tf.nn.bias_add(conv, biases)
_activation_summary(conv2)
# pool2
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='VALID', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
dim = 1
for d in pool2.get_shape()[1:].as_list():
dim *= d
reshape = tf.reshape(pool2, [pool2.get_shape()[0:].as_list()[0], dim])
weights = _variable_with_weight_decay('weights', shape=[dim, 500],
stddev=0.1, wd=0.0)
biases = _variable_on_cpu('biases', [500], tf.constant_initializer(0.10))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[500, 10],
stddev=0.1, wd=0.0)
biases = _variable_on_cpu('biases', [10], tf.constant_initializer(0.0))
local4 = tf.add(tf.matmul(local3, weights), biases, name=scope.name)
_activation_summary(local4)
#local5
with tf.variable_scope('local5') as scope:
weights = _variable_with_weight_decay('weights', [10, 10],
stddev=0.1, wd=0.0)
biases = _variable_on_cpu('biases', [10],
tf.constant_initializer(0.0))
local5 = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(local5)
return local5
def loss(features1, features2, labels):
energy_square = (tf.reduce_sum(tf.pow(tf.sub(features1, features2), 2),1))
loss = tf.add(tf.mul(tf.pow(tf.sub(labels,1),2),energy_square),tf.mul(labels,tf.maximum(tf.sub(1.0,energy_square),0)))
loss = tf.reduce_sum(loss) / features1.get_shape()[0:].as_list()[0] / 2
# Calculate the average cross entropy loss across the batch.
# labels = tf.cast(labels, tf.int64)
# cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
# logits, labels, name='cross_entropy_per_example')
# cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', loss)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def _add_loss_summaries(total_loss):
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(total_loss, global_step):
loss_averages_op = _add_loss_summaries(total_loss)
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
# Apply gradients.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad:
tf.histogram_summary(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
Error I am getting :
2016-03-01 15:56:59.483682: step 0, loss = 0.22 (9.7 examples/sec; 2.896 sec/batch)
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Invalid argument: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [28,112,92,1]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[28,112,92,1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary
[[Node: HistogramSummary = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary/tag, inference/conv1/weights/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_1
[[Node: HistogramSummary_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_1/tag, inference/conv1/biases/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Invalid argument: You must feed a value for placeholder tensor 'Placeholder_2' with dtype float and shape [28]
[[Node: Placeholder_2 = Placeholder[dtype=DT_FLOAT, shape=[28], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_3
[[Node: HistogramSummary_3 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_3/tag, inference/conv2/biases/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_2
[[Node: HistogramSummary_2 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_2/tag, inference/conv2/weights/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_4
[[Node: HistogramSummary_4 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_4/tag, inference/local3/weights/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_5
[[Node: HistogramSummary_5 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_5/tag, inference/local3/biases/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_6
[[Node: HistogramSummary_6 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_6/tag, inference/local4/weights/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_7
[[Node: HistogramSummary_7 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_7/tag, inference/local4/biases/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_8
[[Node: HistogramSummary_8 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_8/tag, inference/local5/weights/read)]]
W tensorflow/core/common_runtime/executor.cc:1102] 0x7fd2340e8b60 Compute status: Out of range: Nan in summary histogram for: HistogramSummary_9
[[Node: HistogramSummary_9 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary_9/tag, inference/local5/biases/read)]]
Traceback (most recent call last):
File "cifar10_train.py", line 110, in <module>
tf.app.run()
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/platform/default/_app.py", line 30, in run
sys.exit(main(sys.argv))
File "cifar10_train.py", line 106, in main
train()
File "cifar10_train.py", line 95, in train
summary_str = sess.run(summary_op)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 315, in run
return self._run(None, fetches, feed_dict)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 511, in _run
feed_dict_string)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 564, in _do_run
target_list)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 586, in _do_call
e.code)
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [28,112,92,1]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[28,112,92,1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Placeholder', defined at:
File "cifar10_train.py", line 110, in <module>
tf.app.run()
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/platform/default/_app.py", line 30, in run
sys.exit(main(sys.argv))
File "cifar10_train.py", line 106, in main
train()
File "cifar10_train.py", line 36, in train
images = tf.placeholder(tf.float32, shape=(batch_size, image_size[0], image_size[1], image_size[2]))
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 742, in placeholder
name=name)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 583, in _placeholder
name=name)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2040, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Users/Macbull/Desktop/GITHUB/tensorflow/venv/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1087, in __init__
self._traceback = _extract_stack()
Also, when I comment out merge_all_summaries(), the model diverges with loss= NaN
The problem here is that some of the summaries in your graph—collected by tf.merge_all_summaries()— depend on your placeholders. For example, the code in cifar10.py creates summaries for various activations at each step, which depend on the training example used.
The solution is to feed the same training batch when you evaluate summary_op:
if step % 100 == 0:
summary_str = sess.run(summary_op, feed_dict={
images: image[offset:(offset + batch_size)],
images2: image_p[offset:(offset + batch_size)],
labels: 1.0 * label[offset:(offset + batch_size)]})
While this gives the smallest modification to your original code, it is slightly inefficient, because it will re-execute the training step every 100 steps. The best way to address this (although it will require some restructuring of your training loop) is to fetch the summaries in the same call to sess.run() that performs a training step:
if step % 100 == 0:
_, loss_value, summary_str = sess.run([train_op, loss, summary_op], feed_dict={
images: image[offset:(offset + batch_size)],
images2: image_p[offset:(offset + batch_size)],
labels: 1.0 * label[offset:(offset + batch_size)]})