i get Tensor Tensor("Placeholder:0", shape=(128, 784), dtype=float32) is not an element of this graph. when i want to train & test my graph. The stack-trace is not really helpful, I can't get more information out of it. I really don't understand how this error happens, every data set should be automatically added to graphFully through with graphFully.as_default():and I later call it with with tf.Session(graph=graph) as session:.
advice on how to simplify the graph would be welcome. I want to define several graphs and compare them, therefore the "complicated" structure.
my graph:
##fully connected with hidden layer
def createFullyConnected():
graphFully = tf.Graph()
with graphFully.as_default():
def constructGraph(dataset, weights1, biases1, weights2, biases2):
logits1 = tf.matmul(dataset, weights1) + biases1
hiddenl = tf.nn.relu(logits1)
logits2 = tf.matmul(hiddenl, weights2) + biases2
return logits2
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.01)
return tf.Variable(initial, name='weights')
def bias_variable(shape):
initial = tf.constant(0.0, shape=shape)
return tf.Variable(initial, name='biases')
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size), name='train_data')
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels), name='train_labels')
tf_valid_dataset = tf.constant(validation[0], name='valid_labels')
tf_test_dataset = tf.constant(test[0], name='test_labels')
# Variables.
with tf.name_scope('hidden') as scope:
weights1 = weight_variable([image_size * image_size, 1024])
biases1 = bias_variable([1024])
weights2 = weight_variable([1024, num_labels])
biases2 = bias_variable([num_labels])
# Training computation.
logits = constructGraph(tf_train_dataset, weights1, biases1, weights2, biases2)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(constructGraph(tf_valid_dataset, weights1, biases1, weights2, biases2))
test_prediction = tf.nn.softmax(constructGraph(tf_test_dataset, weights1, biases1, weights2, biases2))
# We write the graph out to the `logs` directory
tf.summary.FileWriter("logs", graphFully).close()
return (graphFully, optimizer, train_prediction, valid_prediction, test_prediction)
and the evaluation:
def evaluate(graph, optimizer, train_prediction, valid_prediction, test_prediction):
num_steps = 3001
train_dataset = train[0]
train_labels = train[1]
valid_labels = validation[1]
test_labels = test[1]
outlier_labels = outlier[1]
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(
valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Here the errors occur due to out of scope of the two placeholder tf_train_dataset and tf_train_labels. You need to access these two tensors from th graph inside evaluate function.
def evaluate(...):
...
tf_train_dataset = graph.get_tensor_by_name('train_data:0')
tf_train_labels = graph.get_tensor_by_name('train_labels:0')
with tf.Session(graph=graph) as session:
...
Related
Here are my code:
# Model parameters: W and b
# tf.reset_default_graph()
W = tf.get_variable("weight",shape = [784, 10], dtype=tf.float32)
b = tf.get_variable("b", shape=(784,10), dtype= tf.float32)
input_X = tf.placeholder('float32', shape = (None,10))
input_y = tf.placeholder('float32', [784, 10])
ogits = W*input_X + b
probas = tf.nn.softmax(logits)
classes = tf.argmax(probas)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = input_y, logits = logits))
step = tf.train.AdamOptimizer(loss)
s.run(tf.global_variables_initializer())
BATCH_SIZE = 512
EPOCHS = 40
# for logging the progress right here in Jupyter (for those who don't have TensorBoard)
simpleTrainingCurves = matplotlib_utils.SimpleTrainingCurves("cross-entropy", "accuracy")
for epoch in range(EPOCHS): # we finish an epoch when we've looked at all training samples
batch_losses = []
for batch_start in range(0, X_train_flat.shape[0], BATCH_SIZE): # data is already shuffled
_, batch_loss = s.run([step, loss], {input_X: X_train_flat[batch_start:batch_start+BATCH_SIZE],
input_y: y_train_oh[batch_start:batch_start+BATCH_SIZE]})
# collect batch losses, this is almost free as we need a forward pass for backprop anyway
batch_losses.append(batch_loss)
train_loss = np.mean(batch_losses)
val_loss = s.run(loss, {input_X: X_val_flat, input_y: y_val_oh}) # this part is usually small
train_accuracy = accuracy_score(y_train, s.run(classes, {input_X: X_train_flat})) # this is slow and usually skipped
valid_accuracy = accuracy_score(y_val, s.run(classes, {input_X: X_val_flat}))
simpleTrainingCurves.add(train_loss, val_loss, train_accuracy, valid_accuracy)
the error is:
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py
in _run(self, handle, fetches, feed_dict, options, run_metadata)
973 'Cannot feed value of shape %r for Tensor %r, '
974 'which has shape %r'
--> 975 % (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
976 if not self.graph.is_feedable(subfeed_t):
977 raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Cannot feed value of shape (512, 784) for Tensor
'Placeholder_2:0', which has shape '(?, 10)'
I am new at tensorflow and coursena is what I am learning. Please help me.
The issue is with the weights, Bias, input_x and input_y placeholder size.
Below is the modified code that should resolve your issue.
W = tf.get_variable('Weight', shape=(784,10), dtype=tf.float32)
b = tf.get_variable('bias', shape=(10,), dtype=tf.float32)
input_X = tf.placeholder(shape=(None, 784), dtype=tf.float32) #None is batch size and 784 is the input.
input_y = tf.placeholder(shape=(None, 10), dtype=tf.float32) # None is batch size, 10 is number of classess.
logits = tf.matmul(input_X, W) + b
probas = tf.nn.softmax(logits)
classes = tf.argmax(probas, 1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=input_y))
step = tf.train.RMSPropOptimizer(0.001).minimize(loss)
s.run(tf.global_variables_initializer())
BATCH_SIZE = 512
EPOCHS = 50
simpleTrainingCurves = matplotlib_utils.SimpleTrainingCurves("cross-entropy", "accuracy")
for epoch in range(EPOCHS):
batch_losses = []
for batch_start in range(0, X_train_flat.shape[0], BATCH_SIZE): # data is already shuffled
_, batch_loss = s.run([step, loss], {input_X: X_train_flat[batch_start:batch_start+BATCH_SIZE],
input_y: y_train_oh[batch_start:batch_start+BATCH_SIZE]})
batch_losses.append(batch_loss)
train_loss = np.mean(batch_losses)
val_loss = s.run(loss, {input_X: X_val_flat, input_y: y_val_oh})
train_accuracy = accuracy_score(y_train, s.run(classes, {input_X: X_train_flat}))
valid_accuracy = accuracy_score(y_val, s.run(classes, {input_X: X_val_flat}))
simpleTrainingCurves.add(train_loss, val_loss, train_accuracy, valid_accuracy)
I'm sorry if this is a naive question. This is my first attempt at using tensorflow. I'm using this after trying numpy on a non-MNIST dataset as a part of the Udacity course. Now, this is the code that I've written. However, this is giving me just 87-88% accuracy. Kindly suggest me what is the error that is there which I should take care of and what are the probable errors in the code:
batch_size = 256
node_dimensions = 1024
graph = tf.Graph()
with graph.as_default():
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size *image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size,num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables.
#Layer 1
w1 = tf.Variable(
tf.truncated_normal([image_size * image_size, node_dimensions]))
b1 = tf.Variable(tf.zeros([node_dimensions]))
#Layer 2
w2 = tf.Variable(tf.random_uniform([node_dimensions,
node_dimensions], -0.01, 0.01))
b2 = tf.Variable(tf.zeros([node_dimensions]))
#Layer 3
w3 = tf.Variable(
tf.truncated_normal([node_dimensions, num_labels]))
b3 = tf.Variable(tf.zeros([num_labels]))
# Training computation.
# Layer 1.
y1 = tf.nn.softmax(tf.matmul(tf_train_dataset, w1) + b1)
# Layer 2.
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
# Layer 3.
logits = tf.matmul(y2, w3) + b3
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels,logits=logits))
# Valid computation.
# Layer 1.
y1 = tf.nn.softmax(tf.matmul(tf_valid_dataset, w1) + b1)
# Layer 2.
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
# Layer 3.
logits_valid = tf.matmul(y2, w3) + b3
# Test computation.
# Layer 1.
y1 = tf.nn.softmax(tf.matmul(tf_test_dataset, w1) + b1)
# Layer 2.
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
# Layer 3.
logits_test = tf.matmul(y2, w3) + b3
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(logits_valid)
test_prediction = tf.nn.softmax(logits_test)
num_steps = 10001
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print("Initialized")
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data, tf_train_labels :batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(
valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(),
test_labels))
You need to mention the dataset you are using. If this is Mnist I would agree that 88% is low. But on any other dataset is pretty hard to say.
Also, why did you set num_steps = 1000. In general, should be trained like so
num_steps = len(train_data) // batch_size
num_epochs = 10
for _ in num_epochs:
for i in num_steps:
/* Train Code*/
I am trying to create an end-to-end trainable offline English Handwriting Recognition Model (without segmenting individual character). I am using the word dataset from IAM Handwriting Database for training.
I tried decreasing the learning rate, increasing batch size, etc. but the loss keeps on fluctuating with no/significant overall decrease - TensorBoard visualization for cost at each step
I am new to TensorFlow so could have made some naive error. The code used:
class CRNN(object):
def __init__(self, config):
self.config = config
tf.reset_default_graph()
def read_and_decode(self, filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Define how to parse the example
context_features = {
'length': tf.FixedLenFeature([], dtype=tf.int64),
'out_length': tf.FixedLenFeature([], dtype=tf.int64)
}
sequence_features = {
'token': tf.FixedLenSequenceFeature([], dtype=tf.float32),
'labels': tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features)
image = sequence_parsed['token']
label = tf.cast(sequence_parsed['labels'], tf.int32)
length = tf.cast(context_parsed['length'], tf.int32)
lab_length = tf.cast(context_parsed['out_length'], tf.int32)
image_shape = tf.cast(tf.stack([self.config.im_height,
length/self.config.im_height]), tf.int32)
image = tf.reshape(image, image_shape)
# Updating length to represent image width
length = tf.shape(image)[1]
# Batch the variable length tensor with dynamic padding
self.images, self.labels, self.lengths, self.lab_lengths = tf.train.batch(
tensors=[image, label, length, lab_length],
batch_size=self.config.batch_size, dynamic_pad=True)
def net(self):
batch_lab_length = tf.reduce_max(self.lab_lengths)
batch_im_length = tf.reduce_max(self.lengths)
# Reshape to time major
sequences = tf.reshape(self.images, [batch_im_length, self.config.batch_size,
self.config.im_height])
# Feed sequences into RNN
with tf.name_scope('RNN'):
self.cell_fw = tf.nn.rnn_cell.LSTMCell(num_units=self.config.rnn_num_hidden,
state_is_tuple=True)
self.cell_bw = tf.nn.rnn_cell.LSTMCell(num_units=self.config.rnn_num_hidden,
state_is_tuple=True)
self.output, self.state = tf.nn.bidirectional_dynamic_rnn(
cell_fw=self.cell_fw,
cell_bw=self.cell_bw,
inputs=sequences,
dtype=tf.float32,
sequence_length=self.lengths,
time_major=True,
scope='RNN'
)
# Reshaping to apply the same weights over the timesteps
self.output = tf.reshape(self.output, [-1, self.config.rnn_num_hidden])
self.out_W = tf.Variable(tf.truncated_normal([self.config.rnn_num_hidden,
self.config.num_classes],
stddev=0.1), name='out_W')
self.out_b = tf.Variable(tf.constant(0., shape=[self.config.num_classes]), name='out_b')
# Doing the affine projection
logits = tf.matmul(self.output, self.out_W) + self.out_b
# Reshaping back to the original shape
logits = tf.reshape(logits, [self.config.batch_size, -1, self.config.num_classes])
# Time major
logits = tf.transpose(logits, (1, 0, 2))
# Training computation
# Prepare sparse tensor for CTC loss
labs = tf.reshape(self.labels, (self.config.batch_size, batch_lab_length))
sparse_tensor_indices = tf.where(tf.less(tf.cast(0, tf.int32), labs))
labels_vals = tf.reshape(self.labels, [batch_lab_length*self.config.batch_size])
mask = tf.cast(tf.sign(labels_vals), dtype=tf.bool)
labels_vals = tf.boolean_mask(labels_vals,mask)
labels_sparse = tf.SparseTensor(indices=sparse_tensor_indices, values=labels_vals,
dense_shape=[self.config.batch_size,
tf.cast(batch_lab_length, tf.int64)])
self.loss = tf.nn.ctc_loss(labels_sparse, logits, sequence_length=self.lab_lengths,
preprocess_collapse_repeated=False, ctc_merge_repeated=False,
time_major=True)
self.cost = tf.reduce_mean(self.loss)
# Optimizer
self.optimizer = tf.train.MomentumOptimizer(learning_rate=0.01,
momentum=0.9, use_nesterov=True).minimize(self.cost)
# Predictions for the training, validation, and test data.
self.train_prediction = tf.nn.ctc_beam_search_decoder(logits,
sequence_length=self.lab_lengths)
def train(self):
num_steps = int((self.config.num_epochs*self.config.sample_size)/self.config.batch_size)
tf.reset_default_graph()
filename_queue = tf.train.string_input_producer(
[self.config.tfrecord_filename], num_epochs=self.config.num_epochs)
self.read_and_decode(filename_queue)
self.net()
# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
saver = tf.train.Saver()
with tf.Session() as sess:
training_summary = tf.summary.scalar("training_cost", self.cost)
writer = tf.summary.FileWriter("./TensorBoard/graph", sess.graph)
sess.run(init_op)
print('Initialized')
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
start = time.time()
steps_time = start
epoch = 1
for step in range(num_steps):
_, c, predictions, actual_labels, train_summ = sess.run([self.optimizer, self.cost,
self.train_prediction,
self.labels, training_summary])
writer.add_summary(train_summ, step)
if (step % 10000 == 0):
preds = np.zeros((predictions[0][0].dense_shape))
i = 0
for idx in predictions[0][0].indices:
preds[idx[0]][idx[1]] = predictions[0][0].values[i]
i+=1
print(time.time() - steps_time)
steps_time = time.time()
print('Minibatch cost at step %d: %f' % (step, c))
print('Label =', [''.join([char_map_inv[j] for j in i]) for i in actual_labels],
'Prediction =', [''.join([char_map_inv[j] for j in i]) for i in preds])
if (step!=0 and step % int(self.config.sample_size/self.config.batch_size) == 0):
print('Epoch', epoch, 'Completed')
epoch+=1
last_step = step
saver.save(sess, "model_BLSTM", global_step=last_step)
writer.close()
print(time.time() - start)
After trying a lot of things unsuccessfully, I found that an incorrect argument was provided to the sequence_length argument of tf.nn.ctc_loss. It should be set to 'length of input sequence' but I had set it to 'length of output sequence(labels - number of character)'
More details can be found in comments under the selected answer to this question - CTC Loss InvalidArgumentError: sequence_length(b) <= time
Also, if one has a GPU it would be better to use Baidu's CTC GPU implementation (https://github.com/baidu-research/warp-ctc) as it can speed up the training a lot.
The problem is that you are feeding raw images in the LSTM, so it is very difficult for it to extract any useful information. The CRNN paper first uses a series of convolutional layers to extract features from the images, and then these are fed into the LSTM.
I have built a system that leverages Google ML Engine to train various text classifiers using a simple flat CNN architecture (borrowed from the excellent WildML post). I've also leveraged heavily the ML Engine trainer template which exists here - specifically using the Tensorflow core functions.
My issue is that while the model trains and learns parameters correctly, I cannot get the serialized export in the binary SavedModel format (i.e. - the .pb files) to maintain the learned weights. I can tell this by using the gcloud predict local API on the model export folder and each time it makes randomized predictions - leading me to believe that while the graph structure is being saved to the proto-buf format, the associated weights in the checkpoint file are not being carried over.
Here's the code for my run function:
def run(...):
# ... code to load and transform train/test data
with train_graph.as_default():
with tf.Session(graph=train_graph).as_default() as session:
# Features and label tensors as read using filename queue
features, labels = model.input_fn(
x_train,
y_train,
num_epochs=num_epochs,
batch_size=train_batch_size
)
# Returns the training graph and global step tensor
tf.logging.info("Train vocab size: {:d}".format(vocab_size))
train_op, global_step_tensor, cnn, train_summaries = model.model_fn(
model.TRAIN,
sequence_length,
num_classes,
label_values,
vocab_size,
embedding_size,
filter_sizes,
num_filters
)
tf.logging.info("Created simple training CNN with ({}) filter types".format(filter_sizes))
# Setup writers
train_summary_op = tf.summary.merge(train_summaries)
train_summary_dir = os.path.join(job_dir, "summaries", "train")
# Generate writer
train_summary_writer = tf.summary.FileWriter(train_summary_dir, session.graph)
# Initialize all variables
session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer())
model_dir = os.path.abspath(os.path.join(job_dir, "model"))
if not os.path.exists(model_dir):
os.makedirs(model_dir)
saver = tf.train.Saver()
def train_step(x_batch, y_batch):
"""
A single training step
"""
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 0.5
}
step, _, loss, accuracy = session.run([global_step_tensor, train_op, cnn.loss, cnn.accuracy],
feed_dict=feed_dict)
time_str = datetime.datetime.now().isoformat()
if step % 10 == 0:
tf.logging.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
# Return current step
return step
def eval_step(x_batch, y_batch, train_step, total_steps):
"""
Evaluates model on a dev set
"""
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 1.0
}
step, loss, accuracy, scores, predictions = session.run([global_step_tensor, cnn.loss, cnn.accuracy, cnn.scores, cnn.predictions],
feed_dict=feed_dict)
# Get metrics
y_actual = np.argmax(y_batch, 1)
model_metrics = precision_recall_fscore_support(y_actual, predictions)
#print(scores)
time_str = datetime.datetime.now().isoformat()
print("\n---- EVAULATION ----")
avg_precision = np.mean(model_metrics[0], axis=0)
avg_recall = np.mean(model_metrics[1], axis=0)
avg_f1 = np.mean(model_metrics[2], axis=0)
print("{}: step {}, loss {:g}, acc {:g}, prec {:g}, rec {:g}, f1 {:g}".format(time_str, step, loss, accuracy, avg_precision, avg_recall, avg_f1))
print("Model metrics: ", model_metrics)
print("---- EVALUATION ----\n")
# Generate batches
batches = data_helpers.batch_iter(
list(zip(features, labels)), train_batch_size, num_epochs)
# Training loop. For each batch...
for batch in batches:
x_batch, y_batch = zip(*batch)
current_step = train_step(x_batch, y_batch)
if current_step % 20 == 0 or current_step == 1:
eval_step(x_eval, y_eval, current_step, total_steps)
# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
print(model_dir)
trained_model = saver.save(session, os.path.join(job_dir, 'model') + "/model.ckpt", global_step=current_step)
print(trained_model)
print("Saved final model checkpoint to {}".format(trained_model))
# Only perform this if chief
if is_chief:
build_and_run_exports(trained_model, job_dir,
model.SERVING_INPUT_FUNCTIONS[model.TEXT],
sequence_length, num_classes, label_values,
vocab_size, embedding_size, filter_sizes,
num_filters, vocab_processor)
And my build_and_run_exports function:
def build_and_run_exports(...):
# Check if we export already exists - if so delete
export_dir = os.path.join(job_dir, 'export')
if os.path.exists(export_dir):
print("Export currently exists - going to delete:", export_dir)
shutil.rmtree(export_dir)
# Create exporter
exporter = tf.saved_model.builder.SavedModelBuilder(export_dir)
# Restore prediction graph
prediction_graph = tf.Graph()
with prediction_graph.as_default():
with tf.Session(graph=prediction_graph) as session:
# Get training data
features, inputs_dict = serving_input_fn()
# Setup inputs
inputs_info = {
name: tf.saved_model.utils.build_tensor_info(tensor)
for name, tensor in inputs_dict.iteritems()
}
# Load model
cnn = TextCNN(
sequence_length=sequence_length,
num_classes=num_classes,
vocab_size=vocab_size,
embedding_size=embedding_size,
filter_sizes=list(map(int, filter_sizes.split(","))),
num_filters=num_filters,
input_tensor=features)
# Restore model
saver = tf.train.Saver()
saver.restore(session, latest_checkpoint)
# Setup outputs
outputs = {
'logits': cnn.scores,
'probabilities': cnn.probabilities,
'predicted_indices': cnn.predictions
}
# Create output info
output_info = {
name: tf.saved_model.utils.build_tensor_info(tensor)
for name, tensor in outputs.iteritems()
}
# Setup signature definition
signature_def = tf.saved_model.signature_def_utils.build_signature_def(
inputs=inputs_info,
outputs=output_info,
method_name=sig_constants.PREDICT_METHOD_NAME
)
# Create graph export
exporter.add_meta_graph_and_variables(
session,
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={
sig_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
},
legacy_init_op=tf.saved_model.main_op.main_op()
)
# Export model
exporter.save()
And last, but not least, the TextCNN model:
class TextCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0,
dropout_keep_prob=0.5, input_tensor=None):
# Setup input
if input_tensor != None:
self.input_x = input_tensor
self.dropout_keep_prob = tf.constant(1.0)
else:
self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# Placeholders for input, output and dropout
self.input_y = tf.placeholder(tf.int32, [None, num_classes], name="input_y")
# Keeping track of l2 regularization loss (optional)
l2_loss = tf.constant(0.0)
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
# Create a convolution + maxpool layer for each filter size
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.embedded_chars_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
self.h_pool = tf.concat(pooled_outputs, 3)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
# Add dropout
with tf.name_scope("dropout"):
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
# Final (unnormalized) scores and predictions
with tf.name_scope("output"):
W = tf.get_variable(
"W",
shape=[num_filters_total, num_classes],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# CalculateMean cross-entropy loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
with tf.name_scope("probabilities"):
self.probabilities = tf.nn.softmax(logits=self.scores)
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
I'm hoping I'm just missing something simple in how I'm creating the TF graph / session and restoring stats.
Thank you in advance for your help!
This behavior is caused due to the behavior of tf.saved_model.main_op.main_op() which randomly initializes all of the variables in the graph (code). However, legacy_init_op happens after the variables are restored from the checkpoint (restore happens here followed by legacy_init_op here).
The solution is simply to not re-initialize all of the variables, for example, in your code:
from tensorflow.python.ops import variables
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import control_flow_ops
def my_main_op():
init_local = variables.local_variables_initializer()
init_tables = lookup_ops.tables_initializer()
return control_flow_ops.group(init_local, init_tables)
def build_and_run_exports(...):
...
# Create graph export
exporter.add_meta_graph_and_variables(
session,
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={
sig_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
},
legacy_init_op=my_main_op()
)
# Export model
exporter.save()
I am creating a deep learning fully connected NN for the MNIST input. I have a function (it takes placeholder input)
# Create model
def multilayer_perceptron(x, activation_fn, weights, biases, dbg=False):
layerDatas = OrderedDict()
# get each layer data
prev = x
for i in range(len(weights)-1):
weight = weights.items()[i][1]
bias = biases.items()[i][1]
var = 'layer_' + str(i+1)
layerData = tf.add(tf.matmul(prev, weight), bias)
layerData = activation_fn(layerData)
prev = layerData
layerDatas[var] = layerData
# output layer with linear function, using the last layer output value
val = tf.matmul(prev, weights['out'])
out_layer = tf.matmul(prev, weights['out']) + biases['out']
print x.eval() # debug the data
return out_layer
which takes multiple layers in weights and biases. I call the main program with
sess = tf.InteractiveSession() # start a session
print 'Data', n_input, n_classes
print 'Train', train_set_x.shape, train_set_y.shape
(weights, biases) = createWeightsBiases(layers, n_input, n_classes, dbg)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Construct model
pred = multilayer_perceptron(x, activation_fn, weights, biases, dbg)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
done_looping = False
display_step = 1
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Launch the graph
sess.run(init)
# Training cycle
epochs = 1000
for epoch in range(epochs):
avg_cost = 0.
total_batch = int(len(train_set_x)/batch_size)
print 'Batch', total_batch, batch_size
# Loop over all batches
for i in range(total_batch):
batch_x = train_set_x[i * batch_size: (i + 1) * batch_size]
batch_y = train_set_y[i * batch_size: (i + 1) * batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print "Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost)
print "Optimization Finished!"
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print "Accuracy:", accuracy.eval({x: valid_set_x, y: valid_set_y})
When I try to print the tensor in my multilayer_perceptron function, I get a crash with
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I would appreciate help in getting around this.
You can't eval a placeholder. Instead, you can feed the graph with a proper value for the placeholder and only then extract the content (that's the value you fed the graph with).
So, remove the print x.eval() # debug the data line from the multilayer_perceptron function.
To inspect the value of the placeholder you have to fed it and extract the value you just feed it (side note: it's useless).
If you really want to do this, that's how:
placeholder_value = sess.run(x, feed_dict={x: [1,2,3,4]})
print placeholder_value
It will print the value [1,2,3,4]