How to load a graph checkpoints (.ckpt) and use SavedModelBuilder to save it as protobuf without declaring any tf.Variables? - tensorflow

Currently I have resnet_v2_50.ckpt from tensorflow's open sourced pretrained model. I am trying to serve this model in Go cause my backend for my web app is going to be in Go. If I were to create my own model and train it, and then save it. I have no trouble with serving it in Go but I am trying to use pre-trained model to save time on my end.
Here's a simple example of how I save my model
mnist = input_data.read_data_sets(DATA_DIR, one_hot=True)
# Recall that each image is 28x28
x = tf.placeholder(tf.float32, [None, 784], name='imageinput')
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.add(tf.matmul(x, W), b)
labels = tf.placeholder(tf.float32, [None, 10])
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy_loss)
with tf.Session() as sess:
with tf.device("/cpu:0"):
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch_x, batch_label = mnist.train.next_batch(100)
loss, _ = sess.run([cross_entropy_loss, train_step], feed_dict={x: batch_x, labels: batch_label})
print '%d: %f' % (i + 1, loss)
infer = tf.argmax(y, axis=1, name='infer')
truth = tf.argmax(labels, axis=1)
correct_prediction = tf.equal(infer, truth)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print sess.run(accuracy, feed_dict={x: mnist.test.images, labels: mnist.test.labels})
print 'Time to save the graph!'
builder = tf.saved_model.builder.SavedModelBuilder('mnist_model')
builder.add_meta_graph_and_variables(sess, ['serve'])
builder.save()
I can load it using tensorflow in Go
model, err := tf.LoadSavedModel("./tf_mnist_py/mnist_model", []string{"serve"}, nil)
if err != nil {
fmt.Printf("Error loading saved model: %s\n", err.Error())
return
}
defer model.Session.Close()
But now when it comes to pretrained model, I am dealing with ckpt files. One solution I have is to load it up in Python and then save it as protobuf.
from tensorflow.python.tools import inspect_checkpoint as ckpt
ckpt.print_tensors_in_checkpoint_file('./resnet50/resnet_v2_50.ckpt',
tensor_name='',
all_tensors=False,
all_tensor_names=False)
tf.reset_default_graph()
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, './resnet50/resnet_v2_50.ckpt')
print 'Model is restored'
print 'Time to save the graph!'
builder = tf.saved_model.builder.SavedModelBuilder('resnet_50_model')
builder.add_meta_graph_and_variables(sess, ['serve'])
builder.save()
However, this gives me an error saying that ValueError: No variables to save. I can fix it by declaring a variable
v1 = tf.get_variable('total_loss/ExponentialMovingAverage', shape=[])
But here comes my question, does that mean I must declare EVERY variable in the ResNet50 and have tensorflow load the values from ckpt file into those variables and then I perform save? Is there a shortcut to do this?

You should import the variables as well from the .meta file if you have it available with the pretrained model like
saver = tf.train.import_meta_graph('./resnet50/resnet_v2_50.meta')
Detailed tutorial here.
If you don't have the .meta available, but you do have the network generating code, like with resnet_v2_50 in tensorflow/models/blob/master/research/slim/nets/resnet_v2.py then you should import that file and run the resnet_v2_50 function, which will define all the variables for you. Then restore the checkpoint.

Related

How to restore saved BiRNN model in tensorflow so that all output neurons correctly bundled to the corresponding output classes

I faced a problem with properly restoring the saved model in tensorflow. I created the Bidirectional RNN model in tensorflow with following code:
batchX_placeholder = tf.placeholder(tf.float32, [None, timesteps, 1],
name="batchX_placeholder")])
batchY_placeholder = tf.placeholder(tf.float32, [None, num_classes],
name="batchY_placeholder")
weights = tf.Variable(np.random.rand(2*STATE_SIZE, num_classes),
dtype=tf.float32, name="weights")
biases = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32,
name="biases")
logits = BiRNN(batchX_placeholder, weights, biases)
with tf.name_scope("prediction"):
prediction = tf.nn.softmax(logits)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=batchY_placeholder))
lr = tf.Variable(learning_rate, trainable=False, dtype=tf.float32,
name='lr')
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(loss_op)
init_op = tf.initialize_all_variables()
saver = tf.train.Saver()
The architecture of BiRNN created with the following function:
def BiRNN(x, weights, biases):
# Unstack to get a list of 'time_steps' tensors of shape (batch_size,
# num_input)
x = tf.unstack(x, time_steps, 1)
# Forward and Backward direction cells
lstm_fw_cell = rnn.BasicLSTMCell(STATE_SIZE, forget_bias=1.0)
lstm_bw_cell = rnn.BasicLSTMCell(STATE_SIZE, forget_bias=1.0)
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell,
lstm_bw_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights) + biases
Then I train a model and save it after each 200 steps:
with tf.Session() as sess:
sess.run(init_op)
current_step = 0
for batch_x, batch_y in get_minibatch():
sess.run(train_op, feed_dict={batchX_placeholder: batch_x,
batchY_placeholder: batch_y})
current_step += 1
if current_step % 200 == 0:
saver.save(sess, os.path.join(model_dir, "model")
To run the saved model in inference mode I use saved tensorflow graph in "model.meta" file:
graph = tf.get_default_graph()
saver = tf.train.import_meta_graph(os.path.join(model_dir, "model.meta"))
sess = tf.Session()
saver.restore(sess, tf.train.latest_checkpoint(model_dir)
weights = graph.get_tensor_by_name("weights:0")
biases = graph.get_tensor_by_name("biases:0")
batchX_placeholder = graph.get_tensor_by_name("batchX_placeholder:0")
batchY_placeholder = graph.get_tensor_by_name("batchY_placeholder:0")
logits = BiRNN(batchX_placeholder, weights, biases)
prediction = graph.get_operation_by_name("prediction/Softmax")
argmax_pred = tf.argmax(prediction, 1)
init = tf.global_variables_initializer()
sess.run(init)
for x_seq, y_gt in get_sequence():
_, y_pred = sess.run([prediction, argmax_pred],
feed_dict={batchX_placeholder: [x_seq]],
batchY_placeholder: [[0.0, 0.0]]})
print("Y ground true: " + str(y_gt) + ", Y pred: " + str(y_pred[0]))
And when I run the code in inference mode, I get different results each time I launch it. It seems that output neurons from the softmax layer randomly bundled with different output classes.
So, my question is: How can I save and then correctly restore the model in tensorflow, so that all neurons properly bundled with corresponding output classes?
There is no need to call tf.global_variables_initializer(), I think that is your problem.
I removed some operations: logits, weights and biases since you don't need them, all those are already loaded, use graph.get_tensor_by_name to get them.
For the prediction, get the tensor instead of the operation. (see this answer):
This is the code:
graph = tf.get_default_graph()
saver = tf.train.import_meta_graph(os.path.join(model_dir, "model.meta"))
sess = tf.Session()
saver.restore(sess, tf.train.latest_checkpoint(model_dir))
batchX_placeholder = graph.get_tensor_by_name("batchX_placeholder:0")
batchY_placeholder = graph.get_tensor_by_name("batchY_placeholder:0")
prediction = graph.get_tensor_by_name("prediction/Softmax:0")
argmax_pred = tf.argmax(prediction, 1)
Edit 1: I notice that I wasn't clear on why you got different results.
And when I run the code in inference mode, I get different results
each time I launch it.
Notice that although you used the weights from the loaded model, you are creating the BiRNN again, and the BasicLSTMCell also have weights and other variables that you don't set from your loaded model, hence they need to be initialized (with new random values) resulting in an untrained model again.

Restoring a pre-trained model

I am restoring a pre-trained model in TensorFlow and would like to know if my code works as desired. Here is the code below. It looks like it does not make use of the pre-trained model because the loss from the code below is way higher than training error, e.g., 200 something. Please let me know how I can modify it. Thank you in advance.
def main(_):
with tf.Graph().as_default() as g:
x, img, rows, cols = load_image(FLAGS.input, FLAGS.image_size)
with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=0.0005)):
logits, endpoints = resnet_v2.resnet_v2_152(x, nb_classes, is_training=False)
predictions = tf.argmax(logits, 1)
saver = tf.train.Saver()
with tf.Session(graph=g) as sess:
sess.run(tf.global_variables_initializer())
# Load a pretrained model.
print("\nLoading a model...")
saver.restore(sess, checkpoint_path)
print("\nFeedforwarding...")
pred, loss_out, output, grads_val = sess.run([predictions, loss, conv_layer, norm_grads])

restore a model trained with variable input length in tensorflow results in InvalidArgumentError

I am rather new to tensorflow and am currently experimenting with models of varying complexity. I have a problem with the save and restore functionality of the package. As far as I did understand the tutorials, I should be able to restore a trained graph and run it with some new input at some later point. However, I get the following error when I try to do just that.:
InvalidArgumentError (see above for traceback): Shape [-1,10] has negative dimensions
[[Node: Placeholder = Placeholderdtype=DT_FLOAT, shape=[?,10], _device="/job:localhost/replica:0/task:0/cpu:0"]]
My understanding of the message is that the restored graph does not like one dimension to be left arbitrary, which in turn is necessary for practical cases where I don't know beforehand how large my input will be. A code snippet as a minimal example, producing the error above, can be found below. I know how to restore each tensor individually but this gets impractical pretty quickly when the models grow in complexity. I am thankful for any help I get and apologize in case my question is stupid.
import numpy as np
import tensorflow as tf
def generate_random_input():
alist = []
for _ in range(10):
alist.append(np.random.uniform(-1, 1, 100))
return np.array(alist).T
def generate_random_target():
return np.random.uniform(-1, 1, 100)
x = tf.placeholder('float', [None, 10])
y = tf.placeholder('float')
# the model
w1 = tf.get_variable('w1', [10, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable('b1', [1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(seed=1))
result = tf.add(tf.matmul(x, w1), b1, name='result')
loss = tf.reduce_mean(tf.losses.mean_squared_error(predictions=result, labels=y))
optimizer = tf.train.AdamOptimizer(0.03).minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run([optimizer, loss], feed_dict={x: generate_random_input(), y: generate_random_target()})
saver.save(sess, 'file_name')
# now load the model in another session:
sess2 = tf.Session()
saver = tf.train.import_meta_graph('file_name.meta')
saver.restore(sess2, tf.train.latest_checkpoint('./'))
graph = tf.get_default_graph()
pred = graph.get_operation_by_name('result')
test_result = sess2.run(pred, feed_dict={x: generate_random_input()})
in the last line, you don't feed_dict the label_palceholder with the data. So in the placeholder, the [-1] dimension is still -1, other than the batch size. That's the cause.
I'm having the exact same problem as you. I'm importing and testing a bunch of different CNNs with different layer sizes and testing on various datasets. You can stick your model creation in a function like so and recreate it in your other code:
def create_model():
x = tf.placeholder('float', [None, 10])
y = tf.placeholder('float')
w1 = tf.get_variable('w1', [10, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable('b1', [1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(seed=1))
result = tf.add(tf.matmul(x, w1), b1, name='result')
return x, y, result
x, y, result = create_model()
loss = tf.reduce_mean(tf.losses.mean_squared_error(predictions=result, labels=y))
optimizer = tf.train.AdamOptimizer(0.03).minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run([optimizer, loss], feed_dict={x: generate_random_input(), y: generate_random_target()})
saver.save(sess, 'file_name')
# now load the model in another session:
sess2 = tf.Session()
# This stuff is optional if everything is the same scope
x, y, result = create_model()
saver = tf.train.Saver()
# loss = ... if you want loss
# Now just restore the weights and run
saver.restore(sess, 'file_name')
test_result = sess2.run(pred, feed_dict={x: generate_random_input()})
This is a bit tedious if I want to import many complex architectures with different dimensions. For our situation, I don't know if there's any other way to restore an entire model than to recreate that architecture first in your second session.

How to use a pretrained model with Tensorflow?

I know that the following is an already answered question, but even though i tried and tried all the proposed solutions, none of them solved my problem.
I made this net for training over MNIST dataset. At the beginning it was deeper, but in order to focus on the problem i simplified it.
mnist = mnist_data.read_data_sets('MNIST_data', one_hot=True)
# train the net
def train():
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print("accuracy", sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
if i%100==0:
save_path = saver.save(sess, "./tmp/model.ckpt", global_step = i, write_meta_graph=True)
print("Model saved in file: %s" % save_path)
# evaluate the net
def test(image, label):
true_value = tf.argmax(label, 1)
prediction = tf.argmax(y, 1)
print("true value:", sess.run(true_value))
print("predictions", sess.run(prediction, feed_dict={x:image}))
sess = tf.InteractiveSession()
x = tf.placeholder("float", shape=[None, 784])
W = tf.Variable(tf.zeros([784,10]), name = "W1")
b = tf.Variable(tf.zeros([10]), name = "B1")
y = tf.nn.softmax(tf.matmul(x,W) + b, name ="Y")
y_ = tf.placeholder("float", shape=[None, 10])
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
saver = tf.train.Saver()
model_to_restore="./tmp/model.ckpt-100.meta"
if os.path.isfile(model_to_restore):
#what i have to do here?????#
else:
#this part works!#
print("Model does not exist: training")
train()
Thanks everybody for the answers!
Regards,
Silvio
UPDATE
I tried both
saver.restore(sess, model_to_restore)
and
saver = tf.train.import_meta_graph(model_to_restore)
saver.restore(sess, model_to_restore)
but in both cases i had this error from terminal:
DataLossError (see above for traceback): Unable to open table file ./tmp/model.ckpt.meta: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
I think your location to the model might be wrong and i would advise you to give the following workflow a try.
Since the saved models comprise several files i usually save them to a folder after training:
modelPath = "myMNIST/model"
saved_path = saver.save(sess, os.path.join(modelPath, "model.ckpt"))
print("Model saved in file: ", saved_path)
This will also tell you the exact location where it has been saved.
Then i can start my predictor inside the saved location (cd into myMNIST) and restore the model by:
ckpt = tf.train.get_checkpoint_state("./model")
if ckpt and ckpt.model_checkpoint_path:
print("Restored Model")
saver.restore(sess, ckpt.model_checkpoint_path)
else:
print("Could not restore model!")

Add a summary of accuracy of the whole train/test dataset in Tensorflow

I am trying to use Tensorboard to visualize my training procedure. My purpose is, when every epoch completed, I would like to test the network's accuracy using the whole validation dataset, and store this accuracy result into a summary file, so that I can visualize it in Tensorboard.
I know Tensorflow has summary_op to do it, however it seems only work for one batch when running the code sess.run(summary_op). I need to calculate the accuracy for the whole dataset. How?
Is there any example to do it?
Define a tf.scalar_summary that accepts a placeholder:
accuracy_value_ = tf.placeholder(tf.float32, shape=())
accuracy_summary = tf.scalar_summary('accuracy', accuracy_value_)
Then calculate the accuracy for the whole dataset (define a routine that calculates the accuracy for every batch in the dataset and extract the mean value) and save it into a python variable, let's call it va.
Once you have the value of va, just run the accuracy_summary op, feeding the accuracy_value_ placeholder:
sess.run(accuracy_summary, feed_dict={accuracy_value_: va})
I implement a naive one-layer model as an example to classify MNIST dataset and visualize validation accuracy in Tensorboard, it works for me.
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import os
# number of epoch
num_epoch = 1000
model_dir = '/tmp/tf/onelayer_model/accu_info'
# mnist dataset location, change if you need
data_dir = '../data/mnist'
# load MNIST dataset without one hot
dataset = read_data_sets(data_dir, one_hot=False)
# Create placeholder for input images X and labels y
X = tf.placeholder(tf.float32, [None, 784])
# one_hot = False
y = tf.placeholder(tf.int32)
# One layer model graph
W = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[10]))
logits = tf.nn.relu(tf.matmul(X, W) + b)
init = tf.initialize_all_variables()
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
# loss function
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
_, top_1_op = tf.nn.top_k(logits)
top_1 = tf.reshape(top_1_op, shape=[-1])
correct_classification = tf.cast(tf.equal(top_1, y), tf.float32)
# accuracy function
acc = tf.reduce_mean(correct_classification)
# define info that is used in SummaryWritter
acc_summary = tf.scalar_summary('valid_accuracy', acc)
valid_summary_op = tf.merge_summary([acc_summary])
with tf.Session() as sess:
# initialize all the variable
sess.run(init)
print("Writing Summaries to %s" % model_dir)
train_summary_writer = tf.train.SummaryWriter(model_dir, sess.graph)
# load validation dataset
valid_x = dataset.validation.images
valid_y = dataset.validation.labels
for epoch in xrange(num_epoch):
batch_x, batch_y = dataset.train.next_batch(100)
feed_dict = {X: batch_x, y: batch_y}
_, acc_value, loss_value = sess.run(
[train_op, acc, loss], feed_dict=feed_dict)
vsummary = sess.run(valid_summary_op,
feed_dict={X: valid_x,
y: valid_y})
# Write validation accuracy summary
train_summary_writer.add_summary(vsummary, epoch)
Using batching with your validation set is possible in case you are using tf.metrics ops, which use internal counters. Here is a simplified example:
model = create_model()
tf.summary.scalar('cost', model.cost_op)
acc_value_op, acc_update_op = tf.metrics.accuracy(labels,predictions)
summary_common = tf.summary.merge_all()
summary_valid = tf.summary.merge([
tf.summary.scalar('accuracy', acc_value_op),
# other metrics here...
])
with tf.Session() as sess:
train_writer = tf.summary.FileWriter(logs_path + '/train',
sess.graph)
valid_writer = tf.summary.FileWriter(logs_path + '/valid')
While training, only write the common summary using your train-writer:
summary = sess.run(summary_common)
train_writer.add_summary(summary, tf.train.global_step(sess, gstep_op))
train_writer.flush()
After every validation, write both summaries using the valid-writer:
gstep, summaryc, summaryv = sess.run([gstep_op, summary_common, summary_valid])
valid_writer.add_summary(summaryc, gstep)
valid_writer.add_summary(summaryv, gstep)
valid_writer.flush()
When using tf.metrics, don't forget to reset the internal counters (local variables) before every validation step.