Tensorflow, use import_graph_def() to load model error - tensorflow

I attempt to read an RNN network through import_graph_def() and do inference.
But I cannot use tf.trainable_variables() to get any variables.
In the following code, tf.trainable_variables() returns [] (a list with nothing)
Also, when I use saver = tf.train.Saver(), tensorflow reports "no variables to save"
def eval_on_test(graph_path):
batch_size = 80
train_begin = 0
train_end = 3000
with tf.Graph().as_default() as graph:
with open(graph_path, 'rb') as f:
tf_graph = tf.GraphDef()
print("Loading graph_def from {}".format(graph_path))
tf_graph.ParseFromString(f.read())
return_elements = tf.import_graph_def(tf_graph, name="", return_elements=['input_x:0', 'output_y:0', 'pred:0', 'loss:0'])
X = return_elements[0]
Y = return_elements[1]
pred = return_elements[2]
loss = return_elements[3]
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
print("graph loaded, start testing")
with tf.Session(config=tf_config) as sess:
init_op = sess.graph.get_operation_by_name('init')
sess.run(init_op)
print(tf.trainable_variables())
batch_index,train_x,train_y=get_train_data(batch_size,time_step,train_begin,train_end)
for batch in range(len(batch_index)-1):
loss_ = sess.run(loss, feed_dict={X:train_x[batch_index[batch]:batch_index[batch+1]],Y:train_y[batch_index[batch]:batch_index[batch+1]]})
print(batch, loss_)
Any help would be appreciated.

import_graph_def will only restore graph but will not restore collections such as GLOBAL_VARIABLES, that's why Saver can't find any variables in graph, to solve this, you can try tf.train.import_meta_graph which will also restore all collections.

Related

Unable to export saved model using simple_save tensorflow

I am trying to use simple_save for tensorflow, but it isn't working :(
Here is my code:
def export_model(saved_model_dir, final_tensor_name):
with tf.Session() as sess:
with sess.graph.as_default() as graph:
tf.saved_model.simple_save(
sess,
saved_model_dir,
inputs={'image': tf.placeholder(tf.float32)},
outputs={'prediction': graph.get_tensor_by_name(final_tensor_name + ":0")}
)
I get the following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value final_training_ops/biases/final_biases
[[{{node save/SaveV2}}]]
I am working with the following tutorial: https://github.com/BartyzalRadek/Multi-label-Inception-net
I've spent so many hours trying to find solutions online and I know it can't be that tough. I already have a graph that is being exported and all I need now is that saved_model.pb. Any help is appreciated! Thank you!
NEW UPDATE - CODE BELOW
def export_model(saved_model_dir, final_tensor_name):
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
with sess.graph.as_default() as graph:
tf.saved_model.simple_save(
sess,
saved_model_dir,
inputs={'image': tf.placeholder(tf.string)},
outputs={'prediction': graph.get_tensor_by_name(final_tensor_name + ":0")}
)
The code runs now, but when I test the saved model, I always get the same result.
IMAGE_LABELING_CODE
import tensorflow as tf
import sys
image_path = sys.argv[1]
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
label_lines = [line.rstrip() for line
in tf.gfile.GFile("labels.txt")]
with tf.gfile.FastGFile("retrained_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor, \
{'DecodeJpeg/contents:0': image_data})
As #giser_yugang said maybe you should put at the end of the construction part of the graph: init = tf.global_variables_initializer() and then at execution, after beginning the session: sess.run(init)
Nevertheless, if it were a local variable you would have to add the variable to some collection, establish the initializer and the run it. For example:
a = tf.Variable(..., collections=[tf.GRAPH_KEYS.LOCAL_VARIABLES])
local_init = tf.local_variable_initializer()
...
with tf.Session() as sess:
sess.run(local_init)
nevertheless, some implementations from tensorflow library go directly to local variables, for example, tf.metrics (if they have not changed this) and you just have to define and run local_init = tf.local_variables_initializer() and sess.run(local_init)

Tensorflow results inconsistent between each freeze graph

When freezing a graph and then running it elsewhere (mobile device), the output is of low quality compared to the inference on the server on my semantic segmentation model. It is basically a messy version of what would run on the server. It is executing successfully, but it appears as though something was not initialized prior to freezing, even though the method to load the model between the export script and inference scripts is nearly identical.
The exported model can be run on the same images over and over and produce the same results for a given set of images, as expected.
However, each time the model is frozen, using the exact same script and checkpoint, it creates a different output for a given set of images.
def main():
args = get_arguments()
if args.dataset == 'cityscapes':
num_classes = cityscapes_class
else:
num_classes = ADE20k_class
shape = [320, 320]
x = tf.placeholder(dtype=tf.float32, shape=(shape[0], shape[1], 3), name="input")
img_tf = preprocess(x)
model = model_config[args.model]
net = model({'data': img_tf}, num_classes=num_classes, filter_scale=args.filter_scale)
raw_output = net.layers['conv6_cls']
raw_output_up = tf.image.resize_bilinear(raw_output, size=shape, align_corners=True)
raw_output_maxed = tf.argmax(raw_output_up, axis=3, name="output")
# Init tf Session
config = tf.ConfigProto()
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
model_path = model_paths[args.model]
ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt and ckpt.model_checkpoint_path:
input_checkpoint = ckpt.model_checkpoint_path
loader = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=True)
load(loader, sess, ckpt.model_checkpoint_path)
else:
print('No checkpoint file found at %s.' % model_path)
exit()
print("Loaded Model")
# We retrieve the protobuf graph definition
graph = tf.get_default_graph()
input_graph_def = graph.as_graph_def()
# We use a built-in TF helper to export variables to constants
output_graph_def = graph_util.convert_variables_to_constants(
sess, # The session is used to retrieve the weights
input_graph_def, # The graph_def is used to retrieve the nodes
output_node_names.split(",") # The output node names are used to select the usefull nodes
)
# Finally we serialize and dump the output graph to the filesystem
with tf.gfile.GFile("model/output_graph.pb", "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." % len(output_graph_def.node))

How to save and restore Tensorflow model made with Keras

I am currently working on a project where I build a Network in Keras like so:
inputstuff = Input(shape=(32,), name='main_input')
encoded = Dense(16, activation='relu',init='he_normal', activity_regularizer=regularizers.l1(0.01))(inputstuff)
[other layers...]
decoded = Dense(32, activation='relu',init='he_normal', name='main_output')(decoded)
autoencoder = Model(input=inputstuff, output=decoded)
Next to save this file as a tensorflow model, I do the following:
sessK=K.get_session()
saver = tf.train.Saver()
model_path = "/tmp/KerasGlobalAEModel.ckpt"
save_path = saver.save(sessK, model_path)
Next I want to load the file from another program:
model_path = "/tmp/KerasGlobalAEModel.ckpt"
tf.train.NewCheckpointReader(model_path)
with tf.Session() as sess:
# Initialize variables
init = tf.initialize_all_variables()
sess.run(init)
saver = tf.train.import_meta_graph('/tmp/KerasGlobalAEModel.ckpt.meta')
# Restore model weights from previously saved model
saver.restore(sess, model_path)
test = [some data]
graph = tf.get_default_graph()
MyAnswers = sess.run(Y*, feed_dict={X*: test})
If this was the same file I would be able to use:
X* = model.input
Y* = model.output
However, this and all other things I have tried failed.
Here is a list of what DOES NOT work.
Y = graph.get_tensor_by_name("main_output")
Y = model.output
Y = autoencoder.output
I realize I am a noobie at this, and this might be a dumb question but I would appreciate any guidance.
Thank you,

Cannot load int variable from previous session in tensorflow 1.1

I have read many similar questions and just cannot get this to work properly.
I have my model being trained well and checkpoint files are being made every epoch. I want to have it so the program can continue from epoch x once reloaded and also for it to print that is on that epoch with every iteration. I could simply save the data outside of the checkpoint file, however I was also wanting to do this to give me confidence everything else is also being stored properly.
Unfortunately the value in the epoch/global_step variable is always still 0 when I restart.
import tensorflow as tf
import numpy as np
import tensorflow as tf
import numpy as np
# more imports
def extract_number(f): # used to get latest checkpint file
s = re.findall("epoch(\d+).ckpt",f)
return (int(s[0]) if s else -1,f)
def restore(init_op, sess, saver): # called to restore or just initialise model
list = glob(os.path.join("./params/e*"))
if list:
file = max(list,key=extract_number)
saver.restore(sess, file[:-5])
sess.run(init_op)
return
with tf.Graph().as_default() as g:
# build models
total_batch = data.train.num_examples / batch_size
epochLimit = 51
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
restore(init_op, sess, saver)
epoch = global_step.eval()
while epoch < epochLimit:
total_batch = data.train.num_examples / batch_size
for i in range(int(total_batch)):
sys.stdout.flush()
voxels = newData.eval()
batch_z = np.random.uniform(-1, 1, [batch_size, z_size]).astype(np.float32)
sess.run(opt_G, feed_dict={z:batch_z, train:True})
sess.run(opt_D, feed_dict={input:voxels, z:batch_z, train:True})
with open("out/loss.csv", 'a') as f:
batch_loss_G = sess.run(loss_G, feed_dict={z:batch_z, train:False})
batch_loss_D = sess.run(loss_D, feed_dict={input:voxels, z:batch_z, train:False})
msgOut = "Epoch: [{0}], i: [{1}], G_Loss[{2:.8f}], D_Loss[{3:.8f}]".format(epoch, i, batch_loss_G, batch_loss_D)
print(msgOut)
epoch=epoch+1
sess.run(global_step.assign(epoch))
saver.save(sess, "params/epoch{0}.ckpt".format(epoch))
batch_z = np.random.uniform(-1, 1, [batch_size, z_size]).astype(np.float32)
voxels = sess.run(x_, feed_dict={z:batch_z})
v = voxels[0].reshape([32, 32, 32]) > 0
util.save_binvox(v, "out/epoch{0}.vox".format(epoch), 32)
I also update the global step variable using assign at the bottom. Any ideas? Any help would be greatly appreciated.
When you call sess.run(init_op) after restoring this resets all variables to their initial values. Comment that line out and things should work.
My original code was wrong for several reasons because I was trying so many things. The first responder Alexandre Passos gives a valid point, but I believe what changed the game was also the use of scopes (maybe?).
Below is the working updated code if it helps anyone:
import tensorflow as tf
import numpy as np
# more imports
def extract_number(f): # used to get latest checkpint file
s = re.findall("epoch(\d+).ckpt",f)
return (int(s[0]) if s else -1,f)
def restore(sess, saver): # called to restore or just initialise model
list = glob(os.path.join("./params/e*"))
if list:
file = max(list,key=extract_number)
saver.restore(sess, file[:-5])
return saver, True, sess
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
sess.run(init_op)
return saver, False , sess
batch_size = 100
learning_rate = 0.0001
beta1 = 0.5
z_size = 100
save_interval = 1
data = dataset.read()
total_batch = data.train.num_examples / batch_size
def fill_queue():
for i in range(int(total_batch*epochLimit)):
sess.run(enqueue_op, feed_dict={batch: data.train.next_batch(batch_size)}) # runnig in seperate thread to feed a FIFOqueue
with tf.variable_scope("glob"):
global_step = tf.get_variable(name='global_step', initializer=0,trainable=False)
# build models
epochLimit = 51
saver = tf.train.Saver()
with tf.Session() as sess:
saver,rstr,sess = restore(sess, saver)
with tf.variable_scope("glob", reuse=True):
epocht = tf.get_variable(name='global_step', trainable=False, dtype=tf.int32)
epoch = epocht.eval()
while epoch < epochLimit:
total_batch = data.train.num_examples / batch_size
for i in range(int(total_batch)):
sys.stdout.flush()
voxels = newData.eval()
batch_z = np.random.uniform(-1, 1, [batch_size, z_size]).astype(np.float32)
sess.run(opt_G, feed_dict={z:batch_z, train:True})
sess.run(opt_D, feed_dict={input:voxels, z:batch_z, train:True})
with open("out/loss.csv", 'a') as f:
batch_loss_G = sess.run(loss_G, feed_dict={z:batch_z, train:False})
batch_loss_D = sess.run(loss_D, feed_dict={input:voxels, z:batch_z, train:False})
msgOut = "Epoch: [{0}], i: [{1}], G_Loss[{2:.8f}], D_Loss[{3:.8f}]".format(epoch, i, batch_loss_G, batch_loss_D)
print(msgOut)
epoch=epoch+1
sess.run(global_step.assign(epoch))
saver.save(sess, "params/epoch{0}.ckpt".format(epoch))
batch_z = np.random.uniform(-1, 1, [batch_size, z_size]).astype(np.float32)
voxels = sess.run(x_, feed_dict={z:batch_z})
v = voxels[0].reshape([32, 32, 32]) > 0
util.save_binvox(v, "out/epoch{0}.vox".format(epoch), 32)

multiple runs of inference model using saved checkpoint produces stochastic errors - Tensorflow

I am running Tensorflow 0.12.1 on a GPU. I have a trained Deep CNN model whose weights I've saved using a checkpoint file. During inference, I reload the saved checkpoint using restorer.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir)). The code seems to run without issues, but everytime I re-run the script, I'm getting screwed up outputs. AFAIK, I do not shuffle my test set inputs. The inputs are being loaded and fed to the network properly. It is just the output of different runs of the CNN on the same test set using the same order is producing very different outputs. I'm perplexed! Also, how do I execute a graph loaded with saved checkpoint without running an init_op during inference? It seems my code requires all global and local variables to be initialized before execution. (I initialize first, and then only restore the checkpoint!).Here's a snippet of my code:
import tensorflow as tf
import numpy as np
import os
import os.path
from datetime import datetime
import time
import random
import json
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from modelFCNN3 import model
def read_input(inp_queue,height=224,width=224,channels=3, mask=False):
value = tf.read_file(inp_queue)
image = tf.image.decode_png(value)
image = tf.image.resize_images(image, [height, width],method=2)
image = tf.cast(image, tf.uint8)
image.set_shape([height,width,channels])
image = tf.reshape(image,[height,width,channels])
if mask:
image = tf.to_float(tf.greater_equal(image,128))
image = tf.cast(image,tf.float32)
else:
image = tf.image.per_image_standardization(image)
image = tf.cast(image,tf.float32)
return image
if __name__ == '__main__':
tf.reset_default_graph()
with open('X_test.json', 'r') as infile:
X_test = json.load(infile)
with open('y_test.json', 'r') as infile:
y_test = json.load(infile)
imagelist = ops.convert_to_tensor(X_test, dtype=dtypes.string)
labellist = ops.convert_to_tensor(y_test, dtype=dtypes.string)
input_queue = tf.train.slice_input_producer([imagelist, labellist],
num_epochs=1,
shuffle=False)
image = read_input(input_queue[0],height=224,width=224,channels=3, mask=False)
label = read_input(input_queue[1],height=224,width=224,channels=1, mask=True)
images_batch, labels_batch = tf.train.batch([image, label], batch_size=FLAGS.batch_size,
enqueue_many=False,shapes=None, allow_smaller_final_batch=True)
global_step = tf.Variable(0, trainable=False)
images = tf.placeholder_with_default(images_batch, shape=[None, 224,224,3])
labels = tf.placeholder_with_default(labels_batch, shape=[None, 224,224,1])
restorer = tf.train.Saver()
logits = model(images).logits
labels = tf.cast(labels,tf.int32)
labels.set_shape([FLAGS.batch_size,224,224,1])
valid_prediction = tf.argmax(tf.nn.softmax(logits), dimension=3)
valid_prediction.set_shape([FLAGS.batch_size,224,224])
meanIOU,update_op_mIOU= tf.contrib.metrics.streaming_mean_iou(tf.cast(valid_prediction,tf.int32), tf.squeeze(labels),FLAGS.num_classes)
init = tf.global_variables_initializer()
init_locals = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run([init, init_locals])
restorer.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir))
print("Model restored.")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord,sess=sess)
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
try:
step = 0
avg = []
while not coord.should_stop():
myimg, predimg, mylbl= sess.run([images,valid_prediction,labels])
mIOU,_ = sess.run([meanIOU,update_op_mIOU])
avg.append(mIOU)
step += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
Are you running on same machine or different machine
#saver = tf.train.Saver()
The following comment is in tensorflow docs
#NOTE: Restarting training from saved meta_graph only works if the device assignments have not changed.
#saver = tf.train.import_meta_graph(metafile)