When freezing a graph with a local variable, freeze_graph has an error stating "Attempting to use uninitialized value...". The local variable in question was initialized via:
with tf.variable_scope(tf.get_variable_scope(),reuse=tf.AUTO_REUSE):
b_init = tf.constant(10.0, shape=[2, 1], dtype="float32",name = 'bi')
b = tf.get_variable('b',initializer=b_init,collections=[tf.GraphKeys.LOCAL_VARIABLES])
I'm able to create a saved model and run the saved model. However, I'm trying to freeze another graph for optimization. This error will go away if I remove the 'LOCAL_VARIABLES' flag. However, this variable then becomes global, which causes an issue with reloading my checkpoint (Tensorflow is unable to find the variable in the checkpoint).
Normally, I'd expect freeze_graph to initialize 'b' using 'b_init'.
Code to reproduce the issue:
import os, sys, json
import tensorflow as tf
from tensorflow.python.lib.io import file_io
from tensorflow.core.framework import variable_pb2
from tensorflow.python.framework import ops
from tensorflow.python.ops import variables
from tensorflow.python.framework.ops import register_proto_function
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.tools import freeze_graph
from tensorflow.python import ops
from tensorflow.tools.graph_transforms import TransformGraph
#flags
tf.app.flags.DEFINE_integer('model_version',1,'Models version number.')
tf.app.flags.DEFINE_string('export_model_dir','../model_batch/versions', 'Directory where model will be exported to')
FLAGS = tf.app.flags.FLAGS
def main(_):
''' main function'''
a = tf.placeholder(dtype = tf.float32, shape = [2,1])
with tf.variable_scope(tf.get_variable_scope(),reuse=tf.AUTO_REUSE):
b_init = tf.constant(10.0, shape=[2, 1], dtype="float32",name = 'bi')
b = tf.get_variable('b',initializer=b_init,collections=[tf.GraphKeys.LOCAL_VARIABLES])
b = tf.assign(b,a)
c = []
for d in range(5):
b = b * 1.1
c.append(b)
c = tf.identity(c,name = 'c')
init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
#init
sess.run(init)
print(tf.get_default_graph().get_collection(tf.GraphKeys.LOCAL_VARIABLES))
#create saved model builder class
export_path_base = FLAGS.export_model_dir
export_path = os.path.join(
tf.compat.as_bytes(export_path_base),
tf.compat.as_bytes(str(FLAGS.model_version)))
if tf.gfile.Exists(export_path):
print ('Removing previous artifacts')
tf.gfile.DeleteRecursively(export_path)
#inputs
tensor_info_a = tf.saved_model.utils.build_tensor_info(a)
#outputs
tensor_info_c = tf.saved_model.utils.build_tensor_info(c)
print('Exporting trained model to', export_path)
builder = tf.saved_model.builder.SavedModelBuilder(export_path)
#define signatures
prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={'cameras': tensor_info_a},
outputs = {'depthmap' : tensor_info_c},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map = {'predict_batch': prediction_signature})
#export model
builder.save(as_text=True)
writer = tf.summary.FileWriter("output_batch", sess.graph)
writer.close()
#load graph from saved model
print ('Freezing graph')
initializer_nodes = ''
output_node_names = 'c'
saved_model_dir = os.path.join(FLAGS.export_model_dir,str(FLAGS.model_version))
output_graph_filename = os.path.join(saved_model_dir,'frozen_graph.pb')
freeze_graph.freeze_graph(
input_saved_model_dir=saved_model_dir,
output_graph=output_graph_filename,
saved_model_tags = tag_constants.SERVING,
output_node_names=output_node_names,
initializer_nodes=initializer_nodes,
input_graph=None,
input_saver=False,
input_binary=False,
input_checkpoint=None,
restore_op_name=None,
filename_tensor_name=None,
clear_devices=False)
if __name__ == '__main__':
tf.app.run()
I wasn't able to include local_variables in my frozen graph, but I did come up with a work around.
The initial problem was that my checkpoint was created from a graph that contained local_variables. Unfortunately, freezing the graph produced the error:
Attempting to use uninitialized value...
What I did to work-around the issue was to change the local variables to untrainable global variables. I then filtered out the global variables not in my checkpoint using the following solution:
https://stackoverflow.com/a/39142780/6693924
I'm able to create a savedModel and freeze its graph.
Related
I followed the website: https://leimao.github.io/blog/Save-Load-Inference-From-TF2-Frozen-Graph/
However, I still do not know how to run inference with frozen_func(see my code below).
Please advise how to run inference using pb file in TensorFlow 2.2. Thanks.
import tensorflow as tf
def wrap_frozen_graph(graph_def, inputs, outputs, print_graph=False):
def _imports_graph_def():
tf.compat.v1.import_graph_def(graph_def, name="")
wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
import_graph = wrapped_import.graph
print("-" * 50)
print("Frozen model layers: ")
layers = [op.name for op in import_graph.get_operations()]
if print_graph == True:
for layer in layers:
print(layer)
print("-" * 50)
return wrapped_import.prune(
tf.nest.map_structure(import_graph.as_graph_element, inputs),
tf.nest.map_structure(import_graph.as_graph_element, outputs))
# Load frozen graph using TensorFlow 1.x functions
with tf.io.gfile.GFile("/content/drive/My Drive/Model_file/froze_graph.pb", "rb") as f:
graph_def = tf.compat.v1.GraphDef()
loaded = graph_def.ParseFromString(f.read())
# Wrap frozen graph to ConcreteFunctions
frozen_func = wrap_frozen_graph(graph_def=graph_def,
inputs=["wav_data:0"],
outputs=["labels_softmax:0"],
print_graph=True)
You can use tf.graph_util.import_graph_def inside a tf.function to do that. For example, suppose you make a test GraphDef file my_func.pb like this:
import tensorflow as tf
# Test function to make into a GraphDef file
#tf.function
def my_func(x):
return tf.square(x, name='y')
# Get graph
g = my_func.get_concrete_function(tf.TensorSpec(None, tf.float32)).graph
# Write to file
tf.io.write_graph(g, '.', 'my_func.pb', as_text=False)
You can then load it and use it like this:
import tensorflow as tf
from tensorflow.core.framework.graph_pb2 import GraphDef
# Load GraphDef
with open('my_func.pb', 'rb') as f:
gd = GraphDef()
gd.ParseFromString(f.read())
#tf.function
def my_func2(x):
# Ensure the input is a tensor of the right type
x = tf.convert_to_tensor(x, tf.float32)
# Import the graph giving x as input and getting the output y
y = tf.graph_util.import_graph_def(
gd, input_map={'x:0': x}, return_elements=['y:0'])[0]
return y
tf.print(my_func2(2))
# 4
I have a code running Keras with TensorFlow 1. The code modifies the loss function in order to do deep reinforcement learning:
import os
import gym
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
env = gym.make("CartPole-v0").env
env.reset()
n_actions = env.action_space.n
state_dim = env.observation_space.shape
from tensorflow import keras
import random
from tensorflow.keras import layers as L
import tensorflow as tf
from tensorflow.python.keras.backend import set_session
sess = tf.compat.v1.Session()
graph = tf.compat.v1.get_default_graph()
init = tf.global_variables_initializer()
sess.run(init)
network = keras.models.Sequential()
network.add(L.InputLayer(state_dim))
# let's create a network for approximate q-learning following guidelines above
network.add(L.Dense(5, activation='elu'))
network.add(L.Dense(5, activation='relu'))
network.add(L.Dense(n_actions, activation='linear'))
s = env.reset()
# Create placeholders for the <s, a, r, s'> tuple and a special indicator for game end (is_done = True)
states_ph = keras.backend.placeholder(dtype='float32', shape=(None,) + state_dim)
actions_ph = keras.backend.placeholder(dtype='int32', shape=[None])
rewards_ph = keras.backend.placeholder(dtype='float32', shape=[None])
next_states_ph = keras.backend.placeholder(dtype='float32', shape=(None,) + state_dim)
is_done_ph = keras.backend.placeholder(dtype='bool', shape=[None])
#get q-values for all actions in current states
predicted_qvalues = network(states_ph)
#select q-values for chosen actions
predicted_qvalues_for_actions = tf.reduce_sum(predicted_qvalues * tf.one_hot(actions_ph, n_actions),
axis=1)
gamma = 0.99
# compute q-values for all actions in next states
predicted_next_qvalues = network(next_states_ph)
# compute V*(next_states) using predicted next q-values
next_state_values = tf.math.reduce_max(predicted_next_qvalues, axis=1)
# compute "target q-values" for loss - it's what's inside square parentheses in the above formula.
target_qvalues_for_actions = rewards_ph + tf.constant(gamma) * next_state_values
# at the last state we shall use simplified formula: Q(s,a) = r(s,a) since s' doesn't exist
target_qvalues_for_actions = tf.where(is_done_ph, rewards_ph, target_qvalues_for_actions)
#mean squared error loss to minimize
loss = (predicted_qvalues_for_actions - tf.stop_gradient(target_qvalues_for_actions)) ** 2
loss = tf.reduce_mean(loss)
# training function that resembles agent.update(state, action, reward, next_state) from tabular agent
train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(loss)
a = 0
next_s, r, done, _ = env.step(a)
sess.run(train_step, {
states_ph: [s], actions_ph: [a], rewards_ph: [r],
next_states_ph: [next_s], is_done_ph: [done]
})
When I run a sess.run() training step, I get the following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Error while reading resource variable beta1_power from Container: localhost. This could mean that the variable was uninitialized. Not found: Container localhost does not exist. (Could not find resource: localhost/beta1_power)
Any ideas on what might be the problem?
The initialization operation should be fetched and run (only one time) after the variables (i.e. model) have been created or the computation graph has been defined. Therefore, they should be put right before running the training step:
# Define and create the computation graph/model
# ...
# Initialize variables in the graph/model
init = tf.global_variables_initializer()
sess.run(init)
# Start training
sess.run(train_step, ...)
In my training file(train.py), I write:
def deep_part(self):
with tf.variable_scope("deep-part"):
y_deep = tf.reshape(self.embeddings, shape=[-1, self.field_size * self.factor_size]) # None * (F*K)
# self.deep_layers = 2
for i in range(0,len(self.deep_layers)):
y_deep = tf.contrib.layers.fully_connected(y_deep, self.deep_layers[i], \
activation_fn=self.deep_layers_activation, scope = 'fc%d' % i)
return y_deep
now in predict file(predict.py), I restore the checkpoint, but I dont know how to reload the "deep-part" network's weights and biases.Because I think the "fully_conncted" function might hide the weights and biases.
I wrote a lengthy explanation here. A short summary:
By saver.save(sess, '/tmp/my_model') Tensorflow produces multiple files:
checkpoint
my_model.data-00000-of-00001
my_model.index
my_model.meta
The checkpoint file checkpoint is just a pointer to the latest version of our model-weights and it is simply a plain text file containing
$ !cat /tmp/model/checkpoint
model_checkpoint_path: "/tmp/my_model"
all_model_checkpoint_paths: "/tmp/my_model"
The others are binary files containing the graph (.meta) and weights (.data*).
You can help yourself by running
import tensorflow as tf
import numpy as np
data = np.arange(9 * 1).reshape(1, 9).astype(np.float32)
plhdr = tf.placeholder(tf.float32, shape=[1, 9], name='input')
print plhdr.name
activation = tf.layers.dense(plhdr, 10, name='fc')
print activation.name
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
expected = sess.run(activation, {plhdr: data})
print expected
saver = tf.train.Saver(tf.global_variables())
saver.save(sess, '/tmp/my_model')
tf.reset_default_graph()
with tf.Session() as sess:
# load the computation graph (the fully connected + placeholder)
loader = tf.train.import_meta_graph('/tmp/my_model.meta')
sess.run(tf.global_variables_initializer())
plhdr = tf.get_default_graph().get_tensor_by_name('input:0')
activation = tf.get_default_graph().get_tensor_by_name('fc/BiasAdd:0')
actual = sess.run(activation, {plhdr: data})
assert np.allclose(actual, expected) is False
# now load the weights
loader = loader.restore(sess, '/tmp/my_model')
actual = sess.run(activation, {plhdr: data})
assert np.allclose(actual, expected) is True
I am trying to create an image classifier that utilizes the pre-trained ResNet V2 model provided in the slim documentation.
Here is the code so far:
import tensorflow as tf
slim = tf.contrib.slim
from PIL import Image
from inception_resnet_v2 import *
import numpy as np
checkpoint_file = 'inception_resnet_v2_2016_08_30.ckpt'
sample_images = ['carrot.jpg']
input_tensor = tf.placeholder(tf.float32, shape=(None,299,299,3), name='input_image')
scaled_input_tensor = tf.scalar_mul((1.0/255), input_tensor)
scaled_input_tensor = tf.subtract(scaled_input_tensor, 0.5)
scaled_input_tensor = tf.multiply(scaled_input_tensor, 2.0)
variables_to_restore = slim.get_model_variables()
print(variables_to_restore)
init_fn = slim.assign_from_checkpoint_fn(
checkpoint_file,
slim.get_model_variables('InceptionResnetV2'))
sess = tf.Session()
init_fn(sess)
arg_scope = inception_resnet_v2_arg_scope()
with slim.arg_scope(arg_scope):
logits, end_points = inception_resnet_v2(scaled_input_tensor, is_training=False)
for image in sample_images:
im = Image.open(image).resize((299,299))
im = np.array(im)
im = im.reshape(-1,299,299,3)
predict_values, logit_values = sess.run([end_points['Predictions'], logits], feed_dict={input_tensor: im})
print (np.max(predict_values), np.max(logit_values))
print (np.argmax(predict_values), np.argmax(logit_values))
The problem is I keep getting this error:
Traceback (most recent call last):
File "./classify.py", line 21, in <module>
slim.get_model_variables('InceptionResnetV2'))
File "/home/ubuntu/tensorflow/local/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 584, in assign_from_checkpoint_fn
saver = tf_saver.Saver(var_list, reshape=reshape_variables)
File "/home/ubuntu/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1040, in __init__
self.build()
File "/home/ubuntu/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1061, in build
raise ValueError("No variables to save")
ValueError: No variables to save
So it seems TF/Slim is unable to find any variables and this is made clear when I call:
variables_to_restore = slim.get_model_variables()
print(variables_to_restore)
As it outputs an empty array.
How can I go about using the pre-trained model?
This happens because you haven't constructed the model in your graph yet to have any variables starting with the name "InceptionResnetV2" to be captured and restored by the saver.
I believe you should put the model construction before using slim.get_variables_to_restore().
For instance:
with slim.arg_scope(arg_scope):
logits, end_points = inception_resnet_v2(scaled_input_tensor, is_training=False)
variables_to_restore = slim.get_model_variables()
This way, the Tensor variables will be constructed and you should see variables_to_restore is no longer empty.
You need to manually add the model variables.
Try this
with slim.arg_scope(arg_scope):
logits, end_points = inception_resnet_v2(scaled_input_tensor, is_training=False)
# Add model variables
for var in tf.global_variables(scope='inception_resnet_v2'):
slim.add_model_variable(var)
I am running Tensorflow 0.12.1 on a GPU. I have a trained Deep CNN model whose weights I've saved using a checkpoint file. During inference, I reload the saved checkpoint using restorer.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir)). The code seems to run without issues, but everytime I re-run the script, I'm getting screwed up outputs. AFAIK, I do not shuffle my test set inputs. The inputs are being loaded and fed to the network properly. It is just the output of different runs of the CNN on the same test set using the same order is producing very different outputs. I'm perplexed! Also, how do I execute a graph loaded with saved checkpoint without running an init_op during inference? It seems my code requires all global and local variables to be initialized before execution. (I initialize first, and then only restore the checkpoint!).Here's a snippet of my code:
import tensorflow as tf
import numpy as np
import os
import os.path
from datetime import datetime
import time
import random
import json
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from modelFCNN3 import model
def read_input(inp_queue,height=224,width=224,channels=3, mask=False):
value = tf.read_file(inp_queue)
image = tf.image.decode_png(value)
image = tf.image.resize_images(image, [height, width],method=2)
image = tf.cast(image, tf.uint8)
image.set_shape([height,width,channels])
image = tf.reshape(image,[height,width,channels])
if mask:
image = tf.to_float(tf.greater_equal(image,128))
image = tf.cast(image,tf.float32)
else:
image = tf.image.per_image_standardization(image)
image = tf.cast(image,tf.float32)
return image
if __name__ == '__main__':
tf.reset_default_graph()
with open('X_test.json', 'r') as infile:
X_test = json.load(infile)
with open('y_test.json', 'r') as infile:
y_test = json.load(infile)
imagelist = ops.convert_to_tensor(X_test, dtype=dtypes.string)
labellist = ops.convert_to_tensor(y_test, dtype=dtypes.string)
input_queue = tf.train.slice_input_producer([imagelist, labellist],
num_epochs=1,
shuffle=False)
image = read_input(input_queue[0],height=224,width=224,channels=3, mask=False)
label = read_input(input_queue[1],height=224,width=224,channels=1, mask=True)
images_batch, labels_batch = tf.train.batch([image, label], batch_size=FLAGS.batch_size,
enqueue_many=False,shapes=None, allow_smaller_final_batch=True)
global_step = tf.Variable(0, trainable=False)
images = tf.placeholder_with_default(images_batch, shape=[None, 224,224,3])
labels = tf.placeholder_with_default(labels_batch, shape=[None, 224,224,1])
restorer = tf.train.Saver()
logits = model(images).logits
labels = tf.cast(labels,tf.int32)
labels.set_shape([FLAGS.batch_size,224,224,1])
valid_prediction = tf.argmax(tf.nn.softmax(logits), dimension=3)
valid_prediction.set_shape([FLAGS.batch_size,224,224])
meanIOU,update_op_mIOU= tf.contrib.metrics.streaming_mean_iou(tf.cast(valid_prediction,tf.int32), tf.squeeze(labels),FLAGS.num_classes)
init = tf.global_variables_initializer()
init_locals = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run([init, init_locals])
restorer.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir))
print("Model restored.")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord,sess=sess)
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
try:
step = 0
avg = []
while not coord.should_stop():
myimg, predimg, mylbl= sess.run([images,valid_prediction,labels])
mIOU,_ = sess.run([meanIOU,update_op_mIOU])
avg.append(mIOU)
step += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
Are you running on same machine or different machine
#saver = tf.train.Saver()
The following comment is in tensorflow docs
#NOTE: Restarting training from saved meta_graph only works if the device assignments have not changed.
#saver = tf.train.import_meta_graph(metafile)