Splitting a Tensorflow protobuf into two separate models - tensorflow

As part of my team's security measures, I need to split a Tensorflow protubuf model into two parts. The idea is that both protobuf splits can be stored separately. When the end user needs the model, there original model can be restored with the protobuf splits.
My current approach is to load the .pb file, split the model into two graphs and then save each graph.
def extract_sub_graph(graph_def, dest_nodes):
if not isinstance(graph_def, graph_pb2.GraphDef):
raise TypeError("graph_def must be a graph_pb2.GraphDef proto.")
if isinstance(dest_nodes, six.string_types):
raise TypeError("dest_nodes must be a list.")
name_to_input_name, name_to_node, name_to_seq_num = _extract_graph_summary(graph_def)
_assert_nodes_are_present(name_to_node, dest_nodes)
nodes_to_keep = _bfs_for_reachable_nodes(dest_nodes, name_to_input_name)
nodes_to_keep_copy = copy.deepcopy(nodes_to_keep)
for node in nodes_to_keep_copy:
if node not in dest_nodes:
nodes_to_keep.remove(node)
nodes_to_keep_list = sorted(
list(nodes_to_keep), key=lambda n: name_to_seq_num[n])
# Now construct the output GraphDef
out = graph_pb2.GraphDef()
for n in nodes_to_keep_list:
out.node.extend([copy.deepcopy(name_to_node[n])])
out.library.CopyFrom(graph_def.library)
out.versions.CopyFrom(graph_def.versions)
return out
def split_model(graph_def):
subgraphs = []
graph_nodes = [n for n in graph_def.node]
node_names = []
for t in graph_nodes:
node_names.append(t.name)
middle_node_index = int(len(graph_nodes) / 2)
subgraph_1_nodes = []
subgraph_2_nodes = []
for i in range(middle_node_index, len(graph_nodes)):
subgraph_1_nodes.append(node_names[i])
for i in range(0, middle_node_index):
subgraph_2_nodes.append(node_names[i])
subgraph_1 = extract_sub_graph(graph_def, subgraph_1_nodes)
subgraph_2 = extract_sub_graph(graph_def, subgraph_2_nodes)
subgraphs = [subgraph_1, subgraph_2]
return subgraphs
if __name__ == "__main__":
weights_path = "model.pb"
pbtxt_path = "protobuf_text.pbtxt"
with tf.gfile.FastGFile(weights_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
subgraphs = split_model(graph_def)
A modified version of extract_sub_graph() is taken from tensorflow.python.framework.graph_util_impl.
I am struggling to save the graphs as protubuf files. I used tf.io.write_file() and tf.keras.models.save_model() but none of them worked. What is the proper way to save a graph_pb2.GraphDef proto file?

Related

Two models of the same architecture with same weights giving different results

Problem
After copying weights from a pretrained model, I do not get the same output.
Description
tf2cv repository provides pretrained models in TF2 for various backbones. Unfortunately the codebase is of limited use to me because they use subclassing via tf.keras.Model which makes it very hard to extract intermediate outputs and gradients at will. I therefore embarked upon rewriting the codes for the backbones using the functional API. After rewriting the resnet architecture codes, I copied their weights into my model and saved them in SavedModel format. In order to test if it is correctly done, I gave an input to my model instance and theirs and the results were different.
My approaches to debugging the problem
I checked the number of trainable and non-trainable parameters and they are the same between my model instance and theirs.
I checked if all trainable weights have been copied which they have.
My present line of thinking
I think it might be possible that weights have not been copied to the correct layers. For example :- Layer X and Layer Y might have weights of the same shape but during weight copying, weights of layer Y might have gone into Layer X and vice versa. This is only possible if I have not mapped the layer names between the two models properly.
However I have exhaustively checked and have not found any error so far.
The Code
My code is attached below. Their (tfcv) code for resnet can be found here
Please note that resnet_orig in the following snippet is the same as here
My converted code can be found here
from vision.image import resnet as myresnet
from glob import glob
from loguru import logger
import tensorflow as tf
import resnet_orig
import re
import os
import numpy as np
from time import time
from copy import deepcopy
tf.random.set_seed(time())
models = [
'resnet10',
'resnet12',
'resnet14',
'resnetbc14b',
'resnet16',
'resnet18_wd4',
'resnet18_wd2',
'resnet18_w3d4',
'resnet18',
'resnet26',
'resnetbc26b',
'resnet34',
'resnetbc38b',
'resnet50',
'resnet50b',
'resnet101',
'resnet101b',
'resnet152',
'resnet152b',
'resnet200',
'resnet200b',
]
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
def find_model_file(model_type):
model_files = glob('*.h5')
for m in model_files:
if '{}-'.format(model_type) in m:
return m
return None
def remap_our_model_variables(our_variables, model_name):
remapped = list()
reg = re.compile(r'(stage\d+)')
for var in our_variables:
newvar = var.replace(model_name, 'features/features')
stage_search = re.search(reg, newvar)
if stage_search is not None:
stage_search = stage_search[0]
newvar = newvar.replace(stage_search, '{}/{}'.format(stage_search,
stage_search))
newvar = newvar.replace('conv_preact', 'conv/conv')
newvar = newvar.replace('conv_bn','bn')
newvar = newvar.replace('logits','output1')
remapped.append(newvar)
remap_dict = dict([(x,y) for x,y in zip(our_variables, remapped)])
return remap_dict
def get_correct_variable(variable_name, trainable_variable_names):
for i, var in enumerate(trainable_variable_names):
if variable_name == var:
return i
logger.info('Uffff.....')
return None
layer_regexp_compiled = re.compile(r'(.*)\/.*')
model_files = glob('*.h5')
a = np.ones(shape=(1,224,224,3), dtype=np.float32)
inp = tf.constant(a, dtype=tf.float32)
for model_type in models:
logger.info('Model is {}.'.format(model_type))
model = eval('myresnet.{}(input_height=224,input_width=224,'
'num_classes=1000,data_format="channels_last")'.format(
model_type))
model2 = eval('resnet_orig.{}(data_format="channels_last")'.format(
model_type))
model2.build(input_shape=(None,224, 224,3))
model_name=find_model_file(model_type)
logger.info('Model file is {}.'.format(model_name))
original_weights = deepcopy(model2.weights)
if model_name is not None:
e = model2.load_weights(model_name, by_name=True, skip_mismatch=False)
print(e)
loaded_weights = deepcopy(model2.weights)
else:
logger.info('Pretrained model is not available for {}.'.format(
model_type))
continue
diff = [np.mean(x.numpy()-y.numpy()) for x,y in zip(original_weights,
loaded_weights)]
our_model_weights = model.weights
their_model_weights = model2.weights
assert (len(our_model_weights) == len(their_model_weights))
our_variable_names = [x.name for x in model.weights]
their_variable_names = [x.name for x in model2.weights]
remap_dict = remap_our_model_variables(our_variable_names, model_type)
new_weights = list()
for i in range(len(our_model_weights)):
our_name = model.weights[i].name
remapped_name = remap_dict[our_name]
source_index = get_correct_variable(remapped_name, their_variable_names)
new_weights.append(
model2.weights[source_index].value())
logger.debug('Copying from {} ({}) to {} ({}).'.format(
model2.weights[
source_index].name,
model2.weights[source_index].value().shape,
model.weights[
i].name,
model.weights[i].value().shape))
logger.info(len(new_weights))
logger.info('Setting new weights')
model.set_weights(new_weights)
logger.info('Finished setting new weights.')
their_output = model2(inp)
our_output = model(inp)
logger.info(np.max(their_output.numpy() - our_output.numpy()))
logger.info(diff) # This must be 0.0
break

Retrain Frozen Graph in Tensorflow 2.x

I have managed this implementation on retraining frozen graph in tensorflow 1 according to this wonderful detail topic. Basically, the methodology is described:
Load frozen model
Replace the constant frozen node with variable node.
The newly replaced variable node then will be redirected to the corresponding output of the frozen node.
This works in tensorflow 1.x by checking the tf.compat.v1.trainable_variables. However, in tensorflow 2.x, it can't work anymore.
Below is the code snippet:
1/ Load frozen model
frozen_path = '...'
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef()
with tf.compat.v1.io.gfile.GFile(frozen_path, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.graph_util.import_graph_def(od_graph_def, name='')
2/ Create a clone
with detection_graph.as_default():
const_var_name_pairs = {}
probable_variables = [op for op in detection_graph.get_operations() if op.type == "Const"]
available_names = [op.name for op in detection_graph.get_operations()]
for op in probable_variables:
name = op.name
if name+'/read' not in available_names:
continue
tensor = detection_graph.get_tensor_by_name('{}:0'.format(name))
with tf.compat.v1.Session() as s:
tensor_as_numpy_array = s.run(tensor)
var_shape = tensor.get_shape()
# Give each variable a name that doesn't already exist in the graph
var_name = '{}_turned_var'.format(name)
var = tf.Variable(name=var_name, dtype=op.outputs[0].dtype, initial_value=tensor_as_numpy_array,trainable=True, shape=var_shape)
const_var_name_pairs[name] = var_name
3/ Relace frozen node by Graph Editor
import graph_def_editor as ge
ge_graph = ge.Graph(detection_graph.as_graph_def())
name_to_op = dict([(n.name, n) for n in ge_graph.nodes])
for const_name, var_name in const_var_name_pairs.items():
const_op = name_to_op[const_name+'/read']
var_reader_op = name_to_op[var_name + '/Read/ReadVariableOp']
ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_reader_op))
detection_training_graph = ge_graph.to_tf_graph()
with detection_training_graph.as_default():
writer = tf.compat.v1.summary.FileWriter('remap', detection_training_graph )
writer.close
The problem was my Graph Editor when I import the tf.graph_def instead of the original tf.graph that has Variables.
Quickly solve by fixing step 3
Sol1: Using Graph Editor
ge_graph = ge.Graph(detection_graph)
for const_name, var_name in const_var_name_pairs.items():
const_op = ge_graph._node_name_to_node[const_name+'/read']
var_reader_op = ge_graph._node_name_to_node[var_name+'/Read/ReadVariableOp']
ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_reader_op))
However, this requires disable eager execution. To work around with eager execution, you should attach the MetaGraphDef to Graph Editor as below
with detection_graph.as_default():
meta_saver = tf.compat.v1.train.Saver()
meta = meta_saver.export_meta_graph()
ge_graph = ge.Graph(detection_graph,collections=ge.graph._extract_collection_defs(meta))
However, this is the trickest to make the model trainable in tf2.x
Instead of using Graph Editor to export directly the graph, we should export ourselves. The reason is that the Graph Editor make the Variables data type to be resources. Therefore, we should export the graph as graphdef and import the variable def to the graph:
test_graph = tf.Graph()
with test_graph.as_default():
tf.import_graph_def(ge_graph.to_graph_def(), name="")
for var_name in ge_graph.variable_names:
var = ge_graph.get_variable_by_name(var_name)
ret = variable_pb2.VariableDef()
ret.variable_name = var._variable_name
ret.initial_value_name = var._initial_value_name
ret.initializer_name = var._initializer_name
ret.snapshot_name = var._snapshot_name
ret.trainable = var._trainable
ret.is_resource = True
tf_var = tf.Variable(variable_def=ret,dtype=tf.float32)
test_graph.add_to_collections(var.collection_names, tf_var)
Sol2: Manually map by Graphdef
with detection_graph.as_default() as graph:
training_graph_def = remap_input_node(detection_graph.as_graph_def(),const_var_name_pairs)
current_var = (tf.compat.v1.trainable_variables())
assert len(current_var)>0, "no training variables"
detection_training_graph = tf.Graph()
with detection_training_graph.as_default():
tf.graph_util.import_graph_def(training_graph_def, name='')
for var in current_var:
ret = variable_pb2.VariableDef()
ret.variable_name = var.name
ret.initial_value_name = var.name[:-2] + '/Initializer/initial_value:0'
ret.initializer_name = var.name[:-2] + '/Assign'
ret.snapshot_name = var.name[:-2] + '/Read/ReadVariableOp:0'
ret.trainable = True
ret.is_resource = True
tf_var = tf.Variable(variable_def=ret,dtype=tf.float32)
detection_training_graph.add_to_collections({'trainable_variables', 'variables'}, tf_var)
current_var = (tf.compat.v1.trainable_variables())
assert len(current_var)>0, "no training variables"

In tensorflow, for custom layers that need arguments at instantialion, does the get_config method need overriding?

Ubuntu - 20.04,
Tensorflow - 2.2.0,
Tensorboard - 2.2.1
I have read that one needs to reimplement the config method in order for a custom layer to be serializable.
I have a custom layer that accepts arguments in its __init__. It uses another custom layer and that consumes arguments in its __init__ as well. I can:
Without Tensorboard callbacks:
Use them in a model both in eager model and graph form
Run tf.saved_model.save and it executes without a glich
Load the thus saved model using tf.saved_model.load and it loads the model saved in 2. above
I can call model(input) the loaded model. I can also call 'call_and_return_all_conditional_losses(input)` and they run right as well
With Tensorboard callbacks:
All of the above (can .fit, save, load, predict from loaded etc) except.. While running fit i get
WARNING:tensorflow:Model failed to serialize as JSON. Ignoring... Layer PREPROCESS_MONSOON has arguments in `__init__` and therefore must override `get_config`.
Pasting the entire code here that can be run end to end. You just need to have tensorflow 2 installed. Please delete/add the callbacks (only tensorboard callbacks is there) to .fit to see the two behaviors mentioned above
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers as l
from tensorflow import keras as k
import numpy as np
##making empty directories
import os
os.makedirs('r_data',exist_ok=True)
os.makedirs('r_savedir',exist_ok=True)
#Preparing the dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_ = pd.DataFrame(x_train.reshape(60000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_test_ = pd.DataFrame(x_test.reshape(10000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_train_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_[np.random.choice([True,False],size = x_train_.shape,p=[0.05,0.95]).reshape(x_train_.shape)] = np.nan
x_test_[np.random.choice([True,False],size = x_test_.shape,p=[0.05,0.95]).reshape(x_test_.shape)] = np.nan
x_train_.to_csv('r_data/x_train.csv',index=False)
x_test_.to_csv('r_data/x_test.csv',index=False)
pd.DataFrame(y_train).to_csv('r_data/y_train.csv',index=False)
pd.DataFrame(y_test).to_csv('r_data/y_test.csv',index=False)
#**THE MAIN LAYER THAT WE ARE TALKING ABOUT**
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column
import os
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'keys': self.keys,
# 'keys_all': self.keys_all,
# })
# return config
def build(self,input_shape):
def create_moving_mean_vars():
return tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
self.moving_means_total = {t:create_moving_mean_vars() for t in self.keys}
self.layer_global_counter = tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
def call(self,inputs, training = True):
null_cols = {k:tf.math.is_finite(inputs[k]) for k in self.keys}
current_means = {}
def compute_update_current_means(t):
current_mean = tf.math.divide_no_nan(tf.reduce_sum(tf.where(null_cols[t],inputs[t],0.),axis=0),\
tf.reduce_sum(tf.cast(tf.math.is_finite(inputs[t]),tf.float32),axis=0))
self.moving_means_total[t].assign_add(current_mean)
return current_mean
if training:
current_means = {t:compute_update_current_means(t) for t in self.keys}
outputs = {t:tf.where(null_cols[t],inputs[t],current_means[t]) for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
self.layer_global_counter.assign_add(1.)
else:
outputs = {t:tf.where(null_cols[t],inputs[t],(self.moving_means_total[t]/self.layer_global_counter))\
for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
return outputs
class PREPROCESS_MONSOON(layers.Layer):
def __init__(self,cat_cols_with_unique_values,num_cols):
'''cat_cols_with_unqiue_values: (dict) {'col_cat':[unique_values_list]}
num_cols: (list) [num_cols_name_list]'''
super().__init__()
self.cat_cols = cat_cols_with_unique_values
self.num_cols = num_cols
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'cat_cols': self.cat_cols,
# 'num_cols': self.num_cols,
# })
# return config
def build(self,input_shape):
self.ntd = NUM_TO_DENSE(self.num_cols)
self.num_colnames = self.ntd.keys_all
self.ctd = {k:layers.DenseFeatures\
(feature_column.embedding_column\
(feature_column.categorical_column_with_vocabulary_list\
(k,v),tf.cast(tf.math.ceil(tf.math.log(tf.cast(len(self.cat_cols[k]),tf.float32))),tf.int32).numpy()))\
for k,v in self.cat_cols.items()}
self.cat_colnames = [i for i in self.cat_cols]
self.dense_colnames = self.num_colnames+self.cat_colnames
def call(self,inputs,training=True):
dense_num_d = self.ntd(inputs,training=training)
dense_cat_d = {k:self.ctd[k](inputs) for k in self.cat_colnames}
dense_num = tf.stack([dense_num_d[k] for k in self.num_colnames],axis=1)
dense_cat = tf.concat([dense_cat_d[k] for k in self.cat_colnames],axis=1)
dense_all = tf.concat([dense_num,dense_cat],axis=1)
return dense_all
##Inputs
label_path = 'r_data/y_train.csv'
data_path = 'r_data/x_train.csv'
max_epochs = 100
batch_size = 32
shuffle_seed = 42
##Creating layer inputs
dfs = pd.read_csv(data_path,nrows=1)
cdtypes_x = dfs.dtypes
nc = list(dfs.select_dtypes(include=[int,float]).columns)
oc = list(dfs.select_dtypes(exclude=[int,float]).columns)
cdtypes_y = pd.read_csv(label_path,nrows=1).dtypes
dfc = pd.read_csv(data_path,usecols=oc)
ccwuv = {i:list(pd.Series(dfc[i].unique()).dropna()) for i in dfc.columns}
preds_name = pd.read_csv(label_path,nrows=1).columns
##creating datasets
dataset = tf.data.experimental.make_csv_dataset(
'r_data/x_train.csv',batch_size, column_names=cdtypes_x.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
labels = tf.data.experimental.make_csv_dataset(
'r_data/y_train.csv',batch_size, column_names=cdtypes_y.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
dataset = tf.data.Dataset.zip((dataset,labels))
##CREATING NETWORK
p = PREPROCESS_MONSOON(cat_cols_with_unique_values=ccwuv,num_cols=nc)
indict = {}
for i in nc:
indict[i] = k.Input(shape = (), name=i,dtype=tf.float32)
for i in ccwuv:
indict[i] = k.Input(shape=(), name=i,dtype=tf.string)
x = p(indict)
x = l.BatchNormalization()(x)
x = l.Dense(10,activation='relu',name='dense_1')(x)
predictions = l.Dense(10,activation=None,name=preds_name[0])(x)
model = k.Model(inputs=indict,outputs=predictions)
##Compiling model
model.compile(optimizer=k.optimizers.Adam(),
loss=k.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
##callbacks
log_dir = './tensorboard_dir/no_config'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
## Fit model on training data
history = model.fit(dataset,
batch_size=64,
epochs=30,
steps_per_epoch=5,
validation_split=0.,
callbacks = [tensorboard_callback])
#saving the model
tf.saved_model.save(model,'r_savedir')
#loading the model
model = tf.saved_model.load('r_savedir')
##Predicting on loaded model
for i in dataset:
print(model(i[0],training=False))
break
I have commented out the part from the code where i override the config files in my custom layers and you can comment them in and the Warning about the layers not being serializable would go away.
Question:
Do i or do i not need to override the config method in order to make a custom layer that accepts arguments in __init__ serializable?
Thank you in advance for help
You must add 'get_config' to your code
def get_config(self):
config = super().get_config()
return config
The NUM_TO_DENSE class must be like this
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
def get_config(self):
config = super().get_config()
return config

Modify and combine two different frozen graphs generated using tensorflow object detection API for inference

I am working with TensorFlow object detection API, I have trained two different(SSD-mobilenet and FRCNN-inception-v2) models for my use case. Currently, my workflow is like this:
Take an input image, detect one particular object using SSD
mobilenet.
Crop the input image with the bounding box generated from
step 1 and then resize it to a fixed size(e.g. 200 X 300).
Feed this cropped and resized image to FRCNN-inception-V2 for detecting
smaller objects inside the ROI.
Currently at the time of inferencing, when I load two separate frozen graphs and follow the steps, I am getting my desired results. But I need only a single frozen graph because of my deployment requirement. I am new to TensorFlow and wanted to combine both graphs with crop and resizing process in between them.
Thanks, #matt and #Vedanshu for responding, Here is the updated code that works fine for my requirement, Please give suggestions, if it needs any improvement as I am still learning it.
# Dependencies
import tensorflow as tf
import numpy as np
# load graphs using pb file path
def load_graph(pb_file):
graph = tf.Graph()
with graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(pb_file, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return graph
# returns tensor dictionaries from graph
def get_inference(graph, count=0):
with graph.as_default():
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in ['num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks', 'image_tensor']:
tensor_name = key + ':0' if count == 0 else '_{}:0'.format(count)
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().\
get_tensor_by_name(tensor_name)
return tensor_dict
# renames while_context because there is one while function for every graph
# open issue at https://github.com/tensorflow/tensorflow/issues/22162
def rename_frame_name(graphdef, suffix):
for n in graphdef.node:
if "while" in n.name:
if "frame_name" in n.attr:
n.attr["frame_name"].s = str(n.attr["frame_name"]).replace("while_context",
"while_context" + suffix).encode('utf-8')
if __name__ == '__main__':
# your pb file paths
frozenGraphPath1 = '...replace_with_your_path/some_frozen_graph.pb'
frozenGraphPath2 = '...replace_with_your_path/some_frozen_graph.pb'
# new file name to save combined model
combinedFrozenGraph = 'combined_frozen_inference_graph.pb'
# loads both graphs
graph1 = load_graph(frozenGraphPath1)
graph2 = load_graph(frozenGraphPath2)
# get tensor names from first graph
tensor_dict1 = get_inference(graph1)
with graph1.as_default():
# getting tensors to add crop and resize step
image_tensor = tensor_dict1['image_tensor']
scores = tensor_dict1['detection_scores'][0]
num_detections = tf.cast(tensor_dict1['num_detections'][0], tf.int32)
detection_boxes = tensor_dict1['detection_boxes'][0]
# I had to add NMS becuase my ssd model outputs 100 detections and hence it runs out of memory becuase of huge tensor shape
selected_indices = tf.image.non_max_suppression(detection_boxes, scores, 5, iou_threshold=0.5)
selected_boxes = tf.gather(detection_boxes, selected_indices)
# intermediate crop and resize step, which will be input for second model(FRCNN)
cropped_img = tf.image.crop_and_resize(image_tensor,
selected_boxes,
tf.zeros(tf.shape(selected_indices), dtype=tf.int32),
[300, 60] # resize to 300 X 60
)
cropped_img = tf.cast(cropped_img, tf.uint8, name='cropped_img')
gdef1 = graph1.as_graph_def()
gdef2 = graph2.as_graph_def()
g1name = "graph1"
g2name = "graph2"
# renaming while_context in both graphs
rename_frame_name(gdef1, g1name)
rename_frame_name(gdef2, g2name)
# This combines both models and save it as one
with tf.Graph().as_default() as g_combined:
x, y = tf.import_graph_def(gdef1, return_elements=['image_tensor:0', 'cropped_img:0'])
z, = tf.import_graph_def(gdef2, input_map={"image_tensor:0": y}, return_elements=['detection_boxes:0'])
tf.train.write_graph(g_combined, "./", combinedFrozenGraph, as_text=False)
You can load output of one graph into another using input_map in import_graph_def. Also you have to rename the while_context because there is one while function for every graph. Something like this:
def get_frozen_graph(graph_file):
"""Read Frozen Graph file from disk."""
with tf.gfile.GFile(graph_file, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def
def rename_frame_name(graphdef, suffix):
# Bug reported at https://github.com/tensorflow/tensorflow/issues/22162#issuecomment-428091121
for n in graphdef.node:
if "while" in n.name:
if "frame_name" in n.attr:
n.attr["frame_name"].s = str(n.attr["frame_name"]).replace("while_context",
"while_context" + suffix).encode('utf-8')
...
l1_graph = tf.Graph()
with l1_graph.as_default():
trt_graph1 = get_frozen_graph(pb_fname1)
[tf_input1, tf_scores1, tf_boxes1, tf_classes1, tf_num_detections1] = tf.import_graph_def(trt_graph1,
return_elements=['image_tensor:0', 'detection_scores:0', 'detection_boxes:0', 'detection_classes:0','num_detections:0'])
input1 = tf.identity(tf_input1, name="l1_input")
boxes1 = tf.identity(tf_boxes1[0], name="l1_boxes") # index by 0 to remove batch dimension
scores1 = tf.identity(tf_scores1[0], name="l1_scores")
classes1 = tf.identity(tf_classes1[0], name="l1_classes")
num_detections1 = tf.identity(tf.dtypes.cast(tf_num_detections1[0], tf.int32), name="l1_num_detections")
...
# Make your output tensor
tf_out = # your output tensor (here, crop the input image with the bounding box generated from step 1 and then resize it to a fixed size(e.g. 200 X 300).)
...
connected_graph = tf.Graph()
with connected_graph.as_default():
l1_graph_def = l1_graph.as_graph_def()
g1name = 'ved'
rename_frame_name(l1_graph_def, g1name)
tf.import_graph_def(l1_graph_def, name=g1name)
...
trt_graph2 = get_frozen_graph(pb_fname2)
g2name = 'level2'
rename_frame_name(trt_graph2, g2name)
[tf_scores, tf_boxes, tf_classes, tf_num_detections] = tf.import_graph_def(trt_graph2,
input_map={'image_tensor': tf_out},
return_elements=['detection_scores:0', 'detection_boxes:0', 'detection_classes:0','num_detections:0'])
#######
# Export the graph
with connected_graph.as_default():
print('\nSaving...')
cwd = os.getcwd()
path = os.path.join(cwd, 'saved_model')
shutil.rmtree(path, ignore_errors=True)
inputs_dict = {
"image_tensor": tf_input
}
outputs_dict = {
"detection_boxes_l1": tf_boxes_l1,
"detection_scores_l1": tf_scores_l1,
"detection_classes_l1": tf_classes_l1,
"max_num_detection": tf_max_num_detection,
"detection_boxes_l2": tf_boxes_l2,
"detection_scores_l2": tf_scores_l2,
"detection_classes_l2": tf_classes_l2
}
tf.saved_model.simple_save(
tf_sess_main, path, inputs_dict, outputs_dict
)
print('Ok')

Why am I getting shape errors when trying to pass a batch from the Tensorflow Dataset API to my session operations?

I am dealing with an issue in my conversion over to the Dataset API and I guess I just don't have enough experience yet with the API to know how to handle the below situation. We currently have image augmentation that we perform currently using queueing and batching. I was tasked with checking out the new Dataset API and converting over our existing implementation using it rather than queues.
What we would like to do is get a reference to all the paths and handle all operations from just that reference. As you see in the dataset initialization, I have mapped the parse_fn to the dataset itself which then goes about reading the file and extracting the initial values from the filenames. However when I then go about calling the iterators next_batch method and then pass those values to get_summary, I'm now getting an error around shape. I have been trying a number of things which just keeps changing the error and so I felt I should see if anyone on SO saw possibly that I was going about this all wrong and should be taking a different route. Does anything jump out as absolutely wrong in my use of the Dataset API?
Should I not be calling the ops this way any longer? I noticed the majority of the examples I saw they would get the batch, pass the variables to the op and then capture that in a variable and pass that to sess.run, however I haven't found an easy way of doing that as of yet with our setup that wasn't erroring so this was the approach I took instead (but its still erroring). I'll be continuing to try to trace down the problem and post here should I find anything, but if anyone sees something please advise. Thanks!
Current Error:
... in get_summary summary, acc = sess.run([self._summary_op,
self._accuracy], feed_dict=feed_dict) ValueError: Cannot feed value of
shape (32,) for Tensor 'ph_input_labels:0', which has shape '(?, 1)
Below is the block where the get_summary method is called and error is fired:
def perform_train():
if __name__ == '__main__':
#Get all our image paths
filenames = data_layer_train.get_image_paths()
next_batch, iterator = preproc_image_fn(filenames=filenames)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
with sess.graph.as_default():
# Set the random seed for tensorflow
tf.set_random_seed(cfg.RNG_SEED)
classifier_network = c_common.create_model(len(products_to_class_dict), is_training=True)
optimizer, global_step_var = c_common.create_optimizer(classifier_network)
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
# Init tables and dataset iterator
sess.run(tf.tables_initializer())
sess.run(iterator.initializer)
cur_epoch = 0
blobs = None
try:
epoch_size = data_layer_train.get_steps_per_epoch()
num_steps = num_epochs * epoch_size
for step in range(num_steps):
timer_summary.tic()
if blobs is None:
#Now populate from our training dataset
blobs = sess.run(next_batch)
# *************** Below is where it is erroring *****************
summary_train, acc = classifier_network.get_summary(sess, blobs["images"], blobs["labels"], blobs["weights"])
...
Believe the error is in preproc_image_fn:
def preproc_image_fn(filenames, images=None, labels=None, image_paths=None, cells=None, weights=None):
def _parse_fn(filename, label, weight):
augment_instance = False
paths=[]
selected_cells=[]
if vals.FIRST_ITER:
#Perform our check of the path to see if _data_augmentation is within it
#If so set augment_instance to true and replace the substring with an empty string
new_filename = tf.regex_replace(filename, "_data_augmentation", "")
contains = tf.equal(tf.size(tf.string_split([filename], "")), tf.size(tf.string_split([new_filename])))
filename = new_filename
if contains is True:
augment_instance = True
core_file = tf.string_split([filename], '\\').values[-1]
product_id = tf.string_split([core_file], ".").values[0]
label = search_tf_table_for_entry(product_id)
weight = data_layer_train.get_weights(product_id)
image_string = tf.read_file(filename)
img = tf.image.decode_image(image_string, channels=data_layer_train._channels)
img.set_shape([None, None, None])
img = tf.image.resize_images(img, [data_layer_train._target_height, data_layer_train._target_width])
#Previously I was returning the below, but I was getting an error from the op when assigning feed_dict stating that it didnt like the dictionary
#retval = dict(zip([filename], [img])), label, weight
retval = img, label, weight
return retval
num_files = len(filenames)
filenames = tf.constant(filenames)
#*********** Setup dataset below ************
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels, weights))
dataset=dataset.map(_parse_fn)
dataset = dataset.repeat()
dataset = dataset.batch(32)
iterator = dataset.make_initializable_iterator()
batch_features, batch_labels, batch_weights = iterator.get_next()
return {'images': batch_features, 'labels': batch_labels, 'weights': batch_weights}, iterator
def search_tf_table_for_entry(self, product_id):
'''Looks up keys in the table and outputs the values. Will return -1 if not found '''
if product_id is not None:
return self._products_to_class_table.lookup(product_id)
else:
if not self._real_eval:
logger().info("class not found in training {} ".format(product_id))
return -1
Where I create the model and have the placeholders used previously:
...
def create_model(self):
weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)
biases_regularizer = weights_regularizer
# Input data.
self._input_images = tf.placeholder(
tf.float32, shape=(None, self._image_height, self._image_width, self._num_channels), name="ph_input_images")
self._input_labels = tf.placeholder(tf.int64, shape=(None, 1), name="ph_input_labels")
self._input_weights = tf.placeholder(tf.float32, shape=(None, 1), name="ph_input_weights")
self._is_training = tf.placeholder(tf.bool, name='ph_is_training')
self._keep_prob = tf.placeholder(tf.float32, name="ph_keep_prob")
self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, tf.float32))
...
self.create_summaries()
def create_summaries(self):
val_summaries = []
with tf.device("/cpu:0"):
for var in self._act_summaries:
self._add_act_summary(var)
for var in self._train_summaries:
self._add_train_summary(var)
self._summary_op = tf.summary.merge_all()
self._summary_op_val = tf.summary.merge(val_summaries)
def get_summary(self, sess, images, labels, weights):
feed_dict = {self._input_images: images, self._input_labels: labels,
self._input_weights: weights, self._is_training: False}
summary, acc = sess.run([self._summary_op, self._accuracy], feed_dict=feed_dict)
return summary, acc
Since the error says:
Cannot feed value of shape (32,) for Tensor 'ph_input_labels:0', which has shape '(?, 1)
My guess is your labels in get_summary has the shape [32]. Can you just reshape it to (32, 1)? Or maybe reshape the label earlier in _parse_fn?