How to only restore variables in the checkpoint in Tensorflow? - tensorflow

In Tensorflow, my model is based on a pre-trained model, and I added a few more variables and remove some in the pre-trained model. When I restore the variables from the checkpoint file, I have to explicitly specify all variables I added to the graph that need to be excluded. For example, I did
exclude = # explicitly list all variables to exclude
variables_to_restore = slim.get_variables_to_restore(exclude=exclude)
saver = tf.train.Saver(variables_to_restore)
Is there a simpler way to do this? Namely, as long as a variable is not in checkpoint, then don't try to restore.

You should first find out all those variable that are useful(meaning also in your graph) and then add the joint set of the intersection of the two from the checkpoint rather than all from it.
variables_can_be_restored = list(set(tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)).intersection(tf.train.list_variables(checkpoint_dir)))
then restore it after defining a saver like this:
temp_saver = tf.train.Saver(variables_can_be_restored)
ckpt_state = tf.train.get_checkpoint_state(checkpoint_dir, lastest_filename)
print('Loading checkpoint %s' % ckpt_state.model_checkpoint_path)
temp_saver.restore(sess, ckpt_state.model_checkpoint_path)

The only thing that you can do is firstly having the same model as in the checkpoint, secondly restoring the checkpoint values to the same model. After restoring the variables for the same model, you can add new layers, delete existing layers or change the weights of the layers.
But there is an important point that you need to be careful. After added new layers you need to initialize them. If you use tf.global_variables_initializer(), you will lose the values of reloaded layers. So you should only initialize the uninitialized weights, you can use following function for this.
def initialize_uninitialized(sess):
global_vars = tf.global_variables()
is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f]
# for i in not_initialized_vars: # only for testing
# print(i.name)
if len(not_initialized_vars):
sess.run(tf.variables_initializer(not_initialized_vars))

This is more full answer, that works for not-distributed setting:
from tensorflow.contrib.framework.python.framework import checkpoint_utils
slim = tf.contrib.slim
def scan_checkpoint_for_vars(checkpoint_path, vars_to_check):
check_var_list = checkpoint_utils.list_variables(checkpoint_path)
check_var_list = [x[0] for x in check_var_list]
check_var_set = set(check_var_list)
vars_in_checkpoint = [x for x in vars_to_check if x.name[:x.name.index(":")] in check_var_set]
vars_not_in_checkpoint = [x for x in vars_to_check if x.name[:x.name.index(":")] not in check_var_set]
return vars_in_checkpoint, vars_not_in_checkpoint
def create_easy_going_scaffold(vars_in_checkpoint, vars_not_in_checkpoint):
model_ready_for_local_init_op = tf.report_uninitialized_variables(var_list = vars_in_checkpoint)
model_init_vars_not_in_checkpoint = tf.variables_initializer(vars_not_in_checkpoint)
restoration_saver = tf.train.Saver(vars_in_checkpoint)
eg_scaffold = tf.train.Scaffold(saver=restoration_saver,
ready_for_local_init_op = model_ready_for_local_init_op,
local_init_op = model_init_vars_not_in_checkpoint)
return eg_scaffold
all_vars = slim.get_variables()
ckpoint_file = tf.train.latest_checkpoint(output_chkpt_dir)
vars_in_checkpoint, vars_not_in_checkpoint = scan_checkpoint_for_vars(ckpoint_file, all_vars)
is_checkpoint_complete = len(vars_not_in_checkpoint) == 0
# Create session that can handle current checkpoint
if (is_checkpoint_complete):
# Checkpoint is full - all variables can be found there
print('Using normal session')
sess = tf.train.MonitoredTrainingSession(checkpoint_dir = output_chkpt_dir,
save_checkpoint_secs = save_checkpoint_secs,
save_summaries_secs = save_summaries_secs)
else:
# Checkpoint is partial - some variables need to be initialized
print('Using easy going session')
eg_scaffold = create_easy_going_scaffold(vars_in_checkpoint, vars_not_in_checkpoint)
# Save all variables to next checkpoint
saver = tf.train.Saver()
hooks = [tf.train.CheckpointSaverHook(checkpoint_dir = output_chkpt_dir,
save_secs = save_checkpoint_secs,
saver = saver)]
# Such session is a little slower during the first iteration
sess = tf.train.MonitoredTrainingSession(checkpoint_dir = output_chkpt_dir,
scaffold = eg_scaffold,
hooks = hooks,
save_summaries_secs = save_summaries_secs,
save_checkpoint_secs = None)
with sess:
.....

Related

Retrain Frozen Graph in Tensorflow 2.x

I have managed this implementation on retraining frozen graph in tensorflow 1 according to this wonderful detail topic. Basically, the methodology is described:
Load frozen model
Replace the constant frozen node with variable node.
The newly replaced variable node then will be redirected to the corresponding output of the frozen node.
This works in tensorflow 1.x by checking the tf.compat.v1.trainable_variables. However, in tensorflow 2.x, it can't work anymore.
Below is the code snippet:
1/ Load frozen model
frozen_path = '...'
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef()
with tf.compat.v1.io.gfile.GFile(frozen_path, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.graph_util.import_graph_def(od_graph_def, name='')
2/ Create a clone
with detection_graph.as_default():
const_var_name_pairs = {}
probable_variables = [op for op in detection_graph.get_operations() if op.type == "Const"]
available_names = [op.name for op in detection_graph.get_operations()]
for op in probable_variables:
name = op.name
if name+'/read' not in available_names:
continue
tensor = detection_graph.get_tensor_by_name('{}:0'.format(name))
with tf.compat.v1.Session() as s:
tensor_as_numpy_array = s.run(tensor)
var_shape = tensor.get_shape()
# Give each variable a name that doesn't already exist in the graph
var_name = '{}_turned_var'.format(name)
var = tf.Variable(name=var_name, dtype=op.outputs[0].dtype, initial_value=tensor_as_numpy_array,trainable=True, shape=var_shape)
const_var_name_pairs[name] = var_name
3/ Relace frozen node by Graph Editor
import graph_def_editor as ge
ge_graph = ge.Graph(detection_graph.as_graph_def())
name_to_op = dict([(n.name, n) for n in ge_graph.nodes])
for const_name, var_name in const_var_name_pairs.items():
const_op = name_to_op[const_name+'/read']
var_reader_op = name_to_op[var_name + '/Read/ReadVariableOp']
ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_reader_op))
detection_training_graph = ge_graph.to_tf_graph()
with detection_training_graph.as_default():
writer = tf.compat.v1.summary.FileWriter('remap', detection_training_graph )
writer.close
The problem was my Graph Editor when I import the tf.graph_def instead of the original tf.graph that has Variables.
Quickly solve by fixing step 3
Sol1: Using Graph Editor
ge_graph = ge.Graph(detection_graph)
for const_name, var_name in const_var_name_pairs.items():
const_op = ge_graph._node_name_to_node[const_name+'/read']
var_reader_op = ge_graph._node_name_to_node[var_name+'/Read/ReadVariableOp']
ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_reader_op))
However, this requires disable eager execution. To work around with eager execution, you should attach the MetaGraphDef to Graph Editor as below
with detection_graph.as_default():
meta_saver = tf.compat.v1.train.Saver()
meta = meta_saver.export_meta_graph()
ge_graph = ge.Graph(detection_graph,collections=ge.graph._extract_collection_defs(meta))
However, this is the trickest to make the model trainable in tf2.x
Instead of using Graph Editor to export directly the graph, we should export ourselves. The reason is that the Graph Editor make the Variables data type to be resources. Therefore, we should export the graph as graphdef and import the variable def to the graph:
test_graph = tf.Graph()
with test_graph.as_default():
tf.import_graph_def(ge_graph.to_graph_def(), name="")
for var_name in ge_graph.variable_names:
var = ge_graph.get_variable_by_name(var_name)
ret = variable_pb2.VariableDef()
ret.variable_name = var._variable_name
ret.initial_value_name = var._initial_value_name
ret.initializer_name = var._initializer_name
ret.snapshot_name = var._snapshot_name
ret.trainable = var._trainable
ret.is_resource = True
tf_var = tf.Variable(variable_def=ret,dtype=tf.float32)
test_graph.add_to_collections(var.collection_names, tf_var)
Sol2: Manually map by Graphdef
with detection_graph.as_default() as graph:
training_graph_def = remap_input_node(detection_graph.as_graph_def(),const_var_name_pairs)
current_var = (tf.compat.v1.trainable_variables())
assert len(current_var)>0, "no training variables"
detection_training_graph = tf.Graph()
with detection_training_graph.as_default():
tf.graph_util.import_graph_def(training_graph_def, name='')
for var in current_var:
ret = variable_pb2.VariableDef()
ret.variable_name = var.name
ret.initial_value_name = var.name[:-2] + '/Initializer/initial_value:0'
ret.initializer_name = var.name[:-2] + '/Assign'
ret.snapshot_name = var.name[:-2] + '/Read/ReadVariableOp:0'
ret.trainable = True
ret.is_resource = True
tf_var = tf.Variable(variable_def=ret,dtype=tf.float32)
detection_training_graph.add_to_collections({'trainable_variables', 'variables'}, tf_var)
current_var = (tf.compat.v1.trainable_variables())
assert len(current_var)>0, "no training variables"

Modify and combine two different frozen graphs generated using tensorflow object detection API for inference

I am working with TensorFlow object detection API, I have trained two different(SSD-mobilenet and FRCNN-inception-v2) models for my use case. Currently, my workflow is like this:
Take an input image, detect one particular object using SSD
mobilenet.
Crop the input image with the bounding box generated from
step 1 and then resize it to a fixed size(e.g. 200 X 300).
Feed this cropped and resized image to FRCNN-inception-V2 for detecting
smaller objects inside the ROI.
Currently at the time of inferencing, when I load two separate frozen graphs and follow the steps, I am getting my desired results. But I need only a single frozen graph because of my deployment requirement. I am new to TensorFlow and wanted to combine both graphs with crop and resizing process in between them.
Thanks, #matt and #Vedanshu for responding, Here is the updated code that works fine for my requirement, Please give suggestions, if it needs any improvement as I am still learning it.
# Dependencies
import tensorflow as tf
import numpy as np
# load graphs using pb file path
def load_graph(pb_file):
graph = tf.Graph()
with graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(pb_file, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return graph
# returns tensor dictionaries from graph
def get_inference(graph, count=0):
with graph.as_default():
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in ['num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks', 'image_tensor']:
tensor_name = key + ':0' if count == 0 else '_{}:0'.format(count)
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().\
get_tensor_by_name(tensor_name)
return tensor_dict
# renames while_context because there is one while function for every graph
# open issue at https://github.com/tensorflow/tensorflow/issues/22162
def rename_frame_name(graphdef, suffix):
for n in graphdef.node:
if "while" in n.name:
if "frame_name" in n.attr:
n.attr["frame_name"].s = str(n.attr["frame_name"]).replace("while_context",
"while_context" + suffix).encode('utf-8')
if __name__ == '__main__':
# your pb file paths
frozenGraphPath1 = '...replace_with_your_path/some_frozen_graph.pb'
frozenGraphPath2 = '...replace_with_your_path/some_frozen_graph.pb'
# new file name to save combined model
combinedFrozenGraph = 'combined_frozen_inference_graph.pb'
# loads both graphs
graph1 = load_graph(frozenGraphPath1)
graph2 = load_graph(frozenGraphPath2)
# get tensor names from first graph
tensor_dict1 = get_inference(graph1)
with graph1.as_default():
# getting tensors to add crop and resize step
image_tensor = tensor_dict1['image_tensor']
scores = tensor_dict1['detection_scores'][0]
num_detections = tf.cast(tensor_dict1['num_detections'][0], tf.int32)
detection_boxes = tensor_dict1['detection_boxes'][0]
# I had to add NMS becuase my ssd model outputs 100 detections and hence it runs out of memory becuase of huge tensor shape
selected_indices = tf.image.non_max_suppression(detection_boxes, scores, 5, iou_threshold=0.5)
selected_boxes = tf.gather(detection_boxes, selected_indices)
# intermediate crop and resize step, which will be input for second model(FRCNN)
cropped_img = tf.image.crop_and_resize(image_tensor,
selected_boxes,
tf.zeros(tf.shape(selected_indices), dtype=tf.int32),
[300, 60] # resize to 300 X 60
)
cropped_img = tf.cast(cropped_img, tf.uint8, name='cropped_img')
gdef1 = graph1.as_graph_def()
gdef2 = graph2.as_graph_def()
g1name = "graph1"
g2name = "graph2"
# renaming while_context in both graphs
rename_frame_name(gdef1, g1name)
rename_frame_name(gdef2, g2name)
# This combines both models and save it as one
with tf.Graph().as_default() as g_combined:
x, y = tf.import_graph_def(gdef1, return_elements=['image_tensor:0', 'cropped_img:0'])
z, = tf.import_graph_def(gdef2, input_map={"image_tensor:0": y}, return_elements=['detection_boxes:0'])
tf.train.write_graph(g_combined, "./", combinedFrozenGraph, as_text=False)
You can load output of one graph into another using input_map in import_graph_def. Also you have to rename the while_context because there is one while function for every graph. Something like this:
def get_frozen_graph(graph_file):
"""Read Frozen Graph file from disk."""
with tf.gfile.GFile(graph_file, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def
def rename_frame_name(graphdef, suffix):
# Bug reported at https://github.com/tensorflow/tensorflow/issues/22162#issuecomment-428091121
for n in graphdef.node:
if "while" in n.name:
if "frame_name" in n.attr:
n.attr["frame_name"].s = str(n.attr["frame_name"]).replace("while_context",
"while_context" + suffix).encode('utf-8')
...
l1_graph = tf.Graph()
with l1_graph.as_default():
trt_graph1 = get_frozen_graph(pb_fname1)
[tf_input1, tf_scores1, tf_boxes1, tf_classes1, tf_num_detections1] = tf.import_graph_def(trt_graph1,
return_elements=['image_tensor:0', 'detection_scores:0', 'detection_boxes:0', 'detection_classes:0','num_detections:0'])
input1 = tf.identity(tf_input1, name="l1_input")
boxes1 = tf.identity(tf_boxes1[0], name="l1_boxes") # index by 0 to remove batch dimension
scores1 = tf.identity(tf_scores1[0], name="l1_scores")
classes1 = tf.identity(tf_classes1[0], name="l1_classes")
num_detections1 = tf.identity(tf.dtypes.cast(tf_num_detections1[0], tf.int32), name="l1_num_detections")
...
# Make your output tensor
tf_out = # your output tensor (here, crop the input image with the bounding box generated from step 1 and then resize it to a fixed size(e.g. 200 X 300).)
...
connected_graph = tf.Graph()
with connected_graph.as_default():
l1_graph_def = l1_graph.as_graph_def()
g1name = 'ved'
rename_frame_name(l1_graph_def, g1name)
tf.import_graph_def(l1_graph_def, name=g1name)
...
trt_graph2 = get_frozen_graph(pb_fname2)
g2name = 'level2'
rename_frame_name(trt_graph2, g2name)
[tf_scores, tf_boxes, tf_classes, tf_num_detections] = tf.import_graph_def(trt_graph2,
input_map={'image_tensor': tf_out},
return_elements=['detection_scores:0', 'detection_boxes:0', 'detection_classes:0','num_detections:0'])
#######
# Export the graph
with connected_graph.as_default():
print('\nSaving...')
cwd = os.getcwd()
path = os.path.join(cwd, 'saved_model')
shutil.rmtree(path, ignore_errors=True)
inputs_dict = {
"image_tensor": tf_input
}
outputs_dict = {
"detection_boxes_l1": tf_boxes_l1,
"detection_scores_l1": tf_scores_l1,
"detection_classes_l1": tf_classes_l1,
"max_num_detection": tf_max_num_detection,
"detection_boxes_l2": tf_boxes_l2,
"detection_scores_l2": tf_scores_l2,
"detection_classes_l2": tf_classes_l2
}
tf.saved_model.simple_save(
tf_sess_main, path, inputs_dict, outputs_dict
)
print('Ok')

FailedPreconditionError: FailedPr...onError()

I have FailedPreconditionError when running sess.
My network has two different parts, pretrained-network and new add in Recognition network.
Pretrained model is used to extract features and the feature is used to train again for recognition.
In my code, pre-trained model is loaded first.
graph = tf.Graph()
with graph.as_default():
input_data, input_labels, input_boxes = input_train_data.input_fn()
input_boxes = tf.reshape(input_boxes,[input_boxes.shape[0]*2,-1])#convert from Nx8 to 2Nx4
# build model and loss
net = Net(input_data, is_training = False)
f_saver = tf.train.Saver(max_to_keep=1000, write_version=tf.train.SaverDef.V2, save_relative_paths=True)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
f_saver.restore(session, config.train.init_checkpoint)
f_saver restores the pre-trained model.
Then feature conv5_3 is extracted and fed into Recognition network.
conv5_3 = net.end_points['conv5_3']
with tf.variable_scope("Recognition"):
global_step_rec = tf.Variable(0, name='global_step_rec', trainable=False)
#Pass through recognition net
r_net = regnet.ConstructRecNet(conv5_3)
conv7_7 = r_net.end_points['pool7']
#implement ROI Pooling
#input boxes be in x1, y1, x2, y2
h_fmap = tf.dtypes.cast(tf.shape(conv7_7)[1],tf.float32)
w_fmap = tf.dtypes.cast(tf.shape(conv7_7)[2],tf.float32)
#remap boxes at input images to feature mats
#input_boxes = input_boxes / tf.constant([config.train.input_shape[0], config.train.input_shape[0],\
# config.train.input_shape[0], config.train.input_shape[0]], dtype=tf.float32)#Normalize with image size first
remap_boxes=tf.matmul(input_boxes,tf.diag([w_fmap,h_fmap,w_fmap,h_fmap]))
#put first column with image indexes
rows = tf.expand_dims(tf.range(remap_boxes.shape[0]), 1)/2
add_index = tf.concat([tf.cast(rows,tf.float32),remap_boxes],-1)
index = tf.not_equal(tf.reduce_sum(add_index[:,4:],axis=1),0)
remap_boxes = tf.gather_nd(add_index,tf.where(index))
remap_boxes=tf.dtypes.cast(remap_boxes,tf.int32)
prob = roi_pooling(conv7_7, remap_boxes, pool_height=1, pool_width=28)
#Get features for CTC training
prob = tf.transpose(prob, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
learning_rate = tf.train.piecewise_constant(global_step_rec, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
opt_loss = tf.contrib.layers.optimize_loss(loss, global_step_rec, learning_rate, config.train.opt_type,config.train.grad_noise_scale, name='train_step')
tf.global_variables_initializer()
I can run sess till feature extraction conv5_3. But can't run those in Recognition and got error as FailedPreconditionError: FailedPr...onError(). What could be the problem?
graph.finalize()
with tf.variable_scope("Recognition"):
for i in range(config.train.steps):
input_data_, input_labels_, input_boxes_ = session.run([input_data, input_labels, input_boxes])
conv5_3_ = session.run([conv5_3]) #can run this line
global_step_rec_ = session.run([global_step_rec]) # got FailedPreconditionError: FailedPr...onError() error at this line
conv7_7_ = session.run([conv7_7])
h_fmap_ = session.run([h_fmap])
Now it works.
Since my graph has two parts, I need to initialize separately.
(1)First get all variables from pre-trained model to initialize with those from checkpoint.
Then initialize with tf.train.Saver.
(2)Then initialize the rest add-in layers using tf.global_variables_initializer()
My code is as follow.
#Initialization
#Initialize pre-trained model first
#Since we need to restore pre-trained model and initialize to respective variables in this current graph
#(1)make a variable list for checkpoint
#(2)initialize a saver for the variable list
#(3)then restore
#(1)
def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
varlist=[]
reader = pywrap_tensorflow.NewCheckpointReader(file_name)
if all_tensors:
var_to_shape_map = reader.get_variable_to_shape_map()
for key in sorted(var_to_shape_map):
print(key)
varlist.append(key)
return varlist
varlist=print_tensors_in_checkpoint_file(file_name=config.train.init_checkpoint,all_tensors=True,tensor_name=None)
#(2)prepare the list of variables by calling variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
#countcheckpt_vars=0
#for n in tf.get_default_graph().as_graph_def().node:
# print(n.name)
#for op in tf.get_default_graph().get_operations():
# print(str(op.name))
#for var in zip(variables):
# countcheckpt_vars=countcheckpt_vars+1
#(3)
loader = tf.train.Saver(variables[:46])#since I need to initialize only 46 variables from global variables
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
loader.restore(session, config.train.init_checkpoint)
Then initialize the rest of variables
init = tf.global_variables_initializer()
session.run(init)

TensorFlow, combine two checkpoint values into one and restore

I have two models (A and B) with the same architecture, both A and B have the same variables names and model settings, for example
['A1\B1\C1', 'A2\B2\C2', 'A3\B3\C3']
I have got checkpoint files for A and B, and I want to combine ['A1\B1\C1', 'A2\B2\C2'] in A with 'A3\B3\C3' in B int to a checkpoint file and restore it to model A. How can I do that with saver.restor()?
You can do it with init_from_checkpoint. After defining current model, create assignment map.
dir = 'path_to_A_and_B_checkpoint_files'
vars_to_load = [i[0] for i in tf.train.list_variables(dir)]
assignment_map = {variable.op.name: variable for variable in tf.global_variables() if variable.op.name in vars_to_restore}
This creates a dict that has variables from current graph as key and variables from checkpoints as values
tf.train.init_from_checkpoint(dir, assignment_map)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#do_usual_stuff
This function is placed before declaring a session and substitutes saver.restore
Answering my question on my own.
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
def load_weights(ckpt_path, prefix_list):
vars_weights = {}
reader = pywrap_tensorflow.NewCheckpointReader(ckpt_path)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in sorted(var_to_shape_map):
for _pref in prefix_list:
if key.startswith(_pref):
vars_weights[key+':0'] = reader.get_tensor(key)
return vars_weights
# Build model
...
# Init variables
sess.run(tf.global_variables_initializer())
# Restore model
saver.restore(sess, load_dir_A)
prefix = ['A3\B3\C3']
# Get weights from ckpt of B
B_weights = load_weights(load_dir_B, prefix)
# Assign weights from B to A
assign_ops = [tf.assign(tf.get_default_graph().get_tensor_by_name(_name, _value)
for _name, _value in opponent_weights.items()]
sess.run(assign_ops)

In Tensorflow, is it possible to append some summaries to already-merged summary_op?

Let's say, some built-in function returns train_op and summary_op where summary_op is defined by tf.summary.merge(summaries, name='summary_op'), and I cannot touch the function.
Also, let's say, I am going to use the built-in slim.learning.train which takes train_op and summary_op as input arguments.
# -- typical
train_op, summary_op = model_fn(image)
slim.learning.train(train_op, summary_op=summary_op)
# -- my question
train_op, summary_op = model_fn(image)
some_other_summary_list = some_another_function()
summary_op_ = ... # is it possible to append some_other_summary_list to summary_op?
slim.learning.train(train_op, summary_op=summary_op_)
How I can combine summaries in already-merged summary_op and newly-collected summaries some_other_summary_list?
-- If I do tf.merge_all(tf.GraphKeys.SUMMARIES) actually there will be too many summaries since, in model_fn() collect only useful and necessary summaries.
-- I can think of defining separate summary_op2 and define train_step_fn as in:
from tensorflow.contrib.slim.python.slim.learning import train_step
def train_step_fn(...):
... = train_step(...)
if iteration % 100 == 0:
summaries = session.run(summary_op2)
summary_writer.add_summary(summaries, iteration)
slim.learning.train(train_op, summary_op=summary_op, train_step_fn=train_step_fn)
However, this seems too much if I can simply somehow append new summaries to summary_op. Is it possible?
If both "summary_op and newly-collected summaries some_other_summary_list" are created by tf.summary.merge, you can simply merge them again by tf.summary.merge([summary_op, summaries some_other_summary_list]), as demonstrated by this code:
import tensorflow as tf
a = tf.summary.scalar('a', tf.constant(0))
b = tf.summary.scalar('b', tf.constant(1))
c = tf.summary.scalar('c', tf.constant(2))
d = tf.summary.scalar('d', tf.constant(3))
ab = tf.summary.merge([a, b])
cd = tf.summary.merge([c, d])
abcd = tf.summary.merge([ab, cd])
with tf.Session() as sess:
writer = tf.summary.FileWriter('.', sess.graph)
summary = sess.run(abcd)
writer.add_summary(summary)