How to initialize tf.metrics members in TensorFlow? - tensorflow

The below is a part of my project code.
with tf.name_scope("test_accuracy"):
test_mean_abs_err, test_mean_abs_err_op = tf.metrics.mean_absolute_error(labels=label_pl, predictions=test_eval_predict)
test_accuracy, test_accuracy_op = tf.metrics.accuracy(labels=label_pl, predictions=test_eval_predict)
test_precision, test_precision_op = tf.metrics.precision(labels=label_pl, predictions=test_eval_predict)
test_recall, test_recall_op = tf.metrics.recall(labels=label_pl, predictions=test_eval_predict)
test_f1_measure = 2 * test_precision * test_recall / (test_precision + test_recall)
tf.summary.scalar('test_mean_abs_err', test_mean_abs_err)
tf.summary.scalar('test_accuracy', test_accuracy)
tf.summary.scalar('test_precision', test_precision)
tf.summary.scalar('test_recall', test_recall)
tf.summary.scalar('test_f1_measure', test_f1_measure)
# validation metric init op
validation_metrics_init_op = tf.variables_initializer(\
var_list=[test_mean_abs_err_op, test_accuracy_op, test_precision_op, test_recall_op], \
name='validation_metrics_init')
However, when I run it, errors occur like this:
Traceback (most recent call last):
File "./run_dnn.py", line 285, in <module>
train(wnd_conf)
File "./run_dnn.py", line 89, in train
name='validation_metrics_init')
File "/export/local/anaconda2/lib/python2.7/site-
packages/tensorflow/python/ops/variables.py", line 1176, in
variables_initializer
return control_flow_ops.group(*[v.initializer for v in var_list], name=name)
AttributeError: 'Tensor' object has no attribute 'initializer'
I realize that I cannot create a validation initializer like that. I want to re-calculate the corresponding metrics when I save a new checkpoint model and apply a new round of validation. So, I have to re-initialize the metrics to be zero.
But how to reset all these metrics to be zero? Many thanks to your help!

I sovled the problem in the following way after referring to the blog (Avoiding headaches with tf.metrics).
# validation metrics
validation_metrics_var_scope = "validation_metrics"
test_mean_abs_err, test_mean_abs_err_op = tf.metrics.mean_absolute_error(labels=label_pl, predictions=test_eval_predict, name=validation_metrics_var_scope)
test_accuracy, test_accuracy_op = tf.metrics.accuracy(labels=label_pl, predictions=test_eval_predict, name=validation_metrics_var_scope)
test_precision, test_precision_op = tf.metrics.precision(labels=label_pl, predictions=test_eval_predict, name=validation_metrics_var_scope)
test_recall, test_recall_op = tf.metrics.recall(labels=label_pl, predictions=test_eval_predict, name=validation_metrics_var_scope)
test_f1_measure = 2 * test_precision * test_recall / (test_precision + test_recall)
tf.summary.scalar('test_mean_abs_err', test_mean_abs_err)
tf.summary.scalar('test_accuracy', test_accuracy)
tf.summary.scalar('test_precision', test_precision)
tf.summary.scalar('test_recall', test_recall)
tf.summary.scalar('test_f1_measure', test_f1_measure)
# validation metric init op
validation_metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=validation_metrics_var_scope)
validation_metrics_init_op = tf.variables_initializer(var_list=validation_metrics_vars, name='validation_metrics_init')

a minimal working example that can be run line by line in a python terminal:
import tensorflow as tf
s = tf.Session()
acc = tf.metrics.accuracy([0,1,0], [0.1, 0.9, 0.8])
ini = tf.variables_initializer(tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES))
s.run([ini])
s.run([acc])

Related

keras.models.load_model() gives ValueError

I have saved the trained model and the weights as below.
model, history, score = fit_model(model, train_batches, val_batches, callbacks=[callback])
model.save('./model')
model.save_weights('./weights')
Then I tried to get the saved model as the following way
if __name__ == '__main__':
model = keras.models.load_model('./model', compile= False,custom_objects={"F1Score": tfa.metrics.F1Score})
test_batches, nb_samples = test_gen(dataset_test_path, 32, img_width, img_height)
predict, loss, acc = predict_model(model,test_batches, nb_samples)
print(predict)
print(acc)
print(loss)
But it gives me an error. What should I do to overcome this?
Traceback (most recent call last):
File "test_pro.py", line 34, in <module>
model = keras.models.load_model('./model',compile= False,custom_objects={"F1Score": tfa.metrics.F1Score})
File "/home/dcs2016csc007/.local/lib/python3.8/site-packages/tensorflow/python/keras/saving/save.py", line 212, in load_model
return saved_model_load.load(filepath, compile, options)
File "/home/dcs2016csc007/.local/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 138, in load
keras_loader.load_layers()
File "/home/dcs2016csc007/.local/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 379, in load_layers
self.loaded_nodes[node_metadata.node_id] = self._load_layer(
File "/home/dcs2016csc007/.local/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 407, in _load_layer
obj, setter = revive_custom_object(identifier, metadata)
File "/home/dcs2016csc007/.local/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 921, in revive_custom_object
raise ValueError('Unable to restore custom object of type {} currently. '
ValueError: Unable to restore custom object of type _tf_keras_metric currently. Please make sure that the layer implements `get_config`and `from_config` when saving. In addition, please use the `custom_objects` arg when calling `load_model()`.
Looking at the source code for Keras, the error is raised when trying to load a model with a custom object:
def revive_custom_object(identifier, metadata):
"""Revives object from SavedModel."""
if ops.executing_eagerly_outside_functions():
model_class = training_lib.Model
else:
model_class = training_lib_v1.Model
revived_classes = {
constants.INPUT_LAYER_IDENTIFIER: (
RevivedInputLayer, input_layer.InputLayer),
constants.LAYER_IDENTIFIER: (RevivedLayer, base_layer.Layer),
constants.MODEL_IDENTIFIER: (RevivedNetwork, model_class),
constants.NETWORK_IDENTIFIER: (RevivedNetwork, functional_lib.Functional),
constants.SEQUENTIAL_IDENTIFIER: (RevivedNetwork, models_lib.Sequential),
}
parent_classes = revived_classes.get(identifier, None)
if parent_classes is not None:
parent_classes = revived_classes[identifier]
revived_cls = type(
compat.as_str(metadata['class_name']), parent_classes, {})
return revived_cls._init_from_metadata(metadata) # pylint: disable=protected-access
else:
raise ValueError('Unable to restore custom object of type {} currently. '
'Please make sure that the layer implements `get_config`'
'and `from_config` when saving. In addition, please use '
'the `custom_objects` arg when calling `load_model()`.'
.format(identifier))
The method will only work fine with the custom objects of the types defined in revived_classes. As you can see, it currently only works with input layer, layer, model, network, and sequential custom objects.
In your code, you pass an tfa.metrics.F1Score class in the custom_objects argument, which is of type METRIC_IDENTIFIER, therefore, not supported (probably because it doesn't implement the get_config and from_config functions as the error output says):
keras.models.load_model('./model', compile=False, custom_objects={"F1Score": tfa.metrics.F1Score})
It's been a while since I last worked with Keras but maybe you can try and follow what was proposed in this other related answer and wrap the call to tfa.metrics.F1Score in a method. Something like this (adjust it to your needs):
def f1(y_true, y_pred):
metric = tfa.metrics.F1Score(num_classes=3, threshold=0.5)
metric.update_state(y_true, y_pred)
return metric.result()
keras.models.load_model('./model', compile=False, custom_objects={'f1': f1})

AttributeError: 'DataFrame' object has no attribute 'Recovery'

When I tried to filter the data points, it seems like the filtration does not work quite well, and I am really confused cause last time when I used literally the same codes, it worked. Attached below is the filtration part and the error description.
"""------------- Read all the data points in the file ----------------------------------------"""
zeolite_13X_error = pd.read_csv("zeolite_13X_error.csv", delimiter=",") # Used for training set
zeolite_copy = zeolite_13X_error.copy() # Used for test set
"""--------------------------------------------------------------------------------------------"""
"""------------- Filter points, Recovery rate and Purity - Test Set Only ---------------------"""
zeolite_13X_error_testset = zeolite_copy[zeolite_copy.Recovery > 0.7]
zeolite_13X_error_testset = zeolite_13X_error_testset[zeolite_13X_error_testset.Purity > 0.7]
zeolite_13X_error_testset = zeolite_13X_error_testset[zeolite_13X_error_testset.Recovery < 1.0]
*Traceback (most recent call last):
File "C:/Users/julia/Machine Learning Edi/Combination Prediction.py", line 16, in <module>
zeolite_13X_error_testset = zeolite_copy[zeolite_copy.Recovery > 0.7]
File "C:\Users\julia\Anaconda3\envs\Machine Learning Edi\lib\site-packages\pandas\core\generic.py", line 5274, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'Recovery'*

How to use tf.train.Saver in SessionRunHook?

I have trained many sub-models, each sub-models is a part of the last model. And then I want to use those pretrained sub models to initial the last model's parameters. I try to use SessionRunHook to load other ckpt file's model parameters to initial the last model's.
I tried the follow code but failed. Hope some advices. Thanks!
The error info is:
Traceback (most recent call last):
File "train_high_api_local.py", line 282, in <module>
tf.app.run()
File "/Users/zhouliaoming/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 124, in run
_sys.exit(main(argv))
File "train_high_api_local.py", line 266, in main
clf_.train(input_fn=lambda: read_file([tables[0]], epochs_per_eval), steps=None, hooks=[hook_test]) # input yield: x, y
File "/Users/zhouliaoming/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 314, in train
.......
File "/Users/zhouliaoming/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 674, in create_session
hook.after_create_session(self.tf_sess, self.coord)
File "train_high_api_local.py", line 102, in after_create_session
saver = tf.train.Saver([ti]) # TODO: ERROR INFO: Graph is finalized and cannot be modified.
.......
File "/Users/zhouliaoming/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3135, in create_op
self._check_not_finalized()
File "/Users/zhouliaoming/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2788, in _check_not_finalized
raise RuntimeError("Graph is finalized and cannot be modified.")
RuntimeError: Graph is finalized and cannot be modified.
and the code detail is:
class SetTensor(session_run_hook.SessionRunHook):
""" like tf.train.LoggingTensorHook """
def after_create_session(self, session, coord):
""" Called when new TensorFlow session is created: graph is finalized and ops can no longer be added. """
graph = tf.get_default_graph()
ti = graph.get_tensor_by_name("h_1_15/bias:0")
with session.as_default():
with tf.name_scope("rewrite"):
saver = tf.train.Saver([ti]) # TODO: ERROR INFO: Graph is finalized and cannot be modified.
saver.restore(session, "/Users/zhouliaoming/data/credit_dnn/model_retrain/rm_gene_v2_sall/model.ckpt-2102")
pass
def main(unused_argv):
""" train """
norm_all_func = lambda x: tf.cond(x>1, lambda: tf.log(x), lambda: tf.identity(x))
feature_columns=[[tf.feature_column.numeric_column(COLUMNS[i], shape=fi, normalizer_fn=lambda x: tf.py_func(weight_norm2, [x], tf.float32) )] for i, fi in enumerate(FEA_DIM)] # normlized: running OK!
## use self-defined model
param = {"learning_rate": 0.0001, "feature_columns": feature_columns, "isanalysis": FLAGS.isanalysis, "isall": False}
clf_ = tf.estimator.Estimator(model_fn=model_fn_wide2deep, params=param, model_dir=ckpt_dir)
hook_test = SetTensor(["h_1_15/bias", "h_1_15/kernel"])
epochs_per_eval = 1
for n in range(int(FLAGS.num_epochs/epochs_per_eval)):
# train num_epochs
clf_.train(input_fn=lambda: read_file([tables[0]], epochs_per_eval), steps=None, hooks=[hook_test]) # input yield: x, y
SessionRunHook is not meant for this use case. As the error says, you cannot change the graph once sess.run() has been invoked.
You can assign variables using saver.restore() in your "normal code". You don't have to be inside any hooks.
Also, if you want to restore many variables and can match them to their names and shapes in a checkpoint, you might want to take a look at https://gist.github.com/iganichev/d2d8a0b1abc6b15d4a07de83171163d4. It shows some example code to restore a subset of variables.
You can do this:
class SaveAtEnd(tf.train.SessionRunHook):
def begin(self):
self._saver = # create your saver
def end(self, session):
self._saver.save(session, ...)

tf.contrib.slim.get_variables_to_restore() does not return value

Running below code tf.contrib.slim.get_variables_to_restore() return empty value [] for all_vars, and then causing failure when calling tf.train.Saver. Detail error message shows below.
Am I missing anything?
>>> import tensorflow as tf
>>> inception_exclude_scopes = ['InceptionV3/AuxLogits', 'InceptionV3/Logits', 'global_step', 'final_ops']
>>> inception_checkpoint_file = '/Users/morgan.du/git/machine-learning/projects/capstone/yelp/model/inception_v3_2016_08_28.ckpt'
>>> with tf.Session(graph=tf.Graph()) as sess:
... init_op = tf.global_variables_initializer()
... sess.run(init_op)
... reader = tf.train.NewCheckpointReader(inception_checkpoint_file)
... var_to_shape_map = reader.get_variable_to_shape_map()
... all_vars = tf.contrib.slim.get_variables_to_restore(exclude=inception_exclude_scopes)
... inception_saver = tf.train.Saver(all_vars)
... inception_saver.restore(sess, inception_checkpoint_file)
...
Traceback (most recent call last):
File "<stdin>", line 7, in <module>
File "/Users/morgan.du/miniconda2/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1051, in __init__
self.build()
File "/Users/morgan.du/miniconda2/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1072, in build
raise ValueError("No variables to save")
ValueError: No variables to save
The problem here seems to be that your graph is empty—i.e. it does not contain any variables. You create a new graph on the line with tf.Session(graph=tf.Graph()):, and none of the following lines creates a tf.Variable object.
To restore a pre-trained TensorFlow model, you need to do one of three things:
Rebuild the model graph, by executing the same Python graph building code that was used to train the model in the first place.
Load a "MetaGraph" that contains information about how to reconstruct the graph structure and model variables. See this tutorial for more details on how to create and use a MetaGraph. MetaGraphs are often created alongside checkpoint files, and typically have the extension .meta.
Load a "SavedModel", which contains a "MetaGraph". See the documentation here for more details.

Why can't I access the variable I create using the variable name plus scope path in TensorFlow?

I was trying to get a variable I created in a simple function but I keep getting errors. I am doing:
x = tf.get_variable('quadratic/x')
but the python complains as follow:
python qm_tb_scopes.py
quadratic/x:0
Traceback (most recent call last):
File "qm_tb_scopes.py", line 24, in <module>
x = tf.get_variable('quadratic/x')
File "/Users/my_username/path/tensor_flow_experiments/venv/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 732, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/Users/my_username/path/tensor_flow_experiments/venv/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 596, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/Users/my_username/path/tensor_flow_experiments/venv/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 161, in get_variable
caching_device=caching_device, validate_shape=validate_shape)
File "/Users/my_username/path/tensor_flow_experiments/venv/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 457, in _get_single_variable
"but instead was %s." % (name, shape))
ValueError: Shape of a new variable (quadratic/x) must be fully defined, but instead was <unknown>.
it seems its trying to create a new variable, but I am simply trying to get a defined one. Why is it doing this?
The whole code is:
import tensorflow as tf
def get_quaratic():
# x variable
with tf.variable_scope('quadratic'):
x = tf.Variable(10.0,name='x')
# b placeholder (simualtes the "data" part of the training)
b = tf.placeholder(tf.float32,name='b')
# make model (1/2)(x-b)^2
xx_b = 0.5*tf.pow(x-b,2)
y=xx_b
return y,x
y,x = get_quaratic()
learning_rate = 1.0
# get optimizer
opt = tf.train.GradientDescentOptimizer(learning_rate)
# gradient variable list = [ (gradient,variable) ]
print x.name
x = tf.get_variable('quadratic/x')
x = tf.get_variable(x.name)
You need to pass the option reuse=True to tf.variable_scope() if you want to get the same variable twice.
See the documentation (https://www.tensorflow.org/versions/r0.9/how_tos/variable_scope/index.html)
for more details.
Alternatively, you could get the variable once, outside your Python function, and pass it in as a argument in Python. I find that a bit cleaner since it makes it explicit what variables the code uses.
I hope that helps!
This is not the best solution, but try creating the variable through tf.get_variable() with reuse=False to ensure a new variable is created. Then, when obtaining the variable, use tf.get_variable() with reuse=True to get the current variable. Setting reuse to tf.AUTO_REUSE risks the creation of a new variable if the exact var is not present. Also make sure to specify the shape of the variable in tf.get_variable().
import tensorflow as tf
def get_quaratic():
# x variable
with tf.variable_scope('quadratic', reuse=False):
x = tf.get_variable('x', ())
tf.assign(x, 10)
# b placeholder (simualtes the "data" part of the training)
b = tf.placeholder(tf.float32,name='b')
# make model (1/2)(x-b)^2
xx_b = 0.5*tf.pow(x-b,2)
y=xx_b
return y,x
y,x = get_quaratic()
learning_rate = 1.0
# get optimizer
opt = tf.train.GradientDescentOptimizer(learning_rate)
# gradient variable list = [ (gradient,variable) ]
print (x.name)
with tf.variable_scope('', reuse=True):
x = tf.get_variable('quadratic/x', shape=())
print(tf.global_variables()) # there is only 1 variable