Tensorflow colocate issue with import_graph_def and ExponentialMovingAverage - tensorflow

I had an issue with colocate information in the GraphDef.
Here are the high-level steps I follow
Train an Estimator using tf.train.ExponentialMovingAverage and use the EMA predictions for the PREDICT mode
Export to SavedModel
Reload the GraphDef from the SavedModel and remove unnecessary nodes with extract_sub_graph
Freeze the resulting graph (make variables into constants using checkpoint data) with freeze_graph_with_def_protos
At step 4. I get an error ValueError: Node 'layer/kernel/ExponentialMovingAverage' expects to be colocated with unknown node 'layer/kernel'
Here is the code I use to train the model
# train.py
import logging
from pathlib import Path
import sys
import tensorflow as tf
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
ema_var = ema.average(var)
return ema_var if ema_var else var
return _ema_getter
def model_fn(features, labels, mode, params):
# pylint: disable=unused-argument
"""Dummy model_fn"""
if isinstance(features, dict): # For serving
features = features['feature']
predictions = tf.layers.dense(features, 1, name="layer")
predictions = tf.identity(predictions, name="predictions")
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
predictions_ema = tf.layers.dense(features, 1, name="layer")
predictions_ema = tf.identity(predictions_ema, name="predictions_ema")
if mode == tf.estimator.ModeKeys.PREDICT:
preds = {
"predictions_ema": predictions_ema
}
return tf.estimator.EstimatorSpec(mode, predictions=preds)
else:
loss = tf.nn.l2_loss(predictions - labels)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss)
elif mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.train.AdamOptimizer(learning_rate=0.5).minimize(
loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(
mode, loss=loss, train_op=tf.group([train_op, ema_op]))
else:
raise NotImplementedError()
def train_generator_fn():
for number in range(100):
yield [number, number], [2 * number]
def train_input_fn():
shapes, types = (2, 1), (tf.float32, tf.float32)
dataset = tf.data.Dataset.from_generator(
train_generator_fn, output_types=types, output_shapes=shapes)
dataset = dataset.batch(20).repeat(200)
return dataset
def serving_input_receiver_fn():
"""Serving input_fn that builds features from placeholders
Returns
-------
tf.estimator.export.ServingInputReceiver
"""
number = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='number')
receiver_tensors = {'number': number}
features = tf.tile(number, multiples=[1, 2])
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
if __name__ == '__main__':
# Logging
Path('model').mkdir(exist_ok=True)
tf.logging.set_verbosity(logging.INFO)
handlers = [
logging.FileHandler('model/train.log'),
logging.StreamHandler(sys.stdout)
]
logging.getLogger('tensorflow').handlers = handlers
# Train estimator
estimator = tf.estimator.Estimator(model_fn, 'model', params={})
estimator.train(train_input_fn)
# Export
estimator.export_saved_model('saved_model', serving_input_receiver_fn)
and the code I use to optimize the graph
# optimize.py
from pathlib import Path
import tensorflow as tf
from tensorflow.python.tools.freeze_graph import freeze_graph_with_def_protos
from tensorflow.python.framework.graph_util import extract_sub_graph
from tensorflow.core.framework.graph_pb2 import GraphDef
def optimize_and_export(export_dir: str, output: str):
with tf.Session() as sess:
g = tf.saved_model.loader.load(sess, ["serve"], export_dir)
inference_graph = extract_sub_graph(g.graph_def, ["predictions_ema"])
g = freeze_graph_with_def_protos(
inference_graph,
None,
None,
"predictions_ema",
None,
None,
None,
None,
None,
input_saved_model_dir=export_dir,
saved_model_tags=["serve"],
)
tf.io.write_graph(g, logdir=str(Path(output).parent), name=Path(output).name, as_text=False)
if __name__ == '__main__':
export_dir = str(sorted(Path('saved_model').glob('*'))[0])
print(f"Reloading from {export_dir}")
optimize_and_export(export_dir, 'saved_model/final')

My understanding is that tf.train.ExponentialMovingAverage adds colocation information between the nodes (the orignal and the EMA version).
NB: colocation seems to mean "these variables should be located on the same device"
This colocate information is present in the graph protobuf (the SavedModel export).
When extracting the subgraph, only the EMA versions of the variables are kept, but the colocation information is preserved, which causes issues when creating the Graph, which tries to find the original colocated variables (not present anymore).
I found a way around by modifying the protobuf manually and removing all colocation information with
def optimize_and_export(export_dir: str, output: str):
with tf.Session() as sess:
g = tf.saved_model.loader.load(sess, ["serve"], export_dir)
inference_graph = extract_sub_graph(g.graph_def, ["predictions_ema"])
# Remove colocate information from GraphDef
for node in inference_graph.node:
if "_class" in node.attr:
del node.attr["_class"]
tf.logging.warning(f"Removing _class attr of {node.name}")
g = freeze_graph_with_def_protos(
inference_graph,
None,
None,
"predictions_ema",
None,
None,
None,
None,
None,
input_saved_model_dir=export_dir,
saved_model_tags=["serve"],
)
tf.io.write_graph(g, logdir=str(Path(output).parent), name=Path(output).name, as_text=False)

Related

Tensorflow 2 TypeError: Expected int32 passed to parameter 'y' of op 'Greater', got 1e-12 of type 'float' instead

I am performing NER in TensorFlow 2, but when I perform F1Score calculation through Tensorflwo_Addons it keeps giving me this error
TypeError: Expected int32 passed to parameter 'y' of op 'Greater', got 1e-12 of type 'float' instead. Error: Expected int32, got 1e-12 of type 'float' instead.
The function that is producing this error is > model_fn
The first part of code for data preparation
__author__ = "Guillaume Genthial"
import functools
import json
import logging
from pathlib import Path
import sys
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tf_ad
from sklearn.metrics import f1_score
# DiRECTORY PATH
DATADIR = '/content/drive/My Drive/7th Semester/FYP Related Stuff /data/example'
# Logging
Path('results').mkdir(exist_ok=True)
tf.autograph.set_verbosity(logging.INFO) # INFO --> Type of message like error, warning, etc
handlers = [
logging.FileHandler('results/main.log'),
logging.StreamHandler(sys.stdout)
]
logging.getLogger('tensorflow').handlers = handlers
# parse function
def parse_fn(line_words, line_tags):
# Encode in Bytes for TF
words = [word.encode() for word in line_words.strip().split()]
tags = [tag.encode() for tag in line_tags.strip().split()]
assert len(words) == len(tags), "Words and tags lengths don't match" # Returns assertion error
return (words, len(words)), tags
# Data Generator function
def generator_fn(words, tags):
with Path(words).open('r') as file_words, Path(tags).open('r') as file_tags:
for line_words, line_tags in zip(file_words, file_tags):
yield parse_fn(line_words, line_tags) # yield: to return sth from function without stoping the function and exe starts from last yeild stat
# Input Function
def input_fn(words, tags, params=None, shuffle_and_repeat=False):
params = params if params is not None else {}
shapes = ((tf.TensorShape([None]), tf.TensorShape(())), tf.TensorShape([None]))
types = ((tf.string, tf.int32), tf.string) # data types fro tensor
defaults = (('<pad>', 0), 'O') # padding in case of mismatched length
dataset = tf.data.Dataset.from_generator(
functools.partial(generator_fn, words, tags),
output_shapes=shapes, output_types=types)
if shuffle_and_repeat:
dataset = dataset.shuffle(params['buffer']).repeat(params['epochs'])
dataset = (dataset
.padded_batch(params.get('batch_size', 20), shapes, defaults)
.prefetch(1))
return dataset
Model function
def model_fn(features, labels, mode, params):
# For serving, features are a bit different
if isinstance(features, dict):
# if not a tensor
features = features['words'], features['nwords']
dropout = params['dropout']
words, nwords = features # words and number of words
training = (mode == tf.estimator.ModeKeys.TRAIN)
vocab_words = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['words'],
key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER,
delimiter="\n" ),
num_oov_buckets=params['num_oov_buckets'])
with Path(params['tags']).open() as f:
indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
num_tags = len(indices) + 1 # number of tags in tag file
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.]*params['dim']]])
shape = variable.shape
# getting embeddings
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
embeddings = tf.nn.embedding_lookup(variable, word_ids)
# LSTM
dropout_lstm = tf.keras.layers.Dropout(rate=dropout)
embeddings = dropout_lstm(embeddings, training=training)
layer_fw = tf.keras.layers.LSTM(params['lstm_size'], return_sequences=True)
layer_bw = tf.keras.layers.LSTM(params['lstm_size'], go_backwards=True)
bilstm = tf.keras.layers.Bidirectional(layer_fw)
output = bilstm(embeddings)
dropout_crf = tf.keras.layers.Dropout(rate=dropout)
output = dropout_crf(output)
dense_layer = tf.keras.layers.Dense(num_tags)
logits = dense_layer(output)
initial_val = np.zeros((num_tags,num_tags))# [[1.,1.,1.,1.],[1.,1.,1.,1.],[1.,1.,1.,1.],[1.,1.,1.,1.]]
# crf_params = tf.Variable(initial_value=initial_val ,name="crf",shape=[num_tags,num_tags], dtype=tf.float32)
crf_params = tf.convert_to_tensor(initial_val,dtype=tf.float32)
pred_ids, _ = tf_ad.text.crf_decode(logits,crf_params, nwords)
if mode == tf.estimator.ModeKeys.PREDICT:
# Predictions
reverse_vocab_tags = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['tags'],
key_dtype=tf.int64, key_index=tf.lookup.TextFileIndex.LINE_NUMBER,
value_dtype=tf.string, value_index=tf.lookup.TextFileIndex.WHOLE_LINE,
delimiter="\n"
),
num_oov_buckets=params['num_oov_buckets']
)
pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
predictions = {
'pred_ids': pred_ids,
'tags': pred_strings
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
else:
# Loss
vocab_tags = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['tags'],
key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER,
delimiter="\n"
),
num_oov_buckets=params['num_oov_buckets']
)
tags = vocab_tags.lookup(labels)
log_likelihood, _ = tf_ad.text.crf.crf_log_likelihood(
logits, tags, nwords, crf_params)
loss = tf.reduce_mean(-log_likelihood)
print("\nloss of error",loss,"\n")
# Metrics
weights = tf.sequence_mask(nwords)
#Accuracy
acc_layer = tf.keras.metrics.Accuracy()
acc_layer.update_state(tags, pred_ids, weights)
acc = acc_layer
#Precision
pre_layer = tf.keras.metrics.Precision(top_k=num_tags)
pre_layer.update_state(tags, pred_ids, weights)
pre = pre_layer
#Recall
rec_layer = tf.keras.metrics.Recall(top_k=num_tags)
rec_layer.update_state(tags, pred_ids, weights)
rec = rec_layer
print(tf.keras.backend.cast(tags, dtype="int32"))
# f1 Score
f1_layer = tf_ad.metrics.F1Score(num_classes=num_tags, average="weighted")
f1_layer.update_state(tf.keras.backend.cast(tags, dtype="int32"), tf.keras.backend.cast(pred_ids, dtype="int32"), weights)
f1_score = f1_layer
metrics = {
'acc': acc,
'precision': pre,
'recall': rec,
'f1_score': f1_score,
}
# for metric_name, op in metrics.items():
# print(f"\nThe metric_name is:{metric_name} and op is:{op}\n")
# ret = tf.summary.scalar(metric_name, op.result())
# print(ret)
def loss_fn():
loss = tf.reduce_mean(-log_likelihood)
return loss
print("\nloss function\n",metrics)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
elif mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.compat.v1.train.AdamOptimizer().minimize(
loss)
return tf.estimator.EstimatorSpec(
mode, loss=loss, train_op=train_op)
Main.py
if __name__ == '__main__':
# Params
params = {
'dim': 300,
'dropout': 0.5,
'num_oov_buckets': 1,
'epochs': 25,
'batch_size': 20,
'buffer': 15000,
'lstm_size': 100,
'words': str(Path(DATADIR, 'vocab.words.txt')),
'chars': str(Path(DATADIR, 'vocab.chars.txt')),
'tags': str(Path(DATADIR, 'vocab.tags.txt')),
'glove': str(Path(DATADIR, 'glove.npz'))
}
with Path('results/params.json').open('w') as f:
json.dump(params, f, indent=4, sort_keys=True)
def fwords(name):
return str(Path(DATADIR, '{}.words.txt'.format(name)))
def ftags(name):
return str(Path(DATADIR, '{}.tags.txt'.format(name)))
# Estimator, train and evaluate
train_inpf = functools.partial(input_fn, fwords('train'), ftags('train'),
params, shuffle_and_repeat=True)
eval_inpf = functools.partial(input_fn, fwords('testa'), ftags('testa'))
cfg = tf.estimator.RunConfig(save_checkpoints_secs=120)
estimator = tf.estimator.Estimator(model_fn, 'results/model', cfg, params)
Path(estimator.eval_dir()).mkdir(parents=True, exist_ok=True)
# hook is used to stop and then run a pspecific func or piece of code after specific time
hook = tf.estimator.experimental.stop_if_no_increase_hook(
estimator, 'f1', 500, min_steps=8000, run_every_secs=120)
train_spec = tf.estimator.TrainSpec(input_fn=train_inpf, hooks=[hook])
eval_spec = tf.estimator.EvalSpec(input_fn=eval_inpf, throttle_secs=120)
tf.compat.v1.enable_eager_execution()
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Write predictions to file
def write_predictions(name):
Path('results/score').mkdir(parents=True, exist_ok=True)
with Path('results/score/{}.preds.txt'.format(name)).open('wb') as f:
test_inpf = functools.partial(input_fn, fwords(name), ftags(name))
golds_gen = generator_fn(fwords(name), ftags(name))
preds_gen = estimator.predict(test_inpf)
for golds, preds in zip(golds_gen, preds_gen):
((words, _), tags) = golds
for word, tag, tag_pred in zip(words, tags, preds['tags']):
f.write(b' '.join([word, tag, tag_pred]) + b'\n')
f.write(b'\n')
for name in ['train', 'testa', 'testb']:
write_predictions(name)
Another that is causing trouble is this:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
InvalidArgumentError: assertion failed: [predictions must be <= 1] [Condition x <= y did not hold element-wise:] [x (Cast_6:0) = ] [[3 3 3...]...] [y (Cast_9/x:0) = ] [1]
[[{{node Assert}}]]
This happens when I comment out f1score calculation and comment it out in metrics dict
Please help out I am totally stuck with it. Actually, I am converting TF 1 code to TF2 the code that was in TF 1 was working, but when I made the necessary changes to run it in TF2 it started giving these errors
Also, I was trying to use tf.keras.optimiser.Adam().minimize() but it gave error No gradients provided for any of the variable I use loss_fn for loss of minimize and tv for trainable_variables
def loss_fn():
return loss = tf.reduce_mean(-log_likelihood)
tv = dense_layer.trainable_weights + bilst.trainable_weights
I am seriously stuck, I have tried all the GitHub solutions that were posted regarding similar problems, but nothing worked.
Specifications of code:
Tensorflow 2
GloVe for emdeddings

TPU training in colab, custom model, data from my own GCP account: Cell just seems to hang, no progress or error message

I am trying to train on a colab TPU using data from my GCP account.
When I run the cell that starts the training, the cell just seems to hang, with no progress. I put a very low number of steps, so that the training should complete pretty quickly, about a minute on GPU, but it never finishes on TPU.
I am using a custom model, and I am using files saved on GCP using the solution given in this stackoverflow answer How to connect to private storage bucket using the Google Colab TPU
The model trains/runs just fine on GPU/CPU.
The full code is in this colab notebook here
https://colab.research.google.com/drive/13HgRJru0glOzn7m0b7tmVCO_VrRpa1XS?usp=sharing
And here's a google drive link to the sample data file
https://drive.google.com/file/d/10EFyxau97jLfeGaKugMevIyX-bobsFe5/view?usp=sharing
And below is the code from the colab notebook
!pip install transformers --q
%tensorflow_version 2.x
!gcloud auth login
'''NEED TO RUN THIS CELL TWICE TO AVOID ERROR'''
from google.colab import auth
auth.authenticate_user()
project_id = 'machinelearning-264918'
!gcloud config set project {project_id}
!pip install tfa-nightly
import tensorflow_addons as tfa
from transformers import TFBertModel, AutoModel
import tensorflow as tf
from tensorflow.keras.layers import (Dense,
Dropout)
import os
import tensorflow_addons as tfa
logger = tf.get_logger()
logger.info(tf.__version__)
autotune = tf.data.experimental.AUTOTUNE
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)
logger.info('Running with TPUStrategy on TPU {} with {} cores '
.format(tpu.cluster_spec().as_dict()['worker'],
strategy.num_replicas_in_sync))
batch_size = 3 * strategy.num_replicas_in_sync
except Exception:
# raise ValueError
strategy = tf.distribute.OneDeviceStrategy(device='/gpu:0')
logger.warning('Failed initializing TPU! Running on GPU')
batch_size = 3
from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as lso
from tensorflow.python.distribute import parameter_server_strategy
def _minimize(strategy, tape, optimizer, loss, trainable_variables):
with tape:
if isinstance(optimizer, lso.LossScaleOptimizer):
loss = optimizer.get_scaled_loss(loss)
gradients = tape.gradient(loss, trainable_variables)
# Whether to aggregate gradients outside of optimizer. This requires support
# of the optimizer and doesn't work with ParameterServerStrategy and
# CentralStroageStrategy.
aggregate_grads_outside_optimizer = (
optimizer._HAS_AGGREGATE_GRAD and # pylint: disable=protected-access
not isinstance(strategy.extended,
parameter_server_strategy.ParameterServerStrategyExtended))
if aggregate_grads_outside_optimizer:
# We aggregate gradients before unscaling them, in case a subclass of
# LossScaleOptimizer all-reduces in fp16. All-reducing in fp16 can only be
# done on scaled gradients, not unscaled gradients, for numeric stability.
gradients = optimizer._aggregate_gradients(zip(gradients, # pylint: disable=protected-access
trainable_variables))
if isinstance(optimizer, lso.LossScaleOptimizer):
gradients = optimizer.get_unscaled_gradients(gradients)
gradients = optimizer._clip_gradients(gradients) # pylint: disable=protected-access
if trainable_variables:
if aggregate_grads_outside_optimizer:
optimizer.apply_gradients(
zip(gradients, trainable_variables),
experimental_aggregate_gradients=False)
else:
optimizer.apply_gradients(zip(gradients, trainable_variables))
class CustomModel(tf.keras.Model):
def train_step(self, data):
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
batch_label = tf.reshape(y, (tf.size(y)/2, 2), name=None)
rs = tf.ragged.stack(x, axis=0)
reg = rs.to_tensor()
batch_input = tf.reshape(reg, (tf.shape(reg)[0]*tf.shape(reg)[1], tf.shape(reg)[2]))
with tf.GradientTape() as tape:
y_pred = self(batch_input, training=True) # Forward pass
# Compute the loss value
# (the loss function is configured in `compile()`)
loss = self.compiled_loss(batch_label, y_pred, regularization_losses=self.losses)
# Compute gradients
_minimize(self.distribute_strategy, tape, self.optimizer, loss,
self.trainable_variables)
# Update weights
# self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
def get_model(drop_out):
sciBert = TFBertModel.from_pretrained('bert-base-uncased', from_pt=True)
allFinal = tf.keras.Input(shape=(None,), dtype=tf.int32, name='inputN')
'''Should posFinal and negFinal be concatenated, so there's only one call to sciBert'''
allBertOut = sciBert(allFinal, training=True)
allPoolConcat = tf.concat([
allBertOut[0][:, 0], #output of ff layer after last hidden state since it seems to be untrained in roberta
tf.reduce_mean(allBertOut[0][:, 1:-1], axis=1)
],axis=1)
postLayer = tf.keras.layers.Dense(768, activation='swish', name='postff')
LayerNorm = tf.keras.layers.LayerNormalization(epsilon=1e-12, name="LayerNormO")
postLayer2 = tf.keras.layers.Dense(768, activation='swish', name='2postff')
classifier = tf.keras.layers.Dense(2, name='classifierff')
postWeights = postLayer(allPoolConcat)
postWeights = LayerNorm(postWeights)
postWeights = Dropout(drop_out)(postWeights)
postWeights2 = postLayer2(postWeights)
allScores = classifier(postWeights2)
model = CustomModel(inputs=allFinal, outputs=allScores)
return model
#tf.function
def _parse_example(example_proto):
features = {
'sciBert_SentenceIndex': tf.io.VarLenFeature( dtype=tf.int64),
'SciBert_IDs': tf.io.VarLenFeature(dtype=tf.int64),
}
parsed_example_dict = tf.io.parse_single_example(example_proto, features)
sentencePositions = parsed_example_dict['sciBert_SentenceIndex']
passageIds = parsed_example_dict['SciBert_IDs']
sentencePositions = tf.sparse.to_dense(sentencePositions)
bertIds = tf.sparse.to_dense(passageIds)
sentencePositions = tf.cast(sentencePositions, dtype=tf.int32)
passageIds = tf.cast(passageIds, dtype=tf.int32)
length = tf.shape(
sentencePositions, out_type=tf.dtypes.int32, name='shape'
)
lengthMinusOne = tf.math.subtract(
length, 1, name='SubtractOne'
)
# creage random numbers for a sentence index up to 2nd to last index
# the last index is just the last position of the non-padded bertID
startRandSentIndex = tf.random.uniform(
shape=[1], minval=0, maxval=lengthMinusOne[0], dtype=tf.dtypes.int32, seed=None, name=None)
# Get the end point for that sentence
endRandSentIndex = tf.math.add(startRandSentIndex, 1, name=None)
# last position of the non-padded bertID
lastPosition = length-1
# extract BertID positions for sentence start/end and bertID end
startSentencePosit = tf.gather_nd(sentencePositions, [startRandSentIndex], batch_dims=0)
endSentencePosit = tf.gather_nd(sentencePositions, [endRandSentIndex], batch_dims=0)
lastPassagePosit = tf.gather_nd(sentencePositions, [lastPosition], batch_dims=0)
# Get slices of BertIDs for the query, and the rest
firstPiece = tf.slice(bertIds, [0], [startSentencePosit[0]] )
queryPiece = tf.slice(bertIds, [startSentencePosit[0]], [endSentencePosit[0]-startSentencePosit[0]] )
lastPiece = tf.slice(bertIds, [endSentencePosit[0]], [lastPassagePosit[0]-endSentencePosit[0]] )
# concat rest of passage
restPassagePiece = tf.concat( [firstPiece,lastPiece], axis=0 )
# Clip
queryPiece = queryPiece[0:256]
restPassagePiece = restPassagePiece[0:510]
# add special tokens for proper input into the model
return tf.cast(queryPiece, dtype=tf.int32), tf.cast(restPassagePiece, dtype=tf.int32)
#tf.function
def clip_seq_to_len(seq, num_tokens=512):
seq_len = tf.shape(seq)[0]
if seq_len > 511:
return seq[:511]
return seq[:]
#tf.function
def make_samples(query_a, passage_a, query_b, passage_b):
CLS_inputID = tf.constant([102])
SEP_inputID = tf.constant([103])
positive_sample_a = clip_seq_to_len(tf.concat([CLS_inputID, query_a, SEP_inputID, passage_a], axis=-1))
positive_sample_b = clip_seq_to_len(tf.concat([CLS_inputID, query_b, SEP_inputID, passage_b], axis=-1))
negative_sample_a = clip_seq_to_len(tf.concat([CLS_inputID, query_a, SEP_inputID, passage_b], axis=-1))
negative_sample_b = clip_seq_to_len(tf.concat([CLS_inputID, query_b, SEP_inputID, passage_a], axis=-1))
positive_sample_a = tf.concat([positive_sample_a, SEP_inputID], axis=-1)
positive_sample_b = tf.concat([positive_sample_b, SEP_inputID], axis=-1)
negative_sample_a = tf.concat([negative_sample_a, SEP_inputID], axis=-1)
negative_sample_b = tf.concat([negative_sample_b, SEP_inputID], axis=-1)
return positive_sample_a, positive_sample_b, negative_sample_a, negative_sample_b
#tf.function
def get_samples(example_a, example_b):
samples = make_samples(*_parse_example(example_a), *_parse_example(example_b))
return samples
config = {
'drop_out':0.1
}
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
with strategy.scope():
model = get_model(**config)
model.compile(loss=loss_fn,
optimizer=tfa.optimizers.AdamW(weight_decay=1e-5, learning_rate=3e-4, epsilon=1e-07), run_eagerly=False)
config_name = 'model_b'
base_dir = 'gs://bdora-semanticscholar'
model_dir = os.path.join(base_dir, config_name)
# tensorboard_dir = os.path.join(model_dir, 'logs_' + str(time()))
tfrecords_pattern_train = os.path.join(base_dir, 'VersionB_00022*')
tfrecords_pattern_train2 = os.path.join(base_dir, 'VersionB_00022*')
#tf.function
def gen():
while True:
yield ([1, 0], [1, 0], [0, 1], [0, 1] )
batchNumber = batch_size
run_eagerly = False
with strategy.scope():
filenames = tf.io.gfile.glob(tfrecords_pattern_train)
train_dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=autotune)
filenames = tf.io.gfile.glob(tfrecords_pattern_train)
neg_dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=autotune)
train_dataset = train_dataset.shuffle(150_000, seed=1000, reshuffle_each_iteration=True)
neg_dataset = neg_dataset.shuffle(150_000, seed=2000, reshuffle_each_iteration=True)
train_datasetC = tf.data.Dataset.zip((train_dataset, neg_dataset))
train_datasetC = train_datasetC.map(get_samples, num_parallel_calls=autotune)
train_datasetC = train_datasetC.shuffle(1024, seed=1000, reshuffle_each_iteration=True)
train_datasetC = train_datasetC.padded_batch(batchNumber, padding_values=(0, 0, 0, 0))
datasetLabels = tf.data.Dataset.from_generator(
gen,
(tf.int32, tf.int32, tf.int32, tf.int32),
(tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])))
datasetLabels = datasetLabels.batch(batchNumber)
train_datasetFinal = tf.data.Dataset.zip((train_datasetC, datasetLabels))
train_datasetFinal = train_datasetFinal.prefetch(autotune)
train_datasetFinal = train_datasetFinal.repeat()
train_datasetFinal = train_datasetFinal.apply(tf.data.experimental.ignore_errors())
model.fit(train_datasetFinal, steps_per_epoch=100, epochs=3)
And this is the only output I get
Epoch 1/3
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.
I found this GitHub issue discussion [1] that you can refer to.
It's not an error, it just means it's not updating those variables. Those variables (pooler) are not used when doing sequence classification.
[1] https://github.com/tensorflow/tensorflow/issues/37501

estimator.train throws ValueError: model_fn should return an EstimatorSpec

Here's the code I'm using...
I've got a breakpoint installed at what is for me line 304...
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
Has anyone seen this? I'm certain I have the correct versions of TensorFlow and BERT installed.
The complete stack trace is as follows....
Exception has occurred: ValueError
model_fn should return an EstimatorSpec.
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1153, in _call_model_fn
raise ValueError('model_fn should return an EstimatorSpec.')
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py", line 304, in <module>
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
File "C:\Program Files\Python36\Lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Python36\Lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Program Files\Python36\Lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
ValueError: model_fn should return an EstimatorSpec.
This code is my attempt to run some Google colab code from here -
https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb#scrollTo=t6Nukby2EB6-
# Copyright 2019 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
# Set the output directory for saving model file
# Optionally, set a GCP bucket location
OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput'
##markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True ##param {type:"boolean"}
##markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False ##param {type:"boolean"}
BUCKET = 'BUCKET_NAME' ##param {type:"string"}
if USE_BUCKET:
OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()
if DO_DELETE:
try:
tf.gfile.DeleteRecursively(OUTPUT_DIR)
except:
# Doesn't matter if the directory didn't exist
pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
'''
First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
data = {}
data["sentence"] = []
data["sentiment"] = []
for file_path in os.listdir(directory):
with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
data["sentence"].append(f.read())
data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
return pd.DataFrame.from_dict(data)
# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
pos_df = load_directory_data(os.path.join(directory, "pos"))
neg_df = load_directory_data(os.path.join(directory, "neg"))
pos_df["polarity"] = 1
neg_df["polarity"] = 0
return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)
# Download and process the dataset files.
def download_and_load_datasets():
#dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz", origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz", extract=True)
trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
train_df = load_dataset(trainPath)
test_df = load_dataset(testPath)
return train_df, test_df
train, test = download_and_load_datasets()
#To keep training fast, we'll take a sample of 5000 train and test examples, respectively.
train = train.sample(5000)
test = test.sample(5000)
train.columns
#Index(['sentence', 'sentiment', 'polarity'], dtype='object')
#For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respectively)
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]
#Data Preprocessing We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.
#text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
#label is the label for our example, i.e. True, False
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"
def create_tokenizer_from_hub_module():
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
tokenizer.tokenize("This here's an example of using the BERT tokenizer")
# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
#Creating a model
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, num_labels):
#Creates a classification model.
bert_module = hub.Module(BERT_MODEL_HUB,trainable=True)
bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabilities.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training, evaluation, and prediction.'''
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
#"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids, predicted_labels):
accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
f1_score = tf.contrib.metrics.f1_score(
label_ids,
predicted_labels)
auc = tf.metrics.auc(
label_ids,
predicted_labels)
recall = tf.metrics.recall(
label_ids,
predicted_labels)
precision = tf.metrics.precision(
label_ids,
predicted_labels)
true_pos = tf.metrics.true_positives(
label_ids,
predicted_labels)
true_neg = tf.metrics.true_negatives(
label_ids,
predicted_labels)
false_pos = tf.metrics.false_positives(
label_ids,
predicted_labels)
false_neg = tf.metrics.false_negatives(
label_ids,
predicted_labels)
return {
"eval_accuracy": accuracy,
"f1_score": f1_score,
"auc": auc,
"precision": precision,
"recall": recall,
"true_positives": true_pos,
"true_negatives": true_neg,
"false_positives": false_pos,
"false_negatives": false_neg
}
eval_metrics = metric_fn(label_ids, predicted_labels)
if mode == tfe.estimator.ModeKeys.TRAIN:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
else:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
else:
(predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
predictions = {'probabilities': log_probs, 'labels': predicted_labels}
return tfe.estimator.EstimatorSpec(mode, predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.RunConfig(
model_dir=OUTPUT_DIR,
save_summary_steps=SAVE_SUMMARY_STEPS,
save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
model_fn = model_fn_builder(
num_labels=len(label_list),
learning_rate=LEARNING_RATE,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps)
estimator = tfe.estimator.Estimator(
model_fn=model_fn,
config=run_config,
params={"batch_size": BATCH_SIZE}
)
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
features=train_features,
seq_length=MAX_SEQ_LENGTH,
is_training=True,
drop_remainder=False)
#Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)
#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
features=test_features,
seq_length=MAX_SEQ_LENGTH,
is_training=False,
drop_remainder=False)
estimator.evaluate(input_fn=test_input_fn, steps=None)
def getPrediction(in_sentences):
labels = ["Negative", "Positive"]
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = [
"That movie was absolutely awful",
"The acting was a bit lacking",
"The film was creative and surprising",
"Absolutely fantastic!"
]
predictions = getPrediction(pred_sentences)
predictions
Horrifyingly, the answer to this problem was all about indentation. There is a function in the Google Colab example posted above called def model_fn. This appears to be wrapper function for another function that actually creates a model to pass to the TensorFlow Estimator. While I was debugging this in VS code I'd placed a break-point in the function to try and sort out what was happening and it kept skipping over the middle bit where it was checking for "false pos, false_neg etc.
Evidently i'd somehow broken the indentation when editing in VS Code and the functions were nested such that pylint didn't identify any syntax problems - it just skipped over the function.
Fix was to just recopy the entire def model_fn function from the colab notebook and voila it worked.

Unable to use core Estimator with contrib Predictor

I'm using canned estimators and are struggling with poor predict performance so I'm trying to use tf.contrib.predictor to improve my inference performance. I've made this minimalistic example to reproduce my problems:
import tensorflow as tf
from tensorflow.contrib import predictor
def serving_input_fn():
x = tf.placeholder(dtype=tf.string, shape=[1], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
input_feature_column = tf.feature_column.numeric_column('x', shape=[1])
estimator = tf.estimator.DNNRegressor(
feature_columns=[input_feature_column],
hidden_units=[10, 20, 10],
model_dir="model_dir\\predictor-test")
estimator_predictor = predictor.from_estimator(estimator, serving_input_fn)
estimator_predictor({"inputs": ["1.0"]})
This yields the following exception:
UnimplementedError (see above for traceback): Cast string to float is not supported
[[Node: dnn/input_from_feature_columns/input_layer/x/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dnn/input_from_feature_columns/input_layer/x/ExpandDims)]]
I've tried using tf.estimator.export.TensorServingInputReceiver instead of ServingInputReceiver in my serving_input_fn(), so that I can feed my model with a numerical tensor which is what I want:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[1], name='x')
return tf.estimator.export.TensorServingInputReceiver(x, x)
but then I get the following exception in my predictor.from_estimator() call:
ValueError: features should be a dictionary of Tensors. Given type: <class 'tensorflow.python.framework.ops.Tensor'>
Any ideas?
My understanding of all of this is not really solid but I got it working and given the size of the community, I'll try to share what I did.
First, I'm running tensorflow 1.5 binaries with this patch applied manually.
The exact code I'm running is this:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[3500], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir="{}/model_dir_{}/model.ckpt-103712".format(script_dir, 3))
estimator_predictor = tf.contrib.predictor.from_estimator(
estimator, serving_input_fn)
p = estimator_predictor(
{"x": np.array(sample.normalized.input_data)})
My case is a bit different than your example because I'm using a custom Estimator but in your case, I guess you should try something like this:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[1], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
estimator = ...
estimator_predictor = tf.contrib.predictor.from_estimator(
estimator, serving_input_fn)
estimator_predictor({"x": [1.0]})
error is in following line:
estimator_predictor({"inputs": ["1.0"]})
please put 1.0 out of quotes. Currently it's a string.
After having worked on this for a couple of days, I want to share what I have done. The following code is also available from https://github.com/dage/tensorflow-estimator-predictor-example
TL;DR: predictor works best with custom estimators and the performance increase is massive.
import tensorflow as tf
import numpy as np
import datetime
import time
FEATURES_RANK = 3 # The number of inputs
LABELS_RANK = 2 # The number of outputs
# Returns a numpy array of rank LABELS_RANK based on the features argument.
# Can be used when creating a training dataset.
def features_to_labels(features):
sum_column = features.sum(1).reshape(features.shape[0], 1)
labels = np.hstack((sum_column*i for i in range(1, LABELS_RANK+1)))
return labels
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[None, FEATURES_RANK], name='x') # match dtype in input_fn
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
def model_fn(features, labels, mode):
net = features["x"] # input
for units in [4, 8, 4]: # hidden units
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
net = tf.layers.dropout(net, rate=0.1)
output = tf.layers.dense(net, LABELS_RANK, activation=None)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=output, export_outputs={"out": tf.estimator.export.PredictOutput(output)})
loss = tf.losses.mean_squared_error(labels, output)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss)
optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
# expecting a numpy array of shape (1, FEATURE_RANK) for constant_feature argument
def input_fn(num_samples, constant_feature = None, is_infinite = True):
feature_values = np.full((num_samples, FEATURES_RANK), constant_feature) if isinstance(constant_feature, np.ndarray) else np.random.rand(num_samples, FEATURES_RANK)
feature_values = np.float32(feature_values) # match dtype in serving_input_fn
labels = features_to_labels(feature_values)
dataset = tf.data.Dataset.from_tensors(({"x": feature_values}, labels))
if is_infinite:
dataset = dataset.repeat()
return dataset.make_one_shot_iterator().get_next()
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir="model_dir\\estimator-predictor-test-{date:%Y-%m-%d %H.%M.%S}".format(date=datetime.datetime.now()))
train = estimator.train(input_fn=lambda : input_fn(50), steps=500)
evaluate = estimator.evaluate(input_fn=lambda : input_fn(20), steps=1)
predictor = tf.contrib.predictor.from_estimator(estimator, serving_input_fn)
consistency_check_features = np.random.rand(1, FEATURES_RANK)
consistency_check_labels = features_to_labels(consistency_check_features)
num_calls_predictor = 100
predictor_input = {"x": consistency_check_features}
start_time_predictor = time.clock()
for i in range(num_calls_predictor):
predictor_prediction = predictor(predictor_input)
delta_time_predictor = 1./num_calls_predictor*(time.clock() - start_time_predictor)
num_calls_estimator_predict = 10
estimator_input = lambda : input_fn(1, consistency_check_features, False)
start_time_estimator_predict = time.clock()
for i in range(num_calls_estimator_predict):
estimator_prediction = list(estimator.predict(input_fn=estimator_input))
delta_time_estimator = 1./num_calls_estimator_predict*(time.clock() - start_time_estimator_predict)
print("{} --> {}\n predictor={}\n estimator={}.\n".format(consistency_check_features, consistency_check_labels, predictor_prediction, estimator_prediction))
print("Time used per estimator.predict() call: {:.5f}s, predictor(): {:.5f}s ==> predictor is {:.0f}x faster!".format(delta_time_estimator, delta_time_predictor, delta_time_estimator/delta_time_predictor))
On my laptop I get the following results:
[[0.55424854 0.98057611 0.98604857]] --> [[2.52087322 5.04174644]]
predictor={'output': array([[2.5221248, 5.049496 ]], dtype=float32)}
estimator=[array([2.5221248, 5.049496 ], dtype=float32)].
Time used per estimator.predict() call: 0.30071s, predictor(): 0.00057s ==> predictor is 530x faster!

Tensorflow classifier.export_savedmodel (Beginner)

I know about the "Serving a Tensorflow Model" page
https://www.tensorflow.org/serving/serving_basic
but those functions assume you're using tf.Session() which the DNNClassifier tutorial does not... I then looked at the api doc for DNNClassifier and it has an export_savedmodel function (the export function is deprecated) and it seems simple enough but I am getting a "'NoneType' object is not iterable" error... which is suppose to mean I'm passing in an empty variable but I'm unsure what I need to change... I've essentially copied and pasted the code from the get_started/tflearn page on tensorflow.org but then added
directoryName = "temp"
def serving_input_fn():
print("asdf")
classifier.export_savedmodel(
directoryName,
serving_input_fn
)
just after the classifier.fit function call... the other parameters for export_savedmodel are optional I believe... any ideas?
Tutorial with Code:
https://www.tensorflow.org/get_started/tflearn#construct_a_deep_neural_network_classifier
API Doc for export_savedmodel
https://www.tensorflow.org/api_docs/python/tf/contrib/learn/DNNClassifier#export_savedmodel
There are two kind of TensorFlow applications:
The functions that assume you are using tf.Session() are functions from "low level" Tensorflow examples, and
the DNNClassifier tutorial is a "high level" Tensorflow application.
I'm going to explain how to export "high level" Tensorflow models (using export_savedmodel).
The function export_savedmodel requires the argument serving_input_receiver_fn, that is a function without arguments, which defines the input from the model and the predictor. Therefore, you must create your own serving_input_receiver_fn, where the model input type match with the model input in the training script, and the predictor input type match with the predictor input in the testing script.
On the other hand, if you create a custom model, you must define the export_outputs, defined by the function tf.estimator.export.PredictOutput, which input is a dictionary that define the name that has to match with the name of the predictor output in the testing script.
For example:
TRAINING SCRIPT
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_tensors')
receiver_tensors = {"predictor_inputs": serialized_tf_example}
feature_spec = {"words": tf.FixedLenFeature([25],tf.int64)}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
def estimator_spec_for_softmax_classification(logits, labels, mode):
predicted_classes = tf.argmax(logits, 1)
if (mode == tf.estimator.ModeKeys.PREDICT):
export_outputs = {'predict_output': tf.estimator.export.PredictOutput({"pred_output_classes": predicted_classes, 'probabilities': tf.nn.softmax(logits)})}
return tf.estimator.EstimatorSpec(mode=mode, predictions={'class': predicted_classes, 'prob': tf.nn.softmax(logits)}, export_outputs=export_outputs) # IMPORTANT!!!
onehot_labels = tf.one_hot(labels, 31, 1, 0)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
if (mode == tf.estimator.ModeKeys.TRAIN):
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
eval_metric_ops = {'accuracy': tf.metrics.accuracy(labels=labels, predictions=predicted_classes)}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def model_custom(features, labels, mode):
bow_column = tf.feature_column.categorical_column_with_identity("words", num_buckets=1000)
bow_embedding_column = tf.feature_column.embedding_column(bow_column, dimension=50)
bow = tf.feature_column.input_layer(features, feature_columns=[bow_embedding_column])
logits = tf.layers.dense(bow, 31, activation=None)
return estimator_spec_for_softmax_classification(logits=logits, labels=labels, mode=mode)
def main():
# ...
# preprocess-> features_train_set and labels_train_set
# ...
classifier = tf.estimator.Estimator(model_fn = model_custom)
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"words": features_train_set}, y=labels_train_set, batch_size=batch_size_param, num_epochs=None, shuffle=True)
classifier.train(input_fn=train_input_fn, steps=100)
full_model_dir = classifier.export_savedmodel(export_dir_base="C:/models/directory_base", serving_input_receiver_fn=serving_input_receiver_fn)
TESTING SCRIPT
def main():
# ...
# preprocess-> features_test_set
# ...
with tf.Session() as sess:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], full_model_dir)
predictor = tf.contrib.predictor.from_saved_model(full_model_dir)
model_input = tf.train.Example(features=tf.train.Features( feature={"words": tf.train.Feature(int64_list=tf.train.Int64List(value=features_test_set)) }))
model_input = model_input.SerializeToString()
output_dict = predictor({"predictor_inputs":[model_input]})
y_predicted = output_dict["pred_output_classes"][0]
(Code tested in Python 3.6.3, Tensorflow 1.4.0)
If you try to use predictor with tensorflow > 1.6 you can get this Error :
signature_def_key "serving_default". Available signatures are ['predict']. Original error:
No SignatureDef with key 'serving_default' found in MetaGraphDef.
Here is working example which is tested on 1.7.0 :
SAVING :
First you need to define features length in dict format like this:
feature_spec = {'x': tf.FixedLenFeature([4],tf.float32)}
Then you have to build a function which have placeholder with same shape of features and return using tf.estimator.export.ServingInputReceiver
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_tensors')
receiver_tensors = {'inputs': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
Then just save with export_savedmodel :
classifier.export_savedmodel(dir_path, serving_input_receiver_fn)
full example code:
import os
from six.moves.urllib.request import urlopen
import numpy as np
import tensorflow as tf
dir_path = os.path.dirname('.')
IRIS_TRAINING = os.path.join(dir_path, "iris_training.csv")
IRIS_TEST = os.path.join(dir_path, "iris_test.csv")
feature_spec = {'x': tf.FixedLenFeature([4],tf.float32)}
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_tensors')
receiver_tensors = {'inputs': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
def main():
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TRAINING,
target_dtype=np.int,
features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TEST,
target_dtype=np.int,
features_dtype=np.float32)
feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
model_dir=dir_path)
# Define the training inputs
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array(training_set.data)},
y=np.array(training_set.target),
num_epochs=None,
shuffle=True)
# Train model.
classifier.train(input_fn=train_input_fn, steps=200)
classifier.export_savedmodel(dir_path, serving_input_receiver_fn)
if __name__ == "__main__":
main()
Restoring
Now let's restore the model :
import tensorflow as tf
import os
dir_path = os.path.dirname('.') #current directory
exported_path= os.path.join(dir_path, "1536315752")
def main():
with tf.Session() as sess:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], exported_path)
model_input= tf.train.Example(features=tf.train.Features(feature={
'x': tf.train.Feature(float_list=tf.train.FloatList(value=[6.4, 3.2, 4.5, 1.5]))
}))
predictor= tf.contrib.predictor.from_saved_model(exported_path)
input_tensor=tf.get_default_graph().get_tensor_by_name("input_tensors:0")
model_input=model_input.SerializeToString()
output_dict= predictor({"inputs":[model_input]})
print(" prediction is " , output_dict['scores'])
if __name__ == "__main__":
main()
Here is Ipython notebook demo example with data and explanation :
There are two possible questions and answers possible. First you encounter a missing session for the DNNClassifier which uses the more higher level estimators API (as opposed to the more low level API's where you manipulate the ops yourself). The nice thing about tensorflow is that all high and low level APIs are more-or-less interoperable, so if you want a session and do something with that session, it is as simple as adding:
sess = tf.get_default_session()
The you can start hooking in the remainder of the tutorial.
The second interpretation of your question is, what about the export_savedmodel, well actually export_savedmodel and the sample code from the serving tutorial try to achieve the same goal. When you are training your graph you set up some infrastructure to feed input to the graph (typically batches from a training dataset) however when you switch to 'serving' you will often read your input from somewhere else, and you need some separate infrastructure which replaces the input of the graph used for training. The bottomline is that the serving_input_fn() which you filled with a print should in essence return an input op. This is also said in the documentation:
serving_input_fn: A function that takes no argument and returns an
InputFnOps.
Hence instead of print("asdf") it should do something similar as adding an input chain (which should be similar to what builder.add_meta_graph_and_variables is also adding).
Examples of serving_input_fn()'s can for example be found (in the cloudml sample)[https://github.com/GoogleCloudPlatform/cloudml-samples/blob/master/census/customestimator/trainer/model.py#L240]. Such as the following which serves input from JSON:
def json_serving_input_fn():
"""Build the serving inputs."""
inputs = {}
for feat in INPUT_COLUMNS:
inputs[feat.name] = tf.placeholder(shape=[None], dtype=feat.dtype)
return tf.estimator.export.ServingInputReceiver(inputs, inputs)