I would like to add extra signature to SavadModel, which will return business description and serve it with TensorFlow Serving.
#tf.function
def info():
return json.dumps({
'name': 'My model',
'description': 'This is model description.',
'project': 'Product ABCD',
'type': 'some_type',
...
})
As is written in TensorFlow Core manual https://www.tensorflow.org/guide/saved_model#identifying_a_signature_to_export, I can easily export signature which accepts arguments providing tf.TensorSpec.
Is it possible to export signature without arguments and call it on server?
Added after #EricMcLachlan comments:
When I try to call a function without defined signature (input_signature=[]) with a code like this:
data = json.dumps({"signature_name": "info", "inputs": None})
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/my_model:predict', data=data, headers=headers)
I get next error in the response:
'_content': b'{ "error": "Failed to get input map for signature: info" }'
Defining the Signature:
I was going to write my own example, but here's such a great example provided by #AntPhitlok in another StackOverflow post:
class MyModule(tf.Module):
def __init__(self, model, other_variable):
self.model = model
self._other_variable = other_variable
#tf.function(input_signature=[tf.TensorSpec(shape=(None, None, 1), dtype=tf.float32)])
def score(self, waveform):
result = self.model(waveform)
return { "scores": results }
#tf.function(input_signature=[])
def metadata(self):
return { "other_variable": self._other_variable }
In this case, they're serving is a Module, but it could have been a Keras model as well.
Using the Serving:
I am not 100% sure how to access the serving (I haven't done it myself yet) but I think you'll be able to access the serving similarly to this:
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = 'serving_default'
request.model_spec.version_label = self.version
tensor_proto = tf.make_tensor_proto(my_input_data, dtype=tf.float32)
request.inputs['my_signatures_input'].CopyFrom(tensor_proto)
try:
response = self.stub.Predict(request, MAX_TIMEOUT)
except Exception as ex:
logging.error(str(ex))
return [None] * len(batch_of_texts)
Here I'm using gRPC to access the TensorFlow Server.
You'd probably need to substitute 'serving_default' with your serving name. Similarly, 'my_signature_input' should match the input to your tf.function (in your case, I think it's empty).
This is a normal standard Keras type prediction and is piggybacking of predict_pb2.PredictRequest. It might be necessary to create a custom Protobuf but that's a bit beyond my abilities at this point.
I hope it's enough to get you going.
Related
I am using the Language Interpretability Toolkit (LIT) to load and analyze a BERT model that I pre-trained on an NER task.
However, when I'm starting the LIT script with the path to my pre-trained model passed to it, it fails to initialize the weights and tells me:
modeling_utils.py:648] loading weights file bert_remote/examples/token-classification/Data/Models/results_21_03_04_cleaned_annotations/04.03._8_16_5e-5_cleaned_annotations/04-03-2021 (15.22.23)/pytorch_model.bin
modeling_utils.py:739] Weights of BertForTokenClassification not initialized from pretrained model: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
modeling_utils.py:745] Weights from pretrained model not used in BertForTokenClassification: ['bert.embeddings.position_ids']
It then simply uses the bert-base-german-cased version of BERT, which of course doesn't have my custom labels and thus fails to predict anything. I think it might have to do with PyTorch, but I can't find the error.
If relevant, here is how I load my dataset into CoNLL 2003 format (modification of the dataloader scripts found here):
def __init__(self):
# Read ConLL Test Files
self._examples = []
data_path = "lit_remote/lit_nlp/examples/datasets/NER_Data"
with open(os.path.join(data_path, "test.txt"), "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines[:2000]:
if line != "\n":
token, label = line.split(" ")
self._examples.append({
'token': token,
'label': label,
})
else:
self._examples.append({
'token': "\n",
'label': "O"
})
def spec(self):
return {
'token': lit_types.Tokens(),
'label': lit_types.SequenceTags(align="token"),
}
And this is how I initialize the model and start the LIT server (modification of the simple_pytorch_demo.py script found here):
def __init__(self, model_name_or_path):
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name_or_path)
model_config = transformers.AutoConfig.from_pretrained(
model_name_or_path,
num_labels=15, # FIXME CHANGE
output_hidden_states=True,
output_attentions=True,
)
# This is a just a regular PyTorch model.
self.model = _from_pretrained(
transformers.AutoModelForTokenClassification,
model_name_or_path,
config=model_config)
self.model.eval()
## Some omitted snippets here
def input_spec(self) -> lit_types.Spec:
return {
"token": lit_types.Tokens(),
"label": lit_types.SequenceTags(align="token")
}
def output_spec(self) -> lit_types.Spec:
return {
"tokens": lit_types.Tokens(),
"probas": lit_types.MulticlassPreds(parent="label", vocab=self.LABELS),
"cls_emb": lit_types.Embeddings()
This actually seems to be expected behaviour. In the documentation of the GPT models the HuggingFace team writes:
This will issue a warning about some of the pretrained weights not being used and some weights being randomly initialized. That’s because we are throwing away the pretraining head of the BERT model to replace it with a classification head which is randomly initialized.
So it seems to not be a problem for the fine-tuning. In my use case described above it worked despite the warning as well.
The complete code for exporting the model: (I've already trained it and now loading from weights file)
def cnn_layers(inputs):
conv_base= keras.applications.mobilenetv2.MobileNetV2(input_shape=(224,224,3), input_tensor=inputs, include_top=False, weights='imagenet')
for layer in conv_base.layers[:-200]:
layer.trainable = False
last_layer = conv_base.output
x = GlobalAveragePooling2D()(last_layer)
x= keras.layers.GaussianNoise(0.3)(x)
x = Dense(1024,name='fc-1')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.advanced_activations.LeakyReLU(0.3)(x)
x = Dropout(0.4)(x)
x = Dense(512,name='fc-2')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.advanced_activations.LeakyReLU(0.3)(x)
x = Dropout(0.3)(x)
out = Dense(10, activation='softmax',name='output_layer')(x)
return out
model_input = layers.Input(shape=(224,224,3))
model_output = cnn_layers(model_input)
test_model = keras.models.Model(inputs=model_input, outputs=model_output)
weight_path = os.path.join(tempfile.gettempdir(), 'saved_wt.h5')
test_model.load_weights(weight_path)
export_path='export'
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import utils
from tensorflow.python.saved_model import tag_constants, signature_constants
from tensorflow.python.saved_model.signature_def_utils_impl import build_signature_def, predict_signature_def
from tensorflow.contrib.session_bundle import exporter
builder = saved_model_builder.SavedModelBuilder(export_path)
signature = predict_signature_def(inputs={'image': test_model.input},
outputs={'prediction': test_model.output})
with K.get_session() as sess:
builder.add_meta_graph_and_variables(sess=sess,
tags=[tag_constants.SERVING],
signature_def_map={'predict': signature})
builder.save()
And the output of (dir 1 has saved_model.pb and models dir) :
python /tensorflow/python/tools/saved_model_cli.py show --dir /1 --all is
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['predict']:
The given SavedModel SignatureDef contains the following input(s):
inputs['image'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 224, 224, 3)
name: input_1:0
The given SavedModel SignatureDef contains the following output(s):
outputs['prediction'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 107)
name: output_layer/Softmax:0
Method name is: tensorflow/serving/predict
To accept b64 string:
The code was written for (224, 224, 3) numpy array. So, the modifications I made for the above code are:
_bytes should be added to input when passing as b64. So,
predict_signature_def(inputs={'image':......
changed to
predict_signature_def(inputs={'image_bytes':.....
Earlier, type(test_model.input) is : (224, 224, 3) and dtype: DT_FLOAT. So,
signature = predict_signature_def(inputs={'image': test_model.input},.....
changed to (reference)
temp = tf.placeholder(shape=[None], dtype=tf.string)
signature = predict_signature_def(inputs={'image_bytes': temp},.....
Edit:
Code to send using requests is : (As mentioned in the comments)
encoded_image = None
with open('/1.jpg', "rb") as image_file:
encoded_image = base64.b64encode(image_file.read())
object_for_api = {"signature_name": "predict",
"instances": [
{
"image_bytes":{"b64":encoded_image}
#"b64":encoded_image (or this way since "image" is not needed)
}]
}
p=requests.post(url='http://localhost:8501/v1/models/mnist:predict', json=json.dumps(object_for_api),headers=headers)
print(p)
I'm getting <Response [400]> error. I think there's no error in the way I'm sending. Something needs to be changed in the code for exporting the model and specifically in
temp = tf.placeholder(shape=[None], dtype=tf.string).
Looking at the docs you've provided what you're looking to do is to take the image and send it in to the API. Images are easily transferable in a text format if you encode them, base64 being pretty much the standard. So what we want to do is create a json object with the image as base64 in the right place and then send this json object into the REST api. python has the requests library which makes sending in a python dictionary as JSON very easy.
So take the image, encode it, put it in a dictionary and send it off using requests:
import requests
import base64
encoded_image = None
with open("image.png", "rb") as image_file:
encoded_image = base64.b64encode(image_file.read())
object_for_api = {"signature_name": "predict",
"instances": [
{
"image": {"b64": encoded_image}
}]
}
requests.post(url='http://localhost:8501/v1/models/mnist:predict', json=object_for_api)
You can also encode your numpy array into JSON but it doesn't seem that the API docs are looking for that.
Two side notes:
I encourage you to use tf.saved_model.simple_save
You may find model_to_estimator convenient.
While your model seems like it will work for requests (the output of saved_model_cli shows the outer dimension is None for both inputs and outputs), it's fairly inefficient to send JSON arrays of floats
To the last point, it's often easier to modify the code to do the image decoding server side so you're sending a base64 encoded JPG or PNG over the wire instead of an array of floats. Here's one example for Keras (I plan to update that answer with simpler code).
I have built and trained a TensorFlow model, which is deployed using the tf.Estimator paradigm. I have built a serving function like the one below:
def serving_input_fn(params):
feature_placeholders = {
'inputs' : tf.placeholder(tf.int64, [None], name='inputs')
}
features = {
key: tensor
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
Now, I want to be able to call it using application/json as content type. So I built a JSON file like the example I found in this question:
payload = {'instances': [{'inputs': [1039]}]}
json_string = json.dumps(payload)
When I invoke the model I get back:
ERROR in serving: Unsupported request data format: {u'instances': [{u'inputs': [1039]}]}.
Valid formats: tensor_pb2.TensorProto, dict<string, tensor_pb2.TensorProto> and predict_pb2.PredictRequest
Any ideas how I can achieve my goal?
As it turns out the JSON should be:
request = {'dtype': 'DT_INT64',
'tensorShape': {'dim':[{'size': 1}]},
'int64Val': [1039]}
json_string = json.dumps(request)
I have save the model using tf.estimator .method export_savedmodel as follows:
export_dir="exportModel/"
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
classifier.export_savedmodel(export_dir, input_receiver_fn, as_text=False, checkpoint_path="Model/model.ckpt-400")
How can I import this saved model and use for predictions?
I tried to search for a good base example, but it appears the documentation and samples are a bit scattered for this topic. So let's start with a base example: the tf.estimator quickstart.
That particular example doesn't actually export a model, so let's do that (not need for use case 1):
def serving_input_receiver_fn():
"""Build the serving inputs."""
# The outer dimension (None) allows us to batch up inputs for
# efficiency. However, it also means that if we want a prediction
# for a single instance, we'll need to wrap it in an outer list.
inputs = {"x": tf.placeholder(shape=[None, 4], dtype=tf.float32)}
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
export_dir = classifier.export_savedmodel(
export_dir_base="/path/to/model",
serving_input_receiver_fn=serving_input_receiver_fn)
Huge asterisk on this code: there appears to be a bug in TensorFlow 1.3 that doesn't allow you to do the above export on a "canned" estimator (such as DNNClassifier). For a workaround, see the "Appendix: Workaround" section.
The code below references export_dir (return value from the export step) to emphasize that it is not "/path/to/model", but rather, a subdirectory of that directory whose name is a timestamp.
Use Case 1: Perform prediction in the same process as training
This is an sci-kit learn type of experience, and is already exemplified by the sample. For completeness' sake, you simply call predict on the trained model:
classifier.train(input_fn=train_input_fn, steps=2000)
# [...snip...]
predictions = list(classifier.predict(input_fn=predict_input_fn))
predicted_classes = [p["classes"] for p in predictions]
Use Case 2: Load a SavedModel into Python/Java/C++ and perform predictions
Python Client
Perhaps the easiest thing to use if you want to do prediction in Python is SavedModelPredictor. In the Python program that will use the SavedModel, we need code like this:
from tensorflow.contrib import predictor
predict_fn = predictor.from_saved_model(export_dir)
predictions = predict_fn(
{"x": [[6.4, 3.2, 4.5, 1.5],
[5.8, 3.1, 5.0, 1.7]]})
print(predictions['scores'])
Java Client
package dummy;
import java.nio.FloatBuffer;
import java.util.Arrays;
import java.util.List;
import org.tensorflow.SavedModelBundle;
import org.tensorflow.Session;
import org.tensorflow.Tensor;
public class Client {
public static void main(String[] args) {
Session session = SavedModelBundle.load(args[0], "serve").session();
Tensor x =
Tensor.create(
new long[] {2, 4},
FloatBuffer.wrap(
new float[] {
6.4f, 3.2f, 4.5f, 1.5f,
5.8f, 3.1f, 5.0f, 1.7f
}));
// Doesn't look like Java has a good way to convert the
// input/output name ("x", "scores") to their underlying tensor,
// so we hard code them ("Placeholder:0", ...).
// You can inspect them on the command-line with saved_model_cli:
//
// $ saved_model_cli show --dir $EXPORT_DIR --tag_set serve --signature_def serving_default
final String xName = "Placeholder:0";
final String scoresName = "dnn/head/predictions/probabilities:0";
List<Tensor> outputs = session.runner()
.feed(xName, x)
.fetch(scoresName)
.run();
// Outer dimension is batch size; inner dimension is number of classes
float[][] scores = new float[2][3];
outputs.get(0).copyTo(scores);
System.out.println(Arrays.deepToString(scores));
}
}
C++ Client
You'll likely want to use tensorflow::LoadSavedModel with Session.
#include <unordered_set>
#include <utility>
#include <vector>
#include "tensorflow/cc/saved_model/loader.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/public/session.h"
namespace tf = tensorflow;
int main(int argc, char** argv) {
const string export_dir = argv[1];
tf::SavedModelBundle bundle;
tf::Status load_status = tf::LoadSavedModel(
tf::SessionOptions(), tf::RunOptions(), export_dir, {"serve"}, &bundle);
if (!load_status.ok()) {
std::cout << "Error loading model: " << load_status << std::endl;
return -1;
}
// We should get the signature out of MetaGraphDef, but that's a bit
// involved. We'll take a shortcut like we did in the Java example.
const string x_name = "Placeholder:0";
const string scores_name = "dnn/head/predictions/probabilities:0";
auto x = tf::Tensor(tf::DT_FLOAT, tf::TensorShape({2, 4}));
auto matrix = x.matrix<float>();
matrix(0, 0) = 6.4;
matrix(0, 1) = 3.2;
matrix(0, 2) = 4.5;
matrix(0, 3) = 1.5;
matrix(0, 1) = 5.8;
matrix(0, 2) = 3.1;
matrix(0, 3) = 5.0;
matrix(0, 4) = 1.7;
std::vector<std::pair<string, tf::Tensor>> inputs = {{x_name, x}};
std::vector<tf::Tensor> outputs;
tf::Status run_status =
bundle.session->Run(inputs, {scores_name}, {}, &outputs);
if (!run_status.ok()) {
cout << "Error running session: " << run_status << std::endl;
return -1;
}
for (const auto& tensor : outputs) {
std::cout << tensor.matrix<float>() << std::endl;
}
}
Use Case 3: Serve a model using TensorFlow Serving
Exporting models in a manner amenable to serving a Classification model requires that the input be a tf.Example object. Here's how we might export a model for TensorFlow serving:
def serving_input_receiver_fn():
"""Build the serving inputs."""
# The outer dimension (None) allows us to batch up inputs for
# efficiency. However, it also means that if we want a prediction
# for a single instance, we'll need to wrap it in an outer list.
example_bytestring = tf.placeholder(
shape=[None],
dtype=tf.string,
)
features = tf.parse_example(
example_bytestring,
tf.feature_column.make_parse_example_spec(feature_columns)
)
return tf.estimator.export.ServingInputReceiver(
features, {'examples': example_bytestring})
export_dir = classifier.export_savedmodel(
export_dir_base="/path/to/model",
serving_input_receiver_fn=serving_input_receiver_fn)
The reader is referred to TensorFlow Serving's documentation for more instructions on how to setup TensorFlow Serving, so I'll only provide the client code here:
# Omitting a bunch of connection/initialization code...
# But at some point we end up with a stub whose lifecycle
# is generally longer than that of a single request.
stub = create_stub(...)
# The actual values for prediction. We have two examples in this
# case, each consisting of a single, multi-dimensional feature `x`.
# This data here is the equivalent of the map passed to the
# `predict_fn` in use case #2.
examples = [
tf.train.Example(
features=tf.train.Features(
feature={"x": tf.train.Feature(
float_list=tf.train.FloatList(value=[6.4, 3.2, 4.5, 1.5]))})),
tf.train.Example(
features=tf.train.Features(
feature={"x": tf.train.Feature(
float_list=tf.train.FloatList(value=[5.8, 3.1, 5.0, 1.7]))})),
]
# Build the RPC request.
predict_request = predict_pb2.PredictRequest()
predict_request.model_spec.name = "default"
predict_request.inputs["examples"].CopyFrom(
tensor_util.make_tensor_proto(examples, tf.float32))
# Perform the actual prediction.
stub.Predict(request, PREDICT_DEADLINE_SECS)
Note that the key, examples, that is referenced in the predict_request.inputs needs to match the key used in the serving_input_receiver_fn at export time (cf. the constructor to ServingInputReceiver in that code).
Appendix: Working around Exports from Canned Models in TF 1.3
There appears to be a bug in TensorFlow 1.3 in which canned models do not export properly for use case 2 (the problem does not exist for "custom" estimators). Here's is a workaround that wraps a DNNClassifier to make things work, specifically for the Iris example:
# Build 3 layer DNN with 10, 20, 10 units respectively.
class Wrapper(tf.estimator.Estimator):
def __init__(self, **kwargs):
dnn = tf.estimator.DNNClassifier(**kwargs)
def model_fn(mode, features, labels):
spec = dnn._call_model_fn(features, labels, mode)
export_outputs = None
if spec.export_outputs:
export_outputs = {
"serving_default": tf.estimator.export.PredictOutput(
{"scores": spec.export_outputs["serving_default"].scores,
"classes": spec.export_outputs["serving_default"].classes})}
# Replace the 3rd argument (export_outputs)
copy = list(spec)
copy[4] = export_outputs
return tf.estimator.EstimatorSpec(mode, *copy)
super(Wrapper, self).__init__(model_fn, kwargs["model_dir"], dnn.config)
classifier = Wrapper(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
model_dir="/tmp/iris_model")
I dont think there is a bug with canned Estimators (or rather if there was ever one, it has been fixed). I was able to successfully export a canned estimator model using Python and import it in Java.
Here is my code to export the model:
a = tf.feature_column.numeric_column("a");
b = tf.feature_column.numeric_column("b");
feature_columns = [a, b];
model = tf.estimator.DNNClassifier(feature_columns=feature_columns ...);
# To export
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns);
export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec);
servable_model_path = model.export_savedmodel(servable_model_dir, export_input_fn, as_text=True);
To import the model in Java, I used the Java client code provided by rhaertel80 above and it works. Hope this also answers Ben Fowler's question above.
It appears that the TensorFlow team does not agree that there is a bug in version 1.3 using canned estimators for exporting a model under use case #2. I submitted a bug report here:
https://github.com/tensorflow/tensorflow/issues/13477
The response I received from TensorFlow is that the input must only be a single string tensor. It appears that there may be a way to consolidate multiple features into a single string tensor using serialized TF.examples, but I have not found a clear method to do this. If anyone has code showing how to do this, I would be appreciative.
You need to export the saved model using tf.contrib.export_savedmodel and you need to define input receiver function to pass input to.
Later you can load the saved model ( generally saved.model.pb) from the disk and serve it.
TensorFlow: How to predict from a SavedModel?
Can someone assist me in making predictions on TensorFlow's Wide and Deep Learning model loaded into TensorFlow Serving's model_server?
If anyone could point me to a resource or documentation for the same would be really helpful.
You can possibly try to invoke the predict method of the estimator and set the as_iterable as false for an ndarray
y = m.predict(input_fn=lambda: input_fn(df_test), as_iterable=False)
However, note the deprecation note here for future compatibility.
If your model is exported using Estimator.export_savedmodel() and you successfully built TensorFlow Serving itself, you can do something like this:
from grpc.beta import implementations
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
tf.app.flags.DEFINE_string('server', 'localhost:9000', 'Server host:port.')
tf.app.flags.DEFINE_string('model', 'wide_and_deep', 'Model name.')
FLAGS = tf.app.flags.FLAGS
...
def main(_):
host, port = FLAGS.server.split(':')
# Set up a connection to the TF Model Server
channel = implementations.insecure_channel(host, int(port))
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
# Create a request that will be sent for an inference
request = predict_pb2.PredictRequest()
request.model_spec.name = FLAGS.model
request.model_spec.signature_name = 'serving_default'
# A single tf.Example that will get serialized and turned into a TensorProto
feature_dict = {'age': _float_feature(value=25),
'capital_gain': _float_feature(value=0),
'capital_loss': _float_feature(value=0),
'education': _bytes_feature(value='11th'.encode()),
'education_num': _float_feature(value=7),
'gender': _bytes_feature(value='Male'.encode()),
'hours_per_week': _float_feature(value=40),
'native_country': _bytes_feature(value='United-States'.encode()),
'occupation': _bytes_feature(value='Machine-op-inspct'.encode()),
'relationship': _bytes_feature(value='Own-child'.encode()),
'workclass': _bytes_feature(value='Private'.encode())}
label = 0
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
serialized = example.SerializeToString()
request.inputs['inputs'].CopyFrom(
tf.contrib.util.make_tensor_proto(serialized, shape=[1]))
# Create a future result, and set 5 seconds timeout
result_future = stub.Predict.future(request, 5.0)
prediction = result_future.result().outputs['scores']
print('True label: ' + str(label))
print('Prediction: ' + str(np.argmax(prediction)))
Here I wrote a simple tutorial Exporting and Serving a TensorFlow Wide & Deep Model with more details.
Hope it helps.