I defined simple custom model:
import tensorflow as tf
class CustomModule(tf.keras.layers.Layer):
def __init__(self):
super(CustomModule, self).__init__()
self.v = tf.Variable(1.)
def call(self, x):
print('Tracing with', x)
return x * self.v
def mutate(self, new_v):
self.v.assign(new_v)
I want to save it for serving and that is why I need to provide a function for “serving_default”. I’ve tried to do it like this:
module = CustomModule() module_with_signature_path = './tmp/1' call = tf.function(module.mutate, input_signature=[tf.TensorSpec([], tf.float32)]) tf.saved_model.save(module, module_with_signature_path, signatures=call)
I got an error:
ValueError: Got a non-Tensor value <tf.Operation 'StatefulPartitionedCall' type=StatefulPartitionedCall> for key 'output_0' in the output of the function __inference_mutate_8 used to generate the SavedModel signature 'serving_default'. Outputs for functions used as signatures must be a single Tensor, a sequence of Tensors, or a dictionary from string to Tensor.
How can I properly define signature while saving model? Thank you!
I've been going crazy for a few days over a problem that I thought trivial. My end-goal is to deploy to AWS Sagemaker a Tensorflow model that uses a simple string as input, calculates the embedding using a 'sentence-transformer' pre-trained model and eventually uses TensorFlow Recommenders to suggest the knn among a collection of embedding I already have calculated. I would like to do this entirely from the model, including the preprocessing (tokenization).
I made the predictions works with different approaches in my notebook. I start having troubles when I try to save my model.
The problem seems to be that HF's AutoTokenizer needs a pure List of Strings as input, and I hit a roadblock whenever I try to save my model using , and trying to go around this with tf.py_function using this approach results in problems with Sagemaker.
My approaches so far:
1. THE 'I THOUGHT IT WAS SO SIMPLE'
startups_ids: list, startup_vectors
):
import tensorflow as tf
import tensorflow_recommenders as tfrs
import numpy as np
from random import randint
exported_model = tfrs.layers.factorized_top_k.BruteForce(SentenceTransformer("all-mpnet-base-v2").encode)
exported_model.index(np.array(startup_vectors), np.array(startups_ids))
# TESTS the model
#for some reason this seems to be needed in order to save the model :/
# https://github.com/tensorflow/recommenders/issues/131
test = exported_model(['Test Text Query'])
print(test)
return exported_model
text_to_startup_model(search_db_ids, search_db_embeddings)
#--> WORKS PERFECTLY, AS I GET SOME SUGGESTIONS
tf.saved_model.save(text_to_startup_model(search_db_ids, search_db_embeddings), export_dir="/home/nicholas/test_model_save/1")
#TypeError Traceback (most recent call last)
# /home/nicholas/Documents/Dev/Rialto-predict-1/notebooks/t2s_different_approaches.ipynb Cell 5 in <cell line: 22>()
# 19 text_to_startup_model(search_db_ids, search_db_embeddings)
# 20 #--> WORKS PERFECTLY, AS I GET SOME SUGGESTIONS
# ---> 22 tf.saved_model.save(text_to_startup_model(search_db_ids, search_db_embeddings), export_dir="/home/nicholas/test_model_save/1")
# File ~/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/tensorflow/python/saved_model/save.py:1334, in save(obj, export_dir, signatures, options)
# 1332 # pylint: enable=line-too-long
# 1333 metrics.IncrementWriteApi(_SAVE_V2_LABEL)
# -> 1334 save_and_return_nodes(obj, export_dir, signatures, options)
# 1335 metrics.IncrementWrite(write_version="2")
#
# .........
#
#
# File ~/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/tensorflow/python/eager/def_function.py:677, in Function._defun_with_scope.<locals>.wrapped_fn(*args, **kwds)
# 673 with default_graph._variable_creator_scope(scope, priority=50): # pylint: disable=protected-access
# 674 # __wrapped__ allows AutoGraph to swap in a converted function. We give
# 675 # the function a weak reference to itself to avoid a reference cycle.
# 676 with OptionalXlaContext(compile_with_xla):
# --> 677 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
# 678 return out
# File ~/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/tensorflow/python/framework/func_graph.py:1147, in func_graph_from_py_func.<locals>.autograph_handler(*args, **kwargs)
# 1145 except Exception as e: # pylint:disable=broad-except
# 1146 if hasattr(e, "ag_error_metadata"):
# -> 1147 raise e.ag_error_metadata.to_exception(e)
# 1148 else:
# 1149 raise
# TypeError: in user code:
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/keras/saving/saving_utils.py", line 138, in _wrapped_model *
# outputs = model(*args, **kwargs)
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
# raise e.with_traceback(filtered_tb) from None
# TypeError: Exception encountered when calling layer "brute_force_3" (type BruteForce).
# in user code:
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/tensorflow_recommenders/layers/factorized_top_k.py", line 567, in call *
# queries = self.query_model(queries)
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py", line 160, in encode *
# features = self.tokenize(sentences_batch)
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py", line 318, in tokenize *
# return self._first_module().tokenize(texts)
# File "/home/nicholas/Documents/Dev/Rialto-predict-1/venv/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py", line 102, in tokenize *
# batch1.append(text_tuple[0])
# TypeError: 'NoneType' object is not subscriptable
# ...
# Call arguments received:
# • queries=['None']
# • k=None
2. THE tf.py_function
As from my understanding the problem with the first approach is that it has no knowledge of the input type/value this second approach, from Use `sentence-transformers` inside of a keras model was supposedly gonna work, as it uses tf.py_function to accept a List of Strings as first input, without complaining.
def approach_2(startups_ids: list, startup_vectors):
import tensorflow as tf
import tensorflow_recommenders as tfrs
import numpy as np
from transformers import MPNetTokenizer, TFMPNetModel
# Here it loads the specific pre-trained model we are using for Rialto
tokenizer = MPNetTokenizer.from_pretrained(
"sentence-transformers/all-mpnet-base-v2"
)
model = TFMPNetModel.from_pretrained(
"sentence-transformers/all-mpnet-base-v2", from_pt=True
)
class SBert(tf.keras.layers.Layer):
def __init__(self, tokenizer, model):
super(SBert, self).__init__()
self.tokenizer = tokenizer
self.model = model
def tf_encode(self, inputs):
def encode(inputs):
inputs = [x[0].decode("utf-8") for x in inputs.numpy()]
outputs = self.tokenizer(
inputs, padding=True, truncation=True, return_tensors="tf"
)
return outputs["input_ids"], outputs["attention_mask"]
return tf.py_function(
func=encode, inp=[inputs], Tout=[tf.int32, tf.int32]
)
def process(self, i, a):
def __call(i, a):
model_output = self.model(
{"input_ids": i.numpy(), "attention_mask": a.numpy()}
)
return model_output[0]
return tf.py_function(func=__call, inp=[i, a], Tout=[tf.float32])
def mean_pooling(self, model_output, attention_mask):
token_embeddings = tf.squeeze(tf.stack(model_output), axis=0)
input_mask_expanded = tf.cast(
tf.broadcast_to(
tf.expand_dims(attention_mask, -1), tf.shape(token_embeddings)
),
tf.float32,
)
a = tf.math.reduce_sum(token_embeddings * input_mask_expanded, axis=1)
b = tf.clip_by_value(
tf.math.reduce_sum(input_mask_expanded, axis=1),
1e-9,
tf.float32.max,
)
embeddings = a / b
embeddings, _ = tf.linalg.normalize(embeddings, 2, axis=1)
return embeddings
def call(self, inputs):
input_ids, attention_mask = self.tf_encode(inputs)
model_output = self.process(input_ids, attention_mask)
embeddings = self.mean_pooling(model_output, attention_mask)
return embeddings
# Uses the keras-ified model in a Keras model
sbert = SBert(tokenizer, model)
inputs = tf.keras.layers.Input((1,), dtype=tf.string)
outputs = sbert(inputs)
model = tf.keras.Model(inputs, outputs)
# Implements the model we just build for top KNN retrieval, from the pool of pre-calculated startups embeddings.
exported_model = tfrs.layers.factorized_top_k.BruteForce(model)
exported_model.index(np.array(startup_vectors), np.array(startups_ids))
# TESTS the model
# for some reason this seems to be needed in order to save the model :/
# https://github.com/tensorflow/recommenders/issues/131
print(exported_model(tf.constant(["'Test Text Query'"])))
return exported_model
model_to_store_1 = approach_2(search_db_ids, search_db_embeddings)
tf.saved_model.save(model_to_store_1, export_dir="/home/nicholas/test_model_save/2")
# THIS ONE WORKS LIKE A CHARM, saving the model and everything. Deploy on sagemaker is successful.
# FAILS TO WORK ON SAGEMAKER. BELOW THE LOGS WHEN THE MODEL IS CALLED
# ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "{
# "error": "No OpKernel was registered to support Op 'EagerPyFunc' used by {{node StatefulPartitionedCall/brute_force/model/s_bert/EagerPyFunc}} with these attrs: [is_async=false, Tin=[DT_STRING], _output_shapes=[<unknown>, <unknown>], Tout=[DT_INT32, DT_INT32], token=\"pyfunc_4\"]\nRegistered devices: [CPU]\nRegistered kernels:\n <no registered kernels>\n\n\t [[StatefulPartitionedCall/brute_force/model/s_bert/EagerPyFunc]]\n\t [[StatefulPartitionedCall]]"
# }". See https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logEventViewer:group=/aws/sagemaker/Endpoints/rialto-t2s-model-endpoint in account 634470116418 for more information
As you can see from the log, that the problem seems to be with the Eager mode and py_functions. I tried to google and found absolutely nothing on how to address this issue.
3. THE Classes approach
I've tried implementing something building upon this article, but I am running into similar issues that with the first approach, as when I go to save the model, the expected input clashed with the requirements of tokenizer.
EDIT 1 - here a coolab showcasing the approach: https://colab.research.google.com/drive/1gibFdEoHTs0hzD5yiXzLT_-asmilUoAQ?usp=sharing#scrollTo=TibAssWm3D5e
All of this journey triggered some questions:
Question 1 Is this even a best practice? Should I serve my model the tokenized sentences as a tensor?
Question 2 How the hell do I make it work? :)
I'm using tf-serving version 1.0.0.5(grpc protocol) to serve a customized tf2.0 model,
my request in json looks like this:
{'task1':{"feature1":0.0,"feature2":0.0, "feature3":0.0},
'task2':{"feature1":1.0,"feature2":0.0, "feature3":1.0}}
the desired output format is returning 2 float_vals predicted by the model, however it only returned one. If request only wrapped one set of features like:
{'task1':{"feature1":0.0,"feature2":0.0, "feature3":0.0}}
everything looks fine.
My model is written in tf2.0 with keras, and model is saved as .pb, model structure is the following:
class cust_dnn_test(tf.keras.Model):
def compile(self, optimizer, my_loss):
...
def __init__():
...
def call(self, inputs):
...
output_1 = Dense(fc1)
output_2 = Dense(fc2)
output_res = tf.keras.layers.multiply([output_1, output_2])
return {'res1': output_1, 'res2': output_res}
def train_step(self, data):
...
def test_step(self, data):
...
model.fit(training_dataset, validation_data=val_dataset, epochs=1)
model.save('{path}/',overwrite=True)
Any clue will be appreciated, thanks.
I have a tensorflow model written through model subclassing and I want to export it to ONNX format. This is simple enough with the script attached. However, the name of the input variable to the ONNX model is args_0. How can I rename it?
import tensorflow as tf
import tf2onnx
from tensorflow.python.keras import Model
from tensorflow.python.keras.layers import Dense
class MyModel(Model):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dense = Dense(16)
def call(self, inputs, **kwargs):
return self.dense(inputs)
def to_onnx(self, output_path, opset=14):
model_proto, _ = tf2onnx.convert.from_keras(
self,
input_signature=[tf.TensorSpec((1, 128))],
opset=opset,
output_path=output_path,
)
return
if __name__ == "__main__":
output_path = "./test.onnx"
A = MyModel()
A.to_onnx(output_path)
you can provide the input name in input_signature as name="input_name" so, it should look like
input_signature=[tf.TensorSpec((1, 128), name="input_name")],
as shown in this notebook
When using a custom class to replace a lambda function, building a model fails.
I previously had this code in a lambda function and it worked fine but I was unable to save a model. I need to save the model that I'm building which has dependencies on this code snippet.
import keras
import tensorflow as tf
class ShapePositionLayer(keras.layers.Layer):
def call(self, x):
assert isinstance(x, list)
a, b = x
return keras.backend.gather(keras.backend.shape(a), b)
def compute_output_shape(self, input_shape):
return (1)
captions = keras.layers.Input(shape=[5,1024], name='captions')
batch_size = ShapePositionLayer()([captions,tf.constant(0,
dtype=tf.int32)])
model = keras.models.Model(inputs=[captions], outputs=[batch_size])
I expected to be able to build a model.
Instead receive error:
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'