How can I preprocess my Mapdataset to fit my model input? - tensorflow

I use a MapDataset compose of label in text and a vector of float in string.
Here is the way I read the content of my tfrecord:
def extract_data(tfrecord_ds):
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([], tf.string)
}
def _parse_data_function(example_proto):
return tf.compat.v1.parse_single_example(example_proto, feature_description)
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
return dataset
I want to convert the label_text to int according to label.txt file and the data string to vector of float.
I want to use this data to train a custom model like this:
my_model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(1024), dtype=tf.float32,
name='input_embedding'),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(num_classes)
], name='audio_detector')
How can I process my MapDataset from (string,string) to (int, float_array) to be able to train my model?
Edit:
Here is the way I encode my data:
features = {}
features['classes_text'] = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[audio_data_generator.label.encode()]))
bytes = embedding.numpy().tobytes()
features['data'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytes]))
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())

It is easier to encode the embedding using tf.train.FloatList.
When writing to tfrecords use:
features = {
'classes_text': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode()])),
'data': tf.train.Feature(float_list=tf.train.FloatList(value=embedding))
}
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
And when reading give the embedding size to tf.io.FixedLenFeature, for example:
embedding_size = 10
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([embedding_size], tf.float32)
}
To convert label_text to int you can use tf.lookup.StaticVocabularyTable.
# Assuming lable.txt contains a single label per line.
with open('label.txt', 'r') as fin:
categories = [line.strip() for line in fin.readlines()]
init = tf.lookup.KeyValueTensorInitializer(
keys=tf.constant(categories),
values=tf.constant(list(range(len(categories))), dtype=tf.int64))
label_table = tf.lookup.StaticVocabularyTable(
init,
num_oov_buckets=1)
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([embedding_size], tf.float32)
}
def _parse_data_function(example_proto):
example = tf.compat.v1.parse_single_example(example_proto, feature_description)
# Apply the label lookup.
example['classes_text'] = label_table.lookup(example['classes_text'])
return example
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
Edit
If you wish to keep the way you save data you can use np.frombuffer to convert the numpy vectors to from binary stings. You will have to wrap this code in a tf.function and tf.py_function though.
def decode_embedding(embedding_bytes):
return np.frombuffer(embedding_bytes.numpy())
#tf.function()
def tf_decode_embedding(embedding_bytes):
return tf.py_function(decode_embedding, inp=[embedding_bytes], Tout=tf.float32)
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([], tf.string)
}
def _parse_data_function(example_proto):
example = tf.compat.v1.parse_single_example(example_proto, feature_description)
example['classes_text'] = label_table.lookup(example['classes_text'])
example['data'] = tf_decode_embedding(example['data'])
return example
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

How to save large float into TFRecord format? float_list/float32 seems to truncate the values

We write processed data into TFRecords and we are noticing data loss when read back from TFRecords. Reproducible example below. Strange thing is that it doesn't just drop the decimals but seem to randomly roundup/down values. Since it only allows float32, int64 and string, we are not sure what other options to try.
We are writing these values
[20191221.1, 20191222.1, 20191223.1, 20191224.1, 20191225.1, 20191226.1, 20191227.1, 20191228.1, 20191229.1, 20191230.1]
But reading from tfrecords returns these values
tf.Tensor(
[20191222. 20191222. 20191224. 20191224. 20191226. 20191226. 20191228.
20191228. 20191230. 20191230.], shape=(10,), dtype=float32)
Reproducible Code
import tensorflow as tf
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
data = [20191221.1 + x for x in range(0,10)]
print("Writing data - ", data)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'data':tf.train.Feature(float_list=tf.train.FloatList(value=data))
}
))
writer = tf.io.TFRecordWriter("data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'data': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
data = features['data']
return data
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
if __name__ == '__main__':
write_date_tfrecord()
generator = dataset_generator()
for data in generator:
print(data)
This solved my issue. I had this issue when writing audio files as floating point matrix using FloatList.. but when i used BytesList and stored the data into tfrecords and then read the data by decoding it.. the issue resolved.. note that even decoding with tf.float32 will lead not solve the issue. we need to decode it with tf.float64..
def _bytes_feature2(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def serialize_example(sound):
feature = {
'snd': _bytes_feature2(sound.tobytes()),
}
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto.SerializeToString()
def write_tfrecords(rf,snd):
nsamples = len(snd)
with tf.io.TFRecordWriter(rf) as writer:
for i in range(nsamples):
SND = snd[i]
tf_example = serialize_example(SND)
writer.write(tf_example)
# writing records
write_tfrecords(os.getcwd()+'\\tfrec\\'+'train.tfrecords',train)
# loading records
raw_dataset = tf.data.TFRecordDataset(os.getcwd()+'\\tfrec\\'+'train.tfrecords')
def parse_record(record):
name_to_features= {
'snd':tf.io.FixedLenFeature([],tf.string),
}
return tf.io.parse_single_example(record, name_to_features)
def decode_record(record):
aud = tf.io.decode_raw(
record['snd'], out_type=tf.float64
)
return aud
for record in raw_dataset:
parsed_record = parse_record(record)
decoded_record = decode_record(parsed_record)
aud = decoded_record
print(aud.numpy()[0:10])
print(train[0][0:10])
output:
[ 417.69951205 -231.58708746 -10.05624011 -146.10342256 -66.60317323
-159.91550792 -3.93602823 29.94517981 106.22196629 65.53008959]
[ 417.69951205 -231.58708746 -10.05624011 -146.10342256 -66.60317323
-159.91550792 -3.93602823 29.94517981 106.22196629 65.53008959]

Storing a list of list as tf record in tensorflow

What would be the best way of storing/reading a list of list in a tf record in TensorFlow?
I tried to serialize the data to a one dimensional list, then reshape it to its original size when reading. However, the encoding process is taking forever.
Writing to tf_record:
Variable in question: word_data (shape=[nb_channels, 1500])
electrodes_coordinates=word_data['electrodes_coordinates']
electrodes_loc3=word_data['electrodes_loc3']
nb_electrodes=word_data['nb_electrodes']
label=word_data['label']
word_data=word_data['word']
#reshape word_data from list of list (nb_channel,nb_time points) to list (nb_channel*nb_timepoints)
word_data=np.reshape(word_data, [-1])
context = tf.train.Features(feature={
"word/word_id": _bytes_feature(word),
"word/nb_channels": _int64_feature(nb_electrodes),
"word/label": _int64_feature(int(label))
})
feature_lists = tf.train.FeatureLists(feature_list={
"word/electrode_x_coordinates":_float_feature_list(electrodes_coordinates[:,0]),
"word/electrode_y_coordinates":_float_feature_list(electrodes_coordinates[:,1]),
"word/electrode_z_coordinates":_float_feature_list(electrodes_coordinates[:,2]),
"word/electrode_location3":_int64_feature_list(loc3_to_id(electrodes_loc3,loc3_dict)),
"word/data": _float_feature_list(word_data)})
sequence_example = tf.train.SequenceExample(context=context ,feature_lists=feature_lists)
return sequence_example
Reading from tf_record:
context, sequence = tf.parse_single_sequence_example(serialized,
context_features={
nb_channels: tf.FixedLenFeature([], dtype=tf.int64),
label: tf.FixedLenFeature([], dtype=tf.int64)
},
sequence_features={
electrode_x_coordinates: tf.FixedLenSequenceFeature([], dtype=tf.float32),
electrode_y_coordinates: tf.FixedLenSequenceFeature([], dtype=tf.float32),
electrode_z_coordinates: tf.FixedLenSequenceFeature([], dtype=tf.float32),
electrode_location3: tf.FixedLenSequenceFeature([], dtype=tf.int64),
word_data: tf.FixedLenSequenceFeature([], dtype=tf.float32)
}
)
encoded_nb_channels = context[nb_channels]
encoded_label = context[label]
encoded_electrode_x_coordinates = sequence[electrode_x_coordinates]
encoded_electrode_y_coordinates = sequence[electrode_y_coordinates]
encoded_electrode_z_coordinates = sequence[electrode_z_coordinates]
encoded_electrode_location3 = sequence[electrode_location3]
encoded_word_data = sequence[word_data]

one read_and_decode function for different training data

I'm new to TensorFlow, and here's what I'm trying to do: save training data from different scenarios and then read them back. The sizes of feature and output may be different for different scenarios.
The issue is when I tried to read the data back, I got an exception that looks like the following:
InvalidArgumentError (see above for traceback): Name: <unknown>, Key: observation, Index: 0. Number of float values != expected. Values size: 17 but output shape: []
The function for saving data looks like below:
def save_data(obs, actions, filename):
writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
o = obs[index].tolist()
a = actions[index].tolist()
example = tf.train.Example(features=tf.train.Features(
feature = {
'obs' : tf.train.Feature(float_list=tf.train.FloatList(value=o)),
'action': tf.train.Feature(float_list=tf.train.FloatList(value=a)),
'obs_size' : tf.train.Feature(int64_list=tf.train.Int64List(value=[len(o)])),
'action_size': tf.train.Feature(int64_list=tf.train.Feature(int64_list=tf.train.Int64List(value=[len(a)])),
}
))
writer.write(example.SerializeToString())
writer.close()
The function to read the data back is as follows:
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, example = reader.read(filename_queue)
features = tf.parse_single_example(
example,
features = {
'obs' : tf.FixedLenFeature([], tf.float32),
'action' : tf.FixedLenFeature([], tf.float32),
'obs_size': tf.FixedLenFeature([], tf.int64),
'action_size' : tf.FixedLenFeature([], tf.int64)
}
)
obs_size = tf.cast(features['observation_size'], tf.int32)
action_size = tf.cast(features['action_size'], tf.int32)
obs_shape = tf.pack([1, obs_size])
action_shape = tf.pack([1, action_size])
obs = tf.reshape(obs, obs_shape)
action = tf.reshape(action, action_shape)

You must feed a value for placeholder tensor 'input_example_tensor' with dtype string and shape [1]

I am developing a tensorflow serving client/server application by using chatbot-retrieval project.
My code has two parts, namely serving part and client part.
Below is the code snippet for the serving parts.
def get_features(context, utterance):
context_len = 50
utterance_len = 50
features = {
"context": context,
"context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
"utterance": utterance,
"utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
}
return features
def my_input_fn(estimator, input_example_tensor ):
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
context = tf.identity(tf_example['context'], name='context')
utterance = tf.identity(tf_example['utterance'], name='utterance')
features = get_features(context, utterance)
return features
def my_signature_fn(input_example_tensor, features, predictions):
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
tf_context = tf.identity(tf_example['context'], name='tf_context_utterance')
tf_utterance = tf.identity(tf_example['utterance'], name='tf_utterance')
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=tf.identity(predictions)
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':tf_context,
'utterance':tf_utterance
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
use_deprecated_input_fn = True,
signature_fn = my_signature_fn,
exports_to_keep = 1
)
Below is the code snippet for the client part.
def tokenizer_fn(iterator):
return (x.split(" ") for x in iterator)
vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(FLAGS.vocab_processor_file)
input_context = "biz banka kart farkli bir banka atmsinde para"
input_utterance = "farkli banka kart biz banka atmsinde para"
context_feature = np.array(list(vp.transform([input_context])))
utterance_feature = np.array(list(vp.transform([input_utterance])))
context_tensor = tf.contrib.util.make_tensor_proto(context_feature, shape=[1, context_feature.size])
utterance_tensor = tf.contrib.util.make_tensor_proto(context_feature, shape=[1, context_feature.size])
request.inputs['context'].CopyFrom(context_tensor)
request.inputs['utterance'].CopyFrom(utterance_tensor)
result_counter.throttle()
result_future = stub.Predict.future(request, 5.0) # 5 seconds
result_future.add_done_callback(
_create_rpc_callback(label[0], result_counter))
return result_counter.get_error_rate()
Both of the serving and client parts builds with no error. After running the serving application and then the client application I get the following strange error propogated to the client application when the rpc call completes.
Below is the error I get when rpc call completes
AbortionError(code=StatusCode.INVALID_ARGUMENT, details="You must feed a value for placeholder tensor 'input_example_tensor' with dtype string and shape [1]
[[Node: input_example_tensor = Placeholder[_output_shapes=[[1]], dtype=DT_STRING, shape=[1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]")
The error is strange since there seems to be no way to feed the placeholder from the client application.
How can I provide data for the placeholder 'input_example_tensor' if I am accessing the model through tensorflow serving?
ANSWER:
(I posted my answer here since I couldn't post it as an answer due to lack of StackOverflow badges. Anyone who is volunteer to submit it as his/her answer to the question is more than welcome. I will approve it as the answer.)
I could resolve the problem by using the option use_deprecated_input_fn = False in estimator.export function and change the input signatures accordingly.
Below is the final code which is running with no problem.
def get_features(input_example_tensor, context, utterance):
context_len = 50
utterance_len = 50
features = {
"my_input_example_tensor": input_example_tensor,
"context": context,
"context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
"utterance": utterance,
"utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
}
return features
def my_input_fn():
input_example_tensor = tf.placeholder(tf.string, name='tf_example_placeholder')
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
context = tf.identity(tf_example['context'], name='context')
utterance = tf.identity(tf_example['utterance'], name='utterance')
features = get_features(input_example_tensor, context, utterance)
return features, None
def my_signature_fn(input_example_tensor, features, predictions):
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=predictions
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':features['context'],
'utterance':features['utterance']
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
input_feature_key ="my_input_example_tensor",
use_deprecated_input_fn = False,
signature_fn = my_signature_fn,
exports_to_keep = 1
)
OP self-solved but couldn't self-answer, so here's their answer:
Problem was fixed by using the option use_deprecated_input_fn = False in estimator.export function and changing the input signatures accordingly:
def my_signature_fn(input_example_tensor, features, predictions):
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=predictions
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':features['context'],
'utterance':features['utterance']
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
input_feature_key ="my_input_example_tensor",
use_deprecated_input_fn = False,
signature_fn = my_signature_fn,
exports_to_keep = 1
)