HuggingFace Pytorch trainer giving worse results than tensorflow - tensorflow

I’m trying to make the switch from tensorflow to pytorch, but I’m getting a good bit worse results when running a model in pytorch using Trainer.
I’m using bert-base-uncased, and as far as I can tell am using primarily the same settings across both (batch size, epochs, learning rate, etc). However I am getting a f1 score of 0.9967 from tensorflow, and a 0.944649446494465 from pytorch. The loss also seems to fluctuate a lot more in pytorch. I’m still pretty new to machine learning and python in general, so I feel like it’s gotta be something obvious, but I’ve yet to find it. Here are my scripts. Thanks in advance.
Tensorflow
SEQ_LEN = 256
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def train():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='tf')
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
batch_size = 8
num_epochs = 4
batches_per_epoch = len(tokenized["train"]) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=4e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = TFAutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
tf_train_set = model.prepare_tf_dataset(
tokenized["train"],
shuffle=True,
batch_size=batch_size,
collate_fn=data_collator,
)
tf_validation_set = model.prepare_tf_dataset(
tokenized["test"],
shuffle=False,
batch_size=batch_size,
collate_fn=data_collator,
)
eval_metrics = evaluate.load("f1")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return eval_metrics.compute(predictions=predictions, references=labels)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
METRICS = [
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True, name='sparse_crossentropy'),
]
metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_train_set)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
class_weights = dict(enumerate(sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])))
model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=num_epochs, class_weight=class_weights, callbacks=[early_stop, metric_callback])
model.save_pretrained('lease_to_own_model', save_format="tf")
Pytorch
def pyTorch():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False)
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
eval_f1 = evaluate.load("f1")
eval_accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
f1 = eval_f1.compute(predictions=predictions, references=labels)
accuracy = eval_accuracy.compute(predictions=predictions, references=labels)
return {"accuracy": accuracy["accuracy"], "f1": f1["f1"]}
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
device = torch.device("cuda")
model.to(device)
batch_size = 8
training_args = TrainingArguments(
num_train_epochs=4,
output_dir="pytorch",
learning_rate=4e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
evaluation_strategy="epoch",
save_strategy="epoch",
metric_for_best_model='f1',
load_best_model_at_end=True,
logging_strategy="epoch",
warmup_steps=0,
)
class_weights = sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])
weights= torch.tensor(class_weights,dtype=torch.float).to(device)
class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get("logits")
loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
return (loss, outputs) if return_outputs else loss
trainer = CustomTrainer(
model=model,
args=training_args,
train_dataset=tokenized["train"],
eval_dataset=tokenized["test"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
trainer.train()
trainer.save_model("pytorch")

Related

BERT WITH BiDirectional LSTM loss is not decreasing

I am trying port Keras Semantic Similarity example to Pytorch Lightening. I followed all the necessary steps from keras example but somehow loss not at all decreasing. i am just curious where did i make mistake?
Here is the colab link
PyTorch Model
MAX_LENGTH = 128
from transformers import BertModel,AdamW
import pytorch_lightning as pl
class BertWithLSTM(pl.LightningModule):
def __init__(self,train_path,test_path, val_path,num_classes):
super().__init__()
self.bert = BertModel.from_pretrained("bert-base-uncased", output_attentions=True)
self.bert.trainable = False
self.lstm = nn.LSTM(input_size=768, hidden_size=64, batch_first=True, bidirectional=True, dropout=0.3)
self.dropout = nn.Dropout(p=0.3)
self.output = nn.Linear(MAX_LENGTH*2,3)
self.num_classes = num_classes
self.train_path = train_path
self.test_path = test_path
self.val_path = val_path
self.criterion = nn.CrossEntropyLoss()
def train_dataloader(self):
train_dataset = CustomDataset(csv_path=self.train_path)
return DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)
def test_dataloader(self):
test_dataset = CustomDataset(csv_path=self.test_path)
return DataLoader(dataset=test_dataset)
def val_dataloader(self):
val_dataset = CustomDataset(csv_path=self.val_path)
return DataLoader(dataset=val_dataset, batch_size=16, shuffle=True)
def training_step(self,train_batch, batch_idx):
input_ids,attention_mask,token_type_ids,labels = train_batch
output = self(input_ids, attention_mask, token_type_ids, labels)
loss = F.cross_entropy(output, labels)
self.log("training_loss", loss, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self,val_batch, batch_idx):
input_ids,attention_mask,token_type_ids,labels = val_batch
output = self(input_ids, attention_mask, token_type_ids, labels)
loss = F.cross_entropy(output, labels)
self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)
def forward(self, input_ids,attention_mask,token_type_ids, label ):
bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
sequence_output = bert_output.last_hidden_state
output, (hidden_state, c_n) = self.lstm(sequence_output)
avg_pool = torch.mean(output, dim=1)
max_pool,indices = torch.max(output,dim=1)
output = torch.cat((avg_pool,max_pool),1)
output = self.output(output)
return output
def configure_optimizers(self):
optimizer = torch.optim.AdamW(self.parameters())
return optimizer

tfr.keras.losses.ListMLELoss() is always 0 during training, validation, and testing

I have made a ranking model using tensorflow_ranking losses and metrics, but the ListMLELoss() is always 0. The model will train and complete, but I imagine no learning is actually happening since the loss is not getting calculated. I tried to follow this guide, https://www.tensorflow.org/recommenders/examples/listwise_ranking, as well as I could, but there are some differences in use cases so it is a bit different. I am not sure why model.fit() runs and I get an NDCG value, but clearly the model cannot be learning as a loss value is not getting computed.
Here is my ranking model class:
class RankingModel(tf.keras.Model):
def __init__(self, embeddings, vocab_size_dict, dim_dict, loss, activation='sigmoid'):
super().__init__()
self.embeddings = embeddings
self.embedding_layers = {}
self.vocab_size_dict = vocab_size_dict
self.dim_dict = dim_dict
self.activation = activation
self.loss = loss
self.embedding_layers['feature_one'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_one']+1,
self.dim_dict['feature_one'],
name='embedded_feature_one')
self.embedding_layers['feature_two'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_two']+1,
self.dim_dict['feature_two'],
name='embedded_feature_two')
self.embedding_layers['feature_three'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_three']+1,
self.dim_dict['feature_three'],
name='embedded_feature_three')
self.embedding_layers['feature_four'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_four']+1,
self.dim_dict['feature_four'],
name='embedded_feature_four')
self.embedding_layers['feature_five'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_five']+1,
self.dim_dict['feature_five'],
name='embedded_feature_five')
self.flatten = tf.keras.layers.Flatten()
self.concatenate = tf.keras.layers.Concatenate(axis=1, name='Input_Concatenation')
self.batchnorm = tf.keras.layers.BatchNormalization(name='batchnorm')
self.score_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='leaky_relu'),
tf.keras.layers.Dense(12, activation=activation)
])
self.task = tfrs.tasks.Ranking(
loss=self.loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric")
]
)
def __call__(self, features, training=False):
feats = []
for feat, tens in features[0].items():
if feat in self.embeddings:
embedding = self.embedding_layers[feat](tens)
flatten = self.flatten(embedding)
feats.append(flatten)
if feat == 'continuous':
flatten = self.flatten(tens)
feats.append(flatten)
deep_concatenated = self.concatenate(feats)
batchnorm = self.batchnorm(deep_concatenated)
scores = self.score_model(batchnorm)
print("scores: ", scores)
print("mask: ", features[0]['mask'])
masked_scores = tf.boolean_mask(scores, features[0]['mask'])
# pred = tf.expand_dims(masked_scores, axis=1)
# return pred
return tf.expand_dims(masked_scores, axis=1)
def compute_loss(self, features, training=False):
labels = features[1]
# print("labels: ", labels)
# print("mask: ", features[0]['mask'])
masked_labels = tf.boolean_mask(labels, features[0]['mask'])
# print("masked labels:", masked_labels)
masked_labels = tf.expand_dims(masked_labels, axis=1)
print("masked_labels: ", masked_labels)
scores = self(features)
print("scores: ", scores)
print("loss: ", self.task(labels=masked_labels, predictions=scores))
return self.task(
labels=masked_labels,
predictions=scores
)
def train_step(self, inputs):
"""Custom train step using the `compute_loss` method."""
with tf.GradientTape() as tape:
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
def test_step(self, inputs):
"""Custom test step using the `compute_loss` method."""
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
Can anybody see why I am not getting a loss value? Thanks a lot. Please let me know if you need additional info. Maybe I can create some synthetic data so you can run it all yourself. Been pulling my hair out for a few days trying to get this to work so any advice is MUCH appreicated.

Why pure tensorflow autoencoder cannot converge, when Keras one fits good

I'm newbie at machine learning(and at stackoverflow too). I want to ask for help.
I have two implementations of same two-layer autoencoder for mnist.
First one fits good:
import tensorflow as tf, numpy as np
def in_pics(pics):
return np.array(pics, np.float32)/255.
def out_pics(pics):
npformed = np.array(pics, np.float32)
samples = np.shape(npformed)[0]
return np.reshape(npformed, (samples, 784))/255.
(train_data, test_data) = tf.keras.datasets.mnist.load_data()
train_dataset = tf.data.Dataset.from_tensor_slices((in_pics(train_data[0]), out_pics(train_data[0]))).batch(100)
test_dataset = tf.data.Dataset.from_tensor_slices((in_pics(test_data[0]), out_pics(test_data[0]))).batch(100)
model = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(28,28)),
tf.keras.layers.Reshape(target_shape=(784,)),
tf.keras.layers.Dense(128, activation="sigmoid", kernel_initializer=tf.keras.initializers.truncated_normal(stddev=0.1),
bias_initializer =tf.keras.initializers.truncated_normal(stddev=0.1)),
tf.keras.layers.Dense(784, activation="sigmoid", kernel_initializer=tf.keras.initializers.truncated_normal(stddev=0.1),
bias_initializer =tf.keras.initializers.truncated_normal(stddev=0.1)),
]
)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer="adam")
model.fit(x = train_dataset, epochs=20, verbose=1, validation_data=test_dataset)
Keras result
And I have the same model, same learning procedure, but written in terms of pure tensorflow(almost):
import tensorflow as tf, numpy as np
def prep_pics(pics):
npformed = np.array(pics, np.float32)
samples = np.shape(npformed)[0]
return np.reshape(npformed, (samples, 784))/255.
batch_size, num_batches = 100, 12000
(train_data, test_data) = tf.keras.datasets.mnist.load_data()
(train_pics_prepared, test_pics_prepared) = (prep_pics(train_data[0]), prep_pics(test_data[0]))
train_dataset = tf.data.Dataset.from_tensor_slices((train_pics_prepared, train_pics_prepared))
test_dataset = tf.data.Dataset.from_tensor_slices((test_pics_prepared, test_pics_prepared)).batch(batch_size)
encoder_W = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [784,128], dtype = tf.float32))
encoder_b = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [128],dtype = tf.float32))
decoder_W = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [128,784], dtype = tf.float32))
decoder_b = tf.Variable(tf.keras.initializers.truncated_normal(stddev=0.1)(shape = [784],dtype = tf.float32))
optimizer = tf.keras.optimizers.Adam()
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=True)
#tf.function
def trainstep(X_pack, Y_pack):
with tf.GradientTape() as tape:
tape.watch([encoder_W, encoder_b, decoder_W, decoder_b])
encoded = tf.nn.sigmoid(tf.matmul(X_pack, encoder_W) + encoder_b)
decoded = tf.nn.sigmoid(tf.matmul(encoded, decoder_W) + decoder_b)
loss = bin_cross(decoded, Y_pack)
gradients = tape.gradient(loss, [encoder_W, encoder_b, decoder_W, decoder_b])
optimizer.apply_gradients(zip(gradients, [encoder_W, encoder_b, decoder_W, decoder_b]))
num_samples = len(train_data[0])
epochs = num_batches//(num_samples//batch_size)
for i in range(epochs):
print(f"Epoch num {i+1}:")
dataset = train_dataset.shuffle(num_samples).batch(batch_size)
for x,y in dataset:
trainstep(x,y)
Loss is never becomes lower than 0.64(First one had 0.07 in the end of education).
Tensorflow result.
What I tried:
1.) Change optimizer to Adagrad and even to SGD.
2.) Use other loss - sigmoid_cross_entropy_with_logits.
3.) Learn more. Especially for SGD.
Cannot find mistake or typo almost for week. Please, help!

Siamese network with third component error

I was able to create a siamese network similar to :
https://github.com/aspamers/siamese/
The problem happens if I try to add a third model as an input to the head of my network.
I will get the following error :
ValueError: Shape must be rank 2 but is rank 3 for '{{node head/concatenate/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](simple/Identity, simple_1/Identity, different/Identity, head/concatenate/concat/axis)' with input shapes: [?,?], [?,?], [?,?,1], [].
Here is the code below, one thing I am not comfortable with is the line processed_a = base_model1(input_a) and what it does even after checking the Keras model doc. I understand that if I don't do it I cannot have the desired shape and provide the necessary inputs to the final network.
Note that if I replace the code with what is comment and just use a pure siamese network it will work ok.
Any idea what needs to be changed to resolve the above error and what base_model1(input_a) does.
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.layers import *
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(32, activation='relu')(model_input)
layer = layers.Flatten()(layer)
return tf.keras.Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(1, activation='relu')(model_input)
return tf.keras.Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape))
layer = layers.Concatenate()(inputs)
layer = layers.Dense(1)(layer)
return tf.keras.Model(inputs=inputs, outputs=layer, name="head")
dataset = []
inputs1 = []
for i in range(0, 128):
dataset.append([0.0, 1.0, 2.0])
train_dataset1 = np.asarray(dataset)
base_model1 = getModel1(train_dataset1.shape)
dataset3 = [0.0, 1.0, 2.0]
train_dataset3 = np.asarray(dataset3)
base_model3 = getModel3(train_dataset3.shape)
input_a = Input(shape=base_model1.input_shape)
input_b = Input(shape=base_model1.input_shape)
input_c = Input(shape=base_model3.input_shape)
oModel = outputModel([base_model1, base_model1, base_model3])
#oModel = outputModel([base_model1, base_model1])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = tf.keras.Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
#head = oModel([processed_a, processed_b])
#model = tf.keras.Model(inputs=[input_a, input_b], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])])
#model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1])])
model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
#model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
pay attention to the dimensionality that you define when u initialize the model input and output. the first dimension is always the batch size (None) and this can cause u some problem. here the correct example:
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(32, activation='relu')(model_input)
layer = Flatten()(layer)
return Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(1, activation='relu')(model_input)
return Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape[1:]))
layer = Concatenate()(inputs)
layer = Dense(1)(layer)
return Model(inputs=inputs, outputs=layer, name="head")
base_model1 = getModel1((128,3))
base_model3 = getModel3((3))
input_a = Input(shape=base_model1.input_shape[1:])
input_b = Input(shape=base_model1.input_shape[1:])
input_c = Input(shape=base_model3.input_shape[1:])
oModel = outputModel([base_model1, base_model1, base_model3])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
# create dummy data
n_sample = 5
train_dataset1 = np.random.uniform(0,1, (n_sample,128,3))
train_dataset3 = np.random.uniform(0,1, (n_sample,3))
y = np.random.uniform(0,1, n_sample)
model.fit([train_dataset1, train_dataset1, train_dataset3], y, epochs=3)
model.predict([train_dataset1, train_dataset1, train_dataset3]).shape

Op type not registered HashTableV2 in Tensorflow 1.4.1 while deploying in Cloud ML

When we deploying the model to cloud ml we are getting Bad model Op type not registered HashTableV2
Code:
def model_fn(features, labels, mode):
if mode == tf.estimator.ModeKeys.TRAIN:
tf.keras.backend.set_learning_phase(True)
else:
tf.keras.backend.set_learning_phase(False)
input_feature = features['x']
table = lookup.index_table_from_file(vocabulary_file='vocab.txt', num_oov_buckets=1, default_value=-1)
text = tf.squeeze(input_feature, [1])
words = tf.string_split(text)
dense_words = tf.sparse_tensor_to_dense(words, default_value=PADWORD)
numbers = table.lookup(dense_words)
padding = tf.constant([[0, 0], [0, MAX_LEN]])
padded = tf.pad(numbers, padding)
sliced = tf.slice(padded, [0, 0], [-1, MAX_LEN])
print('words_sliced={}'.format(words))
embeds = tf.keras.layers.Embedding(MAX_FEATURES+1, 128, input_length=MAX_LEN)(sliced)
print('words_embed={}'.format(embeds))
f1 = tf.keras.layers.Dropout(0.2)(embeds)
f1 = tf.keras.layers.Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)(f1)
f1 = tf.keras.layers.GlobalAveragePooling1D()(f1)
f1 = tf.keras.layers.Dense(hidden_dims)(f1)
f1 = tf.keras.layers.Dropout(0.5)(f1)
f1 = tf.keras.layers.Activation('relu')(f1)
logits = tf.keras.layers.Dense(11)(f1)
predictions_dict = {
'class': tf.argmax(logits, 1),
'prob': tf.nn.softmax(logits)
}
'''prediction_output = tf.estimator.export.PredictOutput({"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits,
name="softmax_tensor")})'''
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions_dict, export_outputs={
'prediction': tf.estimator.export.PredictOutput(predictions_dict)
})
loss = tf.losses.sparse_softmax_cross_entropy(labels, logits=logits)
if mode == tf.contrib.learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='Adam',
learning_rate=0.001)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
eval_metrics_ops = {
'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions_dict['class']),
'precision': tf.metrics.precision(labels=labels, predictions=predictions_dict['class']),
'recall': tf.metrics.recall(labels=labels, predictions=predictions_dict['class'])
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
def get_train_record(record):
vector = tf.decode_csv(record, DEFAULTS, use_quote_delim=True)
return vector[1:], vector[0]
def preprocess(text):
text = text.lower()
result = ' '.join([word for word in text.split() if word not in (stop_words)])
return result
def build_vocab(file_name, vocab_file_name):
df = pd.read_csv(file_name, header=None, sep=',', skiprows=[1], names=['product', 'consumer_complaint_narrative'])
df['consumer_complaint_narrative'] = df['consumer_complaint_narrative'].apply(preprocess)
print(df['consumer_complaint_narrative'][0])
vocab_processor = tflearn.preprocessing.VocabularyProcessor(max_document_length=MAX_FEATURES, min_frequency=10,
tokenizer_fn=tflearn.preprocessing.tokenizer)
vocab_processor.fit(df['consumer_complaint_narrative'])
with gfile.Open(vocab_file_name, 'wb') as f:
f.write("{}\n".format(PADWORD))
for word, index in vocab_processor.vocabulary_._mapping.items():
f.write("{}\n".format(word))
nwords = len(vocab_processor.vocabulary_)
print('{} words into {}'.format(nwords, vocab_file_name))
def input_fn(file_name, batch_size, repeat_count, shuffle=False):
def _input_fn():
data_set = tf.data.TextLineDataset(filenames=file_name)
data_set = data_set.map(get_train_record)
if shuffle:
data_set = data_set.shuffle(shuffle)
data_set = data_set.repeat(repeat_count)
batch = data_set.batch(batch_size)
iterator = batch.make_one_shot_iterator()
features, labels = iterator.get_next()
return {'x': features}, labels
return _input_fn()
def get_train_spec(file_name, batch_size, repeat_count):
return tf.estimator.TrainSpec(input_fn=lambda: input_fn(file_name, batch_size, repeat_count, shuffle=True), max_steps=1000)
def get_test_spec(file_name, batch_size, repeat_count=1):
return tf.estimator.EvalSpec(input_fn=lambda: input_fn(file_name, batch_size, repeat_count, shuffle=True))
def serving_input_fn():
feature_tensor = tf.placeholder(tf.string, [None])
# features = tf.py_func(preprocess, [feature_tensor], tf.string)
features = tf.expand_dims(feature_tensor, -1)
return tf.estimator.export.ServingInputReceiver({'x': features}, {'x': features})
finance_classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
print('\n Training .....')
finance_classifier.train(input_fn=lambda: input_fn('dataset/train.csv', batch_size, repeat_count=5, shuffle=True))
print('\n Evaluating.....')
eval_results = finance_classifier.evaluate(input_fn=lambda: input_fn('dataset/valid.csv', batch_size, repeat_count=1,
shuffle=False))
for key in eval_results:
print(" {} was {}".format(key, eval_results[key]))
print('\n Exporting')
exported_model_dir = finance_classifier.export_savedmodel(model_dir, serving_input_receiver_fn=serving_input_fn)
decoded_model_dir = exported_model_dir.decode("utf-8")
Screenshot
One important thing to mention here is when I tried with Tensorflow 1.2 with some changes in the code in model_fn. Basically not using tf.keras but using tf.contrib.keras it was working.
The model which was exported in Tensorflow 1.2 works fine. Is it a bug in Tensorflow 1.4 ? How can we fix this error ?
Already created a gihub issue in Tensorflow Repo
The ML Engine supports TensorFlow 1.4 but the default version is TensorFlow 1.2. You can specify that you want 1.4 by adding the following code to your project's setup.py module:
REQUIRED_PACKAGES = ['tensorflow>=1.4']
setup(
...
install_requires=REQUIRED_PACKAGES,
...
)
You can see the full list of supported packages and versions here.
We can solve this problem by setting --runtime-version=1.4.
Use the following command when deploying model in cloud ml.
MODEL_BINARIES=$(gsutil ls gs://${BUCKET}/models/${MODEL_NAME}/export/)
gcloud ml-engine versions create ${MODEL_VERSION} --model=${MODEL_NAME} --origin=${MODEL_BINARIES} --runtime-version=1.4