Resources
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.version.VERSION)
print(keras.__version__)
#2.5.0
#2.5.0
LSTM Encoder Decoder Model with Attention
n_features = 129
type_max = 3
n_padded_in = 10
n_padded_out = 10
input_item = layers.Input(batch_input_shape=[None, n_padded_in],
name="item_input",
dtype=tf.int64)
input_type = layers.Input(batch_input_shape=[None, n_padded_in],
name="type_input",
dtype=tf.int64)
encoding_padding_mask = tf.math.logical_not(tf.math.equal(input_item, 0))
embedding_item = layers.Embedding(input_dim=n_features,
output_dim=batch_size,
name="item_embedding")(input_item)
embedding_type = layers.Embedding(input_dim=type_max+1,
output_dim=batch_size,
name="rec_embedding")(input_type)
concat_inputs = layers.Concatenate(name="concat_inputs")(
[embedding_item, embedding_type])
concat_inputs = tf.keras.layers.BatchNormalization(
name="batchnorm_inputs")(concat_inputs)
encoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_encoder")
encoder_output, hidden, cell = encoder_lstm(concat_inputs)
states = [hidden, cell]
decoder_output = hidden
decoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_decoder")
output_dense = layers.Dense(n_features, name="output")
att = layers.Attention(use_scale=False,
causal=True,
name="attention")
inputs = np.zeros((batch_size, 1, n_features))
all_outputs = []
for _ in range(n_padded_out):
context_vector = att([decoder_output, encoder_output])
context_vector = tf.expand_dims(context_vector, 1)
inputs = tf.cast(inputs, tf.float32)
inputs = tf.concat([context_vector, inputs], axis=-1)
decoder_output, state_h, state_c = decoder_lstm(inputs, initial_state=states)
output = output_dense(decoder_output)
output = tf.expand_dims(output, 1)
all_outputs.append(output)
inputs = output
states = [state_h, state_c]
all_outputs = layers.Lambda(lambda x: tf.concat(x, axis=1))(all_outputs)
type_encoder_model = keras.Model([input_item, input_type],
all_outputs,
name="type_encoder_model")
type_encoder_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(learning_rate=l_rate),
metrics=["sparse_categorical_accuracy"])
type_encoder_model.summary()
Data Preparation
#second input as sequence
type_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["product_type"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#first input sequence
input_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["input_seq"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#output sequence
output_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["output_seq"].to_list(),
maxlen=n_padded_out,
padding="pre",
value=0.0
)
Data Samples
type_seq_padded
array([[0, 0, 0, ..., 1, 1, 1],
[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 1, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
[0, 0, 0, ..., 3, 3, 3]], dtype=int32)
input_seq_padded
array([[ 0, 0, 0, ..., 101, 58, 123],
[ 0, 0, 0, ..., 79, 95, 87],
[ 0, 0, 0, ..., 98, 109, 123],
...,
[ 0, 0, 0, ..., 123, 109, 98],
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 95, 123, 95]], dtype=int32)
output_seq_padded
array([[ 0, 0, 0, ..., 58, 123, 43],
[ 0, 0, 0, ..., 95, 87, 123],
[ 0, 0, 0, ..., 109, 123, 10],
...,
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 98, 123, 43],
[ 0, 0, 0, ..., 123, 95, 95]], dtype=int32)
My LSTM Encoder Decoder model takes 2 input as sequence: items and item-types, and 1 output sequence: items. Last dense layer calculates probability of purchase of 129 different items as next item to be purchased. Model is trained with code below:
hist = type_encoder_model.fit([input_seq_padded[:64000],
type_seq_padded[:64000]],
output_seq_padded[:64000],
epochs=1,
batch_size=128,
verbose=1)
And when i attempt to use the model for prediction with code below:
y_pred = base_model_X.predict([input_seq_padded_test,
type_seq_padded_test])
Test Samples
type_seq_padded_test
array([[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 2, 2],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 3, 2, 1],
[0, 0, 0, ..., 3, 2, 3],
[0, 0, 0, ..., 2, 3, 3]], dtype=int32)
input_seq_padded_test
array([[ 0, 0, 0, ..., 31, 10, 13],
[ 0, 0, 0, ..., 9, 6, 6],
[ 0, 0, 0, ..., 13, 13, 9],
...,
[ 0, 0, 0, ..., 10, 51, 18],
[ 0, 0, 0, ..., 12, 44, 12],
[ 0, 0, 0, ..., 6, 9, 11]], dtype=int32)
I get an error like below:
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1569 predict_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1559 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1552 run_step **
outputs = model.predict_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1525 predict_step
return self(x, training=False)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:421 call
inputs, training=training, mask=mask)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:556 _run_internal_graph
outputs = node.layer(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1363 _call_wrapper
return self._call_wrapper(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1395 _call_wrapper
result = self.function(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1768 concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_array_ops.py:1228 concat_v2
"ConcatV2", values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
compute_device)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3565 _create_op_internal
op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
control_input_ops, op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
raise ValueError(str(e))
ValueError: Dimension 0 in both shapes must be equal, but are 32 and 128. Shapes are [32,1] and [128,1]. for '{{node base_model_X/tf.concat_40/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](base_model_X/tf.expand_dims_80/ExpandDims, base_model_X/148834, base_model_X/tf.concat_40/concat/axis)' with input shapes: [32,1,256], [128,1,137], [] and with computed input tensors: input[2] = <-1>.
Now, i am looking for a solution for the next error above. I don't know what the folks think, but I really start to hate encoder-decoder lstm. Thanks in advance for your ideas or different model configuration recommendations.
How to train on a dataset which has each label of shape [5,30]. For example :
[
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 21, 56, 57, 3,
22, 19, 58, 6, 59, 4, 60, 1, 61, 62, 23, 63, 23, 64],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 65, 7, 66, 2, 67, 68, 3, 69, 70],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 11, 12, 5, 13, 14, 9, 10, 5, 15, 16, 17, 2, 8],
[ 0, 0, 0, 0, 0, 2, 71, 1, 72, 73, 74, 7, 75, 76, 77, 3,
20, 78, 18, 79, 1, 21, 80, 81, 3, 82, 83, 84, 6, 85],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
86, 87, 3, 88, 89, 1, 90, 91, 22, 92, 93, 4, 6, 94]
]
One way was to reshape the labels to [150], but that will make the tokenized sentences lose their meanings. Please suggest me how to arrange the keras layers and which layers to be able to make the model? I want to able to generate sentences later.
My code for model right now is this.
model = tf.keras.Sequential([ feature_layer,
layers.Dense(128, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dropout(.1),
layers.Dense(5),
layers.Dense(30, activation='softmax'), ])
opt = Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='mean_absolute_percentage_error', metrics=['accuracy'])
The actual data.
state
district
month
rainfall
max_temp
min_temp
max_rh
min_rh
wind_speed
advice
Orissa
Kendrapada
february
0.0
34.6
19.4
88.2
29.6
12.0
chances of foot rot disease in paddy crop; apply urea at 3 weeks after transplanting at active tillering stage for paddy;......
Jharkhand
Saraikela Kharsawan
february
0
35.2
16.6
29.4
11.2
3.6
provide straw mulch and go for intercultural operations to avoid moisture losses from soil; chance of leaf blight disease in potato crop; .......
I need to be able to generate the advices.
If you do consider that the output needs to be in this shape (and not flattened), the easiest (and also correct solution in my opinion) is to have a multi-output network, each output having a layers.Dense(30,activation='softmax').
You would have something like:
def create_model():
base_model = .... (stacked Dense units + other) # you can even create multi-input multi-output if you really want that.
first_output = Dense(30,activation='softmax',name='output_1')(base_model)
second_output = Dense(30,activation='softmax',name='output_2')(base_model)
...
fifth_output = Dense(30,activation='softmax',name='output_5')(base_model)
model = Model(inputs=input_layer,
outputs=[first_output,second_output,third_output,fourth_output,fifth_output])
return model
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,
loss={'output_1': 'sparse_categorical_crossentropy',
'output_2': 'sparse_categorical_crossentropy',
'output_3': 'sparse_categorical_crossentropy',
'output_4': 'sparse_categorical_crossentropy',
'output_5': 'sparse_categorical_crossentropy'},
metrics={'output_1':tf.keras.metrics.Accuracy(),
'output_2':tf.keras.metrics.Accuracy(),
'output_3':tf.keras.metrics.Accuracy(),
'output_4':tf.keras.metrics.Accuracy(),
'output_5':tf.keras.metrics.Accuracy()})
model.fit(X, y,
epochs=100, batch_size=10, validation_data=(val_X, val_y))
Here, note that y (both for train and valid) is a a numpy array of length 5 (number of outputs) and each element has length 30.
Again, ensure that you actually need such a configuration; I posted the answer as a demonstration of multi-output label in TensorFlow and Keras and for the benefit of the others, but I am not 100% sure you actually need this exact configuration (perhaps you can opt for something easier).
Note the usage of sparse_categorical_crossentropy, since your labels are not one-hot encoded (also MAPE is for regression, not classification).
The Problem
I have been trying to train TFBertForMaskedLM model with tensorflow. But when i use model.fit() always encounter some question.Hope someone can help and propose some solution.
Reference Paper and sample output
The Paper title is "Conditional Bert for Contextual Augmentation". In short, just change type_token_ids to label_ids. if the label of sentence is 5, length is 10 and max_sequence_length = 16. It will process output as follows:
input_ids = [101, 523, 791, 3189, 677, 5221, 524, 1920, 686, 102, 0, 0, 0, 0, 0, 0]
attention_mask = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
token_type_ids = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0]
labels = [-100, -100, 791, -100, 677, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
Environment
tensorflow == 2.2.0
huggingface == 3.5.0
datasets == 1.1.2
dataset total label is 5. (1~5)
GPU : GCP P100 * 1
Dataset output (max_sequence_length=128, batch_size=1)
{'attention_mask': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>,
'input_ids': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([ 101, 523, 791, 3189, 677, 5221, 524, 1920, 686,
4518, 6240, 103, 2466, 2204, 2695, 100, 519, 5064,
1918, 736, 2336, 520, 103, 2695, 1564, 4923, 8013,
678, 6734, 8038, 8532, 131, 120, 120, 8373, 119,
103, 9989, 103, 8450, 120, 103, 120, 12990, 8921,
8165, 102, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)>,
'labels': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
4634, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
4158, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, 8429, -100, 119, -100, -100, 100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100], dtype=int32)>,
'token_type_ids': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>}
Model code
from transformers import AdamWeightDecay, TFBertForMaskedLM, BertConfig
def create_model():
configuration = BertConfig.from_pretrained('bert-base-chinese')
model = TFBertForMaskedLM.from_pretrained('bert-base-chinese',
config=configuration)
model.bert.embeddings.token_type_embeddings = tf.keras.layers.Embedding(5, 768,
embeddings_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))
return model
model = create_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.Mean(), tf.keras.metrics.SparseCategoricalAccuracy('accuracy')]
model.compile(optimizer = optimizer,
loss = loss,
metrics = metrics)
model.fit(tf_sms_dataset,
epochs=1,
verbose=1)
Warning Message when use TFBertForMaskedLM
Some layers from the model checkpoint at bert-base-chinese were not used when initializing TFBertForMaskedLM: ['nsp___cls']
- This IS expected if you are initializing TFBertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForMaskedLM were initialized from the model checkpoint at bert-base-chinese.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForMaskedLM for predictions without further training.
Error Message
ValueError Traceback (most recent call last)
<ipython-input-42-99b78906fef7> in <module>()
5 model.fit(tf_sms_dataset,
6 epochs=1,
----> 7 verbose=1)
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
846 batch_size=batch_size):
847 callbacks.on_train_batch_begin(step)
--> 848 tmp_logs = train_function(iterator)
849 # Catch OutOfRangeError for Datasets of unknown size.
850 # This blocks until the batch has finished executing.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
578 xla_context.Exit()
579 else:
--> 580 result = self._call(*args, **kwds)
581
582 if tracing_count == self._get_tracing_count():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
625 # This is the first call of __call__, so we have to initialize.
626 initializers = []
--> 627 self._initialize(args, kwds, add_initializers_to=initializers)
628 finally:
629 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
504 self._concrete_stateful_fn = (
505 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 506 *args, **kwds))
507
508 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2444 args, kwargs = None, None
2445 with self._lock:
-> 2446 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2447 return graph_function
2448
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2775
2776 self._function_cache.missed.add(call_context_key)
-> 2777 graph_function = self._create_graph_function(args, kwargs)
2778 self._function_cache.primary[cache_key] = graph_function
2779 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2665 arg_names=arg_names,
2666 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2667 capture_by_value=self._capture_by_value),
2668 self._function_attributes,
2669 # Tell the ConcreteFunction to clean up its graph once it goes out of
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
979 _, original_func = tf_decorator.unwrap(python_func)
980
--> 981 func_outputs = python_func(*func_args, **func_kwargs)
982
983 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
439 # __wrapped__ allows AutoGraph to swap in a converted function. We give
440 # the function a weak reference to itself to avoid a reference cycle.
--> 441 return weak_wrapped_fn().__wrapped__(*args, **kwds)
442 weak_wrapped_fn = weakref.ref(wrapped_fn)
443
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['tf_bert_for_masked_lm_2/bert/embeddings/word_embeddings/weight:0', 'tf_bert_for_masked_lm_2/bert/embeddings/position_embeddings/embeddings:0', 'tf_bert_for_masked_lm_2/bert/embeddings/LayerNorm/gamma:0', 'tf_bert_for_masked_lm_2/bert/embeddings/LayerNorm/beta:0', 'tf_bert_for_masked_lm_2/bert/embeddings/embedding_1/embeddings:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/query/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/query/bias:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/key/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/key/bias:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/value/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/self/value/bias:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/output/dense/bias:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/intermediate/dense/bias:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/output/dense/kernel:0', 'tf_bert_for_masked_lm_2/bert/encoder/layer_._0/output/dense/bias:0', 'tf_bert_f...
Have Someone can help. I will thanks a lot.
Other Test
I used english sentence to test. example as follows:
from transformers import TFBertForMaskedLM, BertConfig
def create_model():
configuration = BertConfig.from_pretrained('bert-base-uncased')
model = TFBertForMaskedLM.from_pretrained('bert-base-uncased',
config=configuration)
return model
model = create_model()
eng_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
token_info = eng_tokenizer(text="We are very happy to show you the 🤗 Transformers library.", padding='max_length', max_length=20)
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.Mean(), tf.keras.metrics.SparseCategoricalAccuracy("acc")]
dataset = tf.data.Dataset.from_tensor_slices(dict(token_info))
dataset = dataset.batch(1).prefetch(tf.data.experimental.AUTOTUNE)
model.compile(optimizer = optimizer,
loss = model.compute_loss,
metrics = metrics)
model.fit(dataset)
token_info output dataset
{
'input_ids': [101, 2057, 2024, 2200, 103, 2000, 2265, 2017, 103, 100, 19081, 3075, 1012, 102, 0, 0, 0, 0, 0, 0]
'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
'labels': [-100, -100, -100, -100, 3407, -100, -100, -100, 1996, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
}
Get same error.....
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['tf_bert_for_masked_lm_2/bert/embeddings/word_embeddings/weight:0', 'tf_bert_for_masked_lm_2/bert/embeddings/position_embeddings/embeddings:0', 'tf_bert_for_masked_lm_2/bert/embeddings/token_type_embeddings/embeddings:0', 'tf_bert_for_masked_lm_2/bert/embeddings/LayerNorm/gamma:0', 'tf_bert_for_masked_lm_2/bert/embeddings/LayerNorm/beta:0',
I'm not sure if there is a problem with the integration of fit() into the model?