with `with strategy.scope():` BERT output loses it's shape from tf-hub and `encoder_output` is missing - tensorflow

Code:
!pip install tensorflow-text==2.7.0
import tensorflow_text as text
import tensorflow_hub as hub
# ... other tf imports....
strategy = tf.distribute.MirroredStrategy()
print('Number of GPU: ' + str(strategy.num_replicas_in_sync)) # 1 or 2, shouldn't matter
NUM_CLASS=2
with strategy.scope():
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
def get_model():
text_input = Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)
output_sequence = outputs['sequence_output']
x = Dense(NUM_CLASS, activation='sigmoid')(output_sequence)
model = Model(inputs=[text_input], outputs = [x])
return model
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- look at the output 1
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model.png') # <- look at the figure 1
with strategy.scope():
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- compare with output 1, it has already lost it's shape
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model_scoped.png') # <- compare this figure too, for ease
With scope, BERT loses seq_length, and it becomes None.
Model summary withOUT scope: (See there is 128 at the very last layer, which is seq_length)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[6][0]',
'keras_layer_2[6][1]',
'keras_layer_2[6][2]']
dense_6 (Dense) (None, 128, 2) 1538 ['keras_layer_3[6][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
Model with scope:
Model: "model_7"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[7][0]',
'keras_layer_2[7][1]',
'keras_layer_2[7][2]']
dense_7 (Dense) (None, None, 2) 1538 ['keras_layer_3[7][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
If these image helps:
Another notable thing encoder_outputs is also missing if you take a look at the 2nd keras layer or 3rd layer of both model.

Related

ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 50), found shape=(None, 1, 512)

Learning to use bert-base-cased and a classification model... the code for the model is the following:
def mao_func(input_ids, masks, labels):
return {'input_ids':input_ids, 'attention_mask':masks}, labels
dataset = dataset.map(mao_func)
BATCH_SIZE = 32
dataset = dataset.shuffle(100000).batch(BATCH_SIZE)
split = .8
ds_len = len(list(dataset))
train = dataset.take(round(ds_len * split))
val = dataset.skip(round(ds_len * split))
from transformers import TFAutoModel
bert = TFAutoModel.from_pretrained('bert-base-cased')
Model: "tf_bert_model"
Layer (type) Output Shape Param #
bert (TFBertMainLayer) multiple 108310272
=================================================================
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
then the NN builduing:
input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(50,), name='attention_mask', dtype='int32')
embeddings = bert(input_ids, attention_mask=mask)[0]
X = tf.keras.layers.GlobalMaxPool1D()(embeddings)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Dense(128, activation='relu')(X)
X = tf.keras.layers.Dropout(0.1)(X)
X = tf.keras.layers.Dense(32, activation='relu')(X)
y = tf.keras.layers.Dense(3, activation='softmax',name='outputs')(X)
model = tf.keras.Model(inputs=[input_ids, mask], outputs=y)
model.layers[2].trainable = False
the model.summary is:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 50)] 0 []
attention_mask (InputLayer) [(None, 50)] 0 []
tf_bert_model (TFBertModel) TFBaseModelOutputWi 108310272 ['input_ids[0][0]',
thPoolingAndCrossAt 'attention_mask[0][0]']
tentions(last_hidde
n_state=(None, 50,
768),
pooler_output=(Non
e, 768),
past_key_values=No
ne, hidden_states=N
one, attentions=Non
e, cross_attentions
=None)
global_max_pooling1d (GlobalMa (None, 768) 0 ['tf_bert_model[0][0]']
xPooling1D)
batch_normalization (BatchNorm (None, 768) 3072 ['global_max_pooling1d[0][0]']
alization)
dense (Dense) (None, 128) 98432 ['batch_normalization[0][0]']
dropout_37 (Dropout) (None, 128) 0 ['dense[0][0]']
dense_1 (Dense) (None, 32) 4128 ['dropout_37[0][0]']
outputs (Dense) (None, 3) 99 ['dense_1[0][0]']
==================================================================================================
Total params: 108,416,003
Trainable params: 104,195
Non-trainable params: 108,311,808
__________________________________________________________________________________________________
finally the model fitting is
optimizer = tf.keras.optimizers.Adam(0.01)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
model.compile(optimizer,loss=loss, metrics=[acc])
history = model.fit(
train,
validation_data = val,
epochs=140
)
with execution error in line 7 -> the model.fit(...):
ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 50), found shape=(None, 1, 512)
Can any one be so kind of helping me on what I did wrong and why... thanks:)
update: here is the git with the codes https://github.com/CharlieArreola/OnlinePosts
It seems, that your shape of the train data doen't match the expected input shape of your input layer.
You can check your shape of the train data with train.shape()
You input layer Input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32') expects train data with 50 columns, but you most likely have 512 if we look at your error.
So to fix this, you could simply change your input shape.
Input_ids = tf.keras.layers.Input(shape=(512,), name='input_ids', dtype='int32')
If you split your x and y in your dataset you can make it more flexible with:
Input_ids = tf.keras.layers.Input(shape=(train_x.shape[0],), name='input_ids', dtype='int32')
Also don't forget, that you have to do this change to all of your input layers!

Sequential VGG16 model, graph disconnected error

I have a sequential model with a VGG16 at the top.:
def rescale(x):
return x/65535.
base_model = tf.keras.applications.VGG16(
include_top=True, weights=None, input_tensor=None, input_shape=(224,224,1),
pooling=None, classes=102, classifier_activation='softmax')
model = tf.keras.Sequential([
tf.keras.Input(shape=(None, None, 1)),
tf.keras.layers.Lambda(rescale),
tf.keras.layers.experimental.preprocessing.Resizing(224, 224),
tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal_and_vertical', seed=42),
base_model
])
Output model.summary():
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lambda (Lambda) (None, None, None, 1) 0
_________________________________________________________________
resizing (Resizing) (None, 224, 224, 1) 0
_________________________________________________________________
random_flip (RandomFlip) (None, 224, 224, 1) 0
_________________________________________________________________
vgg16 (Functional) (None, 102) 134677286
=================================================================
Total params: 134,677,286
Trainable params: 134,677,286
Non-trainable params: 0
Now I want to create a new model with two outputs:
vgg_model = model.layers[3]
last_conv_layer = vgg_model.get_layer('block5_conv3')
new_model = tf.keras.models.Model(inputs=[model.inputs], outputs=[last_conv_layer.output, model.output])
But I get this error:
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_1_6:0", shape=(None, 224, 224, 1), dtype=float32) at layer "block1_conv1". The following previous layers were accessed without issue: []
What am I missing here?
Given a fitted model in this form:
def rescale(x):
return x/65535.
base_model = tf.keras.applications.VGG16(
include_top=True, weights=None, input_tensor=None, input_shape=(224,224,1),
pooling=None, classes=102, classifier_activation='softmax')
model = tf.keras.Sequential([
tf.keras.Input(shape=(None, None, 1)),
tf.keras.layers.Lambda(rescale),
tf.keras.layers.experimental.preprocessing.Resizing(224, 224),
tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal_and_vertical', seed=42),
base_model
])
### model.fit(...)
You can wrap your vgg in a Model that returns all the outputs you need
new_model = Model(inputs=model.layers[3].input,
outputs=[model.layers[3].output,
model.layers[3].get_layer('block5_conv3').output])
inp = tf.keras.Input(shape=(None, None, 1))
x = tf.keras.layers.Lambda(rescale)(inp)
x = tf.keras.layers.experimental.preprocessing.Resizing(224, 224)(x)
outputs = new_model(x)
new_model = Model(inp, outputs)
The summary of new_model:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_49 (InputLayer) [(None, None, None, 1)] 0
_________________________________________________________________
lambda_25 (Lambda) (None, None, None, 1) 0
_________________________________________________________________
resizing_25 (Resizing) (None, 224, 224, 1) 0
_________________________________________________________________
functional_47 (Functional) [(None, 102), (None, 14, 134677286
=================================================================
Total params: 134,677,286
Trainable params: 134,677,286
Non-trainable params: 0

Custom keras layer isn't shown in model.summary

When I try to look at the .summary of my model with my custom layers, I get the following outputs:
Model: "functional_29"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_118 (InputNode) [(None, 1)] 0
__________________________________________________________________________________________________
input_119 (InputNode) [(None, 1)] 0
__________________________________________________________________________________________________
tf_op_layer_strided_slice_156 ( [(1,)] 0 input_118[0][0]
__________________________________________________________________________________________________
tf_op_layer_strided_slice_157 ( [(1,)] 0 input_119[0][0]
__________________________________________________________________________________________________
input_120 (InputNode) [(None, 1)] 0
_________________________________________________________________________________________________
tf_op_layer_concat_106 (TensorF [(2,)] 0 tf_op_layer_strided_slice_162[0][
tf_op_layer_strided_slice_163[0][
...
__________________________________________________________________________________________________
tf_op_layer_strided_slice_164 ( [(1,)] 0 input_120[0][0]
__________________________________________________________________________________________________
tf_op_layer_node_128_output (Te [()] 0 tf_op_layer_Relu_55[0][0]
==================================================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
__________________________________________________________________________________________________
Why is that? How can I wrap all of those operations under the label MyLayer?
You can create your own layer by subclassing from tf.keras.layers.Layer.
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Input
class DirectLayer(tf.keras.layers.Layer):
def __init__(self, name = "direct_layer", **kwargs):
super(DirectLayer, self).__init__(name=name, **kwargs)
def build(self, input_shape):
self.w = self.add_weight(
shape=input_shape[1:],
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=input_shape[1:], initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.multiply(inputs, self.w) + self.b
x_in = Input(shape=[10])
x = DirectLayer(name="my_layer")(x_in)
x_out = DirectLayer()(x)
model = Model(x_in, x_out)
x = tf.ones([16,10])
tf.print(model(x))
tf.print(model.summary())
I created a simple layer called DirectLayer. I built a model that uses that layer twice. The Input layer is there just to specify the shape of input data.
As you can see, you can easily specify the name of the layer.
The summary function produces the following:
Model: "functional_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 10)] 0
_________________________________________________________________
my_layer (DirectLayer) (None, 10) 20
_________________________________________________________________
direct_layer (DirectLayer) (None, 10) 20
=================================================================
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________
None

NotFoundError: [_Derived_]No gradient defined for op: StatefulPartitionedCall on Tensorflow 1.15.0

I am running a tensorflow model using BERT in the embedding layer. I found this similar question with no answer. Honestly I do not understand why the error is occurring, because the model runs fine for another dataset.
When i call model.fit
train_history = model.fit(
train_input, train_labels,
validation_split=0.2,
epochs=3,
batch_size=8
)
I get this error:
NotFoundError: [_Derived_]No gradient defined for op: StatefulPartitionedCall
[[{{node Func/_4}}]]
[[PartitionedCall/gradients/StatefulPartitionedCall_grad/PartitionedCall/gradients/StatefulPartitionedCall_grad/SymbolicGradient]] [Op:__inference_distributed_function_28080]
Function call stack:
distributed_function
Model:
def build_model(bert_layer, max_len=512):
input_word_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
input_mask = Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
segment_ids = Input(shape=(max_len,), dtype=tf.int32, name="segment_ids")
_, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
clf_output = sequence_output[:, 0, :]
out = Dense(1, activation='sigmoid')(clf_output)
model = Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)
model.compile(Adam(lr=2e-6), loss='categorical_crossentropy', metrics=['accuracy'])
return model
Loading BERT from Tensorflow hub
%%time
module_url = "https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/2"
bert_layer = hub.KerasLayer(module_url, trainable=True)
Loading tokenizer and encoding text
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)
train_input = bert_encode(train.text.values, tokenizer, max_len=160)
test_input = bert_encode(test.text.values, tokenizer, max_len=160)
train_labels = train['label']
train_labels = to_categorical(np.asarray(train_labels.factorize()[0]))
type(train_input)
>> tuple
type(train_labels)
>> numpy.ndarray
Run model
model = build_model(bert_layer, max_len=160)
model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_word_ids (InputLayer) [(None, 160)] 0
__________________________________________________________________________________________________
input_mask (InputLayer) [(None, 160)] 0
__________________________________________________________________________________________________
segment_ids (InputLayer) [(None, 160)] 0
__________________________________________________________________________________________________
keras_layer (KerasLayer) [(None, 768), (None, 177853441 input_word_ids[0][0]
input_mask[0][0]
segment_ids[0][0]
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None, 768)] 0 keras_layer[0][1]
__________________________________________________________________________________________________
dense (Dense) (None, 8) 6152 tf_op_layer_strided_slice[0][0]
==================================================================================================
Total params: 177,859,593
Trainable params: 177,859,592
Non-trainable params: 1
__________________________
It was because of using an older version of tf with bert. I missed this issue which answers the question.

keras define a trainable variable for add or matmul

I have some problems in use tf.keras to build model. Now I want to define a trainbale weight tensor with shape(64, 128), which similar to tf.get_variable. However I can't achieve it.
In the past, I have try many methods.But I want to look for easily method.
inputs = tf.keras.Input((128,))
weights = tf.Variable(tf.random.normal((64, 128)))
output = tf.keras.layers.Lambda(lambda x: tf.matmul(x, tf.transpose(weights)))(inputs)
model = tf.keras.Model(inputs, output)
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_10 (InputLayer) (None, 128) 0
_________________________________________________________________
lambda_2 (Lambda) (None, 64) 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
The defined weights is not trainable.
In addition, I know Dense can get trained matrix weights and bias. But if I want add a bias, I can't use Dense.
However, I have to use add_weights in custome layer, for example:
class Bias(keras.layers.Layer):
def build(self, input_shape):
self.bias = self.add_weight(shape=(64, 128), initializer='zeros', dtype=tf.float32, name='x')
self.built = True
def call(self, inputs):
return inputs + self.bias
inputs = Input(shape=(64, 128))
outputs = Bias()(inputs)
model = Model(inputs=inputs, outputs=outputs)
model.summary()
Layer (type) Output Shape Param #
=================================================================
input_11 (InputLayer) (None, 64, 128) 0
_________________________________________________________________
bias_5 (Bias) (None, 64, 128) 8192
=================================================================
Total params: 8,192
Trainable params: 8,192
Non-trainable params: 0
Is there any more easily method to define a trainable variable ?