Embedding layer output shape is 2D - tensorflow

I'm encountering some issue with the output shape of my embedding layer, as per the keras documentation, the embedding layer should have an output shape of 3D tensor, but my embedding layer is only outputting 2D tensor.
class MyModel(Model):
def __init__(self, vocab_size, embedding_matrix, max_length):
super(MyModel, self).__init__()
self.embedding_l1 = tf.keras.layers.Embedding(input_dim=vocab_size,
output_dim=max_length,
input_length=max_length,
weights=[embedding_matrix],
trainable=False)
self.bidirectional_l1 = Bidirectional(
tf.compat.v1.keras.layers.CuDNNLSTM(32,
return_sequences=False))
self.dense_l1 = Dense(units=256, activation='relu')
self.dropout_l1 = Dropout(rate=2e-5)
self.dense_l2 = Dense(units=1, activation='sigmoid')
def call(self, x):
embedding_out = self.embedding_l1(x)
print("SHAPE:",embedding_out.shape)
bid_out1 = self.bidirectional_l1(self.reshape_l1(embedding_out))
dense_out1 = self.dense_l1(bid_out1)
drop_out1 = self.dropout_l2(dense_out1)
dense_out2 = self.dense_l2(drop_out2)
return dense_out2
It outputs the shape of the embedding layer out as a 2D (300,300) tensor. which causes error on the bidirectional lstm:
ValueError: Input 0 of layer bidirectional is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [300, 300]

Figured out it's the input on the embedding layer. I didn't include a batch size so it looks like [batch_size, 300] for the input to the embedding instead of [,300].

Related

Adding Luong attention Layer to CNN

I'm using keras to implement a functional CNN model where I have images with the size of 64x64x1. with 6 convolutional layer like this :
num_classes = 5
def get_model():
##creating CNN functional api for learning
input_ = keras.layers.Input(shape=[64, 64,1])
##first layer of convolutional layer
Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(input_layer)
#second convolutional layer
Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv1)
#third convolutional layer
Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv12)
##i add max pooling with a stride of 2
Max1 = keras.layers.MaxPool2D(2, strides=2)(Conv13)
##i add a second layer of convlutional layer
Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Max1)
##adding second convolutional layer
Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv2)
##adding third convolutional layer
Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv21)
##i add another layer of max pooling
Max2 = keras.layers.MaxPool2D(2, strides=2)(Conv23)
##here i execute data flatting, i will change this to use attention layer Att.
Flat = keras.layers.Flatten()(Max2)
#i add another dense architecture
Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)(Flat)
Dense2= keras.layers.Dense(700,activation=tf.nn.relu)(Dense1)
#i add now the output layer with softmax
# Output layer, class prediction.
output = keras.layers.Dense(num_classes,activation=tf.nn.softmax)(Dense2)
model = Model(inputs=input_, outputs=output)
##end of creating CNN using functional api
##defining loss function and training data and epoche. I modify the optimizer to rmsprop
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer_rmsprop,metrics=["accuracy"])
###i return the model
return model
to get better performance i want to add this attention layer to the above CNN :
#already imported
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from keras.layers import Dropout
# Variable-length int sequences. 64*64*1
query_input = tf.keras.Input(shape=(4096,), dtype='int32')
value_input = tf.keras.Input(shape=(4096,), dtype='int32')
# Embedding lookup.
token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = token_embedding(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = token_embedding(value_input)
# CNN layer.
cnn_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
# Use 'same' padding so outputs have the same shape as inputs.
padding='same')
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = cnn_layer(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = cnn_layer(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = tf.keras.layers.Attention()(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
query_seq_encoding)
query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
but the problem is i don't know how to link the attention layer with my cnn model because when i connect the first convolutional network with the attention layer like this :
Conv1 = keras.layers.Conv2D(32, kernel_size=5)(input_layer)
I get this error :
ValueError: Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (None, 200)
can someone show me how to add an attention layer to the CNN model.
Updated Answer...
keras.backend.clear_session()
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self,
output_dims):
super(AttentionLayer, self).__init__()
self.output_dims = output_dims
self.embeddings = tf.keras.layers.Embedding(input_dim=4096, output_dim = output_dims)
self.conv = tf.keras.layers.Conv1D(2048, 4 , padding='same')
self.attn_layer = tf.keras.layers.Attention()
self.global_pooling_1 = tf.keras.layers.GlobalAveragePooling1D()
self.global_pooling_2 = tf.keras.layers.GlobalAveragePooling1D()
def call (self, query_input, value_input):
batch_size = tf.shape(query_input)[0]
query_input = tf.reshape(query_input, (batch_size, 4096))
value_input = tf.reshape(value_input, (batch_size, 4096))
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = self.embeddings(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = self.embeddings(value_input)
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = self.conv(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = self.conv(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = self.attn_layer(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = self.global_pooling_1(
query_seq_encoding)
query_value_attention = self.global_pooling_2(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
input_layer = tf.reshape(input_layer , (batch_size, 64,64 ,1))
return input_layer
keras.backend.clear_session()
num_classes = 5
class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.attn_layer = AttentionLayer(64)
##first layer of convolutional layer
self.Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#second convolutional layer
self.Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#third convolutional layer
self.Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
##i add max pooling with a stride of 2
self.Max1 = keras.layers.MaxPool2D(2, strides=2)
##i add a second layer of convlutional layer
self.Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding second convolutional layer
self.Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding third convolutional layer
self.Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##i add another layer of max pooling
self.Max2 = keras.layers.MaxPool2D(2, strides=2)
##here i execute data flatting, i will change this to use attention layer Att.
self.Flat = keras.layers.Flatten()
#i add another dense architecture
self.Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)
self.Dense2= keras.layers.Dense(700,activation=tf.nn.relu)
#i add now the output layer with softmax
# Output layer, class prediction.
self.outputs = keras.layers.Dense(num_classes,activation=tf.nn.softmax)
def call(self, x):
x = self.attn_layer(x , x)
x = self.Conv1(x)
x = self.Conv12(x)
x = self.Conv13(x)
x = self.Max1(x)
x = self.Conv2(x)
x = self.Conv21(x)
x = self.Conv23(x)
x = self.Max2(x)
x = self.Flat(x)
x = self.Dense1(x)
x = self.Dense2(x)
return self.outputs(x)
model = Model()
x = np.random.randint(0 ,2 , size = (8, 64, 64,1))
y = np.random.randint(0,5, size=(8,1))
print(model(x).shape)
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimize_rmsprop,metrics=["accuracy"])
model.fit(x, y, epochs=1)
Output:

Keras Model fit throws shape mismatch error

I am building a Siamese network using Keras(TensorFlow) where the target is a binary column, i.e., match or mismatch(1 or 0). But the model fit method throws an error saying that the y_pred is not compatible with the y_true shape. I am using the binary_crossentropy loss function.
Here is the error I see:
Here is the code I am using:
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[tf.keras.metrics.Recall()])
history = model.fit([X_train_entity_1.todense(),X_train_entity_2.todense()],np.array(y_train),
epochs=2,
batch_size=32,
verbose=2,
shuffle=True)
My Input data shapes are as follows:
Inputs:
X_train_entity_1.shape is (700,2822)
X_train_entity_2.shape is (700,2822)
Target:
y_train.shape is (700,1)
In the error it throws, y_pred is the variable which was created internally. What is y_pred dimension is 2822 when I am having a binary target. And 2822 dimension actually matches the input size, but how do I understand this?
Here is the model I created:
in_layers = []
out_layers = []
for i in range(2):
input_layer = Input(shape=(1,))
embedding_layer = Embedding(embed_input_size+1, embed_output_size)(input_layer)
lstm_layer_1 = Bidirectional(LSTM(1024, return_sequences=True,recurrent_dropout=0.2, dropout=0.2))(embedding_layer)
lstm_layer_2 = Bidirectional(LSTM(512, return_sequences=True,recurrent_dropout=0.2, dropout=0.2))(lstm_layer_1)
in_layers.append(input_layer)
out_layers.append(lstm_layer_2)
merge = concatenate(out_layers)
dense1 = Dense(256, activation='relu', kernel_initializer='he_normal', name='data_embed')(merge)
drp1 = Dropout(0.4)(dense1)
btch_norm1 = BatchNormalization()(drp1)
dense2 = Dense(32, activation='relu', kernel_initializer='he_normal')(btch_norm1)
drp2 = Dropout(0.4)(dense2)
btch_norm2 = BatchNormalization()(drp2)
output = Dense(1, activation='sigmoid')(btch_norm2)
model = Model(inputs=in_layers, outputs=output)
model.summary()
Since my data is very sparse, I used todense. And there the type is as follows:
type(X_train_entity_1) is scipy.sparse.csr.csr_matrix
type(X_train_entity_1.todense()) is numpy.matrix
type(X_train_entity_2) is scipy.sparse.csr.csr_matrix
type(X_train_entity_2.todense()) is numpy.matrix
Summary of last few layers as follows:
Mismatched shape in the Input layer. The input shape needs to match the shape of a single element passed as x, or dataset.shape[1:]. So since your dataset size is (700,2822), that is 700 samples of size 2822. So your input shape should be 2822.
Change:
input_layer = Input(shape=(1,))
To:
input_layer = Input(shape=(2822,))
You need to set return_sequences in the lstm_layer_2 to False:
lstm_layer_2 = Bidirectional(LSTM(512, return_sequences=False, recurrent_dropout=0.2, dropout=0.2))(lstm_layer_1)
Otherwise, you will still have the timesteps of your input. That is why you have the shape (None, 2822, 1). You can also add a Flatten layer prior to your output layer, but I would recommend setting return_sequences=False.
Note that a Dense layer computes the dot product between the inputs and the kernel along the last axis of the inputs.

How to pass bert embeddings to an LSTM layer

I want to do sentiment analysis using bert-embedding and lstm layer.
This is my code:
i = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
x = bert_preprocess(i)
x = bert_encoder(x)
x = tf.keras.layers.Dropout(0.2, name="dropout")(x['pooled_output'])
x = tf.keras.layers.LSTM(128, dropout=0.2)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(x)
model = tf.keras.Model(i, x)
When compiling this code I got the following error:
ValueError: Input 0 of layer "lstm_2" is incompatible with the layer: expected
ndim=3, found ndim=2. Full shape received: (None, 768)
Is the logic of my code correct? Can anyone please correct my code?
From bert like models you can expect generally three kinds of outputs (taken from huggingface's TFBertModel documentation)
last_hidden_state with shape (batch_size, sequence_length, hidden_size)
pooler_output with shape (batch_size, hidden_size)
hidden_states with shape (batch_size, sequence_length, hidden_size)
hidden_size is 768 above..
As the error says, the output from dropout layer lacks 3 dimensions (essentially the bert_encoder layer because dropout layers do not change tensor shape) and has only 2 dimensions.
x = bert_encoder(x)
x = tf.keras.layers.Dropout(0.2, name="dropout")(x['pooled_output'])
x = tf.keras.layers.LSTM(128, dropout=0.2)(x)
So if you are planning to use an LSTM layer after the bert_encoder layer, you would need a three dimensional input to the LSTM in the form of (batch_size, num_timesteps, num_features) hence you would have to use either the hidden_states or the last_hidden_state outputs instead of pooler_output.
You will have to choose between the two depending on your objective/use-case.

How can I concatenate Tensorflow Dataset columns?

I have a Keras model that takes an input layer with shape (n, 288, 1), of which 288 is the number of features. I am using a TensorFlow dataset tf.data.experimental.make_batched_features_dataset and my input layer will be (n, 1, 1) which means it gives one feature to the model at a time. How can I make an input tensor with the shape of (n, 288, 1)? I mean how can I use all my features in one tensor?
Here is my code for the model:
def _gzip_reader_fn(filenames):
"""Small utility returning a record reader that can read gzip'ed files."""
return tf.data.TFRecordDataset(filenames, compression_type='GZIP')
def _input_fn(file_pattern, tf_transform_output, batch_size):
"""Generates features and label for tuning/training.
Args:
file_pattern: input tfrecord file pattern.
tf_transform_output: A TFTransformOutput.
batch_size: representing the number of consecutive elements of returned
dataset to combine in a single batch
Returns:
A dataset that contains (features, indices) tuple where features is a
dictionary of Tensors, and indices is a single Tensor of label indices.
"""
transformed_feature_spec = (
tf_transform_output.transformed_feature_spec().copy())
dataset = tf.data.experimental.make_batched_features_dataset(
file_pattern=file_pattern,
batch_size=batch_size,
features=transformed_feature_spec,
reader=_gzip_reader_fn,
label_key=features.transformed_name(features.LABEL_KEY))
return dataset
def _build_keras_model(nb_classes=2, input_shape, learning_rate):
# Keras needs the feature definitions at compile time.
input_shape = (288,1)
input_layer = keras.layers.Input(input_shape)
padding = 'valid'
if input_shape[0] < 60:
padding = 'same'
conv1 = keras.layers.Conv1D(filters=6, kernel_size=7, padding=padding, activation='sigmoid')(input_layer)
conv1 = keras.layers.AveragePooling1D(pool_size=3)(conv1)
conv2 = keras.layers.Conv1D(filters=12, kernel_size=7, padding=padding, activation='sigmoid')(conv1)
conv2 = keras.layers.AveragePooling1D(pool_size=3)(conv2)
flatten_layer = keras.layers.Flatten()(conv2)
output_layer = keras.layers.Dense(units=nb_classes, activation='sigmoid')(flatten_layer)
model = keras.models.Model(inputs=input_layer, outputs=output_layer)
optimizer = keras.optimizers.Adam(lr=learning_rate)
# Compile Keras model
model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
model.summary(print_fn=logging.info)
return model
This is the error:
tensorflow:Model was constructed with shape (None, 288, 1) for input Tensor("input_1:0", shape=(None, 288, 1), dtype=float32), but it was called on an input with incompatible shape (128, 1, 1).

Issues with Keras Conv1D and VGG

I trying to build a deep learning model with VGG16 on top. I have implemented it in Keras using following code:
image_input = Input(shape=(224, 224, 3))
model = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
model.summary()
fc7 = model.get_layer('fc2').output
conv1d = Conv1D(1,5,activation='relu', name="conv1d",input_shape=(1,4096)) (fc7) #error appears here
# flat = Flatten()(conv1d)
fc8 = Dense(512, activation='relu', name="fc8")(conv1d)
#x= Flatten(name='flatten')(last_layer)
out = Dense(num_classes, activation='softmax', name='output')(fc8)
custom_vgg_model = Model(image_input, out)
custom_vgg_model.summary()
I am getting the following error:
ValueError: Input 0 is incompatible with layer conv1d: expected ndim=3, found ndim=2
Why can't we do the consecutive feature vectors 1d convolution like in the image below?
enter link description here
A fully connected layer in a VGG is 2D, and a 1D convolutional layer expects 3D data.
At the point where VGG adds a Dense layer, it destroys the image format (4D) with a flatten or a global pooling, transforming it into plain data (2D). You no longer have dimensions to use convolutions.
If you try to explain why you want a Conv1D, what do you expect from it, then we could think of an alternative.
Example model:
movie_data = any_data_with_shape((number_of_videos, frames, 224, 224, 3))
movie_input = Input((None,224,224,3)) #None means any number of frames
vgg = VGG16(include_top=True,weights='imagenet')
This part is only necessary if you're getting intermediary outputs from vgg:
vgg_in = vgg.input
vgg_out = vgg.get_layer('fc2').output #make sure this layer exists
vgg = Model(vgg_in, vgg_out)
Continue:
vgg_outs = TimeDistributed(vgg)(movie_input) #out shape (None, frames, fc2_units)
outs = Conv1D(.....)(vgg_outs)
outs = GlobalAveragePooling1D()(outs)
outs = Dense(....)(outs)
.....
your_model = model(move_input, outs)