Adding Luong attention Layer to CNN - tensorflow

I'm using keras to implement a functional CNN model where I have images with the size of 64x64x1. with 6 convolutional layer like this :
num_classes = 5
def get_model():
##creating CNN functional api for learning
input_ = keras.layers.Input(shape=[64, 64,1])
##first layer of convolutional layer
Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(input_layer)
#second convolutional layer
Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv1)
#third convolutional layer
Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv12)
##i add max pooling with a stride of 2
Max1 = keras.layers.MaxPool2D(2, strides=2)(Conv13)
##i add a second layer of convlutional layer
Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Max1)
##adding second convolutional layer
Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv2)
##adding third convolutional layer
Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv21)
##i add another layer of max pooling
Max2 = keras.layers.MaxPool2D(2, strides=2)(Conv23)
##here i execute data flatting, i will change this to use attention layer Att.
Flat = keras.layers.Flatten()(Max2)
#i add another dense architecture
Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)(Flat)
Dense2= keras.layers.Dense(700,activation=tf.nn.relu)(Dense1)
#i add now the output layer with softmax
# Output layer, class prediction.
output = keras.layers.Dense(num_classes,activation=tf.nn.softmax)(Dense2)
model = Model(inputs=input_, outputs=output)
##end of creating CNN using functional api
##defining loss function and training data and epoche. I modify the optimizer to rmsprop
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer_rmsprop,metrics=["accuracy"])
###i return the model
return model
to get better performance i want to add this attention layer to the above CNN :
#already imported
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from keras.layers import Dropout
# Variable-length int sequences. 64*64*1
query_input = tf.keras.Input(shape=(4096,), dtype='int32')
value_input = tf.keras.Input(shape=(4096,), dtype='int32')
# Embedding lookup.
token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = token_embedding(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = token_embedding(value_input)
# CNN layer.
cnn_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
# Use 'same' padding so outputs have the same shape as inputs.
padding='same')
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = cnn_layer(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = cnn_layer(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = tf.keras.layers.Attention()(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
query_seq_encoding)
query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
but the problem is i don't know how to link the attention layer with my cnn model because when i connect the first convolutional network with the attention layer like this :
Conv1 = keras.layers.Conv2D(32, kernel_size=5)(input_layer)
I get this error :
ValueError: Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (None, 200)
can someone show me how to add an attention layer to the CNN model.

Updated Answer...
keras.backend.clear_session()
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self,
output_dims):
super(AttentionLayer, self).__init__()
self.output_dims = output_dims
self.embeddings = tf.keras.layers.Embedding(input_dim=4096, output_dim = output_dims)
self.conv = tf.keras.layers.Conv1D(2048, 4 , padding='same')
self.attn_layer = tf.keras.layers.Attention()
self.global_pooling_1 = tf.keras.layers.GlobalAveragePooling1D()
self.global_pooling_2 = tf.keras.layers.GlobalAveragePooling1D()
def call (self, query_input, value_input):
batch_size = tf.shape(query_input)[0]
query_input = tf.reshape(query_input, (batch_size, 4096))
value_input = tf.reshape(value_input, (batch_size, 4096))
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = self.embeddings(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = self.embeddings(value_input)
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = self.conv(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = self.conv(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = self.attn_layer(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = self.global_pooling_1(
query_seq_encoding)
query_value_attention = self.global_pooling_2(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
input_layer = tf.reshape(input_layer , (batch_size, 64,64 ,1))
return input_layer
keras.backend.clear_session()
num_classes = 5
class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.attn_layer = AttentionLayer(64)
##first layer of convolutional layer
self.Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#second convolutional layer
self.Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#third convolutional layer
self.Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
##i add max pooling with a stride of 2
self.Max1 = keras.layers.MaxPool2D(2, strides=2)
##i add a second layer of convlutional layer
self.Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding second convolutional layer
self.Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding third convolutional layer
self.Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##i add another layer of max pooling
self.Max2 = keras.layers.MaxPool2D(2, strides=2)
##here i execute data flatting, i will change this to use attention layer Att.
self.Flat = keras.layers.Flatten()
#i add another dense architecture
self.Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)
self.Dense2= keras.layers.Dense(700,activation=tf.nn.relu)
#i add now the output layer with softmax
# Output layer, class prediction.
self.outputs = keras.layers.Dense(num_classes,activation=tf.nn.softmax)
def call(self, x):
x = self.attn_layer(x , x)
x = self.Conv1(x)
x = self.Conv12(x)
x = self.Conv13(x)
x = self.Max1(x)
x = self.Conv2(x)
x = self.Conv21(x)
x = self.Conv23(x)
x = self.Max2(x)
x = self.Flat(x)
x = self.Dense1(x)
x = self.Dense2(x)
return self.outputs(x)
model = Model()
x = np.random.randint(0 ,2 , size = (8, 64, 64,1))
y = np.random.randint(0,5, size=(8,1))
print(model(x).shape)
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimize_rmsprop,metrics=["accuracy"])
model.fit(x, y, epochs=1)
Output:

Related

Input layer 0 of sequence is incompatible with the layer - CNNs

I am trying to create a CNN model using hyperparameterization for image classification. When I run the code I receive the following error:
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 32, 32, 32, 3), found shape=(32, 32, 32, 3)
How to fix the error? Here is the whole code pasted below:
# first we create our actual code which requires the arguments, units, activation, dropout, lr:
def build_model(hp):
model = ks.Sequential([
# adding first conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
),
# adding second conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
)])
model.add(layers.Flatten())
# Let's tune the number of Dense layers.
for i in range(hp.Int("num_dense_layers", 1, 3)):
model.add(
layers.Dense(
# Let's tune the number of units separately
units = hp.Int(f"units_{i}", min_value=1, max_value = 100, step = 16),
activation = hp.Choice("activation", ["relu", "tanh", "softmax"])
))
if hp.Boolean("dropout"):
model.add(layers.Dropout(rate = 0.25))
model.add(layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = ks.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
build_model(keras_tuner.HyperParameters())
You are getting this error due the input shape mismatch.
Here i have implemented the hypermodel on the mnist fashion dataset which contains images of shape (28,282,1).
def build_model(hp):
model = tf.keras.Sequential([
tf.keras.Input(shape=(28,28,1)),
# adding first conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
),
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_1',values=[2,3])),
# adding second conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
)])
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_2',values=[2,3])),
model.add(tf.keras.layers.Flatten())
if hp.Boolean("dropout"):
model.add(tf.keras.layers.Dropout(rate = 0.25))
model.add(tf.keras.layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
By providing the correct shape you will not get any error.
For more details, Please refer to this gist and this documentation. Thank You!

Why some of the hidden units return zero in the GRU autoencoder?

I have implemented a recurrent neural network autoencoder as below:
def AE_GRU(X):
inputs = Input(shape=(X.shape[1], X.shape[2]), name="input")
L1 = GRU(8, activation="relu", return_sequences=True, kernel_regularizer=regularizers.l2(0.00), name="E1")(inputs)
L2 = GRU(4, activation="relu", return_sequences=False, name="E2")(L1)
L3 = RepeatVector(X.shape[1], name="RepeatVector")(L2)
L4 = GRU(4, activation="relu", return_sequences=True, name="D1")(L3)
L5 = GRU(8, activation="relu", return_sequences=True, name="D2")(L4)
output = TimeDistributed(Dense(X.shape[2]), name="output")(L5)
model = Model(inputs=inputs, outputs=[output])
return model
and after that I am running the below code to train the AE:
model = AE_GRU(trainX)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss="mse")
model.summary()
epochs = 5
batch_size = 64
history = model.fit(
trainX, trainX,
epochs=epochs, batch_size=batch_size,
validation_data=(valX, valX)
).history
I have also attached the result of model.summary() below.
At the end I am extracting the second hidden layer outputs by running the below code.
def all_hidden_layers_output(iModel, dtset):
inp = iModel.input # input placeholder
outputs = [layer.output for layer in iModel.layers] # all layer outputs
functors = [K.function([inp], [out]) for out in outputs] # evaluation functions
layer_outs = [func([dtset]) for func in functors]
return layer_outs
hidden_state_train = all_hidden_layers_output(model, trainX)[2][0]
hidden_state_val = all_hidden_layers_output(model, valX)[2][0]
# remove zeros_columns:
hidden_state_train = hidden_state_train[:,~np.all(hidden_state_train==0.0, axis=0)]
hidden_state_val = hidden_state_val[:,~np.all(hidden_state_val==0.0, axis=0)]
print(f"hidden_state_train.shape={hidden_state_train.shape}")
print(f"hidden_state_val.shape={hidden_state_val.shape}")
But I don't know why some of the units in this layer return zero all the time. I expect to get hidden_state_train and hidden_state_val as 2D numpy array with 4 non-zeros columns (based on the model.summary() information). Any help would be greatly appreciated.
This may be because of the dying relu problem. The relu is 0 for negative values. Have a look at this (https://towardsdatascience.com/the-dying-relu-problem-clearly-explained-42d0c54e0d24) explanation of the problem.

How can I reduce the dimension of data, loaded through the flow_from_directory function of ImageDataGenerator?

Since I load my data (images) from the structured folders, I utilize the flow_from_directory function of the ImageDataGenerator class, which is provided by Keras. I've no issues while feeding this data to a CNN model. But when it comes to an LSTM model, getting the following error: ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (64, 28, 28, 1). How can I reduce the dimension of the input data while reading it via ImageDataGenerator objects to be able to use an LSTM model instead of a CNN?
p.s. The shape of the input images is (28, 28) and they are grayscale.
train_valid_datagen = ImageDataGenerator(validation_split=0.2)
train_gen = train_valid_datagen.flow_from_directory(
directory=TRAIN_IMAGES_PATH,
target_size=(28, 28),
color_mode='grayscale',
batch_size=64,
class_mode='categorical',
shuffle=True,
subset='training'
)
Update: The LSTM model code:
inp = Input(shape=(28, 28, 1))
inp = Lambda(lambda x: squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(num_units, dropout=dropout, recurrent_dropout=recurrent_dropout, activation=activation_fn, return_sequences=True)(inp)
x = BatchNormalization()(x)
x = Dense(128, activation=activation_fn)(x)
output = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(0.001))(x)
model = Model(inputs=inp, outputs=output)
you start feeding your network with 4D data like your images in order to have the compatibility with ImageDataGenerator and then you have to reshape them in 3D format for LSTM.
These are the possibilities:
with only one channel you can simply squeeze the last dimension
inp = Input(shape=(28, 28, 1))
x = Lambda(lambda x: tf.squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(32)(x)
if you have multiple channels (this is the case of RGB images or if would like to apply a RNN after a Conv2D) a solution can be this
inp = Input(shape=(28, 28, 1))
x = Conv2D(32, 3, padding='same', activation='relu')(inp)
x = Reshape((28,28*32))(x) # from 4D to 3D
x = LSTM(32)(x)
the fit can be computed as always with model.fit_generator
UPDATE: model review
inp = Input(shape=(28, 28, 1))
x = Lambda(lambda x: squeeze(x, axis=-1))(inp) # from 4D to 3D
x = LSTM(32, dropout=dropout, recurrent_dropout=recurrent_dropout, activation=activation_fn, return_sequences=False)(x)
x = BatchNormalization()(x)
x = Dense(128, activation=activation_fn)(x)
output = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(0.001))(x)
model = Model(inputs=inp, outputs=output)
model.summary()
pay attention when you define inp variable (don't overwrite it)
set return_seq = False in LSTM in order to have 2D output

Concatenate an input before Dense layer. [Keras with TF backend]

So, I need to concatenate an input to the flattened layer before going in the dense layer.
I'm using Keras with TF as backend.
model.add(Flatten())
aux_input = Input(shape=(1, ))
model.add(Concatenate([model, aux_input]))
model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
I have a scenario like this: X_train, y_train, aux_train. The shape of y_train and aux_train is same (1, ). An image has a ground-truth and an aux_input.
How do I add this aux_input to the model while doing model.fit?
As suggested in answers, I changed my model with functional api. However, now, I get the following error.
ValueError: Layer dense_1 was called with an input that isn't a
symbolic tensor. Received type: . Full input:
[]. All
inputs to the layer should be tensors.
Here's the code for that part.
flatten = Flatten()(drop_5)
aux_rand = Input(shape=self.aux_shape)
concat = Concatenate([flatten, aux_input])
fc1 = Dense(512, kernel_regularizer=regularizers.l2(weight_decay))(concat)
Shape of aux input
aux_shape = (1,)
And then calling the model as follow
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
aux_rand = np.random.rand(y_train.shape[0])
model_inst = cifar10vgg()
x_train_input = Input(shape=(32,32,3))
aux_input = Input(shape=(1,))
model = Model(inputs=[x_train_input, aux_input], output=model_inst.build_model())
model.fit(x=[x_train, aux_rand], y=y_train, batch_size=batch_size, steps_per_epoch=x_train.shape[0] // batch_size,
epochs=maxepoches, validation_data=(x_test, y_test),
callbacks=[reduce_lr, tensorboard], verbose=2)
model_inst.build_model() returns Activation('softmax')(fc2) which is the output to be fed into the Model (as far as I understood)
As I see from your code, you implement the model with sequential API which is not a good option in this case. If you have some auxiliary inputs the best way to implement such a feature is to use functional API.
Here is a example from Keras website:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model
main_input = Input(shape=(100,), dtype='int32', name='main_input')
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
lstm_out = LSTM(32)(x)
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
Based on description, I think following code can give you some intuition:
x1 = Input(shape=(32, 32, 3))
flatten1 = Flatten()(x1)
x2 = Input(shape=(244, 244, 3))
vgg = VGG19(weights='imagenet', include_top=False)(x2)
flatten2 = Flatten()(vgg)
concat = Concatenate()([flatten1, flatten2])
d = Dense(10)(concat)
model = Model(inputs=[x1, x2], outputs=[d])
model.compile('adam', 'categorical_crossentropy')
model.fit(x=[x_train1, x_train2],outputs=y_labels)

TypeError: Input tensors to a Model must be Keras tensors. Found: Tensor("Placeholder_3:0", dtype=float32) (missing Keras metadata)

my input variables
IMG_SIZE_PX=50
SLICE_COUNT=20
n_classes=2
x=tf.placeholder('float')
y=tf.placeholder('float')
keep_rate=0.8
keep_prob=tf.placeholder(tf.float32)
my convolution 3d function
def conv3d(x, W):
return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')
my maxpooling 3d function
def maxpool3d(x):
return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1],
padding='SAME')
this is my network
def convolutional_neural_network(x):
my network weights
weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,3,1,32])),
'W_conv2':tf.Variable(tf.random_normal([3,3,3,32,64])),
'W_fc':tf.Variable(tf.random_normal([ 54080 ,1024])),#here 54080
is the input tensor value
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
my network biases
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
here is my input x
x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
my 2 hidden layers(convolution+maxpooling)
conv1 = tf.nn.relu(conv3d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool3d(conv1)
conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool3d(conv2)
my fully connected layer
fc = tf.reshape(conv2,[-1, 54080 ])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
my output layer
output = tf.matmul(fc, weights['out'])+biases['out']
return output
my input numpy arrays
much_data = np.load('D:/muchdata-50-50-20.npy')
train_data = much_data[-10:]
validation_data = much_data[-2:]
finally training my network
def train_neural_network(x):
outl = convolutional_neural_network(x)#don't know this is my output
layer
model=Model(input=x, output=outl)
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=
['accuracy'])
train_neural_network(x)#train the net
my error is thiskeras meta data is missing
any help can be appreciated