Vision Transformer attention map by keypoint location - TensorFlow - tensorflow

I have trained a ViT model on TensorFlow for keypoint estimation based on and I would like to simulate the attention maps of each keypoint like this:
I have found the code on Pytorch but I have no idea about how to simulate it on TensorFlow:

I have solved it by getting the output of the previous layer of the multihead attention layer and passing it by the multihead attention:
atten_maps_hooks = [Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_0') - 1].output),
Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_1') - 1].output),
Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_2') - 1].output),
Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_3') - 1].output),
Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_4') - 1].output),
Model(inputs = model.input, outputs = model.layers[getLayerIndexByName(model, 'encoded_5') - 1].output)]
for i in range(len(atten_maps_hooks)):
temp = atten_maps_hooks[i].predict(input)
mha, scores = model.get_layer('encoded_' + str(i))(temp, temp, return_attention_scores = True)
enc_atten_maps_hwhw.append(scores.numpy()[0].reshape(shape + shape))


Tensorflow model saved_model.load() is getting error to predict multiple batch input

tokenizer = Tokenizer(num_words=5000)
X1_train = tokenizer.texts_to_sequences(X1_train)
X1_val = tokenizer.texts_to_sequences(X1_val)
X1_test = tokenizer.texts_to_sequences(X1_test)
vocab_size = len(tokenizer.word_index) + 1
maxlen = 5000
X1_train = pad_sequences(X1_train, padding='post', maxlen=maxlen)
X1_val = pad_sequences(X1_val, padding='post', maxlen=maxlen)
X1_test = pad_sequences(X1_test, padding='post', maxlen=maxlen)
embeddings_dictionary = dict()
df_g = pd.read_csv('gs://----------/glove.6B.100d.txt', sep=" ", quoting=3, header=None, index_col=0)
embeddings_dictionary = {key: val.values for key, val in df_g.T.items()}
embedding_matrix = zeros((vocab_size, 100))
for word, index in tokenizer.word_index.items():
embedding_vector = embeddings_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
input_2_col_list= [x1,x2,...................., x30]
X2_train = X_train[input_2_col_list].values
X2_val = X_val[input_2_col_list].values
X2_test = X_test[[input_2_col_list].values
input_1 = Input(shape=(maxlen,))
input_2 = Input(shape=(30,))
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], trainable=False)(input_1)
Bi_layer= Bidirectional(LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(embedding_layer) # Dimn shd be (None,200,128)
con_layer = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(Bi_layer)
avg_pool = GlobalAveragePooling1D()(con_layer)
max_pool = GlobalMaxPooling1D()(con_layer)
dense_layer_1 = Dense(64, activation='relu')(input_2)
dense_layer_2 = Dense(64, activation='relu')(dense_layer_1)
concat_layer = Concatenate()([avg_pool,max_pool, dense_layer_2])
dense_layer_3 = Dense(50, activation='relu')(concat_layer)
output = Dense(2, activation='softmax')(dense_layer_3)
model = Model(inputs=[input_1, input_2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
history =[X1_train, X2_train], y=y_train, batch_size=30, epochs=10, verbose=1, validation_data=([X1_val,X2_val],y_val))
loss, accuracy, f1_score, precision, recall = model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0)'gs://----------/')
loaded_model=tf.keras.models.load_model( 'gs://----------/', custom_objects={"f1_m": f1_m , "recall_m": recall_m, "precision_m": precision_m } )
loss, accuracy, f1_score, precision, recall = loaded_model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0) ###This is getting no error BUT the predictions are wrong
y_pred = loaded_model.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1) ###This is getting no error BUT the predictions are wrong
import tensorflow_hub as hub
x=[X1_test, X2_test]
loaded_model_2 = tf.keras.Sequential([hub.KerasLayer('gs:---------------/')]) #### Getting an error
y_pred_2 = loaded_model_2.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool_2 = np.argmax(y_pred_2, axis=1)
#### Inside of the model folder the files and dirs are: assets/, variables/, saved_model.pb, keras_metadata.pb
#### Using '' to train the model on Vertex AI
I have tried multiple saving a loading function with custom objects, but not of them are working properly
The working loaded model is predicting, but the outputs are not accurate. I have tested the similar TEST data to predict on the loaded model with another test script. The predictions are not matching after I loaded the model.
similar issues on StackOverflow: ''

Dont know why i am getting this error on training my model

This is the error i am getting when i am intiating the training of the model.
error :
Node: 'model_1/conv2d_45/Relu'
Fused conv implementation does not support grouped convolutions for now.
[[{{node model_1/conv2d_45/Relu}}]] [Op:__inference_train_function_6204]
Here is my code :
def ITrackerModel(faces_shape, eyesLeft_shape, eyesRight_shape, faceGrids_shape):
faces = Input(faces_shape)
eyesLeft = Input(eyesLeft_shape)
eyesRight = Input(eyesRight_shape)
face_grid = Input(faceGrids_shape)
xEyeL = conv_layer(eyesLeft)
xEyeR = conv_layer(eyesRight)
xEyes = tf.concat([xEyeL, xEyeR], 1)
eyesFC = Dense(128, activation='relu')(xEyes)
faceFC = face_layer(faces)
faceGridFC = grid_layer(face_grid)
inpFC = tf.concat([eyesFC,faceFC,faceGridFC],1)
d = Dense(128, activation='relu')(inpFC)
output = Dense(2)(d)
model = Model([faces,eyesLeft,eyesRight,face_grid], output)
opt = tf.keras.optimizers.Adam(learning_rate=0.0003)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model[face,left_eye,right_eye,face_mask], y, epochs=20, batch_size=50)

Siamese network with third component error

I was able to create a siamese network similar to :
The problem happens if I try to add a third model as an input to the head of my network.
I will get the following error :
ValueError: Shape must be rank 2 but is rank 3 for '{{node head/concatenate/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](simple/Identity, simple_1/Identity, different/Identity, head/concatenate/concat/axis)' with input shapes: [?,?], [?,?], [?,?,1], [].
Here is the code below, one thing I am not comfortable with is the line processed_a = base_model1(input_a) and what it does even after checking the Keras model doc. I understand that if I don't do it I cannot have the desired shape and provide the necessary inputs to the final network.
Note that if I replace the code with what is comment and just use a pure siamese network it will work ok.
Any idea what needs to be changed to resolve the above error and what base_model1(input_a) does.
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.layers import *
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(32, activation='relu')(model_input)
layer = layers.Flatten()(layer)
return tf.keras.Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(1, activation='relu')(model_input)
return tf.keras.Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
layer = layers.Concatenate()(inputs)
layer = layers.Dense(1)(layer)
return tf.keras.Model(inputs=inputs, outputs=layer, name="head")
dataset = []
inputs1 = []
for i in range(0, 128):
dataset.append([0.0, 1.0, 2.0])
train_dataset1 = np.asarray(dataset)
base_model1 = getModel1(train_dataset1.shape)
dataset3 = [0.0, 1.0, 2.0]
train_dataset3 = np.asarray(dataset3)
base_model3 = getModel3(train_dataset3.shape)
input_a = Input(shape=base_model1.input_shape)
input_b = Input(shape=base_model1.input_shape)
input_c = Input(shape=base_model3.input_shape)
oModel = outputModel([base_model1, base_model1, base_model3])
#oModel = outputModel([base_model1, base_model1])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = tf.keras.Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
#head = oModel([processed_a, processed_b])
#model = tf.keras.Model(inputs=[input_a, input_b], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
metrics=['mae', 'mse'])
model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])])
#model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1])])[np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])[np.asarray([train_dataset1]), np.asarray([train_dataset1])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
pay attention to the dimensionality that you define when u initialize the model input and output. the first dimension is always the batch size (None) and this can cause u some problem. here the correct example:
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(32, activation='relu')(model_input)
layer = Flatten()(layer)
return Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(1, activation='relu')(model_input)
return Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
layer = Concatenate()(inputs)
layer = Dense(1)(layer)
return Model(inputs=inputs, outputs=layer, name="head")
base_model1 = getModel1((128,3))
base_model3 = getModel3((3))
input_a = Input(shape=base_model1.input_shape[1:])
input_b = Input(shape=base_model1.input_shape[1:])
input_c = Input(shape=base_model3.input_shape[1:])
oModel = outputModel([base_model1, base_model1, base_model3])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
metrics=['mae', 'mse'])
# create dummy data
n_sample = 5
train_dataset1 = np.random.uniform(0,1, (n_sample,128,3))
train_dataset3 = np.random.uniform(0,1, (n_sample,3))
y = np.random.uniform(0,1, n_sample)[train_dataset1, train_dataset1, train_dataset3], y, epochs=3)
model.predict([train_dataset1, train_dataset1, train_dataset3]).shape

Keras gives 'Not JSON Serializable' error when saving the model

I'm implementing a fully convolutional neural network for image segmentation by using unet defined here
To give different weights to the pixels of different classes I defined an extra Lambda layer, as suggested here
Keras, binary segmentation, add weight to loss function
The problem is that Keras raises this error when saving the model
....., overwrite=True)
TypeError: ('Not JSON Serializable:', b'\n\x15clip_by_value/Minimum\x12\x07Minimum\x1a\x12conv2d_23/Identity\x1a\x17clip_by_value/Minimum/y*\x07\n\x01T\x12\x020\x01')
My network is defined in an external function
def weighted_binary_loss(X):
y_pred, y_true, weights = X
loss = binary_crossentropy(y_true, y_pred)
weights_mask = y_true*weights[0] + (1.-y_true)*weights[1]
loss = multiply([loss, weights_mask])
return loss
def identity_loss(y_true, y_pred):
return y_pred
def net()
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
w_loss = Lambda(weighted_binary_loss, output_shape=input_size, name='loss_output')([conv10, inputs, weights])
model = Model(inputs = inputs, outputs = w_loss)
model.compile(optimizer = Adam(lr = 1e-5), loss = identity_loss, metrics = ['accuracy'])
that I call in my main function
model_checkpoint = ModelCheckpoint('temp_model.hdf5', monitor='loss',verbose=1, save_best_only=True)
When I erase the Lambda layer, the error desappears
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(inputs = inputs, outputs = conv10)
model.compile(optimizer = Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy'])
I'm using
Keras==2.2.4, tensorflow-gpu==2.0.0b1
It appears that you are computing the loss in the layer of a model. It is not a good practice to accomodate the loss function as a layer. You can compute your weighted loss using custom loss function.
So your code can be rewritten as follows:
def weighted_binary_loss(y_true, y_pred):
weights = [0.5, 0.6] # Define your weights here
loss = binary_crossentropy(y_true, y_pred)
weights_mask = y_true*weights[0] + (1.-y_true)*weights[1]
loss = multiply([loss, weights_mask])
return loss
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(inputs = inputs, outputs = conv10)
model.compile(optimizer = Adam(lr = 1e-5), loss = weighted_binary_loss, metrics = ['accuracy'])
If it is needed that weights is a dynamic property and you have to send it as a separate parameter in loss function, you can follow this question.

How to connect multi-layered Bi-directional LSTM encoder to a decoder?

I'm making a seq2seq model which uses a Bi-LSTM as encoder and Attention mechanism in decoder. For a single layer of LSTM model is working fine. My encoder looks something like this.
def encoding_layer(self, rnn_inputs, rnn_size, num_layers, keep_prob,
embed = tf.nn.embedding_lookup(emb_matrix, rnn_inputs)
stacked_cells = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob)
outputs, state = tf.nn.bidirectional_dynamic_rnn(cell_fw=stacked_cells,
concat_outputs = tf.concat(outputs, 2)
cell_state_fw, cell_state_bw = state
cell_state_final = tf.concat([cell_state_fw.c, cell_state_bw.c], 1)
hidden_state_final = tf.concat([cell_state_fw.h, cell_state_bw.h], 1)
encoder_final_state = tf.nn.rnn_cell.LSTMStateTuple(c=cell_state_final, h=hidden_state_final)
return concat_outputs, encoder_final_state
Decoder :
def decoding_layer_train(self, encoder_outputs, encoder_state, dec_cell, dec_embed_input,
target_sequence_length, max_summary_length,
output_layer, keep_prob, rnn_size, batch_size):
rnn_size = 2 * rnn_size
dec_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob)
train_helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, target_sequence_length)
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(rnn_size, encoder_outputs,
attention_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism,
state = attention_cell.zero_state(dtype=tf.float32, batch_size=batch_size)
state = state.clone(cell_state=encoder_state)
decoder = tf.contrib.seq2seq.BasicDecoder(cell=attention_cell, helper=train_helper,
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=max_summary_length)
return outputs
With above configuration of single layer Bi-LSTM my model is working fine. But, now I want to use a multilayered Bi-LSTM encoder and decoder. So, in encoder and decoder if I change the cell to:
stacked_cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob) for _ in range(num_layers)])
After changing cell I am getting this error:
AttributeError: 'tuple' object has no attribute 'c'
num_layers = 2
rnn_size = 128
embedding_size = 50
So, I want to know what exactly is returned as state in second case. And how to pass that state to decoder.
Full code: