Siamese network with third component error - tensorflow

I was able to create a siamese network similar to :
https://github.com/aspamers/siamese/
The problem happens if I try to add a third model as an input to the head of my network.
I will get the following error :
ValueError: Shape must be rank 2 but is rank 3 for '{{node head/concatenate/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](simple/Identity, simple_1/Identity, different/Identity, head/concatenate/concat/axis)' with input shapes: [?,?], [?,?], [?,?,1], [].
Here is the code below, one thing I am not comfortable with is the line processed_a = base_model1(input_a) and what it does even after checking the Keras model doc. I understand that if I don't do it I cannot have the desired shape and provide the necessary inputs to the final network.
Note that if I replace the code with what is comment and just use a pure siamese network it will work ok.
Any idea what needs to be changed to resolve the above error and what base_model1(input_a) does.
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.layers import *
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(32, activation='relu')(model_input)
layer = layers.Flatten()(layer)
return tf.keras.Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = layers.Dense(1, activation='relu')(model_input)
return tf.keras.Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape))
layer = layers.Concatenate()(inputs)
layer = layers.Dense(1)(layer)
return tf.keras.Model(inputs=inputs, outputs=layer, name="head")
dataset = []
inputs1 = []
for i in range(0, 128):
dataset.append([0.0, 1.0, 2.0])
train_dataset1 = np.asarray(dataset)
base_model1 = getModel1(train_dataset1.shape)
dataset3 = [0.0, 1.0, 2.0]
train_dataset3 = np.asarray(dataset3)
base_model3 = getModel3(train_dataset3.shape)
input_a = Input(shape=base_model1.input_shape)
input_b = Input(shape=base_model1.input_shape)
input_c = Input(shape=base_model3.input_shape)
oModel = outputModel([base_model1, base_model1, base_model3])
#oModel = outputModel([base_model1, base_model1])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = tf.keras.Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
#head = oModel([processed_a, processed_b])
#model = tf.keras.Model(inputs=[input_a, input_b], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])])
#model.predict([np.asarray([train_dataset1]), np.asarray([train_dataset1])])
model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1]), np.asarray([train_dataset3])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])
#model.fit([np.asarray([train_dataset1]), np.asarray([train_dataset1])], np.asarray([1.0]), epochs=1000, validation_split=0, verbose=0, callbacks=[])

pay attention to the dimensionality that you define when u initialize the model input and output. the first dimension is always the batch size (None) and this can cause u some problem. here the correct example:
def getModel1(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(32, activation='relu')(model_input)
layer = Flatten()(layer)
return Model(inputs=model_input, outputs= layer, name="simple")
def getModel3(input_shape):
model_input = Input(shape=input_shape)
layer = Dense(1, activation='relu')(model_input)
return Model(inputs=model_input, outputs=layer, name="different")
def outputModel(models):
inputs = []
for model in models:
inputs.append(Input(shape=model.output_shape[1:]))
layer = Concatenate()(inputs)
layer = Dense(1)(layer)
return Model(inputs=inputs, outputs=layer, name="head")
base_model1 = getModel1((128,3))
base_model3 = getModel3((3))
input_a = Input(shape=base_model1.input_shape[1:])
input_b = Input(shape=base_model1.input_shape[1:])
input_c = Input(shape=base_model3.input_shape[1:])
oModel = outputModel([base_model1, base_model1, base_model3])
processed_a = base_model1(input_a)
processed_b = base_model1(input_b)
processed_c = base_model3(input_c)
head = oModel([processed_a, processed_b, processed_c])
model = Model(inputs=[input_a, input_b, input_c], outputs=head, name="model")
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
# create dummy data
n_sample = 5
train_dataset1 = np.random.uniform(0,1, (n_sample,128,3))
train_dataset3 = np.random.uniform(0,1, (n_sample,3))
y = np.random.uniform(0,1, n_sample)
model.fit([train_dataset1, train_dataset1, train_dataset3], y, epochs=3)
model.predict([train_dataset1, train_dataset1, train_dataset3]).shape

Related

Tensorflow model saved_model.load() is getting error to predict multiple batch input

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X1_train)
X1_train = tokenizer.texts_to_sequences(X1_train)
X1_val = tokenizer.texts_to_sequences(X1_val)
X1_test = tokenizer.texts_to_sequences(X1_test)
vocab_size = len(tokenizer.word_index) + 1
maxlen = 5000
X1_train = pad_sequences(X1_train, padding='post', maxlen=maxlen)
X1_val = pad_sequences(X1_val, padding='post', maxlen=maxlen)
X1_test = pad_sequences(X1_test, padding='post', maxlen=maxlen)
embeddings_dictionary = dict()
df_g = pd.read_csv('gs://----------/glove.6B.100d.txt', sep=" ", quoting=3, header=None, index_col=0)
embeddings_dictionary = {key: val.values for key, val in df_g.T.items()}
embedding_matrix = zeros((vocab_size, 100))
for word, index in tokenizer.word_index.items():
embedding_vector = embeddings_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
input_2_col_list= [x1,x2,...................., x30]
X2_train = X_train[input_2_col_list].values
X2_val = X_val[input_2_col_list].values
X2_test = X_test[[input_2_col_list].values
input_1 = Input(shape=(maxlen,))
input_2 = Input(shape=(30,))
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], trainable=False)(input_1)
Bi_layer= Bidirectional(LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(embedding_layer) # Dimn shd be (None,200,128)
con_layer = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(Bi_layer)
avg_pool = GlobalAveragePooling1D()(con_layer)
max_pool = GlobalMaxPooling1D()(con_layer)
dense_layer_1 = Dense(64, activation='relu')(input_2)
dense_layer_2 = Dense(64, activation='relu')(dense_layer_1)
concat_layer = Concatenate()([avg_pool,max_pool, dense_layer_2])
dense_layer_3 = Dense(50, activation='relu')(concat_layer)
output = Dense(2, activation='softmax')(dense_layer_3)
model = Model(inputs=[input_1, input_2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
print(model.summary())
history = model.fit(x=[X1_train, X2_train], y=y_train, batch_size=30, epochs=10, verbose=1, validation_data=([X1_val,X2_val],y_val))
loss, accuracy, f1_score, precision, recall = model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0)
model.save('gs://----------/Tuned_hybrid_GCP_5000_CASETYPE_8_9.tf')
##################################################
loaded_model=tf.keras.models.load_model( 'gs://----------/Tuned_hybrid_GCP_5000_CASETYPE_8_9.tf', custom_objects={"f1_m": f1_m , "recall_m": recall_m, "precision_m": precision_m } )
loss, accuracy, f1_score, precision, recall = loaded_model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=0) ###This is getting no error BUT the predictions are wrong
y_pred = loaded_model.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1) ###This is getting no error BUT the predictions are wrong
##################################################################
import tensorflow_hub as hub
x=[X1_test, X2_test]
loaded_model_2 = tf.keras.Sequential([hub.KerasLayer('gs:---------------/Tuned_hybrid_GCP_100_CASETYPE_8_11_save.tf')])
loaded_model_2.build(x.shape) #### Getting an error
y_pred_2 = loaded_model_2.predict(x=[X1_test, X2_test], batch_size=64, verbose=1)
y_pred_bool_2 = np.argmax(y_pred_2, axis=1)
###################################################
#### Inside of the model folder the files and dirs are: assets/, variables/, saved_model.pb, keras_metadata.pb
#### Using 'us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-8:latest' to train the model on Vertex AI
I have tried multiple saving a loading function with custom objects, but not of them are working properly
The working loaded model is predicting, but the outputs are not accurate. I have tested the similar TEST data to predict on the loaded model with another test script. The predictions are not matching after I loaded the model.
similar issues on StackOverflow: 'https://stackoverflow.com/questions/68937973/how-can-i-fix-the-problem-of-loading-the-model-to-get-new-predictions'

" ValueError: Expecting KerasTensor which is from tf.keras.Input()". Error in prediction with dropout function

I am trying to predict uncertainty in a regression problem using Dropout during testing as per Yarin Gal's article. I created a class using Keras's backend function as provided by this stack overflow question's answer. The class takes a NN model as input and randomly drops neurons during testing to give a stochastic estimate rather than deterministic output for a time-series forecasting.
I create a simple encoder-decoder model as shown below for the forecasting with 0.1 dropout during training:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec) #maybe use dropout=0.1
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
# optimiser = optimizers.Adam(clipnorm=1)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error'])
enc_dec_model.summary()
After that, I define and call the DropoutPrediction class.
# Define the class:
class KerasDropoutPrediction(object):
def __init__(self ,model):
self.f = K.function(
[model.layers[0].input,
K.learning_phase()],
[model.layers[-1].output])
def predict(self ,x, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.f([x , 1]))
result = np.array(result).reshape(n_iter ,x.shape[0] ,x.shape[1]).T
return result
# Call the object:
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(x_test,n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=1)
However, in the line
kdp = KerasDropoutPrediction(enc_dec_model), when I call the LSTM model,
I got the following error message which says the input has to be a Keras Tensor. Can anyone help me with this error?
Error Message:
ValueError: Found unexpected instance while processing input tensors for keras functional model. Expecting KerasTensor which is from tf.keras.Input() or output from keras layer call(). Got: 0
To activate Dropout at inference time, you simply have to specify training=True (TF>2.0) in the layer of interest (in the last LSTM layer in your case)
with training=False
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=False)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always the same
with training=True
inp = Input(shape=(10, 1))
x = LSTM(1, dropout=0.3)(inp, training=True)
m = Model(inp,x)
# m.compile(...)
# m.fit(...)
X = np.random.uniform(0,1, (1,10,1))
output = []
for i in range(0,100):
output.append(m.predict(X)) # always different
In your example, this becomes:
input_sequence = Input(shape=(lookback, train_x.shape[2]))
encoder = LSTM(128, return_sequences=False)(input_sequence)
r_vec = RepeatVector(forward_pred)(encoder)
decoder = LSTM(128, return_sequences=True, dropout=0.1)(r_vec, training=True)
output = TimeDistributed(Dense(train_y.shape[2], activation='linear'))(decoder)
enc_dec_model = Model(input_sequence, output)
enc_dec_model.compile(
loss="mean_squared_error",
optimizer="adam",
metrics=['mean_squared_error']
)
enc_dec_model.fit(train_x, train_y, epochs=10, batch_size=32)
and the KerasDropoutPrediction:
class KerasDropoutPrediction(object):
def __init__(self, model):
self.model = model
def predict(self, X, n_iter=10):
result = []
for _ in range(n_iter):
result.append(self.model.predict(X))
result = np.array(result)
return result
kdp = KerasDropoutPrediction(enc_dec_model)
y_pred_do = kdp.predict(test_x, n_iter=100)
y_pred_do_mean = y_pred_do.mean(axis=0)

How to build a Siamese Network from Transformer Model? Shape Input Error

I have the following Base Network with some important (error is coming due to these) parameters (please assume every else parameter)
maxlen = 250
model_dense = 256
Base Model :
def build_base_model(inputs):
inputs = layers.Input(shape=(maxlen,),name='base_input')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(model_drop1)(x)
outputs = layers.Dense(model_dense)(x)
base_model = keras.Model(inputs=inputs, outputs=outputs)
return base_model
and I my Siamese network as:
base_model = build_base_model()
input_text1 = layers.Input(shape=(maxlen,))
input_text2 = layers.Input(shape=(maxlen,))
emb1 = base_model(input_text1)
emb2 = base_model(input_text2)
distance = layers.Lambda(euclidean_distance)([emb1, emb2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[emb1, emb2], outputs=outputs)
model.compile(
optimizer="adam", metrics = ["accuracy",], loss= 'binary_crossentropy')
history = model.fit(
train_X, train_y, batch_size=batch_size, epochs = 50, validation_split = 0.15, callbacks = callbacks, verbose = 1,
)
It gives me an error as:
ValueError: Input 0 of layer "model_11" is incompatible with the layer: expected shape=(None, 256), found shape=(None, 250)
What am I doing wrong?
Base Transformer model tutorial taken from this
Siamese Model Structure, cosine distance, make_pairs from this
UPDATE- I have built the new network in a different manner and it is up and running. Can someone please confirms if it is the correct one:
inputs1 = layers.Input(shape=(maxlen,),name='inp_1')
inputs2 = layers.Input(shape=(maxlen,),name='inp_2')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, trans_drop1, trans_drop2, trans_reg1, trans_reg2)
pooling = layers.GlobalAveragePooling1D()
drop_layer = layers.Dropout(model_drop1)
out_dense = layers.Dense(model_dense)
x1 = embedding_layer(inputs1)
x2 = embedding_layer(inputs2)
x1 = transformer_block(x1)
x2 = transformer_block(x2)
x1 = pooling(x1)
x2 = pooling(x2)
x1 = drop_layer(x1)
x2 = drop_layer(x2)
vec_x1 = out_dense(x1)
vec_x2 = out_dense(x2)
distance = layers.Lambda(euclidean_distance)([vec_x1, vec_x2])
outputs = layers.Dense(1, activation="sigmoid")(distance)
model = keras.Model(inputs=[inputs1, inputs2], outputs=outputs)
in the linemodel = keras.Model(inputs=[emb1, emb2], outputs=outputs):
I suspect that you are mean to saymodel = keras.Model(inputs=[input_text1, input_text2], outputs=outputs)

How to freeze/unfreeze a pretrained Model as part of a subclassed Model in Tensorflow?

I am trying to build a subclassed Model which consists of a pretrained convolutional Base and some Dense Layers on top, using Tensorflow >= 2.4.
However freezing/unfreezing of the subclassed Model has no effect once it was trained before. When I do the same with the Functional API everything works as expected. I would really appreciate some Hint to what im missing here: Following Code should specify my problem further. Pardon me the amount of Code:
#Setup
import tensorflow as tf
tf.config.run_functions_eagerly(False)
import numpy as np
from tensorflow.keras.regularizers import l1
import matplotlib.pyplot as plt
#tf.function
def create_images_and_labels(img,label, height = 70, width = 70): #Image augmentation
label = tf.cast(label, 'float32')
label = tf.squeeze(label)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, (height, width))
# img = preprocess_input(img)
return img, label
cifar = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar.load_data()
num_classes = len(np.unique(y_train))
ds_train = tf.data.Dataset.from_tensor_slices((x_train, tf.one_hot(y_train, depth = len(np.unique(y_train)))))
ds_train = ds_train.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_train = ds_train.shuffle(50000)
ds_train = ds_train.batch(50, drop_remainder = True)
ds_val = tf.data.Dataset.from_tensor_slices((x_test, tf.one_hot(y_test, depth = len(np.unique(y_train)))))
ds_val = ds_val.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_val = ds_val.batch(50, drop_remainder=True)
# for i in ds_train.take(1):
# x, y = i
# for ind in range(x.shape[0]):
# plt.imshow(x[ind,:,:])
# plt.show()
# print(y[ind])
'''
Defining simple subclassed Model consisting of
VGG16
Flatten
Dense Layers
customized what happens in model.fit and model.evaluate (Actually its the standard Keras procedure with custom Metrics)
customized metrics: Loss and Accuracy for Training and Validation Step
added unfreezing Method
'set_trainable_layers'
Arguments:
num_head (How many dense Layers)
num_base (How many VGG Layers)
'''
class Test_Model(tf.keras.models.Model):
def __init__(
self,
num_unfrozen_head_layers,
num_unfrozen_base_layers,
num_classes,
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3)),
):
super(Test_Model, self).__init__(name = "Test_Model")
self.base = conv_base
self.flatten = tf.keras.layers.Flatten()
self.dense1 = tf.keras.layers.Dense(2048, activation = 'relu')
self.dense2 = tf.keras.layers.Dense(1024, activation = 'relu')
self.dense3 = tf.keras.layers.Dense(128, activation = 'relu')
self.out = tf.keras.layers.Dense(num_classes, activation = 'softmax')
self.out._name = 'out'
self.train_loss_metric = tf.keras.metrics.Mean('Supervised Training Loss')
self.train_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Training Accuracy')
self.val_loss_metric = tf.keras.metrics.Mean('Supervised Validation Loss')
self.val_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Validation Accuracy')
self.loss_fn = tf.keras.losses.categorical_crossentropy
self.learning_rate = 1e-4
# self.build((None, 32,32,3))
self.set_trainable_layers(num_unfrozen_head_layers, num_unfrozen_base_layers)
#tf.function
def call(self, inputs, training = False):
x = self.base(inputs)
x = self.flatten(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
x = self.out(x)
return x
#tf.function
def train_step(self, input_data):
x_batch, y_batch = input_data
with tf.GradientTape() as tape:
tape.watch(x_batch)
y_pred = self(x_batch, training = True)
loss = self.loss_fn(y_batch, y_pred)
trainable_vars = self.trainable_weights
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.train_loss_metric.update_state(loss)
self.train_acc_metric.update_state(y_batch, y_pred)
return {"Supervised Loss": self.train_loss_metric.result(),
"Supervised Accuracy":self.train_acc_metric.result()}
#tf.function
def test_step(self, input_data):
x_batch,y_batch = input_data
y_pred = self(x_batch, training = False)
loss = self.loss_fn(y_batch, y_pred)
self.val_loss_metric.update_state(loss)
self.val_acc_metric.update_state(y_batch, y_pred)
return {"Val Supervised Loss": self.val_loss_metric.result(),
"Val Supervised Accuracy":self.val_acc_metric.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [self.train_loss_metric,
self.train_acc_metric,
self.val_loss_metric,
self.val_acc_metric]
def set_trainable_layers(self, num_head, num_base):
for layer in [lay for lay in self.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in self.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel', block.name)
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in self.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training completely frozen Model, then unfreezing:
unfreezed model doesnt learn
'''
model = Test_Model(num_unfrozen_head_layers= 0, num_unfrozen_base_layers = 0, num_classes = num_classes) # Should NOT learn -> doesnt learn
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(10,20) # SHOULD LEARN -> Doesnt learn
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
#DOESNT LEARN
'''
Showcase2: when first training the Model with more trainable Layers than in the second step:
AssertionError occurs
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 2, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(1,1) # SHOULD NOT LEARN -> AssertionError
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
'''
Showcase3: same Procedure as in Showcase2 but optimizer State is transferred to recompiled Model:
Cant set Weigthts because optimizer expects List of Length 0
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 20, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
opti_state = model.optimizer.get_weights()
model.set_trainable_layers(0,0) # SHOULD NOT LEARN -> Learns
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.optimizer.set_weights(opti_state)
model.fit(ds_train, validation_data = ds_val)
#%%%
'''
Constructing same Architecture with Functional API and running Experiments
'''
import tensorflow as tf
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3))
inputs = tf.keras.layers.Input((70,70,3))
x = conv_base(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(2048, activation = 'relu') (x)
x = tf.keras.layers.Dense(1024,activation = 'relu') (x)
x = tf.keras.layers.Dense(128,activation = 'relu') (x)
out = tf.keras.layers.Dense(num_classes,activation = 'softmax') (x)
isinstance(tf.keras.layers.Flatten(), tf.keras.models.Model)
isinstance(conv_base, tf.keras.models.Model)
def set_trainable_layers(mod, num_head, num_base):
import time
for layer in [lay for lay in mod.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in mod.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel')
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in mod.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training frozen Model, then unfreezing, recomiling and retraining:
model behaves as expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 0 ,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model should NOT learn
set_trainable_layers(mod, 10,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model SHOULD learn
'''
Showcase2: First training unfrozen Model, then reducing number of trainable Layers:
Model behaves as Expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 10 ,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
set_trainable_layers(mod, 0,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
'''
Showcase3: First training unfrozen Model, then reducing number of trainable Layers but also trying to trasnfer Optimizer States:
Behaves as subclassed Model: New Optimizer shouldnt have Weights
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 1 ,3)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
opti_state = mod.optimizer.get_weights()
set_trainable_layers(mod, 4,8)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.optimizer.set_weights(opti_state)
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
This is happening because one of the fundamental differences between the Subclassing API and the Functional or Sequential APIs in Tensorflow2.
While the Functional or Sequential APIs build a graph of Layers (think of it as a separate data structure), the Subclassing model builds a whole object and stores it as bytecode.
This means that with Subclassing you lose access to the internal connectivity graph and the normal behaviour that allows you to freeze/unfreeze layers or reuse them in other models starts to get weird. Seeing your implementation I would say that the Subclassed model is correct and it SHOULD be working if we were dealing with a library other than Tensorflow that is.
Francois Chollet explains it better than I will ever do in one of his Tweettorials
After some more experiments i have found a workaround for this Problem:
While the model itself cannot be unfrozen/frozen after the first compilation and training, it is however possible to save the model weights to a temporary file model.save_weights('temp.h5') and afterwards reconstructing the model class (Creating a new instance of model class for example) and loading the previous weights with model.load_weights('temp.h5').
However this can also lead to errors occuring when the previous model has both unfrozen and frozen weights. To prevent them you have to either set all layers trainable after the training and before saving weights, or copy the exact trainability structure of the model, and reconstructing the new model such that its layers have the same trainability state as the previous. this is possible with the following functions:
def get_trainability(model): # Takes Keras model and returns dictionary with layer names of Model as key, and its trainability as value/item
train_dict = {}
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
train_dict.update(get_trainability(layer))
else:
train_dict[layer.name] = layer.trainable
return train_dict
def set_trainability(model, train_dict): # Takes keras Model and dictionary with layer names and booleans indicating the desired trainability of the layer.
# modifies model so that every Layer in the Model, whose name matches dict key will get trainable = boolean
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
set_trainability(layer, train_dict)
else:
for name in train_dict.keys():
if name == layer.name:
layer.trainable = train_dict[name]
print(layer.name)
Hope this helps for simmilar problems in the Future

How to multiply a layer by a constant vector element wise in Keras?

I want to make a weighted average ensemble of 3 of my trained models. So, I want first to multiply the softmax output of a model (element-wise) by a vector and then average the 3 weighted outputs of the 3 models.
I used the following code to multiply the output of the first model by its weight vector:
from keras.layers import Multiply, Average
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_tensor])
print(resnet_weighted)
new_model=Model(model.input, resnet_weighted)
However, I'm stuck with the following error:
What can I do?
Use Lambda instead of Multiply, and K.constant instead of tf.constant (is backend-neutral):
resnet_weight_tensor=K.constant(resnet_weights, 'float32')
out = finetuned_model.layers[-1].output
resnet_weighted = Lambda(lambda x: x * resnet_weight_tensor)(out)
FULL EXAMPLE:
## BUILD MODELS
batch_size = 32
num_batches = 100
input_shape = (4,)
num_classes = 3
model_1 = make_model(input_shape, 8, num_classes)
model_2 = make_model(input_shape, 10, num_classes)
model_3 = make_model(input_shape, 12, num_classes)
## BUILD ENSEMBLE
models = (model_1, model_2, model_3)
models_ins = [model.input for model in models]
models_outs = [model.input for model in models]
outputs_weights = [np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes)),
np.random.random((batch_size, num_classes))]
outs_avg = model_outputs_average(models, outputs_weights)
final_out = Dense(num_classes, activation='softmax')(outs_avg)
model_ensemble = Model(inputs=models_ins, outputs=final_out)
model_ensemble.compile('adam', loss='categorical_crossentropy')
### TEST ENSEMBLE
x1 = np.random.randn(batch_size, *input_shape) # toy data
x2 = np.random.randn(batch_size, *input_shape)
x3 = np.random.randn(batch_size, *input_shape)
y = np.random.randint(0,2,(batch_size, num_classes)) # toy labels
model_ensemble.fit([x1,x2,x3], y)
Verify averaging:
[print(layer.name) for layer in model_ensemble.layers] # show layer names
preouts1 = get_layer_outputs(model_ensemble, 'lambda_1', [x1,x2,x3])
preouts2 = get_layer_outputs(model_ensemble, 'lambda_2', [x1,x2,x3])
preouts3 = get_layer_outputs(model_ensemble, 'lambda_3', [x1,x2,x3])
preouts_avg = get_layer_outputs(model_ensemble, 'average_1',[x1,x2,x3])
preouts = np.asarray([preouts1, preouts2, preouts3])
sum_of_diff_of_means = np.sum(np.mean(preouts, axis=0) - preouts_avg)
print(np.sum(np.mean([preouts1, preouts2, preouts3],axis=0) - preouts_avg))
# 4.69e-07
Functions used:
def make_model(input_shape, dense_dim, num_classes=3):
ipt = Input(shape=input_shape)
x = Dense(dense_dim, activation='relu')(ipt)
out = Dense(num_classes, activation='softmax')(x)
model = Model(ipt, out)
model.compile('adam', loss='categorical_crossentropy')
return model
def model_outputs_average(models, outputs_weights):
outs = [model.output for model in models]
out_shape = K.int_shape(outs[0])[1:] # ignore batch dim
assert all([(K.int_shape(out)[1:] == out_shape) for out in outs]), \
"All model output shapes must match"
outs_weights = [K.constant(w, 'float32') for w in outputs_weights]
ow_shape = K.int_shape(outs_weights[0])
assert all([(K.int_shape(w) == ow_shape) for w in outs_weights]), \
"All outputs_weights and model.output shapes must match"
weights_layers = [Lambda(lambda x: x * ow)(out) for ow, out
in zip(outs_weights, outs)]
return Average()(weights_layers)
def get_layer_outputs(model,layer_name,input_data,train_mode=False):
outputs = [layer.output for layer in model.layers if layer_name in layer.name]
layers_fn = K.function([model.input, K.learning_phase()], outputs)
return [layers_fn([input_data,int(train_mode)])][0][0]
The bug is possibly caused by the mixture of kears api and tensorflow api, since your resnet_weight_tensor is a tensor from tensorflow api, while finetuned_model.layers[-1].output is the output from a keras layer. Some discusses can be seen here issue 7362
One walk around is to wrap resnet_weight_tensor into keras Input layer.
from keras.layers import Multiply, Average, Input
resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
resnet_weight_input = Input(tensor=resnet_weight_tensor)
sess = tf.InteractiveSession()
print(resnet_weight_tensor.eval())
sess.close()
resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_input])
print(resnet_weighted)
new_model=Model([model.input, resnet_weight_input], resnet_weighted)