Why some of the hidden units return zero in the GRU autoencoder? - tensorflow

I have implemented a recurrent neural network autoencoder as below:
def AE_GRU(X):
inputs = Input(shape=(X.shape[1], X.shape[2]), name="input")
L1 = GRU(8, activation="relu", return_sequences=True, kernel_regularizer=regularizers.l2(0.00), name="E1")(inputs)
L2 = GRU(4, activation="relu", return_sequences=False, name="E2")(L1)
L3 = RepeatVector(X.shape[1], name="RepeatVector")(L2)
L4 = GRU(4, activation="relu", return_sequences=True, name="D1")(L3)
L5 = GRU(8, activation="relu", return_sequences=True, name="D2")(L4)
output = TimeDistributed(Dense(X.shape[2]), name="output")(L5)
model = Model(inputs=inputs, outputs=[output])
return model
and after that I am running the below code to train the AE:
model = AE_GRU(trainX)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss="mse")
model.summary()
epochs = 5
batch_size = 64
history = model.fit(
trainX, trainX,
epochs=epochs, batch_size=batch_size,
validation_data=(valX, valX)
).history
I have also attached the result of model.summary() below.
At the end I am extracting the second hidden layer outputs by running the below code.
def all_hidden_layers_output(iModel, dtset):
inp = iModel.input # input placeholder
outputs = [layer.output for layer in iModel.layers] # all layer outputs
functors = [K.function([inp], [out]) for out in outputs] # evaluation functions
layer_outs = [func([dtset]) for func in functors]
return layer_outs
hidden_state_train = all_hidden_layers_output(model, trainX)[2][0]
hidden_state_val = all_hidden_layers_output(model, valX)[2][0]
# remove zeros_columns:
hidden_state_train = hidden_state_train[:,~np.all(hidden_state_train==0.0, axis=0)]
hidden_state_val = hidden_state_val[:,~np.all(hidden_state_val==0.0, axis=0)]
print(f"hidden_state_train.shape={hidden_state_train.shape}")
print(f"hidden_state_val.shape={hidden_state_val.shape}")
But I don't know why some of the units in this layer return zero all the time. I expect to get hidden_state_train and hidden_state_val as 2D numpy array with 4 non-zeros columns (based on the model.summary() information). Any help would be greatly appreciated.

This may be because of the dying relu problem. The relu is 0 for negative values. Have a look at this (https://towardsdatascience.com/the-dying-relu-problem-clearly-explained-42d0c54e0d24) explanation of the problem.

Related

ValueError : Output tensors of a Functional model must be the output of a TensorFlow `Layer`

I am trying to build a classifier for mixed data consisting of text and categorical data. I want to process text data using an embedding layer followed by a lstm cell and a dense network.
For Categorical data, I created a dense Network.
I want to concatenate output of two netowrks and pass through another dense network.
Here is my code
def create_model():
seq = tf.keras.layers.Input(shape=(100,))
embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(seq)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128))(embeddings)
x = tf.keras.layers.Dense(64, activation = 'relu')(x)
x = tf.keras.layers.Dense(32, activation = 'relu')(x)
categorical_input = tf.keras.layers.Input(shape =(4198, ) )
y = tf.keras.layers.Dense(128, activation = 'relu')(categorical_input)
y = tf.keras.layers.Dense(64, activation = 'relu')(y)
combined = tf.keras.layers.concatenate([x, y])
z = tf.keras.layers.Dense(32, activation='relu')(combined)
z = tf.keras.layers.Dense(16, activation = 'relu')(z)
outs = tf.keras.layers.Dense(3, activation = 'softmax')
return tf.keras.Model(inputs = [seq, categorical_input], outputs = outs)
model = create_model()
model.summary()
And Here is the full Traceback
/tmp/ipykernel_33/1585487845.py in create_model()
14 z = tf.keras.layers.Dense(16, activation = 'relu')(z)
15 outs = tf.keras.layers.Dense(3, activation = 'softmax')
---> 16 return tf.keras.Model(inputs = [seq, categorical_input], outputs = outs)
17 ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.layers.core.Dense object at 0x7f30ecaee790>

How to freeze/unfreeze a pretrained Model as part of a subclassed Model in Tensorflow?

I am trying to build a subclassed Model which consists of a pretrained convolutional Base and some Dense Layers on top, using Tensorflow >= 2.4.
However freezing/unfreezing of the subclassed Model has no effect once it was trained before. When I do the same with the Functional API everything works as expected. I would really appreciate some Hint to what im missing here: Following Code should specify my problem further. Pardon me the amount of Code:
#Setup
import tensorflow as tf
tf.config.run_functions_eagerly(False)
import numpy as np
from tensorflow.keras.regularizers import l1
import matplotlib.pyplot as plt
#tf.function
def create_images_and_labels(img,label, height = 70, width = 70): #Image augmentation
label = tf.cast(label, 'float32')
label = tf.squeeze(label)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, (height, width))
# img = preprocess_input(img)
return img, label
cifar = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar.load_data()
num_classes = len(np.unique(y_train))
ds_train = tf.data.Dataset.from_tensor_slices((x_train, tf.one_hot(y_train, depth = len(np.unique(y_train)))))
ds_train = ds_train.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_train = ds_train.shuffle(50000)
ds_train = ds_train.batch(50, drop_remainder = True)
ds_val = tf.data.Dataset.from_tensor_slices((x_test, tf.one_hot(y_test, depth = len(np.unique(y_train)))))
ds_val = ds_val.map(lambda img, label: create_images_and_labels(img, label, height = 70, width = 70))
ds_val = ds_val.batch(50, drop_remainder=True)
# for i in ds_train.take(1):
# x, y = i
# for ind in range(x.shape[0]):
# plt.imshow(x[ind,:,:])
# plt.show()
# print(y[ind])
'''
Defining simple subclassed Model consisting of
VGG16
Flatten
Dense Layers
customized what happens in model.fit and model.evaluate (Actually its the standard Keras procedure with custom Metrics)
customized metrics: Loss and Accuracy for Training and Validation Step
added unfreezing Method
'set_trainable_layers'
Arguments:
num_head (How many dense Layers)
num_base (How many VGG Layers)
'''
class Test_Model(tf.keras.models.Model):
def __init__(
self,
num_unfrozen_head_layers,
num_unfrozen_base_layers,
num_classes,
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3)),
):
super(Test_Model, self).__init__(name = "Test_Model")
self.base = conv_base
self.flatten = tf.keras.layers.Flatten()
self.dense1 = tf.keras.layers.Dense(2048, activation = 'relu')
self.dense2 = tf.keras.layers.Dense(1024, activation = 'relu')
self.dense3 = tf.keras.layers.Dense(128, activation = 'relu')
self.out = tf.keras.layers.Dense(num_classes, activation = 'softmax')
self.out._name = 'out'
self.train_loss_metric = tf.keras.metrics.Mean('Supervised Training Loss')
self.train_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Training Accuracy')
self.val_loss_metric = tf.keras.metrics.Mean('Supervised Validation Loss')
self.val_acc_metric = tf.keras.metrics.CategoricalAccuracy('Supervised Validation Accuracy')
self.loss_fn = tf.keras.losses.categorical_crossentropy
self.learning_rate = 1e-4
# self.build((None, 32,32,3))
self.set_trainable_layers(num_unfrozen_head_layers, num_unfrozen_base_layers)
#tf.function
def call(self, inputs, training = False):
x = self.base(inputs)
x = self.flatten(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
x = self.out(x)
return x
#tf.function
def train_step(self, input_data):
x_batch, y_batch = input_data
with tf.GradientTape() as tape:
tape.watch(x_batch)
y_pred = self(x_batch, training = True)
loss = self.loss_fn(y_batch, y_pred)
trainable_vars = self.trainable_weights
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.train_loss_metric.update_state(loss)
self.train_acc_metric.update_state(y_batch, y_pred)
return {"Supervised Loss": self.train_loss_metric.result(),
"Supervised Accuracy":self.train_acc_metric.result()}
#tf.function
def test_step(self, input_data):
x_batch,y_batch = input_data
y_pred = self(x_batch, training = False)
loss = self.loss_fn(y_batch, y_pred)
self.val_loss_metric.update_state(loss)
self.val_acc_metric.update_state(y_batch, y_pred)
return {"Val Supervised Loss": self.val_loss_metric.result(),
"Val Supervised Accuracy":self.val_acc_metric.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [self.train_loss_metric,
self.train_acc_metric,
self.val_loss_metric,
self.val_acc_metric]
def set_trainable_layers(self, num_head, num_base):
for layer in [lay for lay in self.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in self.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel', block.name)
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in self.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training completely frozen Model, then unfreezing:
unfreezed model doesnt learn
'''
model = Test_Model(num_unfrozen_head_layers= 0, num_unfrozen_base_layers = 0, num_classes = num_classes) # Should NOT learn -> doesnt learn
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(10,20) # SHOULD LEARN -> Doesnt learn
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
#DOESNT LEARN
'''
Showcase2: when first training the Model with more trainable Layers than in the second step:
AssertionError occurs
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 2, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
model.set_trainable_layers(1,1) # SHOULD NOT LEARN -> AssertionError
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
'''
Showcase3: same Procedure as in Showcase2 but optimizer State is transferred to recompiled Model:
Cant set Weigthts because optimizer expects List of Length 0
'''
model = Test_Model(num_unfrozen_head_layers= 10, num_unfrozen_base_layers = 20, num_classes = num_classes) # SHOULD LEARN -> learns
model.build((None, 70,70,3))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.fit(ds_train, validation_data = ds_val)
opti_state = model.optimizer.get_weights()
model.set_trainable_layers(0,0) # SHOULD NOT LEARN -> Learns
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(1e-5))
model.optimizer.set_weights(opti_state)
model.fit(ds_train, validation_data = ds_val)
#%%%
'''
Constructing same Architecture with Functional API and running Experiments
'''
import tensorflow as tf
conv_base = tf.keras.applications.VGG16(include_top = False, weights = 'imagenet', input_shape = (70,70,3))
inputs = tf.keras.layers.Input((70,70,3))
x = conv_base(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(2048, activation = 'relu') (x)
x = tf.keras.layers.Dense(1024,activation = 'relu') (x)
x = tf.keras.layers.Dense(128,activation = 'relu') (x)
out = tf.keras.layers.Dense(num_classes,activation = 'softmax') (x)
isinstance(tf.keras.layers.Flatten(), tf.keras.models.Model)
isinstance(conv_base, tf.keras.models.Model)
def set_trainable_layers(mod, num_head, num_base):
import time
for layer in [lay for lay in mod.layers if not isinstance(lay , tf.keras.models.Model)]:
layer.trainable = False
print(layer.name, layer.trainable)
for block in mod.layers:
if isinstance(block, tf.keras.models.Model):
print('Found Submodel')
for layer in block.layers:
layer.trainable = False
print(layer.name, layer.trainable)
if num_base > 0:
for layer in block.layers[-num_base:]:
layer.trainable = True
print(layer.name, layer.trainable)
if num_head > 0:
for layer in [lay for lay in mod.layers if not isinstance(lay, tf.keras.models.Model)][-num_head:]:
layer.trainable = True
print(layer.name, layer.trainable)
'''
Showcase1: First training frozen Model, then unfreezing, recomiling and retraining:
model behaves as expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 0 ,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model should NOT learn
set_trainable_layers(mod, 10,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model SHOULD learn
'''
Showcase2: First training unfrozen Model, then reducing number of trainable Layers:
Model behaves as Expected
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 10 ,20)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
set_trainable_layers(mod, 0,0)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
'''
Showcase3: First training unfrozen Model, then reducing number of trainable Layers but also trying to trasnfer Optimizer States:
Behaves as subclassed Model: New Optimizer shouldnt have Weights
'''
mod = tf.keras.models.Model(inputs,out, name = 'TestModel')
set_trainable_layers(mod, 1 ,3)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.fit(ds_train, validation_data = ds_val) # Model SHOULD learn
opti_state = mod.optimizer.get_weights()
set_trainable_layers(mod, 4,8)
mod.summary()
mod.compile(optimizer = tf.keras.optimizers.Adam(1e-5), loss = 'categorical_crossentropy', metrics = ['accuracy'])
mod.optimizer.set_weights(opti_state)
mod.fit(ds_train, validation_data = ds_val) #Model should NOT learn
This is happening because one of the fundamental differences between the Subclassing API and the Functional or Sequential APIs in Tensorflow2.
While the Functional or Sequential APIs build a graph of Layers (think of it as a separate data structure), the Subclassing model builds a whole object and stores it as bytecode.
This means that with Subclassing you lose access to the internal connectivity graph and the normal behaviour that allows you to freeze/unfreeze layers or reuse them in other models starts to get weird. Seeing your implementation I would say that the Subclassed model is correct and it SHOULD be working if we were dealing with a library other than Tensorflow that is.
Francois Chollet explains it better than I will ever do in one of his Tweettorials
After some more experiments i have found a workaround for this Problem:
While the model itself cannot be unfrozen/frozen after the first compilation and training, it is however possible to save the model weights to a temporary file model.save_weights('temp.h5') and afterwards reconstructing the model class (Creating a new instance of model class for example) and loading the previous weights with model.load_weights('temp.h5').
However this can also lead to errors occuring when the previous model has both unfrozen and frozen weights. To prevent them you have to either set all layers trainable after the training and before saving weights, or copy the exact trainability structure of the model, and reconstructing the new model such that its layers have the same trainability state as the previous. this is possible with the following functions:
def get_trainability(model): # Takes Keras model and returns dictionary with layer names of Model as key, and its trainability as value/item
train_dict = {}
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
train_dict.update(get_trainability(layer))
else:
train_dict[layer.name] = layer.trainable
return train_dict
def set_trainability(model, train_dict): # Takes keras Model and dictionary with layer names and booleans indicating the desired trainability of the layer.
# modifies model so that every Layer in the Model, whose name matches dict key will get trainable = boolean
for layer in model.layers:
if isinstance(layer, tf.keras.models.Model):
set_trainability(layer, train_dict)
else:
for name in train_dict.keys():
if name == layer.name:
layer.trainable = train_dict[name]
print(layer.name)
Hope this helps for simmilar problems in the Future

VGG 16 model training with tensorflow

I'm trying to use VGG16 from keras to train a model for image detection.
Based on these articles (https://www.pyimagesearch.com/2019/06/03/fine-tuning-with-keras-and-deep-learning/ and https://learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/), I've put some addition Dense layer to the VGG 16 model. However, the training accuracy with 20 epoche is around 35% to 41% which doesn't match the result of these articles (above 90%).
Due to this, I would like to know, did I do something wrong with my code below.
Basic setting
url='/content/drive/My Drive/fer2013.csv'
batch_size = 64
img_width,img_height = 48,48
# 0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral
num_classes = 7
model_path = '/content/drive/My Drive/Af/cnn.h5'
df=pd.read_csv(url)
def _load_fer():
# Load training and eval data
df = pd.read_csv(url, sep=',')
train_df = df[df['Usage'] == 'Training']
eval_df = df[df['Usage'] == 'PublicTest']
return train_df, eval_df
def _preprocess_fer(df,label_col='emotion',feature_col='pixels'):
labels, features = df.loc[:, label_col].values.astype(np.int32), [
np.fromstring(image, np.float32, sep=' ')
for image in df.loc[:, feature_col].values]
labels = [to_categorical(l, num_classes=num_classes) for l in labels]
features = np.stack((features,) * 3, axis=-1)
features /= 255
features = features.reshape(features.shape[0], img_width, img_height,3)
return features, labels
# Load fer data
train_df, eval_df = _load_fer()
# preprocess fer data
x_train, y_train = _preprocess_fer(train_df)
x_valid, y_valid = _preprocess_fer(eval_df)
gen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
train_generator = gen.flow(x_train, y_train, batch_size=batch_size)
predict_size_train = int(np.math.ceil(len(x_train) / batch_size))
input_tensor = Input(shape=(img_width, img_height, 3))
Now comes the model training part
baseModel = VGG16(
include_top=False, weights='imagenet',
input_tensor=input_tensor
)
# Construct the head of the model that will be placed on top of the base model (fine tuning)
headModel = baseModel.output
headModel = Flatten()(headModel)
headModel = Dense(1024, activation="relu")(headModel)
#headModel = Dropout(0.5)(headModel)
headModel = BatchNormalization()(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
model = Model(inputs=baseModel.input, outputs=headModel)
for layer in baseModel.layers:
layer.trainable = False
model summary
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(lr=0.001),
metrics=['accuracy'])
history = model.fit(train_generator,
steps_per_epoch=predict_size_train * 1,
epochs=20,
validation_data=valid_generator,
validation_steps=predict_size_valid)
Result:
Result after training
It will be very thankful for you advice.
Best Regards.
Since you are freezing all layers, only one dense layer might not give you desired accuracy. Also if you are not in hurry, you may not set up the validation_steps and steps_per_epochs parameters. Also in this tutorial, model is having fluctuations, which do not want.
I suggest:
for layer in baseModel.layers:
layer.trainable = False
base_out = baseModel.get_layer('block3_pool').output // layer name may be different,
check with model baseModel.summary
With that you can get spefic layer's output. After got the output, you can add some convolutions. After convolutions try stacking more dense layers like:
x = tf.keras.layers.Flatten()(x)
x = Dense(512, activation= 'relu')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation= 'relu')(x)
x = Dropout(0.2)(x)
output_model = Dense(num_classes, activation = 'softmax')(x)
If you don't want to add convolutions and use baseModel completely, that's also fine however you can do something like this:
for layer in baseModel.layers[:12]: // 12 is random you can try different ones. Not
all layers are frozen this time.
layer.trainable = False
for i, layer in enumerate(baseModel.layers):
print(i, layer.name, layer.trainable)
// check frozen layers
After that, you can try to set:
headModel = baseModel.output
headModel = Flatten()(headModel)
headModel = Dense(1024, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
If you see your model is learning, but your loss having fluctuations then you can reduce learning rate. Or you can use ReduceLROnPlateau callback:
rd_lr = ReduceLROnPlateau(monitor='val_loss', factor = np.sqrt(0.1), patience= 4, verbose = 1, min_lr = 5e-8)
Parameters are totally up to your model. For more details you can see docs
what is the form of the content of y_train. If they are integer values then you need to convert them to one hot vectors with
y_train=tf.keras.utils.to_categorical(train, num_classes)
since you are using loss='categorical_crossentropy' in model.compile. In addition VGG16 requires that the pixels be scaled between -1 and +1 so in include
gen = ImageDataGenerator(tf.keras.applications.vgg16.preprocess_input, etc
When you are training you have
for layer in baseModel.layers:
layer.trainable = False
so you are only training the dense layer which is OK but may not give you high accuracy. You might want to leave VGG as trainable but of course this will take longer. Or after you train with VGG not trainable, then change it back to trainable and run a few more epochs to fine tune the model.

Keras gives 'Not JSON Serializable' error when saving the model

I'm implementing a fully convolutional neural network for image segmentation by using unet defined here
https://github.com/zhixuhao
To give different weights to the pixels of different classes I defined an extra Lambda layer, as suggested here
Keras, binary segmentation, add weight to loss function
The problem is that Keras raises this error when saving the model
.....
self.model.save(filepath, overwrite=True)
.....
TypeError: ('Not JSON Serializable:', b'\n\x15clip_by_value/Minimum\x12\x07Minimum\x1a\x12conv2d_23/Identity\x1a\x17clip_by_value/Minimum/y*\x07\n\x01T\x12\x020\x01')
My network is defined in an external function
def weighted_binary_loss(X):
y_pred, y_true, weights = X
loss = binary_crossentropy(y_true, y_pred)
weights_mask = y_true*weights[0] + (1.-y_true)*weights[1]
loss = multiply([loss, weights_mask])
return loss
def identity_loss(y_true, y_pred):
return y_pred
def net()
.....
....
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
w_loss = Lambda(weighted_binary_loss, output_shape=input_size, name='loss_output')([conv10, inputs, weights])
model = Model(inputs = inputs, outputs = w_loss)
model.compile(optimizer = Adam(lr = 1e-5), loss = identity_loss, metrics = ['accuracy'])
that I call in my main function
...
model_checkpoint = ModelCheckpoint('temp_model.hdf5', monitor='loss',verbose=1, save_best_only=True)
model.fit_generator(imgs,steps_per_epoch=20,epochs=1,callbacks=[model_checkpoint])
When I erase the Lambda layer, the error desappears
...
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(inputs = inputs, outputs = conv10)
model.compile(optimizer = Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy'])
I'm using
Keras==2.2.4, tensorflow-gpu==2.0.0b1
It appears that you are computing the loss in the layer of a model. It is not a good practice to accomodate the loss function as a layer. You can compute your weighted loss using custom loss function.
So your code can be rewritten as follows:
def weighted_binary_loss(y_true, y_pred):
weights = [0.5, 0.6] # Define your weights here
loss = binary_crossentropy(y_true, y_pred)
weights_mask = y_true*weights[0] + (1.-y_true)*weights[1]
loss = multiply([loss, weights_mask])
return loss
conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(inputs = inputs, outputs = conv10)
model.compile(optimizer = Adam(lr = 1e-5), loss = weighted_binary_loss, metrics = ['accuracy'])
If it is needed that weights is a dynamic property and you have to send it as a separate parameter in loss function, you can follow this question.

Concatenate an input before Dense layer. [Keras with TF backend]

So, I need to concatenate an input to the flattened layer before going in the dense layer.
I'm using Keras with TF as backend.
model.add(Flatten())
aux_input = Input(shape=(1, ))
model.add(Concatenate([model, aux_input]))
model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
I have a scenario like this: X_train, y_train, aux_train. The shape of y_train and aux_train is same (1, ). An image has a ground-truth and an aux_input.
How do I add this aux_input to the model while doing model.fit?
As suggested in answers, I changed my model with functional api. However, now, I get the following error.
ValueError: Layer dense_1 was called with an input that isn't a
symbolic tensor. Received type: . Full input:
[]. All
inputs to the layer should be tensors.
Here's the code for that part.
flatten = Flatten()(drop_5)
aux_rand = Input(shape=self.aux_shape)
concat = Concatenate([flatten, aux_input])
fc1 = Dense(512, kernel_regularizer=regularizers.l2(weight_decay))(concat)
Shape of aux input
aux_shape = (1,)
And then calling the model as follow
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
aux_rand = np.random.rand(y_train.shape[0])
model_inst = cifar10vgg()
x_train_input = Input(shape=(32,32,3))
aux_input = Input(shape=(1,))
model = Model(inputs=[x_train_input, aux_input], output=model_inst.build_model())
model.fit(x=[x_train, aux_rand], y=y_train, batch_size=batch_size, steps_per_epoch=x_train.shape[0] // batch_size,
epochs=maxepoches, validation_data=(x_test, y_test),
callbacks=[reduce_lr, tensorboard], verbose=2)
model_inst.build_model() returns Activation('softmax')(fc2) which is the output to be fed into the Model (as far as I understood)
As I see from your code, you implement the model with sequential API which is not a good option in this case. If you have some auxiliary inputs the best way to implement such a feature is to use functional API.
Here is a example from Keras website:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model
main_input = Input(shape=(100,), dtype='int32', name='main_input')
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
lstm_out = LSTM(32)(x)
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
Based on description, I think following code can give you some intuition:
x1 = Input(shape=(32, 32, 3))
flatten1 = Flatten()(x1)
x2 = Input(shape=(244, 244, 3))
vgg = VGG19(weights='imagenet', include_top=False)(x2)
flatten2 = Flatten()(vgg)
concat = Concatenate()([flatten1, flatten2])
d = Dense(10)(concat)
model = Model(inputs=[x1, x2], outputs=[d])
model.compile('adam', 'categorical_crossentropy')
model.fit(x=[x_train1, x_train2],outputs=y_labels)