tfr.keras.losses.ListMLELoss() is always 0 during training, validation, and testing - tensorflow

I have made a ranking model using tensorflow_ranking losses and metrics, but the ListMLELoss() is always 0. The model will train and complete, but I imagine no learning is actually happening since the loss is not getting calculated. I tried to follow this guide, https://www.tensorflow.org/recommenders/examples/listwise_ranking, as well as I could, but there are some differences in use cases so it is a bit different. I am not sure why model.fit() runs and I get an NDCG value, but clearly the model cannot be learning as a loss value is not getting computed.
Here is my ranking model class:
class RankingModel(tf.keras.Model):
def __init__(self, embeddings, vocab_size_dict, dim_dict, loss, activation='sigmoid'):
super().__init__()
self.embeddings = embeddings
self.embedding_layers = {}
self.vocab_size_dict = vocab_size_dict
self.dim_dict = dim_dict
self.activation = activation
self.loss = loss
self.embedding_layers['feature_one'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_one']+1,
self.dim_dict['feature_one'],
name='embedded_feature_one')
self.embedding_layers['feature_two'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_two']+1,
self.dim_dict['feature_two'],
name='embedded_feature_two')
self.embedding_layers['feature_three'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_three']+1,
self.dim_dict['feature_three'],
name='embedded_feature_three')
self.embedding_layers['feature_four'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_four']+1,
self.dim_dict['feature_four'],
name='embedded_feature_four')
self.embedding_layers['feature_five'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_five']+1,
self.dim_dict['feature_five'],
name='embedded_feature_five')
self.flatten = tf.keras.layers.Flatten()
self.concatenate = tf.keras.layers.Concatenate(axis=1, name='Input_Concatenation')
self.batchnorm = tf.keras.layers.BatchNormalization(name='batchnorm')
self.score_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='leaky_relu'),
tf.keras.layers.Dense(12, activation=activation)
])
self.task = tfrs.tasks.Ranking(
loss=self.loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric")
]
)
def __call__(self, features, training=False):
feats = []
for feat, tens in features[0].items():
if feat in self.embeddings:
embedding = self.embedding_layers[feat](tens)
flatten = self.flatten(embedding)
feats.append(flatten)
if feat == 'continuous':
flatten = self.flatten(tens)
feats.append(flatten)
deep_concatenated = self.concatenate(feats)
batchnorm = self.batchnorm(deep_concatenated)
scores = self.score_model(batchnorm)
print("scores: ", scores)
print("mask: ", features[0]['mask'])
masked_scores = tf.boolean_mask(scores, features[0]['mask'])
# pred = tf.expand_dims(masked_scores, axis=1)
# return pred
return tf.expand_dims(masked_scores, axis=1)
def compute_loss(self, features, training=False):
labels = features[1]
# print("labels: ", labels)
# print("mask: ", features[0]['mask'])
masked_labels = tf.boolean_mask(labels, features[0]['mask'])
# print("masked labels:", masked_labels)
masked_labels = tf.expand_dims(masked_labels, axis=1)
print("masked_labels: ", masked_labels)
scores = self(features)
print("scores: ", scores)
print("loss: ", self.task(labels=masked_labels, predictions=scores))
return self.task(
labels=masked_labels,
predictions=scores
)
def train_step(self, inputs):
"""Custom train step using the `compute_loss` method."""
with tf.GradientTape() as tape:
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
def test_step(self, inputs):
"""Custom test step using the `compute_loss` method."""
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
Can anybody see why I am not getting a loss value? Thanks a lot. Please let me know if you need additional info. Maybe I can create some synthetic data so you can run it all yourself. Been pulling my hair out for a few days trying to get this to work so any advice is MUCH appreicated.

Related

How select only some trainable variables from NN model to minimize with SciPy L_BFGS_B optimizer?

I'm implementing a physical informed neural network (PINN) model to solve the Navier-Stokes equation, as in PINN. This type of model works better when using L_BFGS_B, and the better optimizer for my case is the fmin_l_bfgs_b from SciPy.
The problem with this optimizer is that they do not work directly with the TensorFlow library. To work with TensorFlow, I implement a class L_BFGS_B with the following methods.
set_weights: Set weights to the model.:
evaluate: evaluate loss and gradients
tf_evaluate: Evaluate loss and gradients as tf.tensor
fit: Train the model
All works fine. The optimizer is training all weights of the model, but the problem is that I only want to train two out of 18 trainable variables.
**Optimizer class **
class L_BFGS_B:
def __init__(self, model, x_train, y_train, factr = 1, m=50, maxls=50,maxfun = 50000, maxiter=50000):
self.model = model
#x_train = xyt, y_train = uv
self.x_train = x_train #tf.constant(x_train, dtype=tf.float32)
self.y_train = y_train #tf.constant(y_train, dtype=tf.float32)
# quando iteração termina
self.factr = factr
#The maximum number of variable metric corrections used
self.m = m
#max number of line search steps/iteration
# nesse caso 50/iteração
self.maxls = maxls
#max number of interation
self.maxiter = maxiter
self.maxfun = maxfun
#tf.function
def tf_evaluate(self, x, y):
"""
Evaluate loss and gradients for weights as tf.Tensor.
Args:
x: input data.
Returns:
loss and gradients for weights as tf.Tensor.
"""
# wehre x = xyt , y = uv
with tf.GradientTape() as g:
uv_fuv = self.model([x, y])
loss = self.model.losses[0]
grads = g.gradient(loss, self.model.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
return loss, grads
def set_weights(self, flat_weights):
"""
Set weights to the model.
Args:
flat_weights: flatten weights.
"""
weights_shapes = [ w.shape for w in self.model.get_weights() ]
n = [0] + [ np.prod(shape) for shape in weights_shapes ]
partition = np.cumsum(n)
weights = [ flat_weights[from_part:to_part].reshape(shape)
for from_part, to_part, shape
in zip(partition[:-1], partition[1:], weights_shapes) ]
self.model.set_weights(weights)
def evaluate(self, flat_weights):
"""
Evaluate loss and gradients for weights as ndarray.
Args:
weights: flatten weights.
Returns:
loss and gradients for weights as ndarray.
"""
self.set_weights(flat_weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
initial_weights = np.concatenate([ w.flatten() for w in self.model.get_weights() ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights,
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
if __name__ == "__main__":
...
# load Data
...
indices = np.random.choice(N*T, n_train, replace = False)
xyt_train = tf.concat( (x_1d[indices], y_1d[indices], t_1d[indices]), axis = 1)
uv_train = tf.concat( (u_1d[indices], v_1d[indices]), axis = 1)
# Model
nn_model = NeuralNet().build()
pinn_model = PhysicsInformedNN(model = nn_model).build()
#Optimizer
lbfgs = L_BFGS_B(model = pinn_model, x_train = xyt_train, y_train = uv_train)
lbfgs.fit()
Attempt
Use arg in the fmin_l_bfgs_b, where args is passed as the trainable variables that I want to fix and **x0 ** the initial two variables to be minimized. The following code is only a sanity test to see if passing the weights in this way works.
def evaluate(self, weights_var, *args):
weights = np.append(weights_var, args)
self.set_weights(weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
weights_fixed = np.concatenate([ w.flatten() for w in self.model.get_weights()[2:] ])
weights_var = np.concatenate([ w.flatten() for w in self.model.get_weights()[0:2] ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights, args = (weights_fixed)
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
Unfortunately, the following error is raised: 0-th dimension must be fixed to 2 but got 2644.
Question: There is a way to fix the trainable variables that I do not want to minimize, work with the ones that are not fixed, and in the final set back then to the neural network model using this type of optimizer?

HuggingFace Pytorch trainer giving worse results than tensorflow

I’m trying to make the switch from tensorflow to pytorch, but I’m getting a good bit worse results when running a model in pytorch using Trainer.
I’m using bert-base-uncased, and as far as I can tell am using primarily the same settings across both (batch size, epochs, learning rate, etc). However I am getting a f1 score of 0.9967 from tensorflow, and a 0.944649446494465 from pytorch. The loss also seems to fluctuate a lot more in pytorch. I’m still pretty new to machine learning and python in general, so I feel like it’s gotta be something obvious, but I’ve yet to find it. Here are my scripts. Thanks in advance.
Tensorflow
SEQ_LEN = 256
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def train():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='tf')
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
batch_size = 8
num_epochs = 4
batches_per_epoch = len(tokenized["train"]) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=4e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = TFAutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
tf_train_set = model.prepare_tf_dataset(
tokenized["train"],
shuffle=True,
batch_size=batch_size,
collate_fn=data_collator,
)
tf_validation_set = model.prepare_tf_dataset(
tokenized["test"],
shuffle=False,
batch_size=batch_size,
collate_fn=data_collator,
)
eval_metrics = evaluate.load("f1")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return eval_metrics.compute(predictions=predictions, references=labels)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
METRICS = [
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True, name='sparse_crossentropy'),
]
metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_train_set)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
class_weights = dict(enumerate(sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])))
model.compile(optimizer=optimizer, loss=loss, metrics=METRICS)
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=num_epochs, class_weight=class_weights, callbacks=[early_stop, metric_callback])
model.save_pretrained('lease_to_own_model', save_format="tf")
Pytorch
def pyTorch():
def preprocess_function(examples):
return tokenizer(examples["text"], max_length=SEQ_LEN, truncation=True, padding='max_length', add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False)
dataset = load_dataset('json', data_files={"train": "full-items.json", "test": "validation-2.json"})
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
eval_f1 = evaluate.load("f1")
eval_accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
f1 = eval_f1.compute(predictions=predictions, references=labels)
accuracy = eval_accuracy.compute(predictions=predictions, references=labels)
return {"accuracy": accuracy["accuracy"], "f1": f1["f1"]}
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
device = torch.device("cuda")
model.to(device)
batch_size = 8
training_args = TrainingArguments(
num_train_epochs=4,
output_dir="pytorch",
learning_rate=4e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
evaluation_strategy="epoch",
save_strategy="epoch",
metric_for_best_model='f1',
load_best_model_at_end=True,
logging_strategy="epoch",
warmup_steps=0,
)
class_weights = sklearn.utils.class_weight.compute_class_weight('balanced',
classes=np.unique(tokenized["train"]["label"]),
y=tokenized["train"]["label"])
weights= torch.tensor(class_weights,dtype=torch.float).to(device)
class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get("logits")
loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
return (loss, outputs) if return_outputs else loss
trainer = CustomTrainer(
model=model,
args=training_args,
train_dataset=tokenized["train"],
eval_dataset=tokenized["test"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
trainer.train()
trainer.save_model("pytorch")

Object localization MNIST Tensorflow to Pytorch : Losses doesn't decrease

I am trying to convert a Tensorflow object localization code into Pytorch. In the original code, the author use model.compile / model.fit to train the model so I don't understand how the losses of classification of the MNIST digits and box regressions work. Still, I'm trying to implement my own training loop in Pytorch.
The goal here is, after some preprocessing, past the MNIST digits randomly into a black square image and then, classify and localize (bounding boxes) the digit.
I set two losses : nn.CrossEntropyLoss and nn.MSELoss and I do (loss_1+loss_2).backward() to compute the gradients. I know it's the right way to compute gradients with two losses from here and here.
But still, my loss doesn't decrease whereas it collapses quasi-imediately with the Tensorflow code. I checked the model with torchinfo.summary and it seems behaving as well as the Tensorflow implementation.
EDIT :
I looked for the predicted labels of my model and it doesn't seem to change at all.
This line of code label_preds, bbox_coords_preds = model(digits) always returns the same values
label_preds[0] = tensor([[0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156, 0.0156]], device='cuda:0', grad_fn=<SliceBackward0>)
Here are my questions :
Is my custom network set correctly ?
Are my losses set correctly ?
Why my label predictions don't change ?
Do my training loop work as well as the .compile and .fit Tensorflow methods ?
Thanks a lot !
PYTORCH CODE
class ConvNetwork(nn.Module):
def __init__(self):
super(ConvNetwork, self).__init__()
self.conv2d_1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3)
self.conv2d_2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
self.conv2d_3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.avgPooling2D = nn.AvgPool2d((2,2))
self.dense_1 = nn.Linear(in_features=3136, out_features=128)
self.dense_classifier = nn.Linear(in_features=128, out_features=10)
self.softmax = nn.Softmax(dim=0)
self.dense_regression = nn.Linear(in_features=128, out_features=4)
def forward(self, input):
x = self.avgPooling2D(F.relu(self.conv2d_1(input)))
x = self.avgPooling2D(F.relu(self.conv2d_2(x)))
x = self.avgPooling2D(F.relu(self.conv2d_3(x)))
x = nn.Flatten()(x)
x = F.relu(self.dense_1(x))
output_classifier = self.softmax(self.dense_classifier(x))
output_regression = self.dense_regression(x)
return [output_classifier, output_regression]
######################################################
learning_rate = 0.1
EPOCHS = 1
BATCH_SIZE = 64
model = ConvNetwork()
model = model.to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
classification_loss = nn.CrossEntropyLoss()
regression_loss = nn.MSELoss()
######################################################
begin_time = time.time()
for epoch in range(EPOCHS) :
tot_loss = 0
train_start = time.time()
training_losses = []
print("-"*20)
print(" "*5 + f"EPOCH {epoch+1}/{EPOCHS}")
print("-"*20)
model.train()
for batch, (digits, labels, bbox_coords) in enumerate(training_dataset):
digits, labels, bbox_coords = digits.to(device), labels.to(device), bbox_coords.to(device)
optimizer.zero_grad()
[label_preds, bbox_coords_preds] = model(digits)
class_loss = classification_loss(label_preds, labels)
box_loss = regression_loss(bbox_coords_preds, bbox_coords)
training_loss = class_loss + box_loss
training_loss.backward()
optimizer.step()
######### print part #######################
training_losses.append(training_loss.item())
if batch+1 <= len_training_ds//BATCH_SIZE:
current_training_sample = (batch+1)*BATCH_SIZE
else:
current_training_sample = (batch)*BATCH_SIZE + len_training_ds%BATCH_SIZE
if (batch+1) == 1 or (batch+1)%100 == 0 or (batch+1) == len_training_ds//BATCH_SIZE +1:
print(f"Elapsed time : {(time.time()-train_start)/60:.3f}",\
f" --- Digit : {current_training_sample}/{len_training_ds}",\
f" : loss = {training_loss:.5f}")
if batch+1 == (len_training_ds//BATCH_SIZE)+1:
print(f"Total elapsed time for training : {(time.time()-begin_time)/60:.3f}")
ORIGINAL TENSORFLOW CODE
def feature_extractor(inputs):
x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(75, 75, 1))(inputs)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
x = tf.keras.layers.AveragePooling2D((2, 2))(x)
return x
def dense_layers(inputs):
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(128, activation='relu')(x)
return x
def classifier(inputs):
classification_output = tf.keras.layers.Dense(10, activation='softmax', name = 'classification')(inputs)
return classification_output
def bounding_box_regression(inputs):
bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
return bounding_box_regression_output
def final_model(inputs):
feature_cnn = feature_extractor(inputs)
dense_output = dense_layers(feature_cnn)
classification_output = classifier(dense_output)
bounding_box_output = bounding_box_regression(dense_output)
model = tf.keras.Model(inputs = inputs, outputs = [classification_output,bounding_box_output])
return model
def define_and_compile_model(inputs):
model = final_model(inputs)
model.compile(optimizer='adam',
loss = {'classification' : 'categorical_crossentropy',
'bounding_box' : 'mse'
},
metrics = {'classification' : 'accuracy',
'bounding_box' : 'mse'
})
return model
inputs = tf.keras.layers.Input(shape=(75, 75, 1,))
model = define_and_compile_model(inputs)
EPOCHS = 10 # 45
steps_per_epoch = 60000//BATCH_SIZE # 60,000 items in this dataset
validation_steps = 1
history = model.fit(training_dataset,
steps_per_epoch=steps_per_epoch,
validation_data=validation_dataset,
validation_steps=validation_steps, epochs=EPOCHS)
loss, classification_loss, bounding_box_loss, classification_accuracy, bounding_box_mse = model.evaluate(validation_dataset, steps=1)
print("Validation accuracy: ", classification_accuracy)
I answering to myself about this bug :
What I found :
I figured that I use a Softmax layer in my code while I'm using the nn.CrossEntropyLoss() as a loss.
What this problem was causing :
This loss already apply a softmax (doc)
Apply a softmax twice must add some noise to the loss and preventing convergence
What I did :
One should let a linear layer as an output for the classification layer.
An other way is to use the NLLLoss (doc) instead and let the softmax layer in the model class.
Also :
I don't fully understand how the .compile() and .fit() Tensorflow methods work but I think it should optimize the training one way or another (I think about the learning rate) since I had to decrease the learning rate to 0.001 in Pytorch to "unstick" the loss and makes it decrease.

In Tensorflow, how do you include a pre-trained model in a custom model?

I've got a custom model that I've pre-trained in a separate notebook called vae which I've saved using vae.save().
I'm now looking to implement a model which is a full training pipeline containing the pre-trained vae and some other (including a ResNet-50 from the Tensorflow gallery).
In the Tensorflow documentation about Making new Layers and Models via subclassing it doesn't mention anything about including models in subclassed models.
My question is how do I include my pre-trained VAE in a new subclassed model?
I've already tried writing the code below, which works fine for training, but I haven't seen implementations like it elsewhere and I'm getting errors when trying to do reid.save() or reid.summary() on the model, so I presume there is a better way to do it:
class ReId(keras.Model):
def __init__(self, vae, num_ids, **kwargs):
super(ReId, self).__init__(**kwargs)
self.vae = vae
self.convex_combination = ConvexCombination()
self.resnet_50 = resnet50
self.glob_avg_pool = keras.layers.GlobalAveragePooling2D()
self.bnneck = keras.layers.BatchNormalization()
self.num_ids = num_ids
self.final_fc_layer = keras.layers.Dense(self.num_ids)
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.vae_loss_tracker = keras.metrics.Mean(
name="vae_loss"
)
self.classification_loss_tracker = keras.metrics.Mean(name="classification_loss")
self.triplet_loss_tracker = keras.metrics.Mean(name="triplet_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.vae_loss_tracker,
# Classification loss is cross-entropy loss here
self.classification_loss_tracker,
self.triplet_loss_tracker,
]
def train_step(self, data):
x, y = data
y = tf.cast(y, tf.int32)
with tf.GradientTape() as tape:
# Calculate VAE loss
z_mean, z_log_var, z = self.vae.encoder(x)
reconstruction = self.vae.decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mean_squared_error(x, reconstruction)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
# Calculate VAE loss
vae_loss = reconstruction_loss + (LAMBDA * kl_loss)
embedding_output = self.call(x)
bnneck_output = self.bnneck(embedding_output)
training_output = self.final_fc_layer(bnneck_output)
# Calculate triplet loss
triplet_loss = tfa.losses.triplet_semihard_loss(y_true=y, y_pred=embedding_output)
# Calculate cross-entropy loss
ce_loss = keras.losses.sparse_categorical_crossentropy(y, training_output)
# Sum up the losses
total_loss = triplet_loss + ce_loss + (TOTAL_LOSS_VAE_MULTIPLIER * vae_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.classification_loss_tracker.update_state(ce_loss)
self.triplet_loss_tracker.update_state(triplet_loss)
self.vae_loss_tracker.update_state(vae_loss)
return {
"loss": self.total_loss_tracker.result(),
"classification loss:": self.classification_loss_tracker.result(),
"triplet loss:": self.triplet_loss_tracker.result(),
"vae loss:": self.vae_loss_tracker.result(),
}
def get_config(self):
config = super(ReId, self).get_config()
config.update({"num_ids": self.num_ids})
return config
def call(self, data):
recon_img = self.vae(data)
convex_combo_input = [data, (data - recon_img)]
convex_combo_output = self.convex_combination(convex_combo_input)
final_output = self.glob_avg_pool(self.resnet_50(convex_combo_output))
return final_output

Perceptual loss function is not giving any gradient tensorflow

I am trying to implement perceptual loss function in tensorflow and here is
loss_model = tf.keras.models.Sequential()
for eachLayer in base_model.layers[:12]:
eachLayer.trainable=False
loss_model.add(eachLayer)
def meanSquaredLoss(y_true,y_pred):
return tf.reduce_mean(tf.keras.losses.MSE(y_true,y_pred))
def featureLoss(image):
predicted_image = model(image,training=False)
activatedModelVal = loss_model(predicted_image,training=False)
actualModelVal = loss_model(image,training=False)
return meanSquaredLoss(actualModelVal,activatedModelVal)
Here is the style loss function given by:
def gram_matrix(input_tensor):
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
input_shape = tf.shape(input_tensor)
num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
return result/(num_locations)
def styleLoss(image):
predicted_image = model(image,training=False)
activatedModelVal = loss_model(predicted_image,training=False)
actualModelVal = loss_model(image,training=False)
return meanSquaredLoss(gram_matrix(actualModelVal),gram_matrix(activatedModelVal))
So now I have both losses, and here is what I have done for optimization and stuffs!
opt = tf.keras.optimizers.Adam(0.02)
def each_train_step(image,showImage=False):
predicted_image = model(image,training=False)
loss = tf.reduce_sum(featureLoss(predicted_image,image)+styleLoss(predicted_image,image))
with tf.GradientTape() as tape:
grad = tape.gradient(loss, model.trainable_variables)
print(grad)
# opt.apply_gradients(zip(grad, model.trainable_variables))
if showImage:
plt.imshow(predicted_image)
The problem is the grad object is getting list of None and I don't know WHY! Why is the gradient returning list of None? Any solution to get the actual gradients ?