No gradients provided for any variables -Custom loss function - tensorflow

I am trying to train a network with custom loss function and I am getting an error:
ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
The custom loss function is:
def cosine_sim_cal(self, vec1, vec2):
vec1 = tf.convert_to_tensor([vec1])
vec2 = tf.convert_to_tensor([vec2])
cosine_loss = tf.keras.metrics.CosineSimilarity(axis=1)
cosine_loss.update_state(vec1,vec2)
return cosine_loss.result()
def triplets_loss(self, y_pred, m):
eps = tf.keras.backend.epsilon()
loss = 0.0
for i in range(len(y_pred)):
d_a_p = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 1, :])
d_a_n = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 2, :])
loss += tf.math.maximum((d_a_p - d_a_n + m), eps)
return loss
The shape of y_pred is TensorShape([180, 3, 128]) and m is a float value. The loss function is computing the loss which looks like tf.Tensor(37.054775, shape=(), dtype=float32)
My training loops is:
model = self.model
train_loss_list = []
validation_loss_list = []
train_triplet_gen_instance = Triplet_Generator(x_data=self.train_class_dict, batch=self.batch)
val_triplet_gen_instance = Triplet_Generator(x_data=self.val_class_dict, batch=self.batch)
for epoch in range(self.epochs):
total_train_loss = 0.0
total_val_loss = 0.0
for step in range(self.training_steps):
x_train_batch = train_triplet_gen_instance.generate_batch()
with tf.GradientTape() as tape:
train_logits = model(x_train_batch, training=True)
train_loss_value = self.triplets_loss(train_logits, m)
total_train_loss += train_loss_value
grads = tape.gradient(train_loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
if step%20==0:
print('Epoch: {}, Step: {}, training_loss:{}'.format(epoch, step, str(total_train_loss/step)))
mean_training_loss = tf.divide(total_train_loss, self.training_steps)
train_loss_list.append(mean_training_loss.numpy())
x_train_batch is a tuple of length 3. every element of this tuple is of shape (180, 200, 200, 3)
I am not able to figure out the bug in the code. If I change my loss function to a distance based loss function, the code works.

I found the problem in the custom loss function. It seems that tf.keras.metrics.CosineSimilarity(axis=1) is not differentiable for which the gradients were not being calculated. For this, I tried to rewrite the function as :
def triplets_loss(self, y_pred, m):
eps = tf.keras.backend.epsilon()
d_a_p = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,1,:])))
d_a_n = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,2,:])))
loss = tf.reduce_sum(tf.math.maximum((d_a_p - d_a_n + m), eps))
return loss
With the new loss function, I was able to continue with the training.

Related

How select only some trainable variables from NN model to minimize with SciPy L_BFGS_B optimizer?

I'm implementing a physical informed neural network (PINN) model to solve the Navier-Stokes equation, as in PINN. This type of model works better when using L_BFGS_B, and the better optimizer for my case is the fmin_l_bfgs_b from SciPy.
The problem with this optimizer is that they do not work directly with the TensorFlow library. To work with TensorFlow, I implement a class L_BFGS_B with the following methods.
set_weights: Set weights to the model.:
evaluate: evaluate loss and gradients
tf_evaluate: Evaluate loss and gradients as tf.tensor
fit: Train the model
All works fine. The optimizer is training all weights of the model, but the problem is that I only want to train two out of 18 trainable variables.
**Optimizer class **
class L_BFGS_B:
def __init__(self, model, x_train, y_train, factr = 1, m=50, maxls=50,maxfun = 50000, maxiter=50000):
self.model = model
#x_train = xyt, y_train = uv
self.x_train = x_train #tf.constant(x_train, dtype=tf.float32)
self.y_train = y_train #tf.constant(y_train, dtype=tf.float32)
# quando iteração termina
self.factr = factr
#The maximum number of variable metric corrections used
self.m = m
#max number of line search steps/iteration
# nesse caso 50/iteração
self.maxls = maxls
#max number of interation
self.maxiter = maxiter
self.maxfun = maxfun
#tf.function
def tf_evaluate(self, x, y):
"""
Evaluate loss and gradients for weights as tf.Tensor.
Args:
x: input data.
Returns:
loss and gradients for weights as tf.Tensor.
"""
# wehre x = xyt , y = uv
with tf.GradientTape() as g:
uv_fuv = self.model([x, y])
loss = self.model.losses[0]
grads = g.gradient(loss, self.model.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
return loss, grads
def set_weights(self, flat_weights):
"""
Set weights to the model.
Args:
flat_weights: flatten weights.
"""
weights_shapes = [ w.shape for w in self.model.get_weights() ]
n = [0] + [ np.prod(shape) for shape in weights_shapes ]
partition = np.cumsum(n)
weights = [ flat_weights[from_part:to_part].reshape(shape)
for from_part, to_part, shape
in zip(partition[:-1], partition[1:], weights_shapes) ]
self.model.set_weights(weights)
def evaluate(self, flat_weights):
"""
Evaluate loss and gradients for weights as ndarray.
Args:
weights: flatten weights.
Returns:
loss and gradients for weights as ndarray.
"""
self.set_weights(flat_weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
initial_weights = np.concatenate([ w.flatten() for w in self.model.get_weights() ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights,
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
if __name__ == "__main__":
...
# load Data
...
indices = np.random.choice(N*T, n_train, replace = False)
xyt_train = tf.concat( (x_1d[indices], y_1d[indices], t_1d[indices]), axis = 1)
uv_train = tf.concat( (u_1d[indices], v_1d[indices]), axis = 1)
# Model
nn_model = NeuralNet().build()
pinn_model = PhysicsInformedNN(model = nn_model).build()
#Optimizer
lbfgs = L_BFGS_B(model = pinn_model, x_train = xyt_train, y_train = uv_train)
lbfgs.fit()
Attempt
Use arg in the fmin_l_bfgs_b, where args is passed as the trainable variables that I want to fix and **x0 ** the initial two variables to be minimized. The following code is only a sanity test to see if passing the weights in this way works.
def evaluate(self, weights_var, *args):
weights = np.append(weights_var, args)
self.set_weights(weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
weights_fixed = np.concatenate([ w.flatten() for w in self.model.get_weights()[2:] ])
weights_var = np.concatenate([ w.flatten() for w in self.model.get_weights()[0:2] ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights, args = (weights_fixed)
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
Unfortunately, the following error is raised: 0-th dimension must be fixed to 2 but got 2644.
Question: There is a way to fix the trainable variables that I do not want to minimize, work with the ones that are not fixed, and in the final set back then to the neural network model using this type of optimizer?

Declaring Variables inside the Tensorflow GradientTape

I have a model with a complex loss, computed per class of the model output.
As you can see below, I'm computing the loss with some custom loss function, assigning this value to the variable, as tensor are immutable in tensorflow.
def calc_loss(y_true, y_pred):
num_classes=10
pos_loss_class = tf.Variable(tf.zeros((1, num_classes), dtype=tf.dtypes.float32))
for idx in range(num_classes):
pos_loss = SOME_LOSS_FUNC(y_true[:, idx], y_pred[:, idx]
pos_loss_class[:, idx].assign(pos_loss)
return tf.reduce_mean(pos_loss_class)
My code is simple:
with tf.GradientTape() as tape:
output = model(input, training=True)
loss = calc_loss(targets, output)
grads = tape.gradient(loss, model.trainable_weights)
However, I receive None for all model's variables. From my understanding this is caused by a blocking manner of the state of the variable as written here: https://www.tensorflow.org/guide/autodiff#4_took_gradients_through_a_stateful_object
Any suggestions?
Here is the reproducible code, which is a toy example, but demonstrates the issue.
y_true = tf.Variable(tf.random.normal((1, 2)), name='targets')
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])
with tf.GradientTape() as tape:
y_pred = layer(x)
loss_class = tf.Variable(tf.zeros((1,2)), dtype=tf.float32)
for idx in range(2):
loss = tf.abs(y_true[:, idx] - y_pred[:, idx])
loss_class[:, idx].assign(loss)
final_loss = tf.reduce_mean(loss_class)
grads = tape.gradient(final_loss, layer.trainable_weights)
My current second guess, is that the assign method blocks the gradient, as explained in the tensorflow page you liked... instead, try to use just a plain list:
def calc_loss(y_true, y_pred):
num_classes=10
pos_loss_class = []
for idx in range(num_classes):
pos_loss = SOME_LOSS_FUNC(y_true[:, idx], y_pred[:, idx]
pos_loss_class.append(pos_loss)
return tf.reduce_mean(pos_loss_class)

Multi-class weighted loss for semantic image segmentation in keras/tensorflow

Given batched RGB images as input, shape=(batch_size, width, height, 3)
And a multiclass target represented as one-hot, shape=(batch_size, width, height, n_classes)
And a model (Unet, DeepLab) with softmax activation in last layer.
I'm looking for weighted categorical-cross-entropy loss funciton in kera/tensorflow.
The class_weight argument in fit_generator doesn't seems to work, and I didn't find the answer here or in https://github.com/keras-team/keras/issues/2115.
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
# y_true, y_pred shape is (batch_size, width, height, n_classes)
loos = ?...
return loss
return wcce
I will answer my question:
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
Kweights = K.constant(weights)
if not K.is_tensor(y_pred): y_pred = K.constant(y_pred)
y_true = K.cast(y_true, y_pred.dtype)
return K.categorical_crossentropy(y_true, y_pred) * K.sum(y_true * Kweights, axis=-1)
return wcce
Usage:
loss = weighted_categorical_crossentropy(weights)
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss=loss)
I'm using the Generalized Dice Loss. It works better than the Weighted Categorical Crossentropy in my case. My implementation is in PyTorch, however, it should be fairly easy to translate it.
class GeneralizedDiceLoss(nn.Module):
def __init__(self):
super(GeneralizedDiceLoss, self).__init__()
def forward(self, inp, targ):
inp = inp.contiguous().permute(0, 2, 3, 1)
targ = targ.contiguous().permute(0, 2, 3, 1)
w = torch.zeros((targ.shape[-1],))
w = 1. / (torch.sum(targ, (0, 1, 2))**2 + 1e-9)
numerator = targ * inp
numerator = w * torch.sum(numerator, (0, 1, 2))
numerator = torch.sum(numerator)
denominator = targ + inp
denominator = w * torch.sum(denominator, (0, 1, 2))
denominator = torch.sum(denominator)
dice = 2. * (numerator + 1e-9) / (denominator + 1e-9)
return 1. - dice
This issue might be similar to: Unbalanced data and weighted cross entropy which has an accepted answer.

NN on tensorflow doesn't train

I try to train a simple tensorflow-network on a simple model, but from some reason, it doesn't learn anything. Do I make any mistake?
X, Y = read_data(file_name)
# CONSTRUCT GRAPH
x_t = tf.placeholder(shape=[None, X.shape[1]], dtype=tf.float32)
y_t = tf.placeholder(shape=[None,], dtype=tf.float32)
hidden_1 = tf.layers.dense(x_t, 50, activation=tf.nn.sigmoid)
hidden_2 = tf.layers.dense(hidden_1, 50, activation=tf.nn.sigmoid)
output = tf.layers.dense(hidden_2, 1, activation=tf.nn.sigmoid)
# DEFINE LOSS AND OPTIMIZER
loss = tf.reduce_mean(tf.square(output - y_t))
GD_optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train_step = GD_optimizer.minimize(loss)
# BATCH SIZE
BATCH_SIZE = 20
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(15000):
rand_indices = np.random.choice(X.shape[0], size=BATCH_SIZE)
x_batch = X[rand_indices,:]
y_batch = Y[rand_indices]
_, temp_loss = sess.run([train_step, loss], feed_dict={x_t: x_batch, y_t: y_batch})
print(temp_loss)
According to my understanding of your dataset description, the target value column Y is a float (real valued number) that can be in any range and not necessarily within the [0,1] interval.
On the otherhand, because you use a sigmoid activation for the last layer of your model. The prediction values will be always in [0, 1] range.
I would suggest not using sigmoid activation in the last layer. Unless if your Y value were also in the [0,1] range.
so, modify your code such that it becomes
output = tf.layers.dense(hidden_2, 1, activation=None)

How to find accuracy for logistic regression and gradient descent with training and validation data sets?

I am trying to implement logistic regression with gradient descent on the notMNIST dataset. This is my code thus far, which parses the data and plots the accuracy against the epochs. I have done my training in 7 mini batches of 500 each. There are a total of 5000 iterations and therefore 5000/7 epochs.
My goal is to find the accuracy after each epoch and plot it against the epoch. And I want to do the same with the average loss at each epoch. I want to do this for the validation points.
This is the loss function I am implementing.
However, for some reason, when I try to calculate accuracy I always get 100%, which doesn't make sense since I am finding the weight from the training and then using it on the validation set, so the algorithm cannot be correct 100% of the time. Also when I plot the losses, I get a linear function, which also doesn't make any sense.
Does anyone have ideas about what I am doing wrong? Any help would be appreciated!
#implement logistic regression
#logistic regression prediction function is y = sigmoid(W^Tx + b)
#train the logistic regression model using SGD and mini batch size B = 500 on the two-class notNMIST dataset
#how to train the dataset:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
##############Constants##################################
BATCH_SIZE = 500;
NUM_BATCHES = 7;
NUM_ITERATIONS = 5000;
LEARNING_RATE = [0.005]#0.001, 0.0001];
PIXEL_SIZE = 784; #28x28
NUM_TRAINING_POINTS = 3500;
NUM_VALID_POINTS = 100;
###############Extracting data############################
with np.load("notMNIST.npz") as data :
Data, Target = data ["images"], data["labels"]
posClass = 2
negClass = 9
dataIndx = (Target==posClass) + (Target==negClass)
Data = Data[dataIndx]/255.
Target = Target[dataIndx].reshape(-1, 1)
Target[Target==posClass] = 1
Target[Target==negClass] = 0
np.random.seed(521)
randIndx = np.arange(len(Data))
np.random.shuffle(randIndx)
Data, Target = Data[randIndx], Target[randIndx]
trainData, trainTarget = Data[:3500], Target[:3500]
validData, validTarget = Data[3500:3600], Target[3500:3600]
testData, testTarget = Data[3600:], Target[3600:]
################Manipulating Data##########################
trainX = np.reshape(trainData, (NUM_TRAINING_POINTS, PIXEL_SIZE));
validX = np.reshape(validData, (NUM_VALID_POINTS, PIXEL_SIZE))
batchesX = np.array(np.split(trainX, NUM_BATCHES));
batchesY = np.array(np.split(trainTarget, NUM_BATCHES));
################Defining variables########################
loss_Values = [[0 for x in range(NUM_BATCHES)] for y in range(715)]
lr = dict()
epoch_list = []
mean_list = []
accuracy_list = []
x = tf.placeholder(tf.float32, [PIXEL_SIZE, None], name = "input_points") #784 dimensions (28x28 pixels)
W = tf.Variable(tf.truncated_normal(shape=[PIXEL_SIZE,1], stddev=0.5), name='weights')
b = tf.Variable(0.0, name='bias')
y = tf.placeholder(tf.float32, [None,1], name = "target_labels")#target labels
lambda_ = 0.01
##############Calculations###############################
#weight_squared_sum = tf.matmul(tf.transpose(W),W) #find the square of the weight vector
#calculating the bias term
with tf.Session() as sess:
tf.global_variables_initializer().run()
weight = W.eval()
weight_squared_sum = np.linalg.norm(weight)
loss_W = lambda_ /2 * weight_squared_sum #find the loss
y_hat = tf.add(tf.matmul(tf.transpose(W), x), b) #based on the sigmoid equation
y_hat = tf.transpose(y_hat)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits = y_hat, labels = y) #sigmoid_cross_entropy_with_logits takes in the actual y and the predicted y
total_loss = tf.add(tf.reduce_mean(cross_entropy,0),loss_W)
#############Training######################################
epoch = 0
with tf.Session() as sess:
epoch = 0;
tf.global_variables_initializer().run()
for learning_rate in LEARNING_RATE:
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss) #change the learning rate each time
for i in range(NUM_BATCHES*NUM_ITERATIONS):
sess.run(train_step, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]})
print("i: ",i)
print("LOSS:")
print(sess.run(total_loss, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]}))
if( i % NUM_BATCHES == 0): #everytime we reach 0, a new epoch has started
loss_Values[epoch][i%NUM_BATCHES] = sess.run(cross_entropy, feed_dict={x: np.transpose(batchesX[i%NUM_BATCHES]) , y: batchesY[i%NUM_BATCHES]});
correct_prediction = tf.equal(y, y_hat)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy_val = sess.run(accuracy, feed_dict={x: np.transpose(validX) , y: validTarget})
print("Accuracy: ", accuracy_val)
accuracy_list.append(accuracy_val)
epoch = epoch + 1;
lr[learning_rate] = loss_Values;
print("Final value")
#for plotting purposes
N = len(loss_Values)
for epoch in range (N): #find average over all input points in one epoch
epoch_list.append(epoch)
row = np.array(loss_Values[epoch])
mean = np.add.reduce(row) / 3500;
mean_list.append(mean)
epoch_list = np.array(epoch_list)
mean_list = np.array(epoch_list)
accuracy_list = np.array(epoch_list)
plt.figure()
plt.plot(epoch_list, accuracy_list, '-', label = 'Average loss')
plt.show()