Perceptual loss function is not giving any gradient tensorflow - tensorflow

I am trying to implement perceptual loss function in tensorflow and here is
loss_model = tf.keras.models.Sequential()
for eachLayer in base_model.layers[:12]:
eachLayer.trainable=False
loss_model.add(eachLayer)
def meanSquaredLoss(y_true,y_pred):
return tf.reduce_mean(tf.keras.losses.MSE(y_true,y_pred))
def featureLoss(image):
predicted_image = model(image,training=False)
activatedModelVal = loss_model(predicted_image,training=False)
actualModelVal = loss_model(image,training=False)
return meanSquaredLoss(actualModelVal,activatedModelVal)
Here is the style loss function given by:
def gram_matrix(input_tensor):
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
input_shape = tf.shape(input_tensor)
num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
return result/(num_locations)
def styleLoss(image):
predicted_image = model(image,training=False)
activatedModelVal = loss_model(predicted_image,training=False)
actualModelVal = loss_model(image,training=False)
return meanSquaredLoss(gram_matrix(actualModelVal),gram_matrix(activatedModelVal))
So now I have both losses, and here is what I have done for optimization and stuffs!
opt = tf.keras.optimizers.Adam(0.02)
def each_train_step(image,showImage=False):
predicted_image = model(image,training=False)
loss = tf.reduce_sum(featureLoss(predicted_image,image)+styleLoss(predicted_image,image))
with tf.GradientTape() as tape:
grad = tape.gradient(loss, model.trainable_variables)
print(grad)
# opt.apply_gradients(zip(grad, model.trainable_variables))
if showImage:
plt.imshow(predicted_image)
The problem is the grad object is getting list of None and I don't know WHY! Why is the gradient returning list of None? Any solution to get the actual gradients ?

Related

How select only some trainable variables from NN model to minimize with SciPy L_BFGS_B optimizer?

I'm implementing a physical informed neural network (PINN) model to solve the Navier-Stokes equation, as in PINN. This type of model works better when using L_BFGS_B, and the better optimizer for my case is the fmin_l_bfgs_b from SciPy.
The problem with this optimizer is that they do not work directly with the TensorFlow library. To work with TensorFlow, I implement a class L_BFGS_B with the following methods.
set_weights: Set weights to the model.:
evaluate: evaluate loss and gradients
tf_evaluate: Evaluate loss and gradients as tf.tensor
fit: Train the model
All works fine. The optimizer is training all weights of the model, but the problem is that I only want to train two out of 18 trainable variables.
**Optimizer class **
class L_BFGS_B:
def __init__(self, model, x_train, y_train, factr = 1, m=50, maxls=50,maxfun = 50000, maxiter=50000):
self.model = model
#x_train = xyt, y_train = uv
self.x_train = x_train #tf.constant(x_train, dtype=tf.float32)
self.y_train = y_train #tf.constant(y_train, dtype=tf.float32)
# quando iteração termina
self.factr = factr
#The maximum number of variable metric corrections used
self.m = m
#max number of line search steps/iteration
# nesse caso 50/iteração
self.maxls = maxls
#max number of interation
self.maxiter = maxiter
self.maxfun = maxfun
#tf.function
def tf_evaluate(self, x, y):
"""
Evaluate loss and gradients for weights as tf.Tensor.
Args:
x: input data.
Returns:
loss and gradients for weights as tf.Tensor.
"""
# wehre x = xyt , y = uv
with tf.GradientTape() as g:
uv_fuv = self.model([x, y])
loss = self.model.losses[0]
grads = g.gradient(loss, self.model.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
return loss, grads
def set_weights(self, flat_weights):
"""
Set weights to the model.
Args:
flat_weights: flatten weights.
"""
weights_shapes = [ w.shape for w in self.model.get_weights() ]
n = [0] + [ np.prod(shape) for shape in weights_shapes ]
partition = np.cumsum(n)
weights = [ flat_weights[from_part:to_part].reshape(shape)
for from_part, to_part, shape
in zip(partition[:-1], partition[1:], weights_shapes) ]
self.model.set_weights(weights)
def evaluate(self, flat_weights):
"""
Evaluate loss and gradients for weights as ndarray.
Args:
weights: flatten weights.
Returns:
loss and gradients for weights as ndarray.
"""
self.set_weights(flat_weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
initial_weights = np.concatenate([ w.flatten() for w in self.model.get_weights() ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights,
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
if __name__ == "__main__":
...
# load Data
...
indices = np.random.choice(N*T, n_train, replace = False)
xyt_train = tf.concat( (x_1d[indices], y_1d[indices], t_1d[indices]), axis = 1)
uv_train = tf.concat( (u_1d[indices], v_1d[indices]), axis = 1)
# Model
nn_model = NeuralNet().build()
pinn_model = PhysicsInformedNN(model = nn_model).build()
#Optimizer
lbfgs = L_BFGS_B(model = pinn_model, x_train = xyt_train, y_train = uv_train)
lbfgs.fit()
Attempt
Use arg in the fmin_l_bfgs_b, where args is passed as the trainable variables that I want to fix and **x0 ** the initial two variables to be minimized. The following code is only a sanity test to see if passing the weights in this way works.
def evaluate(self, weights_var, *args):
weights = np.append(weights_var, args)
self.set_weights(weights)
loss, grads = self.tf_evaluate(self.x_train, self.y_train)
loss = loss.numpy().astype('float64')
grads = np.concatenate([ g.numpy().flatten() for g in grads ]).astype('float64')
#printest('loss', loss)
return loss, grads
def fit(self):
"""
Train the model using L-BFGS-B algorithm.
"""
# Flatten initial weights
weights_fixed = np.concatenate([ w.flatten() for w in self.model.get_weights()[2:] ])
weights_var = np.concatenate([ w.flatten() for w in self.model.get_weights()[0:2] ])
#optmizer
fmin_l_bfgs_b(func = self.evaluate, x0 = initial_weights, args = (weights_fixed)
factr = self.factr, m = self.m,
maxls = self.maxls, maxiter = self.maxiter,
maxfun = self.maxfun)
Unfortunately, the following error is raised: 0-th dimension must be fixed to 2 but got 2644.
Question: There is a way to fix the trainable variables that I do not want to minimize, work with the ones that are not fixed, and in the final set back then to the neural network model using this type of optimizer?

tfr.keras.losses.ListMLELoss() is always 0 during training, validation, and testing

I have made a ranking model using tensorflow_ranking losses and metrics, but the ListMLELoss() is always 0. The model will train and complete, but I imagine no learning is actually happening since the loss is not getting calculated. I tried to follow this guide, https://www.tensorflow.org/recommenders/examples/listwise_ranking, as well as I could, but there are some differences in use cases so it is a bit different. I am not sure why model.fit() runs and I get an NDCG value, but clearly the model cannot be learning as a loss value is not getting computed.
Here is my ranking model class:
class RankingModel(tf.keras.Model):
def __init__(self, embeddings, vocab_size_dict, dim_dict, loss, activation='sigmoid'):
super().__init__()
self.embeddings = embeddings
self.embedding_layers = {}
self.vocab_size_dict = vocab_size_dict
self.dim_dict = dim_dict
self.activation = activation
self.loss = loss
self.embedding_layers['feature_one'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_one']+1,
self.dim_dict['feature_one'],
name='embedded_feature_one')
self.embedding_layers['feature_two'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_two']+1,
self.dim_dict['feature_two'],
name='embedded_feature_two')
self.embedding_layers['feature_three'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_three']+1,
self.dim_dict['feature_three'],
name='embedded_feature_three')
self.embedding_layers['feature_four'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_four']+1,
self.dim_dict['feature_four'],
name='embedded_feature_four')
self.embedding_layers['feature_five'] = tf.keras.layers.Embedding(
self.vocab_size_dict['feature_five']+1,
self.dim_dict['feature_five'],
name='embedded_feature_five')
self.flatten = tf.keras.layers.Flatten()
self.concatenate = tf.keras.layers.Concatenate(axis=1, name='Input_Concatenation')
self.batchnorm = tf.keras.layers.BatchNormalization(name='batchnorm')
self.score_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='leaky_relu'),
tf.keras.layers.Dense(12, activation=activation)
])
self.task = tfrs.tasks.Ranking(
loss=self.loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric")
]
)
def __call__(self, features, training=False):
feats = []
for feat, tens in features[0].items():
if feat in self.embeddings:
embedding = self.embedding_layers[feat](tens)
flatten = self.flatten(embedding)
feats.append(flatten)
if feat == 'continuous':
flatten = self.flatten(tens)
feats.append(flatten)
deep_concatenated = self.concatenate(feats)
batchnorm = self.batchnorm(deep_concatenated)
scores = self.score_model(batchnorm)
print("scores: ", scores)
print("mask: ", features[0]['mask'])
masked_scores = tf.boolean_mask(scores, features[0]['mask'])
# pred = tf.expand_dims(masked_scores, axis=1)
# return pred
return tf.expand_dims(masked_scores, axis=1)
def compute_loss(self, features, training=False):
labels = features[1]
# print("labels: ", labels)
# print("mask: ", features[0]['mask'])
masked_labels = tf.boolean_mask(labels, features[0]['mask'])
# print("masked labels:", masked_labels)
masked_labels = tf.expand_dims(masked_labels, axis=1)
print("masked_labels: ", masked_labels)
scores = self(features)
print("scores: ", scores)
print("loss: ", self.task(labels=masked_labels, predictions=scores))
return self.task(
labels=masked_labels,
predictions=scores
)
def train_step(self, inputs):
"""Custom train step using the `compute_loss` method."""
with tf.GradientTape() as tape:
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
def test_step(self, inputs):
"""Custom test step using the `compute_loss` method."""
loss = self.compute_loss(inputs)
# Handle regularization losses as well.
regularization_loss = sum(self.losses)
total_loss = loss + regularization_loss
metrics = {metric.name: metric.result() for metric in self.metrics}
metrics["loss"] = loss
metrics["regularization_loss"] = regularization_loss
metrics["total_loss"] = total_loss
return metrics
Can anybody see why I am not getting a loss value? Thanks a lot. Please let me know if you need additional info. Maybe I can create some synthetic data so you can run it all yourself. Been pulling my hair out for a few days trying to get this to work so any advice is MUCH appreicated.

Weighted Absolute Error implementation doesn't work in tensorflow (keras)

I have created custom loss (Weighted Absolute error) in keras but implementation doesn't work - I get an error ValueError: No gradients provided for any variable: ['my_model/conv2d/kernel:0', 'my_model/conv2d/bias:0'].
I want to apply different weight for each pixel.
class WeightedMeanAbsoluteError(tf.keras.metrics.Metric):
def __init__(self, name='weighted_mean_absolute_error'):
super(WeightedMeanAbsoluteError, self).__init__(name=name)
self.wmae = self.add_weight(name='wmae', initializer='zeros')
def update_state(self, y_true, y_pred, loss_weights):
values = tf.math.abs(y_true - y_pred) * loss_weights
return self.wmae.assign_add(tf.reduce_sum(values))
def result(self):
return self.wmae
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wmae.assign(0.)
loss_object = WeightedMeanAbsoluteError()
train_loss = WeightedMeanAbsoluteError()
I use the following code to implement a training step:
#tf.function
def train_step(input_images, output_images):
with tf.GradientTape() as tape:
# training=True is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
result_images = model(input_images, training=True)
loss = loss_object(output_images, result_images)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
Also my code works just fine if I use
loss_object = tf.keras.losses.MeanAbsoluteError()
train_loss = tf.keras.metrics.MeanAbsoluteError()
The best and simple way to minimize a weighted standard loss (such mae) is using the sample_weights parameter in fit method where we pass an array with the desired weight of each sample
X = np.random.uniform(0,1, (1000,50))
y = np.random.uniform(0,1, 1000)
W = np.random.randint(1,10, 1000)
inp = Input((50))
x = Dense(64, activation='relu')(inp)
out = Dense(10)(x)
model = Model(inp, out)
model.compile('adam','mae')
model.fit(X,y, epochs=100, sample_weights=W)

How to apply a computed loss to a graph?

I am new to tensorflow and trying to code a toy discriminator problem. The way I have it set up, the loss is calculated from the expert_actions and the novice_actions. However, I am running an error when I am trying to optimize using the computed loss. The error is ValueError: No variables to optimize. I do understand that I am getting the error because there is no feed_dict. However, I do not know the solution to this.
class discriminator:
def __init__(self,n_actions, learning_rate):
self.n_actions = n_actions
self.learning_rate_dist = learning_rate
self.graph = tf.Graph()
with self.graph.as_default():
self.dis_input = tf.placeholder(tf.float32, [None, self.n_actions])
self.discriminator_function()
init = tf.global_variables_initializer()
self.sess = tf.Session(graph=self.graph)
self.sess.run(init)
def discriminator_function(self, hidden = None):
if hidden == None:
hidden = 16
x = tf.layers.dense(self.dis_input,hidden,tf.nn.relu)
x = tf.layers.dense(x,hidden,tf.nn.relu)
self.dis_output = tf.layers.dense(x,1)
def discriminator(self,expert_actions,novice_actions):
expert_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : expert_actions})
novice_out = self.sess.run(self.dis_output,feed_dict={self.dis_input : novice_actions})
loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out))
# update discriminator loss
optimize = tf.train.AdamOptimizer(self.learning_rate_dis).minimize(-loss)
self.sess.run(optimize) #error over here
return loss
if __name__ == '__main__':
d = discriminator(2,0.001)
expert_actions = np.random.randint(2, size=10)
novice_actions = np.random.randint(2, size=10)
d.discriminator(expert_actions,novice_actions)
You are trying to optimize loss = tf.reduce_mean(tf.log(expert_out) + tf.log(1.-novice_out)) with expert_out and novice_out being numpy arrays. There are no variables between the input and loss, to compute gradients.
Your discriminator function should be something like this:
def discriminator(self,expert_actions,novice_actions):
#Make sure you add the new ops and variables to the graph defined.
with self.graph.as_default():
loss = tf.reduce_mean(tf.log('Should be a tensor that is part of the graph and not a numpy array))
optimize = tf.train.AdamOptimizer(0.01).minimize(-loss)
self.sess.run(tf.global_variables_initializer())
#pass the inputs here
loss = self.sess.run([loss, optimize], feed_dict={self.dis_input : expert_actions})
return loss

character level bidirectional language model in tensorflow

Inspired from Andrej Karpathy Char-RNN, There is a Tensorflow implementation of char-rnn sherjilozair/char-rnn-tensorflow: Multi-layer Recurrent Neural Networks (LSTM, RNN) for character-level language models in Python using Tensorflow. I want to implement bidirectional character level language model from this code. I change the model.py and wrote a simple code:
class Model:
def __init__(self, input_data, targets, seq_length=Config.max_seq_length, training=True):
if Config.model == 'rnn':
cell_fn = rnn.BasicRNNCell
elif Config.model == 'gru':
cell_fn = rnn.GRUCell
elif Config.model == 'lstm':
cell_fn = rnn.BasicLSTMCell
elif Config.model == 'nas':
cell_fn = rnn.NASCell
else:
raise Exception("model type not supported: {}".format(Config.model))
fw_cells = []
bw_cells = []
for _ in range(Config.num_layers):
fw_cell = cell_fn(Config.rnn_size)
bw_cell = cell_fn(Config.rnn_size)
fw_cells.append(fw_cell)
bw_cells.append(bw_cell)
self.fw_cell = rnn.MultiRNNCell(fw_cells, state_is_tuple=True)
self.bw_cell = rnn.MultiRNNCell(bw_cells, state_is_tuple=True)
self.input_data, self.targets = input_data, targets
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [Config.rnn_size*2, Config.vocab_size])
softmax_b = tf.get_variable("softmax_b", [Config.vocab_size])
embedding = tf.get_variable("embedding", [Config.vocab_size, Config.rnn_size])
inputs = tf.nn.embedding_lookup(embedding, self.input_data)
inputs = tf.unstack(inputs, num=seq_length, axis=1)
outputs, _, _ = tf.nn.static_bidirectional_rnn(self.fw_cell, self.bw_cell, inputs,
dtype=tf.float32, scope='rnnlm')
output = tf.reshape(tf.concat(outputs, 1), [-1, Config.rnn_size*2])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
self.lr = tf.Variable(0.0, trainable=False)
if training:
loss = legacy_seq2seq.sequence_loss_by_example(
[self.logits],
[tf.reshape(self.targets, [-1])],
[tf.sign(tf.cast(tf.reshape(self.targets, [-1]), dtype=tf.float32))])
with tf.name_scope('cost'):
self.cost = tf.reduce_mean(loss)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), Config.grad_clip)
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
In training phase, I see a fast converge. After near 3000 iteration, the loss reach 0.003. In test phase, the probability of all character is 1.0. I think there is a mistake.
I will so glad to get some help to find my mistake.
use the preceding and following output to predict the prob of current word. In your case, you used current rnn output to predict the prob of current word.
Looks like you set self.lr = tf.Variable(0.0, trainable=False). Try changing this to a nonzero value. If you are reading probabilities from self.probs during the testing phase this should be normalized appropriately,