I want to get other predicted value rather than y_true and y_pred inside model loss function. I have tried something but they didn't work so how can I use model.predict inside loss function in Tensorflow?
class CustomLoss(tf.keras.losses.Loss):
def __init__(self, anchor, positive, model):
super().__init__()
self.anchor=anchor
self.positive=positive
self.model=model
def convert(self, image):
c_im=self.model(image.reshape(1, 160, 160, 3))
print(c_im)
c_im=tf.make_ndarray(c_im)
return c_im/np.linalg.norm(c_im, ord=2)
def call(self, y_true, y_pred):
y_pred_c=tf.cast(y_pred, dtype=tf.float32)
anchor, positive = self.convert(self.anchor), self.convert(self.positive)
anchor, positive = tf.convert_to_tensor(anchor, dtype=tf.float32), tf.convert_to_tensor(positive, dtype=tf.float32)
pos_d=tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=-1)
neg_d=tf.reduce_sum(tf.square(tf.subtract(anchor, y_pred_c)), axis=-1)
basic_l=tf.add(tf.subtract(pos_d, neg_d), 0.2)
print(basic_l)
del anchor
del positive
return tf.reduce_sum(tf.maximum(basic_l, 0), axis=None)
Related
I am trying to implement a normalized cross entropy loss as described in this publication
The math given is:
This paper provided a PyTorch implementation:
#mlconfig.register
class NormalizedCrossEntropy(torch.nn.Module):
def __init__(self, num_classes, scale=1.0):
super(NormalizedCrossEntropy, self).__init__()
self.device = device
self.num_classes = num_classes
self.scale = scale
def forward(self, pred, labels):
pred = F.log_softmax(pred, dim=1)
label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1))
return self.scale * nce.mean()
But I need this to be translated to tensorflow for my ongoing project. Can anyone help me implement this normalized crossentropy loss in tensorflow?
I think is just a matter of translating methods name:
# given y_pred as 1-hot and y-true the multiclass probabilities
def NCE(y_true, y_pred):
num = - tf.math.reduce_sum(tf.multiply(y_true, y_pred), axis=1)
denom = -tf.math.reduce_sum(y_pred, axis=1)
return tf.reduce_mean(num / denom)
t = tf.constant([[1,0,0], [0,0,1]], dtype=tf.float64)
y = tf.constant([[0.3,0.6,0.1], [0.1,0.1,0.8]], dtype=tf.float64)
NCE(t,y)
# <tf.Tensor: shape=(), dtype=float64, numpy=0.55>
Just check if the resulting loss is the same since I've not tested it
I have a model with a complex loss, computed per class of the model output.
As you can see below, I'm computing the loss with some custom loss function, assigning this value to the variable, as tensor are immutable in tensorflow.
def calc_loss(y_true, y_pred):
num_classes=10
pos_loss_class = tf.Variable(tf.zeros((1, num_classes), dtype=tf.dtypes.float32))
for idx in range(num_classes):
pos_loss = SOME_LOSS_FUNC(y_true[:, idx], y_pred[:, idx]
pos_loss_class[:, idx].assign(pos_loss)
return tf.reduce_mean(pos_loss_class)
My code is simple:
with tf.GradientTape() as tape:
output = model(input, training=True)
loss = calc_loss(targets, output)
grads = tape.gradient(loss, model.trainable_weights)
However, I receive None for all model's variables. From my understanding this is caused by a blocking manner of the state of the variable as written here: https://www.tensorflow.org/guide/autodiff#4_took_gradients_through_a_stateful_object
Any suggestions?
Here is the reproducible code, which is a toy example, but demonstrates the issue.
y_true = tf.Variable(tf.random.normal((1, 2)), name='targets')
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])
with tf.GradientTape() as tape:
y_pred = layer(x)
loss_class = tf.Variable(tf.zeros((1,2)), dtype=tf.float32)
for idx in range(2):
loss = tf.abs(y_true[:, idx] - y_pred[:, idx])
loss_class[:, idx].assign(loss)
final_loss = tf.reduce_mean(loss_class)
grads = tape.gradient(final_loss, layer.trainable_weights)
My current second guess, is that the assign method blocks the gradient, as explained in the tensorflow page you liked... instead, try to use just a plain list:
def calc_loss(y_true, y_pred):
num_classes=10
pos_loss_class = []
for idx in range(num_classes):
pos_loss = SOME_LOSS_FUNC(y_true[:, idx], y_pred[:, idx]
pos_loss_class.append(pos_loss)
return tf.reduce_mean(pos_loss_class)
I have created custom loss (Weighted Absolute error) in keras but implementation doesn't work - I get an error ValueError: No gradients provided for any variable: ['my_model/conv2d/kernel:0', 'my_model/conv2d/bias:0'].
I want to apply different weight for each pixel.
class WeightedMeanAbsoluteError(tf.keras.metrics.Metric):
def __init__(self, name='weighted_mean_absolute_error'):
super(WeightedMeanAbsoluteError, self).__init__(name=name)
self.wmae = self.add_weight(name='wmae', initializer='zeros')
def update_state(self, y_true, y_pred, loss_weights):
values = tf.math.abs(y_true - y_pred) * loss_weights
return self.wmae.assign_add(tf.reduce_sum(values))
def result(self):
return self.wmae
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wmae.assign(0.)
loss_object = WeightedMeanAbsoluteError()
train_loss = WeightedMeanAbsoluteError()
I use the following code to implement a training step:
#tf.function
def train_step(input_images, output_images):
with tf.GradientTape() as tape:
# training=True is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
result_images = model(input_images, training=True)
loss = loss_object(output_images, result_images)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
Also my code works just fine if I use
loss_object = tf.keras.losses.MeanAbsoluteError()
train_loss = tf.keras.metrics.MeanAbsoluteError()
The best and simple way to minimize a weighted standard loss (such mae) is using the sample_weights parameter in fit method where we pass an array with the desired weight of each sample
X = np.random.uniform(0,1, (1000,50))
y = np.random.uniform(0,1, 1000)
W = np.random.randint(1,10, 1000)
inp = Input((50))
x = Dense(64, activation='relu')(inp)
out = Dense(10)(x)
model = Model(inp, out)
model.compile('adam','mae')
model.fit(X,y, epochs=100, sample_weights=W)
I want to train a model with a self-customized loss function. The loss includes two parts. Part1 and part2 can be calculated with y_true (labels) and y_predicted (real output).
However, the loss = part1 +lambda part2
The lambda is a variable that should be able to adjust together with the parameters of the network model. In tensorflow, it seems the lambda can be defined as tf.Variable to updated. However, how can I do it in Keras?
Alright, I have come up with a solution. It is ugly, but it's a solution:
class UtilityLayer(Layer):
def build(self, input_shape):
self.kernel = self.add_weight(
name='kernel',
shape=(1,),
initializer='ones',
trainable=True,
constraint='nonneg'
)
super().build(input_shape)
def call(self, inputs, **kwargs):
return self.kernel
switch = -1
last_loss = 0
def custom_loss_builder(utility_layer):
def custom_loss(y_true, y_pred):
global switch, last_loss
switch *= -1
if switch == 1:
last_loss = utility_layer.trainable_weights[0] * MSE(y_true, y_pred)
return last_loss # your network loss
else:
return last_loss # your lambda loss
return custom_loss
dummy_y = np.empty(len(x))
inputs = Input(shape=(1,))
x = Dense(2, activation='relu')(inputs)
outputs = Dense(1)(x)
utility_outputs = UtilityLayer()(inputs)
model = Model(inputs, [outputs, utility_outputs])
model.compile(optimizer='adam', loss=custom_loss_builder(model.layers[-1]))
model.fit(x, [y, dummy_y], epochs=100)
And the evolution of your lambda:
In tensorflow's model_with_buckets api function, there is a parameter named softmax_loss_function, the signature of which is Function (labels, logits) -> loss-batch.
def model_with_buckets(
encoder_inputs,
decoder_inputs,
targets,
weights,
buckets,
seq2seq,
softmax_loss_function=None,
per_example_loss=False,
name=None
)
The traditional way of assigning softmax_loss_function is using anything like softmax_cross_entropy_with_logits:
def softmax_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
dim=-1,
name=None
)
While the sampled_softmax_loss function has following definition:
def sampled_softmax_loss(weights,
biases,
labels,
inputs,
num_sampled,
num_classes,
num_true=1,
sampled_values=None,
remove_accidental_hits=True,
partition_strategy="mod",
name="sampled_softmax_loss"):
In an official tensorflow seq2seq example about translate english to france, it assigned sampled_softmax_loss to model_with_bucket paramater with a wraper function sampled_loss as following:
w = tf.get_variable("proj_w", [size, self.target_vocab_size], dtype=tf.float32)
w_t = tf.transpose(w)
b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=tf.float32)
output_projection = (w, b)
# maybe need Function (labels, logits)
def sampled_loss(labels, inputs):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.target_vocab_size)
softmax_loss_function = sampled_loss
It's wield to me. And the code runs with a lot of errors. Because inputs is not equal logits. In my understanding, logits = inputs * weights + biases. So, what is the right way to use sampled_softmax_loss function in tensorflow's model_with_buckets?