What exactly tensorflow.gather() does? - numpy

I saw code for triplet loss that contains the function tf.gather(). What this function does?
I have gone through the tensorflow's official website for definition but still unable to get it.
def margin_triplet_loss(y_true, y_pred, margin, batch_size):
anchor = tf.gather(y_pred, tf.range(0, batch_size, 3))
positive = tf.gather(y_pred, tf.range(1, batch_size, 3))
negative = tf.gather(y_pred, tf.range(2, batch_size, 3))
loss = K.maximum(margin
+ K.sum(K.square(anchor-positive), axis=1)
- K.sum(K.square(anchor-negative), axis=1),
0.0)
return K.mean(loss)

tf.gather is a function to index an array. You gather the elements which you specify by the index argument. This is not natively posible for tensorflow tensors.
tf.gather(y_pred, tf.range(0, batch_size, 3)) is equivalent in numpy to y_pred[0:batch_size:3], which means that you return every third element starting from the first one.

Related

When writing a custom loss function, should I use tf.reduce_mean, and if so how? Does it ever matter?

The sample code below shows that all the following give the same (correct) results when
writing a custom loss function (calculating mean_squared_error) for
a simple linear regression model.
Do not use tf_reduce_mean() (so returning a loss for each example)
Use tf_reduce_mean() (so returning a single loss)
Use tf_reduce_mean(..., axis-1)
Is there any reason to prefer one approach to another, and are there any circumstances
where it makes a difference?
(There is, for example sample code at
Make a custom loss function in keras
that suggests axis=-1 should be used)
import numpy as np
import tensorflow as tf
# Create simple dataset to do linear regression on
# The mean squared error (~ best achievable MSE loss after fitting linear regression) for this dataset is 0.01
xtrain = np.random.randn(5000) # Already normalized
ytrain = xtrain + np.random.randn(5000) * 0.1 # Close enough to being normalized
# Function to create model and fit linear regression, and report final loss
def cre_and_fit(loss="mean_squared_error", lossdescription="",epochs=20):
model = tf.keras.models.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])
model.compile(loss=loss, optimizer="RMSProp")
history = model.fit(xtrain, ytrain, epochs=epochs, verbose=False)
print(f"Final loss value for {lossdescription}: {history.history['loss'][-1]:.4f}")
# Result from standard MSE loss ~ 0.01
cre_and_fit("mean_squared_error","Keras standard MSE")
# This gives the right result, not reducing. Return shape = (batch_size,)
cre_and_fit(lambda y_true, y_pred: (y_true-y_pred)*(y_true-y_pred),
"custom loss, not reducing over batch items" )
# This also gives the right result, reducing over batch items. Return shape = ()
cre_and_fit(lambda y_true, y_pred: tf.reduce_mean((y_true-y_pred)*(y_true-y_pred) ),
"custom loss, reducing over batch items")
# How about using axis=-1? Also gives the same result
cre_and_fit(lambda y_true, y_pred: tf.reduce_mean((y_true-y_pred)*(y_true-y_pred), axis=-1),
"custom loss, reducing with axis=-1" )
When you pass a lambda (or a callable in general) to compile and call fit, TF will wrap it inside a LossFunctionWrapper, which is a subclass of Loss, with a default reduction type of ReductionV2.AUTO. Note that a Loss object always has a reduction type representing how it will reduce the loss tensor to a single scalar.
Under most circumstances, ReductionV2.AUTO translates to ReductionV2.SUM_OVER_BATCH_SIZE which, despite its name, actually performs reduced mean over all axis on the underlying lambda's output.
import tensorflow as tf
from keras import losses as losses_mod
from keras.utils import losses_utils
a = tf.random.uniform((10,2))
b = tf.random.uniform((10,2))
l_auto = losses_mod.LossFunctionWrapper(fn=lambda y_true, y_pred : tf.square(y_true - y_pred), reduction=losses_utils.ReductionV2.AUTO)
l_sum = losses_mod.LossFunctionWrapper(fn=lambda y_true, y_pred : tf.square(y_true - y_pred), reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)
l_auto(a,b).shape.rank == l_sum(a,b).shape.rank == 0 # rank 0 means scalar
l_auto(a,b) == tf.reduce_mean(tf.square(a - b)) # True
l_sum(a,b) == tf.reduce_mean(tf.square(a - b)) # True
So to answer your question, the three options are equivalent since they all eventually result in a single scalar that is the mean of all elements in the raw tf.square(a - b) loss tensor. However, should you wish to perform an operation other than reduce_mean e.g., reduce_sum, in the lambda, then the three will yield different results:
l1 = losses_mod.LossFunctionWrapper(fn=lambda y_true, y_pred : tf.square(y_true - y_pred),
reduction=losses_utils.ReductionV2.AUTO)
l2 = losses_mod.LossFunctionWrapper(fn=lambda y_true, y_pred : tf.reduce_sum(tf.square(y_true - y_pred)),
reduction=losses_utils.ReductionV2.AUTO)
l3 = losses_mod.LossFunctionWrapper(fn=lambda y_true, y_pred : tf.reduce_sum(tf.square(y_true - y_pred), axis=-1),
reduction=losses_utils.ReductionV2.AUTO)
l1(a,b) == tf.reduce_mean(tf.square(a-b)) # True
l2(a,b) == tf.reduce_sum(tf.square(a-b)) # True
l3(a,b) == tf.reduce_mean(tf.reduce_sum(tf.square(a-b), axis=-1)) # True
Concretely, l2(a,b) == tf.reduce_mean(tf.reduce_sum(tf.square(a-b))), but that is just tf.reduce_sum(tf.square(a-b)) since mean of a scalar is itself.

Feeding weight maps into a CNN (UNET network) in keras

I have implemented UNET network described in here
The network is working fine, but in the paper, they have mentioned adding weighted maps into the network for better boundary separation. The weight maps are calculated this way as far as I understand
def unet_weight_map(y, wc=None, w0 = 10, sigma = 5):
"""
Parameters
----------
mask: Numpy array
2D array of shape (image_height, image_width) representing binary mask
of objects.
wc: dict
Dictionary of weight classes.
w0: int
Border weight parameter.
sigma: int
Border width parameter.
Returns
-------
Numpy array
Training weights. A 2D array of shape (image_height, image_width).
"""
labels = label(y)
no_labels = labels == 0
label_ids = sorted(np.unique(labels))[1:]
if len(label_ids) > 1:
distances = np.zeros((y.shape[0], y.shape[1], len(label_ids)))
for i, label_id in enumerate(label_ids):
distances[:,:,i] = distance_transform_edt(labels != label_id)
distances = np.sort(distances, axis=2)
d1 = distances[:,:,0]
d2 = distances[:,:,1]
w = w0 * np.exp(-1/2*((d1 + d2) / sigma)**2) * no_labels
else:
w = np.zeros_like(y)
if wc:
class_weights = np.zeros_like(y)
for k, v in wc.items():
class_weights[y == k] = v
w = w + class_weights
return w
Until here everything is fine. But, my question is that how I can get use of these weight maps in the network. I have a weighted binary cross entropy loss defined as below
def weighted_binary_crossentropy( y_true, y_pred, weight=[1.,2.]):
y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
logloss = -(y_true * K.log(y_pred) * weight[0] + (1 - y_true) * K.log(1 - y_pred)*weight[1])
return K.mean( logloss, axis=-1)
But, here I give the weights as a [a, b] array into the loss for class weights and then feed this loss to the network when compiling. My question is that should I feed those maps into this customized loss function? if so, how? if not, what other way can I use in Keras? Please help. I have read many stack overflow questions related to this problem, but I could not get an answer. I can provide any information regarding my network if needed.
In order to pass your own parameters to a custom loss function, you have 2 ways. You should either subclass loss, or use a wrapper function.
For example you can set a wrapper function like this:
def wrapper_loss(weights=[1.,2.]):
def weighted_binary_crossentropy(y_true, y_pred):
y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
logloss = -(y_true * K.log(y_pred) * weight[0] + (1 - y_true) * K.log(1 - y_pred)*weight[1])
return K.mean(logloss, axis=-1)
return weighted_binary_crossentropy
Then, pass it to the model.compile() like this:
model.compile(loss=wrapper_loss(weights=[1.,2.]), optimizer=...)
P.S: You may need to check these out:
tf.nn.weighted_cross_entropy_with_logits
class_weight argument for model.fit()
I realized how to use those maps. First I define an Input (with the same shape as ground truth labels) as the way we define Input when feeding the input images. Something like
weights = Input(shape=(shape_of_groundtruth_labels))
I define the customized loss with the same structure as wrapper_loss defined above; with weight maps this time, not class weights [1, 2]. Then, when defining the model which needs input and output. I give the input as both input images and input weights. something like:
model = Model(inputs=[images, weights], outputs=...)
where weights are the one I defined in the Input layer. In the model.compile(), I give the loss as the name of my customized loss (wrapper_loss) with the inputs weights. something like
model.compile(optimizer=..., loss=wrapper_loss(weight = weights), ...)
where the second 'weights' is the one defined in Input layer.
Now, the last thing to do is to do the same in model.fit; I give the weight maps with the images with the same structure as above.

Can't apply gradients on tf.Variable

I am trying to learn a similarity matrix(M) between two image embeddings, A single instance of training is a pair of images - (anchor, positive). So ideally the model will return 0 distance for embeddings of similar images.
The problem is, when i declare the distance matrix(M) as a tf.Variable, it returns an error
on this line
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
TypeError: 'Variable' object is not iterable.
I think I should use a tensorflow datatype for M, that is iterable
Please tell me how I can fix this issue
import tensorflow as tf
from tensorflow import keras
# metric learning model
class MetricLearningModel:
def __init__(self, lr):
self.optimizer = keras.optimizers.Adam(lr=lr)
self.lr = lr
self.loss_object = keras.losses.MeanSquaredError()
self.trainable_variables = tf.Variable(
(tf.ones((2048, 2048), dtype=tf.float32)),
trainable=True
)
def similarity_function(self, anchor_embeddings, positive_embeddings):
M = self.trainable_variables
X_i = anchor_embeddings
X_j = positive_embeddings
similarity_value = tf.matmul(X_j, M, name='Tensor')
similarity_value = tf.matmul(similarity_value, tf.transpose(X_i), name='Tensor')
# distance(x,y) = sqrt( (x-y)#M#(x-y).T )
return similarity_value
def train_step(self, anchor, positive):
anchor_embeddings, positive_embeddings = anchor, positive
# Calculate gradients
with tf.GradientTape() as tape:
# Calculate similarity between anchors and positives.
similarities = self.similarity_function(anchor_embeddings, positive_embeddings)
y_pred = similarities
y_true = tf.zeros(1)
print(y_true, y_pred)
loss_value = self.loss_object(
y_pred=y_true,
y_true=y_pred,
)
gradients = tape.gradient(loss_value, self.trainable_variables)
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metric_model = MetricLearningModel(lr=1e-3)
anchor, positive = tf.ones((1, 2048), dtype=tf.float32), tf.ones((1, 2048), dtype=tf.float32)
metric_model.train_step(anchor, positive)
The python zip function expects iterable objects, like for example a list or a tuple.
In your calls to tape.gradient, or optimizer.apply_gradients, you can put your Variable in a list to solve the issue :
with tf.GradienTape() as tape:
gradients = tape.gradient(loss_value, [self.trainable_variables])
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, [self.trainable_variables]))
tape.gradient respects the shape of the sources object passed to compute the gradients of, so if you feed it with a list, you will get a list out of it. It is stated in the documentation:
Returns
a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in sources. Returned structure is the same as the structure of sources.

Evaluating TF model inside a TF op throws error

I am using TensorFlow 2. I am trying to optimize a function which uses the loss of a trained tensorflow model (poison).
#tf.function
def totalloss(x):
xt = tf.multiply(x, (1.0 - m)) + tf.multiply(m, d)
label = targetlabel*np.ones(xt.shape[0])
loss1 = poison.evaluate(xt, label, steps=1)
loss2 = tf.linalg.norm(m, 1)
return loss1 + loss2
I am not able to execute this function, however, when I comment the #tf.function line the function works!
I need to use this function as a tensorflow op so as to optimize 'm' & 'd'.
Value Error: Unknown graph. Aborting.
This is how I am defining the model and variables:
# mask
m = tf.Variable(tf.zeros(shape=(1, 784)), name="m")
d = tf.Variable(tf.zeros(shape=(1, 784)), name="d")
# target
targetlabel = 6
poison = fcn()
poison.load_weights("MNISTP.h5")
adam = tf.keras.optimizers.Adam(lr=.002, decay=1e-6)
poison.compile(optimizer=adam, loss=tf.losses.sparse_categorical_crossentropy)
This is how I am calling the function later: (Executing this line results in an error listed below. However if I comment off the #tf.function line, this command works!)
loss = totalloss(ptestdata)
This is the entire traceback call:
ValueError: in converted code:
<ipython-input-52-4841ad87022f>:5 totalloss *
loss1 = poison.evaluate(xt, label, steps=1)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:746 evaluate
use_multiprocessing=use_multiprocessing)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py:693 evaluate
callbacks=callbacks)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py:187 model_iteration
f = _make_execution_function(model, mode)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py:555 _make_execution_function
return model._make_execution_function(mode)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:2034 _make_execution_function
self._make_test_function()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:2010 _make_test_function
**self._function_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:3544 function
return EagerExecutionFunction(inputs, outputs, updates=updates, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:3429 __init__
raise ValueError('Unknown graph. Aborting.')
ValueError: Unknown graph. Aborting.
The purpose of #tf.function decorator is to convert Tensorflow operations written in Python into Tensorflow graph to achieve better performance. The error might come when you tried to use a pre-trained model with a serialized graph. Thus, the decorator cannot make the graph-to-graph conversion.
I've reported this error here: https://github.com/tensorflow/tensorflow/issues/33997
A (temporary) solution is that your loss function should be separated into two small functions. The decorator should only be used in the function not including the pre-trained model. In this way, you still can achieve better performance in other operations but not with the part of using the pre-trained model.
For example:
#tf.function
def _other_ops(x):
xt = tf.multiply(x, (1.0 - m)) + tf.multiply(m, d)
label = targetlabel * np.ones(xt.shape[0])
loss2 = tf.linalg.norm(m, 1)
return xt, label, loss2
def total_loss(x):
xt, label, loss2 = _other_ops(x)
loss1 = poison.evaluate(xt, label, steps=1)
return loss1 + loss2
Update:
According to the discussion in the above TF issue link, an elegant solution is to manually pass the input through each layer of the model. You could get a list of layers in your model by calling your_model.layers
In your case, you might calculate the loss from the prediction of your output with the label in the last layer. Thus, I think you should skip the last layer and calculate the loss outside of the loop:
#tf.function
def totalloss(x):
xt = tf.multiply(x, (1.0 - m)) + tf.multiply(m, d)
label = targetlabel*np.ones(xt.shape[0])
feat = xt
# Skip the last layer which calculates loss1
for i in range(len(poison.layers) - 1):
layer = poison.layers[i]
feat = layer(feat)
# Now, calculate loss by yourself
loss1 = tf.keras.losses.sparse_categorical_crossentropy(feat, label)
loss2 = tf.linalg.norm(m, 1)
return loss1 + loss2
The way that the TF engineers explain for this issue is that a model might wrap high-level processing which does guarantee by the #tf.function. So, putting a model inside a function decorated with #tf.function is not recommended. Thus, we need to break the model to smaller pieces to bypass it.

Tensorflow : IOU per class

I'm trying to use deeplab for semantic segmentation. I'd like to calculate IOU per class(IOU for person only) instead of mean IOU.
At L142 of
https://github.com/tensorflow/models/blob/master/research/deeplab/eval.py,
I tried to get confusion matrix instead of mean IOU by
miou, cmat = tf.metrics.mean_iou(...)
metric_map['cmat'] = cmat
but it did not work.
I'd appreciate if someone suggest me how to get around.
You can use _streaming_confusion_matrix from tensorflow.python.ops.metrics_impl to get the confusion matrix.
Essentially it works the same way as other running metrics like mean_iou. which means, you get two ops when calling this metric, a total confusion_matrix op and an update op that updates the confusion matrix cumulatively.
With the confusion matrix, now you should be able to compute the class wise iou
I implemented a class-specific IoU metric for this very purpose based on the MeanIoU class.
class ClassIoU(tf.keras.metrics.MeanIoU):
"""Computes the class-specific Intersection-Over-Union metric.
IOU is defined as follows:
IOU = true_positive / (true_positive + false_positive + false_negative).
The predictions are accumulated in a confusion matrix, weighted by
`sample_weight` and the metric is then calculated from it.
If `sample_weight` is `None`, weights default to 1.
Use `sample_weight` of 0 to mask values.
Args:
class_idx: The index of the the class of interest
one_hot: Indicates if the input is a one_hot vector as in CategoricalCrossentropy or if the class indices
are used as in SparseCategoricalCrossentropy or MeanIoU
num_classes: The possible number of labels the prediction task can have.
This value must be provided, since a confusion matrix of dimension =
[num_classes, num_classes] will be allocated.
name: (Optional) string name of the metric instance.
dtype: (Optional) data type of the metric result.
"""
def __init__(self, class_idx, one_hot, num_classes, name=None, dtype=None):
super().__init__(num_classes, name, dtype)
self.one_hot = one_hot
self.class_idx = class_idx
def result(self):
sum_over_row = tf.cast(
tf.reduce_sum(self.total_cm, axis=0), dtype=self._dtype)
sum_over_col = tf.cast(
tf.reduce_sum(self.total_cm, axis=1), dtype=self._dtype)
true_positives = tf.cast(
tf.linalg.diag_part(self.total_cm), dtype=self._dtype)
# sum_over_row + sum_over_col =
# 2 * true_positives + false_positives + false_negatives.
denominator = sum_over_row[self.class_idx] + sum_over_col[self.class_idx] \
- true_positives[self.class_idx]
# The mean is only computed over classes that appear in the
# label or prediction tensor. If the denominator is 0, we need to
# ignore the class.
num_valid_entries = tf.reduce_sum(
tf.cast(tf.not_equal(denominator, 0), dtype=self._dtype))
iou = tf.math.divide_no_nan(true_positives[self.class_idx], denominator)
return tf.math.divide_no_nan(
tf.reduce_sum(iou, name='mean_iou'), num_valid_entries)
def update_state(self, y_true, y_pred, sample_weight=None):
if self.one_hot:
return super().update_state(tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1), sample_weight)
else:
return super().update_state(y_true, y_pred, sample_weight)