Related
I am trying to code a neural network using only numpy and pandas. I am having issues with the dimension of my data. I am getting the error "ValueError: operands could not be broadcast together with shapes (150,) (150,3)
." Not sure what the alternative is here, as we are trying to predict one of the three types of flower based on 4 numerical values. Here is my code:
import pandas as pd
class NeuralNet():
def __init__(self, i_dim, h_dim, o_dim, lr):
self.i_dim = i_dim
self.h_dim = h_dim
self.o_dim = o_dim
self.lr = lr
self.weights1 = np.random.randn(self.i_dim, self.h_dim) / np.sqrt(self.i_dim)
self.bias1 = np.zeros((1, self.h_dim))
self.weights2 = np.random.randn(self.h_dim, self.o_dim) / np.sqrt(self.h_dim)
self.bias2 = np.zeros((1, self.o_dim))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def softmax(self, x):
exps = np.exp(x - np.max(x, axis=1, keepdims=True))
return exps / np.sum(exps, axis=1, keepdims=True)
def forward(self, X):
self.layer1 = self.sigmoid(np.dot(X, self.weights1) + self.bias1)
self.layer2 = self.softmax(np.dot(self.layer1, self.weights2) + self.bias2)
return self.layer2
def sigmoid_derivative(self, x):
return x * (1 - x)
def softmax_derivative(self, x):
s = x.reshape(-1, 1)
return np.diagflat(s) - np.dot(s, s.T)
def backward(self, X, y, y_hat):
d_softmax = self.softmax_derivative(y_hat)
d_sigmoid = self.sigmoid_derivative(self.layer1)
d_weights2 = np.dot(self.layer1.T, (2 * (y - y_hat) * d_softmax))
d_bias2 = np.sum(2 * (y - y_hat) * d_softmax, axis=0, keepdims=True)
d_weights1 = np.dot(X.T, (np.dot(2 * (y - y_hat) * d_softmax, self.weights2.T) * d_sigmoid))
d_bias1 = np.sum(np.dot(2 * (y - y_hat) * d_softmax, self.weights2.T) * d_sigmoid, axis=0)
self.weights1 -= self.lr * d_weights1
self.bias1 -= self.lr * d_bias1
self.weights2 -= self.lr * d_weights2
self.bias2 -= self.lr * d_bias2
def cross_ent_loss(self):
sample_losses = - self.y * np.log(self.y_hat) - (1 - self.y) * np.log(1 - self.y_hat)
loss = np.mean(sample_losses)
return loss
def train(self, X, y, epochs):
for epoch in range(epochs):
y_hat = self.forward(X)
self.backward(X, y, y_hat)
loss = self.cross_ent_loss()
print(f"Epoch {epoch}: Loss = {loss}")
if epoch % 10 == 0:
print(f"Epoch {epoch}: Loss = {loss}")
def predict(self, X):
return self.forward(X)
df = pd.read_csv('/Users/brasilgu/PycharmProjects/NNfs/venv/lib/iris.data.txt', header=None)
X_train = df.iloc[:, :4].values
y_train = df.iloc[:, -1].values
nn = NeuralNet(4, 5, 3, 0.1)
nn.train(X_train, y_train, 1000)
y_pred = nn.predict(X_train)
y_pred_labels = np.argmax(y_pred, axis=1)
print(y_pred) ```
The stacktrace of the error:
``` Traceback (most recent call last):
File "/Users/brasilgu/PycharmProjects/NNfs/venv/lib/neural_net.py", line 72, in <module>
nn.train(X_train, y_train, 1000)
File "/Users/brasilgu/PycharmProjects/NNfs/venv/lib/neural_net.py", line 57, in train
self.backward(X, y, y_hat)
File "/Users/brasilgu/PycharmProjects/NNfs/venv/lib/neural_net.py", line 39, in backward
d_weights2 = np.dot(self.layer1.T, (2 * (y - y_hat) * d_softmax))
ValueError: operands could not be broadcast together with shapes (150,) (150,3)```
I saw the publicly available iris dataset and according to your code, the y seems to be a rank one matrix with shape (150, ).
So modify your y_train as y_train = y_train.reshape(-1, 1) to make it a proper matrix before creating the NeuralNet
I want to feed pytorch gradients manually. In my real problem, I have my own adjoint function that does not use tensors. Is there any way I can define my own gradient function for pytorch to use during optimization?
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
# (the crude analogy is that I have predefined stochastic
# forward and backward functions)
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
# here, closure is fed f_rand to compute the gradient.
# I need to feed closer the gradient directly from jac_rand
def closure():
optimizer.zero_grad()
loss = f_rand(x_tensor)
loss.backward() # jac_rand(x)
return loss
for ii in range(200):
optimizer.step(closure)
print(x_tensor, f(x_tensor))
# tensor([1.0000, 1.0000], dtype=torch.float64, requires_grad=True) tensor(4.5799e-09, dtype=torch.float64, grad_fn=<AddBackward0>)
# ( this is the right answer, E[f(1, 1)] = 0 )
I've tried defining a custom function, but I can't get it to work. This is my best attempt so far:
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
class custom_function(torch.autograd.Function):
#staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return f_rand(input)
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output * g_rand(input)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
for ii in range(200):
print('x_tensor ', x_tensor)
optimizer.step(custom_function())
print(x_tensor, f(x_tensor))
It says:
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)
Not quite sure if this is exactly what you want but the method call loss.backward() computes gradients via pytorch's computational graph and stores the gradient values in the weight tensors themselves (in your case it's in x_tensor). And these gradients can be accessed via x_tensor.grad. However, if you don't want to use pytorch's gradient computing method using loss.backward(), then you can manually feed your gradients into your tensor's .grad attribute as follows:
with torch.no_grad():
def closure():
optimizer.zero_grad()
loss = f_rand(x_tensor)
x_tensor.grad = torch.from_numpy(jac_rand(x_tensor))
return loss
I made some modifications, mainly the learning rate and the number of iterations. You will see the loss goes to zero as the tensor approaches (a, a²).
import torch
import numpy as np
import torch
# define rosenbrock function and gradient
np.random.seed(0)
x0 = np.array([0.1, 0.1])
a = 6
b = 100
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
def f_rand(x):
#return f(x)
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x):
#return jac(x)
return jac(x) * np.random.uniform(0.5, 1.5)
class CustomFunction(torch.autograd.Function):
#staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return f_rand(input)
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output * jac_rand(input)
custom_function = CustomFunction.apply
x_tensor = torch.tensor(x0, requires_grad=True)
optimizer = torch.optim.Adam([x_tensor], lr=0.0001)
print('x_tensor ', x_tensor)
for ii in range(1000000):
optimizer.zero_grad()
output=custom_function(x_tensor)
loss = round(output.item(),8)
if loss < 0.0000001:
print('loss: ',loss)
break
print('loss: ',loss)
output.backward()
optimizer.step()
print(x_tensor, f(x_tensor))
I've been trying to experiment with Region Based: Dice Loss but there have been a lot of variations on the internet to a varying degree that I could not find two identical implementations. The problem is that all of these produce varying results. Below are the implementations that I found. Some uses smoothing factor which the authors in this paper have called epsilon, some use it in both numerator and denominator, one implementation used Gamma etc etc.
Could someone please help me with the correct implementation.
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np
def dice_loss1(y_true, y_pred, smooth=1e-6):
'''
https://www.kaggle.com/code/bigironsphere/loss-function-library-keras-pytorch/notebook
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
y_pred = K.flatten(y_pred)
y_true = K.flatten(y_true)
intersection = K.sum(K.dot(y_true, y_pred))
dice_coef = (2*intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)
dice_loss = 1-dice_coef
return dice_loss
def dice_loss2(y_true, y_pred, smooth=1e-6): # Only Smooth
"""
https://gist.github.com/wassname/7793e2058c5c9dacb5212c0ac0b18a8a
"""
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
dice_coef = (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)
return 1- dice_coef
def dice_loss3(y_true, y_pred): # No gamma, no smooth
'''
https://lars76.github.io/2018/09/27/loss-functions-for-segmentation.html
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
y_pred = tf.math.sigmoid(y_pred)
numerator = 2 * tf.reduce_sum(y_true * y_pred)
denominator = tf.reduce_sum(y_true + y_pred)
return 1 - numerator / denominator
def dice_loss4(y_true, y_pred, smooth=1e-6, gama=1): # Gama + Smooth is used
'''
https://dev.to/_aadidev/3-common-loss-functions-for-image-segmentation-545o
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
gama = tf.cast(gama, y_pred.dtype)
nominator = 2 * tf.reduce_sum(tf.multiply(y_pred, y_true)) + smooth
denominator = tf.reduce_sum(y_pred ** gama) + tf.reduce_sum(y_true ** gama) + smooth
result = 1 - tf.divide(nominator, denominator)
return result
y_true = np.array([[0,0,1,0],
[0,0,1,0],
[0,0,1.,0.]])
y_pred = np.array([[0,0,0.9,0],
[0,0,0.1,0],
[1,1,0.1,1.]])
# print(dice_loss1(y_true, y_pred)) # Gives you error in K.dot()
print(dice_loss2(y_true, y_pred))
print(dice_loss3(y_true, y_pred)) # provides array of values
print(dice_loss4(y_true, y_pred))
I utilized a variation of the dice loss for brain tumor segmentation. The implementation for the dice coefficient which I used for such results was:
def dice_coef(y_true, y_pred, smooth=100):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
dice = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return dice
In order to make it a loss, it needs to be made into a function we want to minimize. This can be accomplished by making it negative:
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
or subtracting it from 1:
def dice_coef_loss(y_true, y_pred):
return 1 - dice_coef(y_true, y_pred)
or applying some other function then negating - for example, taking the negative logarithm (which could smooth the gradients):
def dice_coef_loss(y_true, y_pred):
return -K.log(dice_coef(y_true, y_pred))
The variable smooth represents your observation in other implementations with various names (smoothing, epsilon, etc.). Just for clarity, this smoothing variable exists to handle the case where the ground truth has very few white (or no) white pixels (assuming white pixels belonging to a class or boundary of an object, depending on your implementation).
If smooth is set too low, when the ground truth has few to 0 white pixels and the predicted image has some non-zero number of white pixels, the model will be penalized more heavily. Setting smooth higher means if the predicted image has some low amount of white pixels when the ground truth has none, the loss value will be lower. Depending on how aggressive the model needs to be, though, maybe a lower value is good.
Here's an illustrative example:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
def dice_coef(y_true, y_pred, smooth):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
dice = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return dice
def dice_coef_loss(y_true, y_pred, smooth):
return 1 - dice_coef(y_true, y_pred, smooth)
if __name__ == '__main__':
smooth = 10e-6
y_pred = np.zeros((1, 128, 128))
# one pixel is set to 1
y_pred[0, 0, 0] = 1
y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
y_true = tf.zeros((1, 128, 128), dtype=tf.float32)
print(dice_coef(y_true, y_pred, smooth=smooth))
print(dice_coef_loss(y_true, y_pred, smooth=smooth))
will print out:
tf.Tensor(9.9999e-06, shape=(), dtype=float32)
tf.Tensor(0.99999, shape=(), dtype=float32)
But if smooth is set to 100:
tf.Tensor(0.990099, shape=(), dtype=float32)
tf.Tensor(0.009900987, shape=(), dtype=float32)
Showing the loss reduces to 0.009 instead of 0.99.
For completeness, if you have multiple segmentation channels (B X W X H X K, where B is the batch size, W and H are the dimensions of your image, and K are the different segmentations channels), the same concepts apply, but it can be implemented as follows:
def dice_coef_multilabel(y_true, y_pred, M, smooth):
dice = 0
for index in range(M):
dice += dice_coef(y_true[:,:,:,index], y_pred[:,:,:,index], smooth)
return dice
And it can be converted to a loss function through negation or subtraction, in the same way as dice_coef is. smooth could also be tuned per channel, if you supply a list or some other sequence (e.g; smooth_list):
def dice_coef_multilabel(y_true, y_pred, M, smooth_list):
dice = 0
for index in range(M):
dice += dice_coef(y_true[:,:,:,index], y_pred[:,:,:,index], smooth_list[index])
return dice
I am working on CapsNet and taking code help from here. Simulation is performed on google colab with tensorflow = 2.4.0. I am getting following error:
AttributeError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/content/drive/My Drive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:102 dice_loss *
return 1-dice_soft(y_true, y_pred, from_logits=False)
/content/drive/My Drive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:41 dice_soft *
y_pred = tf.log(y_pred / (1 - y_pred))
AttributeError: module 'tensorflow' has no attribute 'log'
Following is custom_losses.py
'''
Capsules for Object Segmentation (SegCaps)
Original Paper: https://arxiv.org/abs/1804.04241
Code written by: Rodney LaLonde
If you use significant portions of this code or the ideas from our paper, please cite it :)
If you have any questions, please email me at lalonde#knights.ucf.edu.
This file contains the definitions of custom loss functions not present in the default Keras.
=====
This program includes all custom loss functions UNet, tiramisu, Capsule Nets (capsbasic) or SegCaps(segcapsr1 or segcapsr3).
#author: Cheng-Lin Li a.k.a. Clark
#copyright: 2018 Cheng-Lin Li#Insight AI. All rights reserved.
#license: Licensed under the Apache License v2.0. http://www.apache.org/licenses/
#contact: clark.cl.li#gmail.com
Enhancement:
1. Revise default loss_type to jaccard on dice_soft function.
2. add bce_dice_loss for future usage.
'''
import tensorflow as tf
from keras import backend as K
from keras.losses import binary_crossentropy
def dice_soft(y_true, y_pred, loss_type='jaccard', axis=[1,2,3], smooth=1e-5, from_logits=False):
"""Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity
of two batch of data, usually be used for binary image segmentation
i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match.
Parameters
-----------
y_pred : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
y_true : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
loss_type : string
``jaccard`` or ``sorensen``, default is ``jaccard``.
axis : list of integer
All dimensions are reduced, default ``[1,2,3]``.
smooth : float
This small value will be added to the numerator and denominator.
If both y_pred and y_true are empty, it makes sure dice is 1.
If either y_pred or y_true are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``,
then if smooth is very small, dice close to 0 (even the image values lower than the threshold),
so in this case, higher smooth can have a higher dice.
Examples
---------
>>> outputs = tl.act.pixel_wise_softmax(network.outputs)
>>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_)
References
-----------
- `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
"""
if not from_logits:
# transform back to logits
_epsilon = tf.convert_to_tensor(1e-7, y_pred.dtype.base_dtype)
y_pred = tf.clip_by_value(y_pred, _epsilon, 1 - _epsilon)
y_pred = tf.log(y_pred / (1 - y_pred))
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
if loss_type == 'jaccard':
l = tf.reduce_sum(y_pred * y_pred, axis=axis)
r = tf.reduce_sum(y_true * y_true, axis=axis)
elif loss_type == 'sorensen':
l = tf.reduce_sum(y_pred, axis=axis)
r = tf.reduce_sum(y_true, axis=axis)
else:
raise Exception("Unknow loss_type")
## old axis=[0,1,2,3]
# dice = 2 * (inse) / (l + r)
# epsilon = 1e-5
# dice = tf.clip_by_value(dice, 0, 1.0-epsilon) # if all empty, dice = 1
## new haodong
dice = (2. * inse + smooth) / (l + r + smooth)
##
dice = tf.reduce_mean(dice)
return dice
def dice_hard(y_true, y_pred, threshold=0.5, axis=[1,2,3], smooth=1e-5):
"""Non-differentiable Sørensen–Dice coefficient for comparing the similarity
of two batch of data, usually be used for binary image segmentation i.e. labels are binary.
The coefficient between 0 to 1, 1 if totally match.
Parameters
-----------
y_pred : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
y_true : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
threshold : float
The threshold value to be true.
axis : list of integer
All dimensions are reduced, default ``[1,2,3]``.
smooth : float
This small value will be added to the numerator and denominator, see ``dice_coe``.
References
-----------
- `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
"""
y_pred = tf.cast(y_pred > threshold, dtype=tf.float32)
y_true = tf.cast(y_true > threshold, dtype=tf.float32)
inse = tf.reduce_sum(tf.multiply(y_pred, y_true), axis=axis)
l = tf.reduce_sum(y_pred, axis=axis)
r = tf.reduce_sum(y_true, axis=axis)
## old axis=[0,1,2,3]
# hard_dice = 2 * (inse) / (l + r)
# epsilon = 1e-5
# hard_dice = tf.clip_by_value(hard_dice, 0, 1.0-epsilon)
## new haodong
hard_dice = (2. * inse + smooth) / (l + r + smooth)
##
hard_dice = tf.reduce_mean(hard_dice)
return hard_dice
def dice_loss(y_true, y_pred, from_logits=False):
return 1-dice_soft(y_true, y_pred, from_logits=False)
def bce_dice_loss(y_true, y_pred):
return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
def weighted_binary_crossentropy_loss(pos_weight):
# pos_weight: A coefficient to use on the positive examples.
def weighted_binary_crossentropy(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
# Returns
A tensor.
"""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = tf.convert_to_tensor(1e-7, output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.weighted_cross_entropy_with_logits(targets=target,
logits=output,
pos_weight=pos_weight)
return weighted_binary_crossentropy
def margin_loss(margin=0.4, downweight=0.5, pos_weight=1.0):
'''
Args:
margin: scalar, the margin after subtracting 0.5 from raw_logits.
downweight: scalar, the factor for negative cost.
'''
def _margin_loss(labels, raw_logits):
"""Penalizes deviations from margin for each logit.
Each wrong logit costs its distance to margin. For negative logits margin is
0.1 and for positives it is 0.9. First subtract 0.5 from all logits. Now
margin is 0.4 from each side.
Args:
labels: tensor, one hot encoding of ground truth.
raw_logits: tensor, model predictions in range [0, 1]
Returns:
A tensor with cost for each data point of shape [batch_size].
"""
logits = raw_logits - 0.5
positive_cost = pos_weight * labels * tf.cast(tf.less(logits, margin),
tf.float32) * tf.pow(logits - margin, 2)
negative_cost = (1 - labels) * tf.cast(
tf.greater(logits, -margin), tf.float32) * tf.pow(logits + margin, 2)
return 0.5 * positive_cost + downweight * 0.5 * negative_cost
return _margin_loss
The above comes while using dice loss. When using bce loss there is no error. I have tried tf.math.log instead of tf.log but still getting following error:
TypeError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/content/drive/MyDrive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:102 dice_loss *
return 1-dice_soft(y_true, y_pred, from_logits=False)
/content/drive/MyDrive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:43 dice_soft *
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1180 binary_op_wrapper
raise e
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1164 binary_op_wrapper
return func(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1496 _mul_dispatch
return multiply(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:518 multiply
return gen_math_ops.mul(x, y, name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py:6078 mul
"Mul", x=x, y=y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:558 _apply_op_helper
inferred_from[input_arg.type_attr]))
TypeError: Input 'y' of 'Mul' Op has type uint8 that does not match type float32 of argument 'x'.
The error
TypeError: Input 'y' of 'Mul' Op has type uint8 that does not match type float32 of argument 'x'.
indicates that y does not match the type of x in x * y. This can be fixed by casting to tf.float32.
The problem arises in this line in dice_soft:
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
So one solution is to use tf.cast to cast y_true to the same type as y_pred.
Given batched RGB images as input, shape=(batch_size, width, height, 3)
And a multiclass target represented as one-hot, shape=(batch_size, width, height, n_classes)
And a model (Unet, DeepLab) with softmax activation in last layer.
I'm looking for weighted categorical-cross-entropy loss funciton in kera/tensorflow.
The class_weight argument in fit_generator doesn't seems to work, and I didn't find the answer here or in https://github.com/keras-team/keras/issues/2115.
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
# y_true, y_pred shape is (batch_size, width, height, n_classes)
loos = ?...
return loss
return wcce
I will answer my question:
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
Kweights = K.constant(weights)
if not K.is_tensor(y_pred): y_pred = K.constant(y_pred)
y_true = K.cast(y_true, y_pred.dtype)
return K.categorical_crossentropy(y_true, y_pred) * K.sum(y_true * Kweights, axis=-1)
return wcce
Usage:
loss = weighted_categorical_crossentropy(weights)
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss=loss)
I'm using the Generalized Dice Loss. It works better than the Weighted Categorical Crossentropy in my case. My implementation is in PyTorch, however, it should be fairly easy to translate it.
class GeneralizedDiceLoss(nn.Module):
def __init__(self):
super(GeneralizedDiceLoss, self).__init__()
def forward(self, inp, targ):
inp = inp.contiguous().permute(0, 2, 3, 1)
targ = targ.contiguous().permute(0, 2, 3, 1)
w = torch.zeros((targ.shape[-1],))
w = 1. / (torch.sum(targ, (0, 1, 2))**2 + 1e-9)
numerator = targ * inp
numerator = w * torch.sum(numerator, (0, 1, 2))
numerator = torch.sum(numerator)
denominator = targ + inp
denominator = w * torch.sum(denominator, (0, 1, 2))
denominator = torch.sum(denominator)
dice = 2. * (numerator + 1e-9) / (denominator + 1e-9)
return 1. - dice
This issue might be similar to: Unbalanced data and weighted cross entropy which has an accepted answer.