Correct Implementation of Dice Loss in Tensorflow / Keras - tensorflow

I've been trying to experiment with Region Based: Dice Loss but there have been a lot of variations on the internet to a varying degree that I could not find two identical implementations. The problem is that all of these produce varying results. Below are the implementations that I found. Some uses smoothing factor which the authors in this paper have called epsilon, some use it in both numerator and denominator, one implementation used Gamma etc etc.
Could someone please help me with the correct implementation.
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np
def dice_loss1(y_true, y_pred, smooth=1e-6):
'''
https://www.kaggle.com/code/bigironsphere/loss-function-library-keras-pytorch/notebook
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
y_pred = K.flatten(y_pred)
y_true = K.flatten(y_true)
intersection = K.sum(K.dot(y_true, y_pred))
dice_coef = (2*intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)
dice_loss = 1-dice_coef
return dice_loss
def dice_loss2(y_true, y_pred, smooth=1e-6): # Only Smooth
"""
https://gist.github.com/wassname/7793e2058c5c9dacb5212c0ac0b18a8a
"""
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
dice_coef = (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)
return 1- dice_coef
def dice_loss3(y_true, y_pred): # No gamma, no smooth
'''
https://lars76.github.io/2018/09/27/loss-functions-for-segmentation.html
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
y_pred = tf.math.sigmoid(y_pred)
numerator = 2 * tf.reduce_sum(y_true * y_pred)
denominator = tf.reduce_sum(y_true + y_pred)
return 1 - numerator / denominator
def dice_loss4(y_true, y_pred, smooth=1e-6, gama=1): # Gama + Smooth is used
'''
https://dev.to/_aadidev/3-common-loss-functions-for-image-segmentation-545o
'''
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
smooth = tf.cast(smooth, y_pred.dtype)
gama = tf.cast(gama, y_pred.dtype)
nominator = 2 * tf.reduce_sum(tf.multiply(y_pred, y_true)) + smooth
denominator = tf.reduce_sum(y_pred ** gama) + tf.reduce_sum(y_true ** gama) + smooth
result = 1 - tf.divide(nominator, denominator)
return result
y_true = np.array([[0,0,1,0],
[0,0,1,0],
[0,0,1.,0.]])
y_pred = np.array([[0,0,0.9,0],
[0,0,0.1,0],
[1,1,0.1,1.]])
# print(dice_loss1(y_true, y_pred)) # Gives you error in K.dot()
print(dice_loss2(y_true, y_pred))
print(dice_loss3(y_true, y_pred)) # provides array of values
print(dice_loss4(y_true, y_pred))

I utilized a variation of the dice loss for brain tumor segmentation. The implementation for the dice coefficient which I used for such results was:
def dice_coef(y_true, y_pred, smooth=100):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
dice = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return dice
In order to make it a loss, it needs to be made into a function we want to minimize. This can be accomplished by making it negative:
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
or subtracting it from 1:
def dice_coef_loss(y_true, y_pred):
return 1 - dice_coef(y_true, y_pred)
or applying some other function then negating - for example, taking the negative logarithm (which could smooth the gradients):
def dice_coef_loss(y_true, y_pred):
return -K.log(dice_coef(y_true, y_pred))
The variable smooth represents your observation in other implementations with various names (smoothing, epsilon, etc.). Just for clarity, this smoothing variable exists to handle the case where the ground truth has very few white (or no) white pixels (assuming white pixels belonging to a class or boundary of an object, depending on your implementation).
If smooth is set too low, when the ground truth has few to 0 white pixels and the predicted image has some non-zero number of white pixels, the model will be penalized more heavily. Setting smooth higher means if the predicted image has some low amount of white pixels when the ground truth has none, the loss value will be lower. Depending on how aggressive the model needs to be, though, maybe a lower value is good.
Here's an illustrative example:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
def dice_coef(y_true, y_pred, smooth):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
dice = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return dice
def dice_coef_loss(y_true, y_pred, smooth):
return 1 - dice_coef(y_true, y_pred, smooth)
if __name__ == '__main__':
smooth = 10e-6
y_pred = np.zeros((1, 128, 128))
# one pixel is set to 1
y_pred[0, 0, 0] = 1
y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
y_true = tf.zeros((1, 128, 128), dtype=tf.float32)
print(dice_coef(y_true, y_pred, smooth=smooth))
print(dice_coef_loss(y_true, y_pred, smooth=smooth))
will print out:
tf.Tensor(9.9999e-06, shape=(), dtype=float32)
tf.Tensor(0.99999, shape=(), dtype=float32)
But if smooth is set to 100:
tf.Tensor(0.990099, shape=(), dtype=float32)
tf.Tensor(0.009900987, shape=(), dtype=float32)
Showing the loss reduces to 0.009 instead of 0.99.
For completeness, if you have multiple segmentation channels (B X W X H X K, where B is the batch size, W and H are the dimensions of your image, and K are the different segmentations channels), the same concepts apply, but it can be implemented as follows:
def dice_coef_multilabel(y_true, y_pred, M, smooth):
dice = 0
for index in range(M):
dice += dice_coef(y_true[:,:,:,index], y_pred[:,:,:,index], smooth)
return dice
And it can be converted to a loss function through negation or subtraction, in the same way as dice_coef is. smooth could also be tuned per channel, if you supply a list or some other sequence (e.g; smooth_list):
def dice_coef_multilabel(y_true, y_pred, M, smooth_list):
dice = 0
for index in range(M):
dice += dice_coef(y_true[:,:,:,index], y_pred[:,:,:,index], smooth_list[index])
return dice

Related

Dice coefficient with threshold

Set threshold for my dice coefficient metric, but it seems not working correctly.
y_true and y_pred have values between 0 and 1. I am using a Unet in Keras.
def dice_coef_NoHand(self,y_true, y_pred,smooth=1):
# greater = tf.keras.backend.greater(y_pred, 0.5000)
# y_pred = tf.where(y_pred>0.5, y_pred, [0.0])
# #y_pred_2 = K.cast(K.greater(K.clip(y_pred, 0, 1), 0.9), K.floatx())
#
# y_true = tf.keras.layers.Flatten()(y_true)
# y_pred_flatten = tf.keras.layers.Flatten()(y_pred)
# y_pred = tf.round(y_pred_flatten)
# intersection = tf.reduce_sum(y_true * y_pred)
# return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
cond = y_pred > 0.5
y_pred_new = tf.where(cond, y_pred, 0.0)
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred_new)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
All elements smaller than 0.5 should become 0 in y_pred. To check if this works with the above given function, I have the following metric, which I log during the training:
def y_pred_max(self,y_true,y_pred):
cond = y_pred > 0.5
y_pred = tf.where(cond, y_pred, 0.0)
y_pred_max = tf.maximum(y_pred, y_true)
return y_pred_max
Now I should not get values between 0 and 0.5 for y_pred_max. But I am still getting values such as 0.04,...
How can I implement a threshold for my dice coefficient?

Normalized Cross Entropy Loss Implementation Tensorflow/Keras

I am trying to implement a normalized cross entropy loss as described in this publication
The math given is:
This paper provided a PyTorch implementation:
#mlconfig.register
class NormalizedCrossEntropy(torch.nn.Module):
def __init__(self, num_classes, scale=1.0):
super(NormalizedCrossEntropy, self).__init__()
self.device = device
self.num_classes = num_classes
self.scale = scale
def forward(self, pred, labels):
pred = F.log_softmax(pred, dim=1)
label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1))
return self.scale * nce.mean()
But I need this to be translated to tensorflow for my ongoing project. Can anyone help me implement this normalized crossentropy loss in tensorflow?
I think is just a matter of translating methods name:
# given y_pred as 1-hot and y-true the multiclass probabilities
def NCE(y_true, y_pred):
num = - tf.math.reduce_sum(tf.multiply(y_true, y_pred), axis=1)
denom = -tf.math.reduce_sum(y_pred, axis=1)
return tf.reduce_mean(num / denom)
t = tf.constant([[1,0,0], [0,0,1]], dtype=tf.float64)
y = tf.constant([[0.3,0.6,0.1], [0.1,0.1,0.8]], dtype=tf.float64)
NCE(t,y)
# <tf.Tensor: shape=(), dtype=float64, numpy=0.55>
Just check if the resulting loss is the same since I've not tested it

In 'tensorflow unable to take 'log'

I am working on CapsNet and taking code help from here. Simulation is performed on google colab with tensorflow = 2.4.0. I am getting following error:
AttributeError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/content/drive/My Drive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:102 dice_loss *
return 1-dice_soft(y_true, y_pred, from_logits=False)
/content/drive/My Drive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:41 dice_soft *
y_pred = tf.log(y_pred / (1 - y_pred))
AttributeError: module 'tensorflow' has no attribute 'log'
Following is custom_losses.py
'''
Capsules for Object Segmentation (SegCaps)
Original Paper: https://arxiv.org/abs/1804.04241
Code written by: Rodney LaLonde
If you use significant portions of this code or the ideas from our paper, please cite it :)
If you have any questions, please email me at lalonde#knights.ucf.edu.
This file contains the definitions of custom loss functions not present in the default Keras.
=====
This program includes all custom loss functions UNet, tiramisu, Capsule Nets (capsbasic) or SegCaps(segcapsr1 or segcapsr3).
#author: Cheng-Lin Li a.k.a. Clark
#copyright: 2018 Cheng-Lin Li#Insight AI. All rights reserved.
#license: Licensed under the Apache License v2.0. http://www.apache.org/licenses/
#contact: clark.cl.li#gmail.com
Enhancement:
1. Revise default loss_type to jaccard on dice_soft function.
2. add bce_dice_loss for future usage.
'''
import tensorflow as tf
from keras import backend as K
from keras.losses import binary_crossentropy
def dice_soft(y_true, y_pred, loss_type='jaccard', axis=[1,2,3], smooth=1e-5, from_logits=False):
"""Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity
of two batch of data, usually be used for binary image segmentation
i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match.
Parameters
-----------
y_pred : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
y_true : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
loss_type : string
``jaccard`` or ``sorensen``, default is ``jaccard``.
axis : list of integer
All dimensions are reduced, default ``[1,2,3]``.
smooth : float
This small value will be added to the numerator and denominator.
If both y_pred and y_true are empty, it makes sure dice is 1.
If either y_pred or y_true are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``,
then if smooth is very small, dice close to 0 (even the image values lower than the threshold),
so in this case, higher smooth can have a higher dice.
Examples
---------
>>> outputs = tl.act.pixel_wise_softmax(network.outputs)
>>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_)
References
-----------
- `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
"""
if not from_logits:
# transform back to logits
_epsilon = tf.convert_to_tensor(1e-7, y_pred.dtype.base_dtype)
y_pred = tf.clip_by_value(y_pred, _epsilon, 1 - _epsilon)
y_pred = tf.log(y_pred / (1 - y_pred))
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
if loss_type == 'jaccard':
l = tf.reduce_sum(y_pred * y_pred, axis=axis)
r = tf.reduce_sum(y_true * y_true, axis=axis)
elif loss_type == 'sorensen':
l = tf.reduce_sum(y_pred, axis=axis)
r = tf.reduce_sum(y_true, axis=axis)
else:
raise Exception("Unknow loss_type")
## old axis=[0,1,2,3]
# dice = 2 * (inse) / (l + r)
# epsilon = 1e-5
# dice = tf.clip_by_value(dice, 0, 1.0-epsilon) # if all empty, dice = 1
## new haodong
dice = (2. * inse + smooth) / (l + r + smooth)
##
dice = tf.reduce_mean(dice)
return dice
def dice_hard(y_true, y_pred, threshold=0.5, axis=[1,2,3], smooth=1e-5):
"""Non-differentiable Sørensen–Dice coefficient for comparing the similarity
of two batch of data, usually be used for binary image segmentation i.e. labels are binary.
The coefficient between 0 to 1, 1 if totally match.
Parameters
-----------
y_pred : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
y_true : tensor
A distribution with shape: [batch_size, ....], (any dimensions).
threshold : float
The threshold value to be true.
axis : list of integer
All dimensions are reduced, default ``[1,2,3]``.
smooth : float
This small value will be added to the numerator and denominator, see ``dice_coe``.
References
-----------
- `Wiki-Dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
"""
y_pred = tf.cast(y_pred > threshold, dtype=tf.float32)
y_true = tf.cast(y_true > threshold, dtype=tf.float32)
inse = tf.reduce_sum(tf.multiply(y_pred, y_true), axis=axis)
l = tf.reduce_sum(y_pred, axis=axis)
r = tf.reduce_sum(y_true, axis=axis)
## old axis=[0,1,2,3]
# hard_dice = 2 * (inse) / (l + r)
# epsilon = 1e-5
# hard_dice = tf.clip_by_value(hard_dice, 0, 1.0-epsilon)
## new haodong
hard_dice = (2. * inse + smooth) / (l + r + smooth)
##
hard_dice = tf.reduce_mean(hard_dice)
return hard_dice
def dice_loss(y_true, y_pred, from_logits=False):
return 1-dice_soft(y_true, y_pred, from_logits=False)
def bce_dice_loss(y_true, y_pred):
return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
def weighted_binary_crossentropy_loss(pos_weight):
# pos_weight: A coefficient to use on the positive examples.
def weighted_binary_crossentropy(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
# Returns
A tensor.
"""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = tf.convert_to_tensor(1e-7, output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.weighted_cross_entropy_with_logits(targets=target,
logits=output,
pos_weight=pos_weight)
return weighted_binary_crossentropy
def margin_loss(margin=0.4, downweight=0.5, pos_weight=1.0):
'''
Args:
margin: scalar, the margin after subtracting 0.5 from raw_logits.
downweight: scalar, the factor for negative cost.
'''
def _margin_loss(labels, raw_logits):
"""Penalizes deviations from margin for each logit.
Each wrong logit costs its distance to margin. For negative logits margin is
0.1 and for positives it is 0.9. First subtract 0.5 from all logits. Now
margin is 0.4 from each side.
Args:
labels: tensor, one hot encoding of ground truth.
raw_logits: tensor, model predictions in range [0, 1]
Returns:
A tensor with cost for each data point of shape [batch_size].
"""
logits = raw_logits - 0.5
positive_cost = pos_weight * labels * tf.cast(tf.less(logits, margin),
tf.float32) * tf.pow(logits - margin, 2)
negative_cost = (1 - labels) * tf.cast(
tf.greater(logits, -margin), tf.float32) * tf.pow(logits + margin, 2)
return 0.5 * positive_cost + downweight * 0.5 * negative_cost
return _margin_loss
The above comes while using dice loss. When using bce loss there is no error. I have tried tf.math.log instead of tf.log but still getting following error:
TypeError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/content/drive/MyDrive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:102 dice_loss *
return 1-dice_soft(y_true, y_pred, from_logits=False)
/content/drive/MyDrive/Cervical GAN/Segmentation/Cheng-Lin-Li/SegCaps-master-aashish/utils/custom_losses.py:43 dice_soft *
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1180 binary_op_wrapper
raise e
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1164 binary_op_wrapper
return func(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1496 _mul_dispatch
return multiply(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:518 multiply
return gen_math_ops.mul(x, y, name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py:6078 mul
"Mul", x=x, y=y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:558 _apply_op_helper
inferred_from[input_arg.type_attr]))
TypeError: Input 'y' of 'Mul' Op has type uint8 that does not match type float32 of argument 'x'.
The error
TypeError: Input 'y' of 'Mul' Op has type uint8 that does not match type float32 of argument 'x'.
indicates that y does not match the type of x in x * y. This can be fixed by casting to tf.float32.
The problem arises in this line in dice_soft:
inse = tf.reduce_sum(y_pred * y_true, axis=axis)
So one solution is to use tf.cast to cast y_true to the same type as y_pred.

Multi-class weighted loss for semantic image segmentation in keras/tensorflow

Given batched RGB images as input, shape=(batch_size, width, height, 3)
And a multiclass target represented as one-hot, shape=(batch_size, width, height, n_classes)
And a model (Unet, DeepLab) with softmax activation in last layer.
I'm looking for weighted categorical-cross-entropy loss funciton in kera/tensorflow.
The class_weight argument in fit_generator doesn't seems to work, and I didn't find the answer here or in https://github.com/keras-team/keras/issues/2115.
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
# y_true, y_pred shape is (batch_size, width, height, n_classes)
loos = ?...
return loss
return wcce
I will answer my question:
def weighted_categorical_crossentropy(weights):
# weights = [0.9,0.05,0.04,0.01]
def wcce(y_true, y_pred):
Kweights = K.constant(weights)
if not K.is_tensor(y_pred): y_pred = K.constant(y_pred)
y_true = K.cast(y_true, y_pred.dtype)
return K.categorical_crossentropy(y_true, y_pred) * K.sum(y_true * Kweights, axis=-1)
return wcce
Usage:
loss = weighted_categorical_crossentropy(weights)
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss=loss)
I'm using the Generalized Dice Loss. It works better than the Weighted Categorical Crossentropy in my case. My implementation is in PyTorch, however, it should be fairly easy to translate it.
class GeneralizedDiceLoss(nn.Module):
def __init__(self):
super(GeneralizedDiceLoss, self).__init__()
def forward(self, inp, targ):
inp = inp.contiguous().permute(0, 2, 3, 1)
targ = targ.contiguous().permute(0, 2, 3, 1)
w = torch.zeros((targ.shape[-1],))
w = 1. / (torch.sum(targ, (0, 1, 2))**2 + 1e-9)
numerator = targ * inp
numerator = w * torch.sum(numerator, (0, 1, 2))
numerator = torch.sum(numerator)
denominator = targ + inp
denominator = w * torch.sum(denominator, (0, 1, 2))
denominator = torch.sum(denominator)
dice = 2. * (numerator + 1e-9) / (denominator + 1e-9)
return 1. - dice
This issue might be similar to: Unbalanced data and weighted cross entropy which has an accepted answer.

Differing results for MNIST autoencoder due to different placement of activation function

I stumbled across a strange phenomenon while playing around with variational autoencoders. The problem is quite simple to describe:
When defining the loss function for the VAE, you have to use some kind of reconstruction error. I decided to use my own implementation of cross-entropy, as I wasn't able to get reasonable results with any function provided by tensorflow. It looks like this:
x_hat = tf.contrib.layers.fully_connected(fc2,
input_dim,
activation_fn=tf.sigmoid)
## Define the loss
reconstruction_loss = -tf.reduce_sum(
x * tf.log(epsilon + x_hat) +
(1 - x) * tf.log(epsilon + 1 - x_hat),
axis=1)
It uses the output of the reconstructed layer, which applies the sigmoid function to get it to the [0; 1] range. Now, I wanted to apply the sigmoid within the loss function and changed it to
x_hat = tf.contrib.layers.fully_connected(fc2,
input_dim,
activation_fn=None)
## Define the loss
reconstruction_loss = -tf.reduce_sum(
x * tf.log(epsilon + tf.sigmoid(x_hat)) +
(1 - x) * tf.log(epsilon + 1 - tf.sigmoid(x_hat)),
axis=1)
I'm convinced that this should provide nearly identical results. In practice, though, this second attempt results in weird grey pictures. The originals seem blurry and much brighter, too. First the okay version, then the alternative "wrong" version.
Can someone explain to me what causes this weird behavior?
If you want to test it yourself, below is my source code. You have to comment the respective blocks in or out to get the results. Thanks!
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import numpy as np
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
n_samples = mnist.train.num_examples
input_dim = mnist.train.images[0].shape[0]
inter_dim = 256
encoding_dim = 5
epsilon = 1e-10
learning_rate = 1e-4
n_epochs = 20
batch_size = 100
width = 28
## Define the variational autoencoder model
x = tf.placeholder(dtype=tf.float32,
shape=[None, input_dim],
name='x')
fc1 = tf.contrib.layers.fully_connected(x,
inter_dim,
activation_fn=tf.nn.relu)
z_mean = tf.contrib.layers.fully_connected(fc1,
encoding_dim,
activation_fn=None)
z_log_var = tf.contrib.layers.fully_connected(fc1,
encoding_dim,
activation_fn=None)
eps = tf.random_normal(shape=tf.shape(z_log_var),
mean=0,
stddev=1,
dtype=tf.float32)
z = z_mean + tf.exp(z_log_var / 2) * eps
fc2 = tf.contrib.layers.fully_connected(z,
inter_dim,
activation_fn=tf.nn.relu)
x_hat = tf.contrib.layers.fully_connected(fc2,
input_dim,
activation_fn=tf.sigmoid)
#activation_fn=None)
## Define the loss
reconstruction_loss = -tf.reduce_sum(
x * tf.log(epsilon + x_hat) +
(1 - x) * tf.log(epsilon + 1 - x_hat),
axis=1)
ALTERNATIVE LOSS W/ APPLYING SIGMOID, REMOVED ACTIVATION FROM OUTPUT LAYER
'''
reconstruction_loss = -tf.reduce_sum(
x * tf.log(epsilon + tf.sigmoid(x_hat)) +
(1 - x) * tf.log(epsilon + 1 - tf.sigmoid(x_hat)),
axis=1)
'''
KL_div = -.5 * tf.reduce_sum(
1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),
axis=1)
total_loss = tf.reduce_mean(reconstruction_loss + KL_div)
## Define the training operator
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss)
## Run it
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
for _ in range(n_samples // batch_size):
batch = mnist.train.next_batch(batch_size)
_, loss, recon_loss, KL_loss = sess.run([train_op,
total_loss,
reconstruction_loss,
KL_div],
feed_dict={x:batch[0]})
print('[Epoch {}] loss: {}'.format(epoch, loss))
print('Training Done')
## Reconstruct a few samples to validate the training
batch = mnist.train.next_batch(100)
x_reconstructed = sess.run(x_hat, feed_dict={x:batch[0]})
n = np.sqrt(batch_size).astype(np.int32)
I_reconstructed = np.empty((width*n, 2*width*n))
for i in range(n):
for j in range(n):
x = np.concatenate(
(x_reconstructed[i*n+j, :].reshape(width, width),
batch[0][i*n+j, :].reshape(width, width)),
axis=1
)
I_reconstructed[i*width:(i+1)*width, j*2*width:(j+1)*2*width] = x
fig = plt.figure()
plt.imshow(I_reconstructed, cmap='gray')
EDIT1: SOLUTION
Thanks to #xdurch0, I was made aware of the fact that the reconstructed output is no longer rescaled via the sigmoid function. That means the sigmoid has to be applied on the image before plotting it. Just modify the output:
x_reconstructed = sess.run(tf.sigmoid(x_hat), feed_dict={x:batch[0]})