TypeError: Expected keras.losses.Loss, found function - tensorflow

I want to build a TFF model for speech recognition systems. For this, I use the CNN-GRU model architecture with a CTC loss function. but I got error when I wanted to build_federated_averaging_process and think it's about the ctc_loss function but I cant fix it.
part of my code is:
def CTCLoss(y_true, y_pred):
# Compute the training-time loss value
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
return loss
def create_compiled_keras_model():
"""Model similar to DeepSpeech2."""
# Model's input
input_spectrogram = layers.Input((None, fft_length // 2 + 1), name="input")
# Expand the dimension to use 2D CNN.
x = layers.Reshape((-1, fft_length // 2 + 1 , 1), name="expand_dim")(input_spectrogram)
# Convolution layer 1
x = layers.Conv2D(
filters=32,
kernel_size=[11, 41],
strides=[2, 2],
padding="same",
use_bias=False,
name="conv_1",
)(x)
x = layers.BatchNormalization(name="conv_1_bn")(x)
x = layers.ReLU(name="conv_1_relu")(x)
# Convolution layer 2
x = layers.Conv2D(
filters=32,
kernel_size=[11, 21],
strides=[1, 2],
padding="same",
use_bias=False,
name="conv_2",
)(x)
x = layers.BatchNormalization(name="conv_2_bn")(x)
x = layers.ReLU(name="conv_2_relu")(x)
# Reshape the resulted volume to feed the RNNs layers
x = layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x)
# RNN layers
for i in range(1, 2 + 1):
recurrent = layers.GRU(
units=128,
activation="tanh",
recurrent_activation="sigmoid",
use_bias=True,
return_sequences=True,
reset_after=True,
name=f"gru_{i}",
)
x = layers.Bidirectional(
recurrent, name=f"bidirectional_{i}", merge_mode="concat"
)(x)
if i < 2:
x = layers.Dropout(rate=0.5)(x)
# Dense layer
x = layers.Dense(units=128 * 2, name="dense_1")(x)
x = layers.ReLU(name="dense_1_relu")(x)
x = layers.Dropout(rate=0.5)(x)
# Classification layer
output = layers.Dense(units= output_dim + 1, activation="softmax")(x)
# Model
model = keras.Model(input_spectrogram, output, name="DeepSpeech_2")
return model
def model_fn():
# We _must_ create a new model here, and _not_ capture it from an external
# scope. TFF will call this within different graph contexts.
keras_model = create_compiled_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=layers.Input((None, fft_length // 2 + 1)),
loss=CTCLoss)
and I got error in this step :
iterative_process = tff.learning.build_federated_averaging_process(
model_fn,
client_optimizer_fn=lambda:keras.optimizers.Adam(learning_rate=1e-4))
TypeError: Expected keras.losses.Loss, found function.
how do I fix it?

class Customloss(tf.keras.losses.Loss):
def __init__(self):
super().__init__()
#tf.function
def CTCLoss(self, y_true, y_pred):
...#
return loss
try to use tf.keras.losses.Loss for custom loss in tff. It will work.

Related

keras custom layer unknown output shape

I am trying to build a custom keras layer that does Canny edge detection with OpenCV. Here's my code:
class CannyEdgeDetectorLayer(layers.Layer):
def __init__(self, threshold1=60, threshold2=120, **kwargs):
super(CannyEdgeDetectorLayer, self).__init__(**kwargs)
self.threshold1 = threshold1
self.threshold2 = threshold2
def call(self, inputs):
return tf.py_function(func=self.canny_edge_detector, inp=[inputs], Tout=tf.float32)
def canny_edge_detector(self, inputs):
inputs = inputs.numpy()
edges = [cv2.Canny(img, self.threshold1, self.threshold2).reshape(inputs.shape[1], inputs.shape[2], -1) / 255 for img in inputs]
return tf.reshape(tf.convert_to_tensor(edges, dtype=tf.float32), (inputs.shape[0], inputs.shape[1], inputs.shape[2], 1))
# return np.array(edges).reshape(inputs.shape[0], inputs.shape[1], inputs.shape[2], 1)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[2], 1)
def get_config(self):
config = super().get_config().copy()
config.update({
'threshold1': self.threshold1,
'threshold2': self.threshold2
})
return config
def build(self, input_shape):
return super().build(input_shape)
And my model is as follows:
inputs = keras.Input(shape=(255, 255, 3))
x = CannyEdgeDetectorLayer(60, 120)(inputs)
x = layers.RandomFlip('horizontal')(x)
x = layers.RandomRotation(1./12)(x)
x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=128, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=256, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='gelu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation='gelu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=outputs)
I have tried my custom layers on some test images, and it worked fine and successfully outputted a batch of tensors with shape (n, h, w, 1). But when I am trying to build my model I get the following error:
Image augmentation layers are expecting inputs to be rank 3 (HWC) or 4D (NHWC) tensors. Got shape: <unknown>
Call arguments received by layer "random_flip_25" (type RandomFlip):
• inputs=tf.Tensor(shape=<unknown>, dtype=float32)
• training=True
What went wrong and how should I properly specify the output shape of my custom layer?
I solved the problem by specify the output's shape in call:
def call(self, inputs):
out = tf.py_function(func=self.canny_edge_detector, inp=[inputs], Tout=tf.float32)
out.set_shape((inputs.shape[0], inputs.shape[1], inputs.shape[2], 1))
return out
Turns out it's a problem of py_function and EagerTensor.

Unable to interpret an argument of type tensorflow.python.data.ops.dataset_ops.PrefetchDataset as a TFF value in iterative process

I'm trying to run a classification simulation in tff, but I'm getting this error:
TypeError: Unable to interpret an argument of type tensorflow.python.data.ops.dataset_ops.PrefetchDataset as a TFF value.
Here is the code I'm using
client_lr = 1e-3
server_lr = 1e-1
NUM_ROUNDS = 200
NUM_EPOCHS = 5
BATCH_SIZE = 2048
EPOCHS = 400
TH = 0.5
def base_model():
return Sequential([
Dense(256, activation='relu', input_shape=(x_train.shape[-1],)),
Dropout(0.5),
Dense(256, activation='relu'),
Dropout(0.5),
Dense(256, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid'),
])
client_train_dataset = collections.OrderedDict()
for i in range(1, total_clients+1):
client_name = "client_" + str(i)
start = samples_per_set * (i-1)
end = samples_per_set * i
data = collections.OrderedDict((('y', y_train[start:end]), ('x', x_train[start:end])))
client_train_dataset[client_name] = data
train_dataset = tff.simulation.FromTensorSlicesClientData(client_train_dataset)
sample_dataset = train_dataset.create_tf_dataset_for_client(train_dataset.client_ids[0])
sample_element = next(iter(sample_dataset))
PREFETCH_BUFFER = 10
SHUFFLE_BUFFER = samples_per_set
def preprocess(dataset):
def batch_format_fn(element):
return collections.OrderedDict(
x=element['x'],
y=tf.reshape(element['y'], [-1, 1]))
return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)
preprocessed_sample_dataset = preprocess(sample_dataset)
sample_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_sample_dataset)))
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in client_ids]
federated_train_data = make_federated_data(train_dataset, train_dataset.client_ids)
def model_tff():
model = base_model()
return tff.learning.from_keras_model(
model,
input_spec=preprocessed_sample_dataset.element_spec,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[
tfa.metrics.F1Score(num_classes=1, threshold=TH),
keras.metrics.Precision(name="precision", thresholds=TH),
keras.metrics.Recall(name="recall", thresholds=TH)
])
iterative_process = tff.learning.build_federated_averaging_process(
model_tff,
client_optimizer_fn=lambda: optimizers.Adam(learning_rate=client_lr),
server_optimizer_fn=lambda: optimizers.SGD(learning_rate=server_lr))
state = iterative_process.initialize()
federated_model = None
for round_num in range(1, NUM_ROUNDS+1):
state, tff_metrics = iterative_process.next(state, federated_train_data) # THE ERROR IS HERE
federated_model = base_model()
federated_model.compile(optimizer=optimizers.Adam(learning_rate=client_lr),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[
tfa.metrics.F1Score(num_classes=1, threshold=TH),
keras.metrics.Precision(name="precision", thresholds=TH),
keras.metrics.Recall(name="recall", thresholds=TH)
])
state.model.assign_weights_to(model=federated_model)
federated_result = federated_model.evaluate(x_val, y_val, verbose=1, return_dict=True)
federated_test = federated_model.evaluate(x_test, y_test, verbose=1, return_dict=True)
I'm using this creditcard dataset: https://www.kaggle.com/mlg-ulb/creditcardfraud
The federated_train_data is a list of <PrefetchDataset shapes: OrderedDict([(x, (None, 29)), (y, (None, 1))]), types: OrderedDict([(x, tf.float64), (y, tf.int64)])>, just like the tutorial from the Tensorflow Federated website Federated Learning for Image Classification.
This might be issue#918. Does this only occur when running in Google Colab? What version of TFF is being used?
Commit#4e57386 is believed to have fixed this, which is now part of the tensorflow-federated-nightly pip package.

How to modify the Keras CycleGAN example code to run parallelly on GPUs using tf.strategy

Here is the example of CycleGAN from the Keras
CycleGAN Example Using Keras.
Here is my modified implementation to use multiple GPUs. To implement the custom training I have used a reference Custom training with tf.distribute.Strategy
I want an example of CycleGAN from the Keras to run fast using GPUs. As further I need to process and train a huge amount of data. As well as CycleGAN uses multiple loss functions train_step will return 4 types of losses, currently, I am just returning one for easier understanding. Still, the training on GPUs is dead slow. I am not able to find the reason behind this.
Am I using tf.distribute.Strategy wrongly?
"""
Title: CycleGAN
Author: [A_K_Nain](https://twitter.com/A_K_Nain)
Date created: 2020/08/12
Last modified: 2020/08/12
Description: Implementation of CycleGAN.
"""
"""
## CycleGAN
CycleGAN is a model that aims to solve the image-to-image translation
problem. The goal of the image-to-image translation problem is to learn the
mapping between an input image and an output image using a training set of
aligned image pairs. However, obtaining paired examples isn't always feasible.
CycleGAN tries to learn this mapping without requiring paired input-output images,
using cycle-consistent adversarial networks.
- [Paper](https://arxiv.org/pdf/1703.10593.pdf)
- [Original implementation](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix)
"""
"""
## Setup
"""
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
autotune = tf.data.experimental.AUTOTUNE
# Create a MirroredStrategy.
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
"""
## Prepare the dataset
In this example, we will be using the
[horse to zebra](https://www.tensorflow.org/datasets/catalog/cycle_gan#cycle_ganhorse2zebra)
dataset.
"""
# Load the horse-zebra dataset using tensorflow-datasets.
dataset, _ = tfds.load("cycle_gan/horse2zebra", with_info=True, as_supervised=True)
train_horses, train_zebras = dataset["trainA"], dataset["trainB"]
test_horses, test_zebras = dataset["testA"], dataset["testB"]
# Define the standard image size.
orig_img_size = (286, 286)
# Size of the random crops to be used during training.
input_img_size = (256, 256, 3)
# Weights initializer for the layers.
kernel_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
# Gamma initializer for instance normalization.
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
buffer_size = 256
batch_size = 1
def normalize_img(img):
img = tf.cast(img, dtype=tf.float32)
# Map values in the range [-1, 1]
return (img / 127.5) - 1.0
def preprocess_train_image(img, label):
# Random flip
img = tf.image.random_flip_left_right(img)
# Resize to the original size first
img = tf.image.resize(img, [*orig_img_size])
# Random crop to 256X256
img = tf.image.random_crop(img, size=[*input_img_size])
# Normalize the pixel values in the range [-1, 1]
img = normalize_img(img)
return img
def preprocess_test_image(img, label):
# Only resizing and normalization for the test images.
img = tf.image.resize(img, [input_img_size[0], input_img_size[1]])
img = normalize_img(img)
return img
"""
## Create `Dataset` objects
"""
BATCH_SIZE_PER_REPLICA = batch_size
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
# Apply the preprocessing operations to the training data
train_horses = (
train_horses.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
train_zebras = (
train_zebras.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Apply the preprocessing operations to the test data
test_horses = (
test_horses.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
test_zebras = (
test_zebras.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Visualize some samples
_, ax = plt.subplots(4, 2, figsize=(10, 15))
for i, samples in enumerate(zip(train_horses.take(4), train_zebras.take(4))):
horse = (((samples[0][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
zebra = (((samples[1][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
ax[i, 0].imshow(horse)
ax[i, 1].imshow(zebra)
plt.show()
plt.savefig('Visualize_Some_Samples')
plt.close()
# Building blocks used in the CycleGAN generators and discriminators
class ReflectionPadding2D(layers.Layer):
"""Implements Reflection Padding as a layer.
Args:
padding(tuple): Amount of padding for the
spatial dimensions.
Returns:
A padded tensor with the same type as the input tensor.
"""
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
super(ReflectionPadding2D, self).__init__(**kwargs)
def call(self, input_tensor, mask=None):
padding_width, padding_height = self.padding
padding_tensor = [
[0, 0],
[padding_height, padding_height],
[padding_width, padding_width],
[0, 0],
]
return tf.pad(input_tensor, padding_tensor, mode="REFLECT")
def residual_block(
x,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
gamma_initializer=gamma_init,
use_bias=False,
):
dim = x.shape[-1]
input_tensor = x
x = ReflectionPadding2D()(input_tensor)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = activation(x)
x = ReflectionPadding2D()(x)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.add([input_tensor, x])
return x
def downsample(
x,
filters,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2D(
filters,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def upsample(
x,
filters,
activation,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_init,
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2DTranspose(
filters,
kernel_size,
strides=strides,
padding=padding,
kernel_initializer=kernel_initializer,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def get_resnet_generator(
filters=64,
num_downsampling_blocks=2,
num_residual_blocks=9,
num_upsample_blocks=2,
gamma_initializer=gamma_init,
name=None,
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = ReflectionPadding2D(padding=(3, 3))(img_input)
x = layers.Conv2D(filters, (7, 7), kernel_initializer=kernel_init, use_bias=False)(
x
)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.Activation("relu")(x)
# Downsampling
for _ in range(num_downsampling_blocks):
filters *= 2
x = downsample(x, filters=filters, activation=layers.Activation("relu"))
# Residual blocks
for _ in range(num_residual_blocks):
x = residual_block(x, activation=layers.Activation("relu"))
# Upsampling
for _ in range(num_upsample_blocks):
filters //= 2
x = upsample(x, filters, activation=layers.Activation("relu"))
# Final block
x = ReflectionPadding2D(padding=(3, 3))(x)
x = layers.Conv2D(3, (7, 7), padding="valid")(x)
x = layers.Activation("tanh")(x)
model = keras.models.Model(img_input, x, name=name)
return model
"""
## Build the discriminators
The discriminators implement the following architecture:
`C64->C128->C256->C512`
"""
def get_discriminator(
filters=64, kernel_initializer=kernel_init, num_downsampling=3, name=None
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = layers.Conv2D(
filters,
(4, 4),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_initializer,
)(img_input)
x = layers.LeakyReLU(0.2)(x)
num_filters = filters
for num_downsample_block in range(3):
num_filters *= 2
if num_downsample_block < 2:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(2, 2),
)
else:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(1, 1),
)
x = layers.Conv2D(
1, (4, 4), strides=(1, 1), padding="same", kernel_initializer=kernel_initializer
)(x)
model = keras.models.Model(inputs=img_input, outputs=x, name=name)
return model
"""
## Build the CycleGAN model
"""
class CycleGan(keras.Model):
def __init__(
self,
generator_G,
generator_F,
discriminator_X,
discriminator_Y,
lambda_cycle=10.0,
lambda_identity=0.5,
):
super(CycleGan, self).__init__()
self.gen_G = generator_G
self.gen_F = generator_F
self.disc_X = discriminator_X
self.disc_Y = discriminator_Y
self.lambda_cycle = lambda_cycle
self.lambda_identity = lambda_identity
def compile(
self,
gen_G_optimizer,
gen_F_optimizer,
disc_X_optimizer,
disc_Y_optimizer,
gen_loss_fn,
disc_loss_fn,
cycle_loss_fn,
identity_loss_fn
):
super(CycleGan, self).compile()
self.gen_G_optimizer = gen_G_optimizer
self.gen_F_optimizer = gen_F_optimizer
self.disc_X_optimizer = disc_X_optimizer
self.disc_Y_optimizer = disc_Y_optimizer
self.generator_loss_fn = gen_loss_fn
self.discriminator_loss_fn = disc_loss_fn
#self.cycle_loss_fn = keras.losses.MeanAbsoluteError()
#self.identity_loss_fn = keras.losses.MeanAbsoluteError()
self.cycle_loss_fn = cycle_loss_fn
self.identity_loss_fn = identity_loss_fn
def train_step(self, batch_data):
# x is Horse and y is zebra
real_x, real_y = batch_data
with tf.GradientTape(persistent=True) as tape:
# Horse to fake zebra
fake_y = self.gen_G(real_x, training=True)
# Zebra to fake horse -> y2x
fake_x = self.gen_F(real_y, training=True)
# Cycle (Horse to fake zebra to fake horse): x -> y -> x
cycled_x = self.gen_F(fake_y, training=True)
# Cycle (Zebra to fake horse to fake zebra) y -> x -> y
cycled_y = self.gen_G(fake_x, training=True)
# Identity mapping
same_x = self.gen_F(real_x, training=True)
same_y = self.gen_G(real_y, training=True)
# Discriminator output
disc_real_x = self.disc_X(real_x, training=True)
disc_fake_x = self.disc_X(fake_x, training=True)
disc_real_y = self.disc_Y(real_y, training=True)
disc_fake_y = self.disc_Y(fake_y, training=True)
# Generator adverserial loss
gen_G_loss = self.generator_loss_fn(disc_fake_y)
gen_F_loss = self.generator_loss_fn(disc_fake_x)
# Generator cycle loss
cycle_loss_G = self.cycle_loss_fn(real_y, cycled_y) * self.lambda_cycle
cycle_loss_F = self.cycle_loss_fn(real_x, cycled_x) * self.lambda_cycle
# Generator identity loss
id_loss_G = (
self.identity_loss_fn(real_y, same_y)
* self.lambda_cycle
* self.lambda_identity
)
id_loss_F = (
self.identity_loss_fn(real_x, same_x)
* self.lambda_cycle
* self.lambda_identity
)
# Total generator loss
total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G
total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F
# Discriminator loss
disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x)
disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y)
# Get the gradients for the generators
grads_G = tape.gradient(total_loss_G, self.gen_G.trainable_variables)
grads_F = tape.gradient(total_loss_F, self.gen_F.trainable_variables)
# Get the gradients for the discriminators
disc_X_grads = tape.gradient(disc_X_loss, self.disc_X.trainable_variables)
disc_Y_grads = tape.gradient(disc_Y_loss, self.disc_Y.trainable_variables)
# Update the weights of the generators
self.gen_G_optimizer.apply_gradients(
zip(grads_G, self.gen_G.trainable_variables)
)
self.gen_F_optimizer.apply_gradients(
zip(grads_F, self.gen_F.trainable_variables)
)
# Update the weights of the discriminators
self.disc_X_optimizer.apply_gradients(
zip(disc_X_grads, self.disc_X.trainable_variables)
)
self.disc_Y_optimizer.apply_gradients(
zip(disc_Y_grads, self.disc_Y.trainable_variables)
)
return total_loss_G
# return [total_loss_G, total_loss_F, disc_X_loss, disc_Y_loss]
# Open a strategy scope.
with strategy.scope():
mae_loss_fn = keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE)
# Loss function for evaluating cycle consistency loss
def cycle_loss_fn(real, cycled):
cycle_loss = mae_loss_fn(real, cycled)
cycle_loss = tf.nn.compute_average_loss(cycle_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return cycle_loss
# Loss function for evaluating identity mapping loss
def identity_loss_fn(real, same):
identity_loss = mae_loss_fn(real, same)
identity_loss = tf.nn.compute_average_loss(identity_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return identity_loss
# Loss function for evaluating adversarial loss
adv_loss_fn = keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
# Define the loss function for the generators
def generator_loss_fn(fake):
fake_loss = adv_loss_fn(tf.ones_like(fake), fake)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return fake_loss
# Define the loss function for the discriminators
def discriminator_loss_fn(real, fake):
real_loss = adv_loss_fn(tf.ones_like(real), real)
fake_loss = adv_loss_fn(tf.zeros_like(fake), fake)
real_loss = tf.nn.compute_average_loss(real_loss, global_batch_size=GLOBAL_BATCH_SIZE)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return (real_loss + fake_loss) * 0.5
# Get the generators
gen_G = get_resnet_generator(name="generator_G")
gen_F = get_resnet_generator(name="generator_F")
# Get the discriminators
disc_X = get_discriminator(name="discriminator_X")
disc_Y = get_discriminator(name="discriminator_Y")
# Create cycle gan model
cycle_gan_model = CycleGan(
generator_G=gen_G, generator_F=gen_F, discriminator_X=disc_X, discriminator_Y=disc_Y
)
optimizer = keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5)
# Compile the model
cycle_gan_model.compile(
gen_G_optimizer=optimizer,
gen_F_optimizer=optimizer,
disc_X_optimizer=optimizer,
disc_Y_optimizer=optimizer,
gen_loss_fn=generator_loss_fn,
disc_loss_fn=discriminator_loss_fn,
cycle_loss_fn=cycle_loss_fn,
identity_loss_fn=identity_loss_fn
)
train_dist_dataset = strategy.experimental_distribute_dataset(
tf.data.Dataset.zip((train_horses,
train_zebras)))
# `run` replicates the provided computation and runs it
# with the distributed input.
#tf.function
def distributed_train_step(dataset_inputs):
per_replica_losses = strategy.run(cycle_gan_model.train_step, args=(dataset_inputs,))
return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
axis=None)
"""
## Train the end-to-end model
"""
for epoch in range(1):
# TRAIN LOOP
all_loss = 0.0
num_batches = 0.0
for one_batch in train_dist_dataset:
all_loss += distributed_train_step(one_batch)
num_batches += 1
train_loss = all_loss/num_batches
print(train_loss)

Gradients are None for Custom Convolution Layer

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging
Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.

Converting keras functional model to keras class in tensorflow 2

I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.
I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)