DeblurGAN can't load his own weights anymore - tensorflow

Hey I realy need some help =)
firstly, sorry that it's soo long^^ but I hope that you don't need the full code at the end.
I coded a GAN for deblurring. Now I'm training it. the first 71 epochs have been trained without any problems: I trained some epochs till the colab GPU-time limit was reached, the next day I loaded my weights into the gan and continued training.
2 or 3 weeks ago I wanted to load the weights of epoch 71 in my Gan but I recieved the following error (I'm quite sure that I didn't change anything in the code). Since this moment I only can load the first 65 weights and i get the same error for every epoch higher than 65:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-a35c9a2bbf3a> in <module>()
1 # Load weights
----> 2 gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in load_weights(self, filepath, by_name, skip_mismatch, options)
2209 f, self.layers, skip_mismatch=skip_mismatch)
2210 else:
-> 2211 hdf5_format.load_weights_from_hdf5_group(f, self.layers)
2212
2213 def _updated_config(self):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py in load_weights_from_hdf5_group(f, layers)
706 str(len(weight_values)) + ' elements.')
707 weight_value_tuples += zip(symbolic_weights, weight_values)
--> 708 K.batch_set_value(weight_value_tuples)
709
710
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in batch_set_value(tuples)
3574 if ops.executing_eagerly_outside_functions():
3575 for x, value in tuples:
-> 3576 x.assign(np.asarray(value, dtype=dtype(x)))
3577 else:
3578 with get_graph().as_default():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py in assign(self, value, use_locking, name, read_value)
856 with _handle_graph(self.handle):
857 value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
--> 858 self._shape.assert_is_compatible_with(value_tensor.shape)
859 assign_op = gen_resource_variable_ops.assign_variable_op(
860 self.handle, value_tensor, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py in assert_is_compatible_with(self, other)
1132 """
1133 if not self.is_compatible_with(other):
-> 1134 raise ValueError("Shapes %s and %s are incompatible" % (self, other))
1135
1136 def most_specific_compatible_shape(self, other):
ValueError: Shapes (4, 4, 64, 128) and (64,) are incompatible
I was looking a long time for a solution and i didn't find a real one. But I found out, that if I train one epoch with one of the old weights (1-65) afterwards I can load one of the new weights. So I thought that I could use this "workaround" but yesterday I plotted the scores of the metric of the Test dataset for every epoch. I recieved this picture:
psnrscore/epoch
as you can see it looks like I'm producing trash since epoch 65 (on the pic since 60 because I lost the first 5 epochs, so it starts by 6)
I'm realy frustrated and hope that someone could help me =D
Here's the full code of the GAN:
# Libraries to build the model
from tensorflow import pad
from tensorflow.keras.layers import Layer
from keras.layers import Input, Activation, Add, UpSampling2D
from keras.layers.merge import Add
from keras.layers.core import Dropout, Dense, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.core import Lambda
from keras.layers.normalization import BatchNormalization
from keras.models import Model
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
import keras
# Reflection padding
from keras.engine import InputSpec
import tensorflow as tf
from keras.engine.topology import Layer
'''
2D Reflection Padding
Attributes:
- padding: (padding_width, padding_height) tuple
'''
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
# Res Block
def res_block(input, filters, kernel_size = (3,3), strides = (1,1), use_dropout = False):
"""
Instanciate a Keras Resnet Block using sequential API.
:param input: Input tensor
:param filters: Number of filters to use
:param kernel_size: Shape of the kernel for the convolution
:param strides: Shape of the strides for the convolution
:param use_dropout: Boolean value to determine the use of dropout
:return: Keras Model
"""
x = ReflectionPadding2D((1,1))(input)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if use_dropout:
x = Dropout(0.5)(x)
x = ReflectionPadding2D((1,1))(x)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
# Two convolution layers followed by a direct connection between input and output (skip connection)
out = Add()([input, x])
return out
# Generator
n_res_blocks = 9
def generator_model():
# encoder
inputs = Input(shape = img_shape)
x = ReflectionPadding2D((3, 3))(inputs)
x = Conv2D(filters = 64, kernel_size = (7,7), padding = 'valid')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(128, (3,3), strides=2, padding='same') (x) #DIM(15,15,128)
x = BatchNormalization() (x)
x = Activation('relu') (x)
x = Conv2D(256, (3,3), strides = 2, padding = 'same') (x) #DIM(7,7,256)
x = BatchNormalization() (x)
x = Activation('relu') (x)
# Apply 9 res blocks
for i in range(n_res_blocks):
x = res_block(x, 256, use_dropout = True)
# decoder
#x = Conv2DTranspose(128, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 128, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
#x = Conv2DTranspose(64, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 64, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = ReflectionPadding2D((3,3))(x)
x = Conv2D(filters = 3, kernel_size = (7,7), padding = 'valid')(x)
x = Activation('tanh')(x)
# Add direct connection from input to output and recenter to [-1, 1] (skip connection)
outputs = Add()([x, inputs])
outputs = Lambda(lambda z: z/2)(outputs) # to keep normalized outputs
model = Model(inputs = inputs, outputs = outputs, name = 'Generator')
return model
# Discriminator
def discriminator_model():
Input_img = Input(shape=(img_shape))
x = Conv2D(filters = 64, kernel_size = (4, 4), strides = 2, padding='same')(Input_img)
x = LeakyReLU(0.2)(x)
nf_mult, nf_mult_prev = 1, 1
for n in range(3):
nf_mult_prev, nf_mult = nf_mult, min(2**n, 8)
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 2, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
nf_mult_prev, nf_mult = nf_mult, 8
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
x = Conv2D(filters = 1, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = Flatten()(x)
x = Dense(1024, activation = 'tanh')(x)
x = Dense(1, activation = 'sigmoid')(x)
model = Model(inputs = Input_img, outputs = x, name = 'discriminator')
return model
def gan_model(generator, discriminator):
inputs = Input(shape = img_shape)
generated_images = generator(inputs)
outputs = discriminator(generated_images)
model = Model(inputs=inputs, outputs = [generated_images, outputs])
return model
#Losses
#Wassersteinloss:
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
# vgg16 model for perceptual loss
vgg = VGG16(include_top = False, weights = 'imagenet', input_shape = img_shape)
loss_model = Model(inputs = vgg.input, outputs = vgg.get_layer('block3_conv3').output)
loss_model.trainable = False
#perceptual loss:
def perceptual_loss(y_true, y_pred):
return K.mean(K.square(loss_model(y_true) - loss_model(y_pred)))
#Metrics:
#SSIM:
def ssim_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.ssim(tf.convert_to_tensor(y_true),tf.convert_to_tensor(y_pred), max_val=1.0, ))
#PSNR:
def psnr_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.psnr(y_true, y_pred, max_val=1.0))
def training(epochs, batch_size):
path_psnr = F"/content/gdrive/My Drive/Colab Notebooks/data/psnr"
path_ssim = F"/content/gdrive/My Drive/Colab Notebooks/data/ssim"
GAN_losses = []
#psnrs = []
#ssims = []
random_idx = np.arange(0, X_train.shape[0])
n_batches = int (len(random_idx)/batch_size) #divide trainingset into batches of batch_size
for e in range(epochs):
#weights_name = "deblurGAN_weights%s_batchsize_%r.h5" %(e + 66, batch_size)
weights_name = "deblurGAN_weights_test.h5"
print("epoch: %s " %(e + 66))
#randomize index of trainig set
random.shuffle(random_idx)
for i in range(n_batches):
img_batch_blured = X_train[i*batch_size:(i+1)*batch_size]
img_batch_generated = generator.predict(img_batch_blured)
img_batch_original = Y_train[i*batch_size:(i+1)*batch_size]
img_batch = np.concatenate((img_batch_generated , img_batch_original),0)
valid0 = -np.ones(batch_size)
valid1 = np.ones(batch_size)
valid = np.concatenate((valid0,valid1))
discriminator.trainable = True
for k in range(5):
loss = discriminator.train_on_batch(img_batch, valid)
discriminator.trainable = False
GAN_loss = gan.train_on_batch(img_batch_blured, [img_batch_original, valid1])
GAN_losses.append(GAN_loss)
if (100*i/n_batches).is_integer():
psnr = psnr_metric(img_batch_original, img_batch_generated)
ssim = ssim_metric(img_batch_original, img_batch_generated)
psnrs.append(psnr)
ssims.append(ssim)
#creating 2 files in Google Drive where the psnr and ssim data will be saved.
pickle.dump( psnrs, open( path_psnr, "wb" ) )
pickle.dump( ssims, open( path_ssim, "wb" ) )
print((100*i/n_batches) + 1, "% psnr: ", psnr," ssim: ", ssim)
# Save weights: mode the path to your directory
gan.save_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/{weights_name}")
return [GAN_losses, psnrs, ssims]
# Initialize models
generator = generator_model()
discriminator = discriminator_model()
gan = gan_model(generator, discriminator)
# Initialize optimizers
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
gan_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# Compile models
discriminator.trainable = True
discriminator.compile(optimizer = d_opt, loss = wasserstein_loss)
discriminator.trainable = False
loss = [perceptual_loss, wasserstein_loss]
loss_weights = [100, 1]
gan.compile(optimizer = gan_opt, loss = loss, loss_weights = loss_weights)
discriminator.trainable = True
gan.summary()
# Load weights
gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
#connect to GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
loss = training(1, 1) #epochs, batchsize

It is solved an can be closed. I didn't know that the "discriminato.Trainable = True/False" was changed. It seems to be the reason for another ordering in the weights.

Related

Multi input Keras model shapes error: Matrix size-incompatible

I'm trying to make a multi input model that takes image and text by reading them from a dataframe.
everything looked fine until training when i got the error listed under:
class MultiGen:
def __init__(self, data_path, batch_size):
self.data_path = data_path
self.batch_size = batch_size
self.df = pd.read_csv(data_path)
self.train_df = self.df[:int(0.8 * len(self.df))]
self.val_df = self.df[int(0.8 * len(self.df)):]
self.tokenizer = Tokenizer(num_words=10000)
self.vectorizer = None
self.max_features = 10000
# self.tokenizer.fit_on_texts(self.df['text'])
def multi_input_generator(self, data_df):
while True:
for i in range(0, len(data_df), self.batch_size):
batch_df = data_df[i:i+self.batch_size].reset_index(drop=True)
images = []
text = []
for img_path in batch_df['image']:
img = load_img(img_path, target_size=(300, 300))
img = img_to_array(img) / 255.0
images.append(img)
for txt_path in batch_df['text']:
txt = open(txt_path).read()
text.append(txt)
self.vectorizer = CountVectorizer(max_features=self.max_features)
texts = self.vectorizer.fit_transform(text)
texts = texts.toarray()
# texts = self.X.toarray()
# texts = self.tokenizer.texts_to_sequences(text)
# texts = pad_sequences(texts, maxlen=150)
labels = batch_df['label']
# labels = to_categorical(labels,num_classes=345)
yield [np.array(images), np.array(texts)], np.array(labels)
def train_generator(self):
return self.multi_input_generator(self.train_df)
def val_generator(self):
return self.multi_input_generator(self.val_df)
def length(self):
return len(self.df)
def train_length(self):
return len(self.train_df)
def val_length(self):
return len(self.val_df)
batch_size = 32
# initialize the generator object
gen = MultiGen(data_path, batch_size=batch_size)
# get the generator
train_gen = gen.train_generator()
val_gen = gen.val_generator()
input_img = Input(shape=(300, 300, 3))
input_text = Input(shape=(gen.max_features,))
x = Conv2D(32, (3, 3), activation='relu')(input_img)
x = MaxPooling2D((2, 2))(x)
# x = Conv2D(64, (3, 3), activation='relu')(x)
# x = MaxPooling2D((2, 2))(x)
#
# x = Conv2D(128, (3, 3), activation='relu')(x)
x = Flatten()(x)
y = Dense(64, activation='relu')(input_text)
z = keras.layers.concatenate([x, y],axis=-1)
output = Dense(337, activation='softmax')(z)
model = Model(inputs=[input_img, input_text], outputs=output)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
# train the model
model.fit(train_gen, validation_data=val_gen, steps_per_epoch=gen.train_length()//batch_size, validation_steps=gen.val_length()//batch_size, epochs=10)
This is the error I get:
Node: 'model_1/dense_2/MatMul'
Matrix size-incompatible: In[0]: [32,229], In[1]: [10000,64]
[[{{node model_1/dense_2/MatMul}}]] [Op:__inference_train_function_1955]

Issues translating tf/Keras model to pytorch?

I’m trying to convert this model and training code for pytorch (originally taken from HERE):
# example of pix2pix gan for satellite to map image-to-image translation
from numpy import load
from numpy import zeros
from numpy import ones
from numpy.random import randint
from keras.optimizers import Adam
from keras.initializers import RandomNormal
from keras.models import Model
from keras.models import Input
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Activation
from keras.layers import Concatenate
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import LeakyReLU
from matplotlib import pyplot
# define the discriminator model
def define_discriminator(image_shape):
# weight initialization
init = RandomNormal(stddev=0.02)
# source image input
in_src_image = Input(shape=image_shape)
# target image input
in_target_image = Input(shape=image_shape)
# concatenate images channel-wise
merged = Concatenate()([in_src_image, in_target_image])
# C64
d = Conv2D(64, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(merged)
d = LeakyReLU(alpha=0.2)(d)
# C128
d = Conv2D(128, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
d = BatchNormalization()(d)
d = LeakyReLU(alpha=0.2)(d)
# C256
d = Conv2D(256, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
d = BatchNormalization()(d)
d = LeakyReLU(alpha=0.2)(d)
# C512
d = Conv2D(512, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
d = BatchNormalization()(d)
d = LeakyReLU(alpha=0.2)(d)
# second last output layer
d = Conv2D(512, (4,4), padding='same', kernel_initializer=init)(d)
d = BatchNormalization()(d)
d = LeakyReLU(alpha=0.2)(d)
# patch output
d = Conv2D(1, (4,4), padding='same', kernel_initializer=init)(d)
patch_out = Activation('sigmoid')(d)
# define model
model = Model([in_src_image, in_target_image], patch_out)
# compile model
opt = Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, loss_weights=[0.5])
return model
# define an encoder block
def define_encoder_block(layer_in, n_filters, batchnorm=True):
# weight initialization
init = RandomNormal(stddev=0.02)
# add downsampling layer
g = Conv2D(n_filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_in)
# conditionally add batch normalization
if batchnorm:
g = BatchNormalization()(g, training=True)
# leaky relu activation
g = LeakyReLU(alpha=0.2)(g)
return g
# define a decoder block
def decoder_block(layer_in, skip_in, n_filters, dropout=True):
# weight initialization
init = RandomNormal(stddev=0.02)
# add upsampling layer
g = Conv2DTranspose(n_filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_in)
# add batch normalization
g = BatchNormalization()(g, training=True)
# conditionally add dropout
if dropout:
g = Dropout(0.5)(g, training=True)
# merge with skip connection
g = Concatenate()([g, skip_in])
# relu activation
g = Activation('relu')(g)
return g
# define the standalone generator model
def define_generator(image_shape=(256,256,3)):
# weight initialization
init = RandomNormal(stddev=0.02)
# image input
in_image = Input(shape=image_shape)
# encoder model
e1 = define_encoder_block(in_image, 64, batchnorm=False)
e2 = define_encoder_block(e1, 128)
e3 = define_encoder_block(e2, 256)
e4 = define_encoder_block(e3, 512)
e5 = define_encoder_block(e4, 512)
e6 = define_encoder_block(e5, 512)
e7 = define_encoder_block(e6, 512)
# bottleneck, no batch norm and relu
b = Conv2D(512, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(e7)
b = Activation('relu')(b)
# decoder model
d1 = decoder_block(b, e7, 512)
d2 = decoder_block(d1, e6, 512)
d3 = decoder_block(d2, e5, 512)
d4 = decoder_block(d3, e4, 512, dropout=False)
d5 = decoder_block(d4, e3, 256, dropout=False)
d6 = decoder_block(d5, e2, 128, dropout=False)
d7 = decoder_block(d6, e1, 64, dropout=False)
# output
g = Conv2DTranspose(3, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d7)
out_image = Activation('tanh')(g)
# define model
model = Model(in_image, out_image)
return model
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model, image_shape):
# make weights in the discriminator not trainable
for layer in d_model.layers:
if not isinstance(layer, BatchNormalization):
layer.trainable = False
# define the source image
in_src = Input(shape=image_shape)
# connect the source image to the generator input
gen_out = g_model(in_src)
# connect the source input and generator output to the discriminator input
dis_out = d_model([in_src, gen_out])
# src image as input, generated image and classification output
model = Model(in_src, [dis_out, gen_out])
# compile model
opt = Adam(lr=0.0002, beta_1=0.5)
model.compile(loss=['binary_crossentropy', 'mae'], optimizer=opt, loss_weights=[1,100])
return model
# load and prepare training images
def load_real_samples(filename):
# load compressed arrays
data = load(filename)
# unpack arrays
X1, X2 = data['arr_0'], data['arr_1']
# scale from [0,255] to [-1,1]
X1 = (X1 - 127.5) / 127.5
X2 = (X2 - 127.5) / 127.5
return [X1, X2]
# select a batch of random samples, returns images and target
def generate_real_samples(dataset, n_samples, patch_shape):
# unpack dataset
trainA, trainB = dataset
# choose random instances
ix = randint(0, trainA.shape[0], n_samples)
# retrieve selected images
X1, X2 = trainA[ix], trainB[ix]
# generate 'real' class labels (1)
y = ones((n_samples, patch_shape, patch_shape, 1))
return [X1, X2], y
# generate a batch of images, returns images and targets
def generate_fake_samples(g_model, samples, patch_shape):
# generate fake instance
X = g_model.predict(samples)
# create 'fake' class labels (0)
y = zeros((len(X), patch_shape, patch_shape, 1))
return X, y
# generate samples and save as a plot and save the model
def summarize_performance(step, g_model, dataset, n_samples=3):
# select a sample of input images
[X_realA, X_realB], _ = generate_real_samples(dataset, n_samples, 1)
# generate a batch of fake samples
X_fakeB, _ = generate_fake_samples(g_model, X_realA, 1)
# scale all pixels from [-1,1] to [0,1]
X_realA = (X_realA + 1) / 2.0
X_realB = (X_realB + 1) / 2.0
X_fakeB = (X_fakeB + 1) / 2.0
# plot real source images
for i in range(n_samples):
pyplot.subplot(3, n_samples, 1 + i)
pyplot.axis('off')
pyplot.imshow(X_realA[i])
# plot generated target image
for i in range(n_samples):
pyplot.subplot(3, n_samples, 1 + n_samples + i)
pyplot.axis('off')
pyplot.imshow(X_fakeB[i])
# plot real target image
for i in range(n_samples):
pyplot.subplot(3, n_samples, 1 + n_samples*2 + i)
pyplot.axis('off')
pyplot.imshow(X_realB[i])
# save plot to file
filename1 = 'plot_%06d.png' % (step+1)
pyplot.savefig(filename1)
pyplot.close()
# save the generator model
filename2 = 'model_%06d.h5' % (step+1)
g_model.save(filename2)
print('>Saved: %s and %s' % (filename1, filename2))
# train pix2pix models
def train(d_model, g_model, gan_model, dataset, n_epochs=100, n_batch=1):
# determine the output square shape of the discriminator
n_patch = d_model.output_shape[1]
# unpack dataset
trainA, trainB = dataset
# calculate the number of batches per training epoch
bat_per_epo = int(len(trainA) / n_batch)
# calculate the number of training iterations
n_steps = bat_per_epo * n_epochs
# manually enumerate epochs
for i in range(n_steps):
# select a batch of real samples
[X_realA, X_realB], y_real = generate_real_samples(dataset, n_batch, n_patch)
# generate a batch of fake samples
X_fakeB, y_fake = generate_fake_samples(g_model, X_realA, n_patch)
# update discriminator for real samples
d_loss1 = d_model.train_on_batch([X_realA, X_realB], y_real)
# update discriminator for generated samples
d_loss2 = d_model.train_on_batch([X_realA, X_fakeB], y_fake)
# update the generator
g_loss, _, _ = gan_model.train_on_batch(X_realA, [y_real, X_realB])
# summarize performance
print('>%d, d1[%.3f] d2[%.3f] g[%.3f]' % (i+1, d_loss1, d_loss2, g_loss))
# summarize model performance
if (i+1) % (bat_per_epo * 10) == 0:
summarize_performance(i, g_model, dataset)
# load image data
dataset = load_real_samples('maps_256.npz')
print('Loaded', dataset[0].shape, dataset[1].shape)
# define input shape based on the loaded dataset
image_shape = dataset[0].shape[1:]
# define the models
d_model = define_discriminator(image_shape)
g_model = define_generator(image_shape)
# define the composite model
gan_model = define_gan(g_model, d_model, image_shape)
# train model
train(d_model, g_model, gan_model, dataset)
So far I have:
import os
import torch
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
def conv(in_channels, out_channels, kernel_size=4, stride=2, batch_norm=True):
layers = []
layers.append(nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=1,
bias=False
))
if batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
return nn.Sequential(*layers)
def deconv(in_channels, out_channels, kernel_size=4, stride=2, batch_norm=True):
layers = []
layers.append(nn.ConvTranspose2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=1,
bias=False
))
if batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
return nn.Sequential(*layers)
class EncoderBlock(nn.Module):
def __init__(self, in_channels, out_channels, batch_norm=True):
super(EncoderBlock, self).__init__()
self.conv1 = conv(in_channels=in_channels, out_channels=out_channels, batch_norm=batch_norm)
def forward(self, x):
out = F.leaky_relu(self.conv1(x), .2)
return out
class DecoderBlock(nn.Module):
def __init__(self, in_channels, out_channels, dropout=True):
super(DecoderBlock, self).__init__()
self.deconv1 = deconv(in_channels=in_channels, out_channels=out_channels, batch_norm=True)
self.dropout = dropout
def forward(self, x, prev_out):
out = self.deconv1(x)
if self.dropout:
out = F.dropout(out, .5)
out = torch.cat([out, prev_out])
out = F.relu(out)
return out
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.conv1 = conv(6, 64)
self.conv2 = conv(64, 128)
self.conv3 = conv(128, 256)
self.conv4 = conv(256, 512)
self.conv5 = conv(512, 512)
self.conv6 = conv(512, 1)
self.leaky_relu = nn.LeakyReLU(.2)
def forward(self, x, y):
out = torch.cat([x, y], dim=1)
out = self.leaky_relu(self.conv1(out))
out = self.leaky_relu(self.conv2(out))
out = self.leaky_relu(self.conv3(out))
out = self.leaky_relu(self.conv4(out))
out = self.leaky_relu(self.conv5(out))
out = F.sigmoid(self.conv6(out))
return out
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.e1 = EncoderBlock(3, 64, batch_norm=False)
self.e2 = EncoderBlock(64, 128)
self.e3 = EncoderBlock(128, 256)
self.e4 = EncoderBlock(256, 512)
self.e5 = EncoderBlock(512, 512)
self.e6 = EncoderBlock(512, 512)
self.e7 = EncoderBlock(512, 512)
self.b = conv(512, 512, batch_norm=False)
self.d1 = DecoderBlock(512, 512)
self.d2 = DecoderBlock(512, 512)
self.d3 = DecoderBlock(512, 512)
self.d4 = DecoderBlock(512, 512, dropout=False)
self.d5 = DecoderBlock(512, 256, dropout=False)
self.d6 = DecoderBlock(256, 128, dropout=False)
self.d7 = DecoderBlock(128, 64, dropout=False)
self.deconv1 = deconv(64, 3)
def forward(self, x):
e1 = self.e1(x)
e2 = self.e2(e1)
e3 = self.e3(e2)
e4 = self.e4(e3)
e5 = self.e5(e4)
e6 = self.e6(e5)
e7 = self.e7(e6)
b = F.relu(self.b(e7))
d1 = self.d1(b, e7)
d2 = self.d2(d1, e6)
d3 = self.d3(d2, e5)
d4 = self.d4(d3, e4)
d5 = self.d5(d4, e3)
d6 = self.d6(d5, e2)
d7 = self.d7(d6, e1)
out = F.tanh(self.deconv1(d7))
return out
class GAN(nn.Module):
def __init__(self, generator, discriminator):
super(GAN, self).__init__()
for layer in discriminator.children():
if not isinstance(layer, nn.BatchNorm2d):
layer.eval()
layer.track_running_stats = False
self.generator = generator
self.discriminator = discriminator
def forward(self, x):
g_out = self.generator(x)
d_in = torch.cat([x, g_out])
d_out = self.discriminator(d_in)
out = torch.cat([d_out, g_out])
return out
def load_real_samples(filename):
# load compressed arrays
data = np.load(filename)
# unpack arrays
X1, X2 = data['arr_0'], data['arr_1']
# scale from [0,255] to [-1,1]
X1 = (X1 - 127.5) / 127.5
X2 = (X2 - 127.5) / 127.5
return [X1, X2]
def generate_real_samples(dataset, n_samples, patch_shape):
real_inputs, real_outputs = dataset
random_index = np.random.randint(0, real_inputs.shape[0], n_samples)
real_input, real_output = real_inputs[random_index], real_outputs[random_index]
true_label = np.ones((n_samples, patch_shape, patch_shape, 1))
real_input = torch.Tensor(real_input).cuda()
real_output = torch.Tensor(real_output).cuda()
true_label = torch.Tensor(true_label).cuda()
return [real_input, real_output], true_label
def generate_fake_samples(g_model, samples, patch_shape):
fake_output = g_model(samples)
false_label = np.zeros((len(fake_output), patch_shape, patch_shape, 1))
false_label = torch.Tensor(false_label).cuda()
return fake_output, false_label
def train_on_batch(model, inputs, outputs, optimizer, criterion):
model.train()
model.zero_grad()
logits = model(*inputs)
loss = criterion(logits, outputs)
loss.backward()
optimizer.step()
return loss.item()
def train(d_model, g_model, gan_model, dataset, n_epochs=100, n_batch=1):
criterion = nn.BCEWithLogitsLoss(torch.Tensor([0.5]))
lr = 0.0002
disc_optimizer = optim.Adam(d_model.parameters(), lr=lr)
gan_optimizer = optim.Adam(gan_model.parameters(), lr=lr)
n_patch = 16
real_inputs, real_outputs = dataset
bat_per_epo = int(len(real_inputs) / n_batch)
n_steps = bat_per_epo * n_epochs
for i in range(n_steps):
[real_input, real_output], true_label = generate_real_samples(dataset, n_batch, n_patch)
fake_output, false_label = generate_fake_samples(g_model, real_input, n_patch)
disc_loss1 = train_on_batch(d_model, [real_input, real_output], true_label, disc_optimizer, criterion)
disc_loss2 = train_on_batch(d_model, [real_input, fake_output], false_label, disc_optimizer, criterion)
gan_loss = train_on_batch(gan_model, real_input, [true_label, real_output], gan_optimizer, criterion)
if __name__ == "__main__":
os.system("cls")
dataset = load_real_samples('data/data_test.npz')
g_model = Generator().cuda()
d_model = Discriminator().cuda()
gan_model = GAN(g_model, d_model).cuda()
train(d_model, g_model, gan_model, dataset, n_epochs=100, n_batch=1)
When I run the code, I get the error ValueError: Target size (torch.Size([1, 16, 16, 1])) must be the same as input size (torch.Size([1, 1, 4, 4]))
This occurs when calculating loss loss = criterion(logits, outputs) right after the forward from Generator
Does this have to with my translation of binary cross entropy loss from tensor flow to pytorch?

tensorflow improper batch setup Check failed: cudnnSetTensorNdDescriptor when using upsampling

I think I am setting up my batches wrong. If I run with the generated dataset it runs fine but with my own data I get an error.
If I take out the encoder (max pooling) and decoder (UpSampling2D) I don't get an error.
input size (304, 228, 1)
Generated: RUNS
import tensorflow as tf
from tensorflow.keras import layers
from natsort import natsorted
from tensorflow.keras.models import Model
BATCH_SIZE = 4
EPOCHS = 20
LEARNING_RATE = 1e-4
RESET_TRAINING = True
INPUT_CHANNELS = 1
OUTPUT_CHANNELS = 1
LOSS_TYPE = tf.keras.losses.SparseCategoricalCrossentropy()
img_size = (304, 228)
# configure cuda
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
class UnetModel(Model):
def __init__(self, *args, **kwargs):
super().__init__(UnetModel, *args, **kwargs)
# -- Encoder -- #
# Block encoder 1
input_shape = (img_size[0], img_size[1], 1)
# If you want to know more about why we are using `he_normal`:
# https://stats.stackexchange.com/questions/319323/whats-the-difference-between-variance-scaling-initializer-and-xavier-initialize/319849#319849
# Or the excellent fastai course:
# https://github.com/fastai/course-v3/blob/master/nbs/dl2/02b_initializing.ipynb
initializer = 'he_normal'
inputs = layers.Input(shape=input_shape)
print("input shape ", input_shape)
conv_enc_1 = layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer=initializer)(inputs)
conv_enc_1 = layers.Conv2D(64, 3, activation = 'relu', padding='same', kernel_initializer=initializer)(conv_enc_1)
# Block encoder 2
max_pool_enc_2 = layers.MaxPooling2D(pool_size=(2, 2))(conv_enc_1)
conv_enc_2 = layers.Conv2D(128, 5, activation = 'relu', padding = 'same', kernel_initializer = initializer)(max_pool_enc_2)
conv_enc_2 = layers.Conv2D(128, 5, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_enc_2)
# Block decoder 1
up_dec_4 = layers.Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = initializer)(layers.UpSampling2D(size = (2,2))(conv_enc_2))
merge_dec_4 = layers.concatenate([conv_enc_1, up_dec_4], axis = 3)
conv_dec_4 = layers.Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(merge_dec_4)
conv_dec_4 = layers.Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_dec_4)
conv_dec_4 = layers.Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_dec_4)
# -- Dencoder -- #
output = layers.Conv2D(1, 1, activation = 'softmax')(conv_dec_4)
self.model = tf.keras.Model(inputs = inputs, outputs = output)
def call(self, x):
return self.model(x)
model = UnetModel()
model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss = LOSS_TYPE, metrics= [tf.keras.metrics.get('accuracy')])
dataset_debug = tf.data.Dataset.from_tensor_slices((tf.random.normal(shape = (BATCH_SIZE, img_size[0], img_size[1], 1)), tf.random.normal(shape = (BATCH_SIZE, img_size[0], img_size[1], 1)))).batch(BATCH_SIZE)
history = model.fit(dataset_debug, epochs=EPOCHS, shuffle=True)
Does NOT run
Here I am splitting the filenames into training and validation sets using train_test_split and reading in images in the parse_img_input function
# takes image filenames of uint8 and normalizes to 0-1 range
def parse_img_input(img_file, img_file_out):
print("img file ", img_file)
def _parse_input(img_file, img_file_out):
# get img image
d_filepath = img_file.numpy().decode()
d_image_decoded = tf.image.decode_jpeg(
tf.io.read_file(d_filepath), channels=1)
d_image = tf.cast(d_image_decoded, tf.float32) / 255.0
# get img image
d_filepath_out = img_file_out.numpy().decode()
d_image_decoded_out = tf.image.decode_jpeg(
tf.io.read_file(d_filepath_out), channels=1)
d_image_out = tf.cast(d_image_decoded_out, tf.float32) / 255.0
# add channel dimension
d_image = tf.expand_dims(d_image, -1)
d_image_out = tf.expand_dims(d_image_out, -1)
return d_image, d_image_out
return tf.py_function(_parse_input,
inp=[img_file, img_file_out],
Tout=[tf.float32, tf.float32])
# depth_files_in, depth_files_out are lists of filenames
# split input data into train, test sets
X_train_file, X_test_file, y_train_file, y_test_file = train_test_split(depth_files_in, depth_files_out,
test_size=0.2,
random_state=0)
dataset_train = tf.data.Dataset.from_tensor_slices((X_train_file, y_train_file))
dataset_train = dataset_train.map(parse_img_input)
dataset_test = tf.data.Dataset.from_tensor_slices((X_test_file, y_test_file))
dataset_test = dataset_test.map(parse_img_input)
history = model.fit(dataset_train, epochs=EPOCHS, shuffle=True, batch_size = BATCH_SIZE, validation_data= dataset_test)
F tensorflow/stream_executor/cuda/cuda_dnn.cc:535] Check failed: cudnnSetTensorNdDescriptor(handle_.get(), elem_type, nd, dims.data(), strides.data()) == CUDNN_STATUS_SUCCESS (3 vs. 0)batch_descriptor: {count: 228 feature_map_count: 64 spatial: 152 0 value_min: 0.000000 value_max: 0.000000 layout: BatchDepthYX}

How to modify the Keras CycleGAN example code to run parallelly on GPUs using tf.strategy

Here is the example of CycleGAN from the Keras
CycleGAN Example Using Keras.
Here is my modified implementation to use multiple GPUs. To implement the custom training I have used a reference Custom training with tf.distribute.Strategy
I want an example of CycleGAN from the Keras to run fast using GPUs. As further I need to process and train a huge amount of data. As well as CycleGAN uses multiple loss functions train_step will return 4 types of losses, currently, I am just returning one for easier understanding. Still, the training on GPUs is dead slow. I am not able to find the reason behind this.
Am I using tf.distribute.Strategy wrongly?
"""
Title: CycleGAN
Author: [A_K_Nain](https://twitter.com/A_K_Nain)
Date created: 2020/08/12
Last modified: 2020/08/12
Description: Implementation of CycleGAN.
"""
"""
## CycleGAN
CycleGAN is a model that aims to solve the image-to-image translation
problem. The goal of the image-to-image translation problem is to learn the
mapping between an input image and an output image using a training set of
aligned image pairs. However, obtaining paired examples isn't always feasible.
CycleGAN tries to learn this mapping without requiring paired input-output images,
using cycle-consistent adversarial networks.
- [Paper](https://arxiv.org/pdf/1703.10593.pdf)
- [Original implementation](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix)
"""
"""
## Setup
"""
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
autotune = tf.data.experimental.AUTOTUNE
# Create a MirroredStrategy.
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
"""
## Prepare the dataset
In this example, we will be using the
[horse to zebra](https://www.tensorflow.org/datasets/catalog/cycle_gan#cycle_ganhorse2zebra)
dataset.
"""
# Load the horse-zebra dataset using tensorflow-datasets.
dataset, _ = tfds.load("cycle_gan/horse2zebra", with_info=True, as_supervised=True)
train_horses, train_zebras = dataset["trainA"], dataset["trainB"]
test_horses, test_zebras = dataset["testA"], dataset["testB"]
# Define the standard image size.
orig_img_size = (286, 286)
# Size of the random crops to be used during training.
input_img_size = (256, 256, 3)
# Weights initializer for the layers.
kernel_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
# Gamma initializer for instance normalization.
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
buffer_size = 256
batch_size = 1
def normalize_img(img):
img = tf.cast(img, dtype=tf.float32)
# Map values in the range [-1, 1]
return (img / 127.5) - 1.0
def preprocess_train_image(img, label):
# Random flip
img = tf.image.random_flip_left_right(img)
# Resize to the original size first
img = tf.image.resize(img, [*orig_img_size])
# Random crop to 256X256
img = tf.image.random_crop(img, size=[*input_img_size])
# Normalize the pixel values in the range [-1, 1]
img = normalize_img(img)
return img
def preprocess_test_image(img, label):
# Only resizing and normalization for the test images.
img = tf.image.resize(img, [input_img_size[0], input_img_size[1]])
img = normalize_img(img)
return img
"""
## Create `Dataset` objects
"""
BATCH_SIZE_PER_REPLICA = batch_size
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
# Apply the preprocessing operations to the training data
train_horses = (
train_horses.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
train_zebras = (
train_zebras.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Apply the preprocessing operations to the test data
test_horses = (
test_horses.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
test_zebras = (
test_zebras.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Visualize some samples
_, ax = plt.subplots(4, 2, figsize=(10, 15))
for i, samples in enumerate(zip(train_horses.take(4), train_zebras.take(4))):
horse = (((samples[0][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
zebra = (((samples[1][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
ax[i, 0].imshow(horse)
ax[i, 1].imshow(zebra)
plt.show()
plt.savefig('Visualize_Some_Samples')
plt.close()
# Building blocks used in the CycleGAN generators and discriminators
class ReflectionPadding2D(layers.Layer):
"""Implements Reflection Padding as a layer.
Args:
padding(tuple): Amount of padding for the
spatial dimensions.
Returns:
A padded tensor with the same type as the input tensor.
"""
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
super(ReflectionPadding2D, self).__init__(**kwargs)
def call(self, input_tensor, mask=None):
padding_width, padding_height = self.padding
padding_tensor = [
[0, 0],
[padding_height, padding_height],
[padding_width, padding_width],
[0, 0],
]
return tf.pad(input_tensor, padding_tensor, mode="REFLECT")
def residual_block(
x,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
gamma_initializer=gamma_init,
use_bias=False,
):
dim = x.shape[-1]
input_tensor = x
x = ReflectionPadding2D()(input_tensor)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = activation(x)
x = ReflectionPadding2D()(x)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.add([input_tensor, x])
return x
def downsample(
x,
filters,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2D(
filters,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def upsample(
x,
filters,
activation,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_init,
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2DTranspose(
filters,
kernel_size,
strides=strides,
padding=padding,
kernel_initializer=kernel_initializer,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def get_resnet_generator(
filters=64,
num_downsampling_blocks=2,
num_residual_blocks=9,
num_upsample_blocks=2,
gamma_initializer=gamma_init,
name=None,
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = ReflectionPadding2D(padding=(3, 3))(img_input)
x = layers.Conv2D(filters, (7, 7), kernel_initializer=kernel_init, use_bias=False)(
x
)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.Activation("relu")(x)
# Downsampling
for _ in range(num_downsampling_blocks):
filters *= 2
x = downsample(x, filters=filters, activation=layers.Activation("relu"))
# Residual blocks
for _ in range(num_residual_blocks):
x = residual_block(x, activation=layers.Activation("relu"))
# Upsampling
for _ in range(num_upsample_blocks):
filters //= 2
x = upsample(x, filters, activation=layers.Activation("relu"))
# Final block
x = ReflectionPadding2D(padding=(3, 3))(x)
x = layers.Conv2D(3, (7, 7), padding="valid")(x)
x = layers.Activation("tanh")(x)
model = keras.models.Model(img_input, x, name=name)
return model
"""
## Build the discriminators
The discriminators implement the following architecture:
`C64->C128->C256->C512`
"""
def get_discriminator(
filters=64, kernel_initializer=kernel_init, num_downsampling=3, name=None
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = layers.Conv2D(
filters,
(4, 4),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_initializer,
)(img_input)
x = layers.LeakyReLU(0.2)(x)
num_filters = filters
for num_downsample_block in range(3):
num_filters *= 2
if num_downsample_block < 2:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(2, 2),
)
else:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(1, 1),
)
x = layers.Conv2D(
1, (4, 4), strides=(1, 1), padding="same", kernel_initializer=kernel_initializer
)(x)
model = keras.models.Model(inputs=img_input, outputs=x, name=name)
return model
"""
## Build the CycleGAN model
"""
class CycleGan(keras.Model):
def __init__(
self,
generator_G,
generator_F,
discriminator_X,
discriminator_Y,
lambda_cycle=10.0,
lambda_identity=0.5,
):
super(CycleGan, self).__init__()
self.gen_G = generator_G
self.gen_F = generator_F
self.disc_X = discriminator_X
self.disc_Y = discriminator_Y
self.lambda_cycle = lambda_cycle
self.lambda_identity = lambda_identity
def compile(
self,
gen_G_optimizer,
gen_F_optimizer,
disc_X_optimizer,
disc_Y_optimizer,
gen_loss_fn,
disc_loss_fn,
cycle_loss_fn,
identity_loss_fn
):
super(CycleGan, self).compile()
self.gen_G_optimizer = gen_G_optimizer
self.gen_F_optimizer = gen_F_optimizer
self.disc_X_optimizer = disc_X_optimizer
self.disc_Y_optimizer = disc_Y_optimizer
self.generator_loss_fn = gen_loss_fn
self.discriminator_loss_fn = disc_loss_fn
#self.cycle_loss_fn = keras.losses.MeanAbsoluteError()
#self.identity_loss_fn = keras.losses.MeanAbsoluteError()
self.cycle_loss_fn = cycle_loss_fn
self.identity_loss_fn = identity_loss_fn
def train_step(self, batch_data):
# x is Horse and y is zebra
real_x, real_y = batch_data
with tf.GradientTape(persistent=True) as tape:
# Horse to fake zebra
fake_y = self.gen_G(real_x, training=True)
# Zebra to fake horse -> y2x
fake_x = self.gen_F(real_y, training=True)
# Cycle (Horse to fake zebra to fake horse): x -> y -> x
cycled_x = self.gen_F(fake_y, training=True)
# Cycle (Zebra to fake horse to fake zebra) y -> x -> y
cycled_y = self.gen_G(fake_x, training=True)
# Identity mapping
same_x = self.gen_F(real_x, training=True)
same_y = self.gen_G(real_y, training=True)
# Discriminator output
disc_real_x = self.disc_X(real_x, training=True)
disc_fake_x = self.disc_X(fake_x, training=True)
disc_real_y = self.disc_Y(real_y, training=True)
disc_fake_y = self.disc_Y(fake_y, training=True)
# Generator adverserial loss
gen_G_loss = self.generator_loss_fn(disc_fake_y)
gen_F_loss = self.generator_loss_fn(disc_fake_x)
# Generator cycle loss
cycle_loss_G = self.cycle_loss_fn(real_y, cycled_y) * self.lambda_cycle
cycle_loss_F = self.cycle_loss_fn(real_x, cycled_x) * self.lambda_cycle
# Generator identity loss
id_loss_G = (
self.identity_loss_fn(real_y, same_y)
* self.lambda_cycle
* self.lambda_identity
)
id_loss_F = (
self.identity_loss_fn(real_x, same_x)
* self.lambda_cycle
* self.lambda_identity
)
# Total generator loss
total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G
total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F
# Discriminator loss
disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x)
disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y)
# Get the gradients for the generators
grads_G = tape.gradient(total_loss_G, self.gen_G.trainable_variables)
grads_F = tape.gradient(total_loss_F, self.gen_F.trainable_variables)
# Get the gradients for the discriminators
disc_X_grads = tape.gradient(disc_X_loss, self.disc_X.trainable_variables)
disc_Y_grads = tape.gradient(disc_Y_loss, self.disc_Y.trainable_variables)
# Update the weights of the generators
self.gen_G_optimizer.apply_gradients(
zip(grads_G, self.gen_G.trainable_variables)
)
self.gen_F_optimizer.apply_gradients(
zip(grads_F, self.gen_F.trainable_variables)
)
# Update the weights of the discriminators
self.disc_X_optimizer.apply_gradients(
zip(disc_X_grads, self.disc_X.trainable_variables)
)
self.disc_Y_optimizer.apply_gradients(
zip(disc_Y_grads, self.disc_Y.trainable_variables)
)
return total_loss_G
# return [total_loss_G, total_loss_F, disc_X_loss, disc_Y_loss]
# Open a strategy scope.
with strategy.scope():
mae_loss_fn = keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE)
# Loss function for evaluating cycle consistency loss
def cycle_loss_fn(real, cycled):
cycle_loss = mae_loss_fn(real, cycled)
cycle_loss = tf.nn.compute_average_loss(cycle_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return cycle_loss
# Loss function for evaluating identity mapping loss
def identity_loss_fn(real, same):
identity_loss = mae_loss_fn(real, same)
identity_loss = tf.nn.compute_average_loss(identity_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return identity_loss
# Loss function for evaluating adversarial loss
adv_loss_fn = keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
# Define the loss function for the generators
def generator_loss_fn(fake):
fake_loss = adv_loss_fn(tf.ones_like(fake), fake)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return fake_loss
# Define the loss function for the discriminators
def discriminator_loss_fn(real, fake):
real_loss = adv_loss_fn(tf.ones_like(real), real)
fake_loss = adv_loss_fn(tf.zeros_like(fake), fake)
real_loss = tf.nn.compute_average_loss(real_loss, global_batch_size=GLOBAL_BATCH_SIZE)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return (real_loss + fake_loss) * 0.5
# Get the generators
gen_G = get_resnet_generator(name="generator_G")
gen_F = get_resnet_generator(name="generator_F")
# Get the discriminators
disc_X = get_discriminator(name="discriminator_X")
disc_Y = get_discriminator(name="discriminator_Y")
# Create cycle gan model
cycle_gan_model = CycleGan(
generator_G=gen_G, generator_F=gen_F, discriminator_X=disc_X, discriminator_Y=disc_Y
)
optimizer = keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5)
# Compile the model
cycle_gan_model.compile(
gen_G_optimizer=optimizer,
gen_F_optimizer=optimizer,
disc_X_optimizer=optimizer,
disc_Y_optimizer=optimizer,
gen_loss_fn=generator_loss_fn,
disc_loss_fn=discriminator_loss_fn,
cycle_loss_fn=cycle_loss_fn,
identity_loss_fn=identity_loss_fn
)
train_dist_dataset = strategy.experimental_distribute_dataset(
tf.data.Dataset.zip((train_horses,
train_zebras)))
# `run` replicates the provided computation and runs it
# with the distributed input.
#tf.function
def distributed_train_step(dataset_inputs):
per_replica_losses = strategy.run(cycle_gan_model.train_step, args=(dataset_inputs,))
return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
axis=None)
"""
## Train the end-to-end model
"""
for epoch in range(1):
# TRAIN LOOP
all_loss = 0.0
num_batches = 0.0
for one_batch in train_dist_dataset:
all_loss += distributed_train_step(one_batch)
num_batches += 1
train_loss = all_loss/num_batches
print(train_loss)

Gradients are None for Custom Convolution Layer

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging
Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.