I have a tensorflow implementation of a GAN that I'm trying to use to sharpen the image output from a variational autoencoder. The problem is after a few epochs (50000 generator weight updates, 200000 critic weight updates, single image batches) the network is still producing images that are almost greyscale when being trained on color images, and will typically produce very faded images with little contrast (although will produce clear images at times)
The generator gets a blurry reconstruction image as an input and tries to make a convincing fake of the real image it's based on. The discriminator gets a blurry reconstruction image and either the real image it's based on or the generators fake and ouputs a confidence value for if it is the real image or not.
This might be a case of not having trained the network enough but on every other GAN I've trained to slightly alter an image it normally learns to reproduce the input image within the first 300-400 weight updates.
One theory I have is that the gradient clipping is set quite low to facilitate stability (0.0001) and this might be causing a gradient saturation issue, but I honestly don't understand enough about the concepts involved to even know where to begin bugfixing this. This is the first time I've used wasserstein loss as well so let me know if my implementation is completely wrong.
Some additional information from my attempts to debug this -
I've tested the image inputs by adding mean absolute error loss to the generator and it is able to reproduce it's inputs and approximate the real images just fine
The loss is wasserstein and generally hangs around these values:
disc_total[0.000117] disc_real[-0.000080] disc_gen[0.000197] gen_total[-0.000062]
The input image folder is based on UTK face and contains images that look like this
The network output typically looks like this
Code:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
import matplotlib.pyplot as plt
import vae
import numpy as np
import random
import cv2
import os
import time
#--------------------Build Model--------------------
#Save filepath
checkpoint_dir = 'Models/GAN/WGAN-U'
#Saved models filepath
filepath = "Models/BVAE/BVAE"
#Data filepath
datapath = "Datasets/Reconstructions/UTK"
val_datapath = ""
batch_size = 1
image_save_freq = 100
model_save_freq = 1000
grad_clipping = 0.0001
vae.vae.load_weights(filepath)
for layer in vae.vae.layers:
layer.trainable = False
#Wrapper for utk face data generator
class utk_data_gen(Sequence):
def __init__(self, validation):
self.validation = validation
self.dp = val_datapath if self.validation else datapath
self.data_len = len(os.listdir(self.dp))//batch_size
def __getitem__(self, index):
X1 = np.empty(shape=(batch_size, vae.height, vae.width, vae.channels))
X2 = np.empty(shape=(batch_size, vae.height, vae.width, vae.channels))
C = np.empty(shape=(batch_size, 7))
data_dir = os.listdir(self.dp)
i=index*batch_size
for j in range(0, batch_size):
data = data_dir[i+j]
img = cv2.imread(os.path.join(self.dp,data))
img = (img / 127.5)-1.0
vae_img = img[:vae.height]
img = img[vae.height:]
labels = data.split('_')
age = float(labels[0]) / 116
gender = float(labels[1])
white = 0.0
black = 0.0
asian = 0.0
indian = 0.0
other = 0.0
if labels[2] == '0':
white = 1.0
elif labels[2] == '1':
black = 1.0
elif labels[2] == '2':
asian = 1.0
elif labels[2] == '3':
indian = 1.0
elif labels[2] == '4':
other = 1.0
img_label = (age, gender, white, black, asian, indian, other)
img_label = np.array(img_label)
X1[j]=img
X2[j]=vae_img
C[j]=img_label
Y=X1
return [X1,X2,C],Y
def __len__(self):
return self.data_len
#Wasserstein discriminator
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
class ClipConstraint(Constraint):
def __init__(self, clip_value):
self.clip_value = clip_value
def __call__(self, weights):
return K.clip(weights, -self.clip_value, self.clip_value)
def get_config(self):
return {'clip_value': self.clip_value}
disc_optimizer = tf.keras.optimizers.RMSprop(lr=0.00005)
def build_discriminator():
init = tf.random_normal_initializer(0.0, 0.02)
kcon = ClipConstraint(grad_clipping)
img_input = layers.Input(shape=(vae.height, vae.width, vae.channels), name='input_image')
vae_input = layers.Input(shape=(vae.height, vae.width, vae.channels), name='vae_input_image')
concat_inputs = layers.Concatenate()([img_input, vae_input])
downsamples = 4
startFilters = 32
x = layers.Conv2D(startFilters, (4,4), strides=(1,1), padding='same', kernel_constraint=kcon, kernel_initializer=init)(concat_inputs)
for i in range(downsamples):
x = layers.Conv2D(startFilters * 2**i, (4,4), strides=(2,2), padding='same', kernel_constraint=kcon, kernel_initializer=init)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(startFilters * 2**downsamples, (4,4), padding='same', kernel_constraint=kcon, kernel_initializer=init)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Flatten()(x)
output = layers.Dense(1, activation='linear')(x)
model = keras.Model([img_input, vae_input], output, name="discriminator")
model.compile(loss=wasserstein_loss, optimizer=disc_optimizer)
return model
#Generator
def UNet_downsample(layer_input, filters):
init = tf.random_normal_initializer(0.0, 0.02)
x = layers.Conv2D(filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_input)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.2)(x)
return x
def UNet_upsample(layer_input, downsample_skip, filters, crop=False, use_dropout=False):
init = tf.random_normal_initializer(0.0, 0.02)
x = layers.Conv2DTranspose(filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_input)
if crop:
x = x[:,:-1,:-1]
x = layers.BatchNormalization()(x)
if use_dropout:
x = layers.Dropout(0.3)(x, training=True)
x = layers.Concatenate()([x, downsample_skip])
x = layers.LeakyReLU(alpha=0.2)(x)
return x
#U-Net
def build_generator():
init = tf.random_normal_initializer(0.0, 0.02)
input_img = keras.Input(shape=(vae.height, vae.width, vae.channels), name = "gen_input_image")
x = layers.Conv2D(3, (4,4), strides=1, padding="same", kernel_initializer=init, name="input_conv")(input_img)
x1 = UNet_downsample(x, 128)
x2 = UNet_downsample(x1, 256)
x3 = UNet_downsample(x2, 512)
x4 = UNet_downsample(x3, 512)
x5 = UNet_downsample(x4, 512)
x = layers.Conv2D(512, (4,4), strides=(2,2), padding='same', activation='relu', kernel_initializer=init)(x5)
x = UNet_upsample(x, x5, 512, True)
x = UNet_upsample(x, x4, 512, True, True)
x = UNet_upsample(x, x3, 512, True, True)
x = UNet_upsample(x, x2, 256)
x = UNet_upsample(x, x1, 128)
output = layers.Conv2DTranspose(3, (4,4), strides=(2,2), activation="tanh", padding="same", name="generator_outputs")(x)
return keras.Model(inputs = [input_img], outputs=output, name="generator")
#Build GAN
gan_optimizer = tf.keras.optimizers.RMSprop(lr=0.00005)
def build_gan(disc, gen):
for layer in disc.layers:
if not isinstance(layer, layers.BatchNormalization):
layer.trainable = False
input_img = keras.Input(shape=(vae.height,vae.width,vae.channels), name = "gan_input_img")
gen_output = gen(input_img)
disc_output = disc([gen_output, input_img])
model = keras.Model(input_img, [disc_output, gen_output])
model.compile(loss=[wasserstein_loss], optimizer=gan_optimizer)
return model
discriminator = build_discriminator()
generator = build_generator()
gan = build_gan(discriminator, generator)
generator_optimizer = tf.keras.optimizers.RMSprop(lr=0.00005)
discriminator_optimizer = tf.keras.optimizers.RMSprop(lr=0.00005)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(gan_optimizer=gan_optimizer,
discriminator_optimizer=discriminator_optimizer,
discriminator=discriminator,
gan=gan)
cp_num = 0
def saveImg():
labels = np.array([[0.8, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0]])
im = vae.decoder.predict([np.zeros(shape=(1,20)), labels])[0]
im = tf.expand_dims(im, axis=0)
im = generator.predict([im])[0]
im = ((im+1.0)*127.5).astype(np.uint8)
cv2.imwrite("Models/GAN/Progress/GAN_"+str(cp_num)+"_"+str(time.time())+ ".jpg", im)
mean_loss_index = 0
mean_loss_losses = [[0,]*30 for i in range(5)]
def get_mean_loss(gan_loss):
global mean_loss_index
global mean_loss_losses
for i, loss in enumerate(gan_loss):
mean_loss_losses[i][mean_loss_index] = loss
mean_loss_index += 1
mean_loss_index = mean_loss_index%30
return tuple([np.mean(loss) for loss in mean_loss_losses])
real_labels = -np.ones((batch_size,1))
fake_labels = np.ones((batch_size,1))
def train(input_img, vae_img):
critic_updates = 4
disc_real_loss = 0
disc_gen_loss = 0
for i in range(critic_updates):
fake_img = generator.predict(vae_img)
disc_real_loss += discriminator.train_on_batch([input_img, vae_img], real_labels)
disc_gen_loss += discriminator.train_on_batch([fake_img, vae_img], fake_labels)
disc_real_loss /= critic_updates
disc_gen_loss /= critic_updates
disc_total_loss = disc_real_loss + disc_gen_loss
gen_loss, _ = gan.train_on_batch([vae_img], real_labels)
rec_loss = 0
print('disc_total[%.6f] disc_real[%.6f] disc_gen[%.6f] gen_total[%.6f] reconstruction[%.3f]' % get_mean_loss((disc_total_loss, disc_real_loss, disc_gen_loss, gen_loss, rec_loss)))
print()
#Main training loop
def fit(train_ds):
global cp_num
while True:
for i, (data, _) in enumerate(train_ds.__iter__()):
print("Batch " + str(i) + '.')
input_image, vae_image, labels = data
input_image = tf.cast(input_image, dtype='float32')
vae_image = tf.cast(vae_image, dtype='float32')
if (i+1) % image_save_freq == 0:
saveImg()
if (i+1) % model_save_freq == 0:
print()
checkpoint.save(file_prefix = checkpoint_prefix)
print("Model Checkpoint <" + str(cp_num) + "> Saved!")
cp_num+=1
else:
print(str(model_save_freq - i%model_save_freq) + " batches until next save")
train(input_image, vae_image)
print()
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
fit(utk_data_gen(False))
Any help with this would be appreciated, even just to say that I haven't made any obvious errors and this issue may resolve itself with more training. I would have waited longer to see before posting the question, but I am under a time constraint to get something visual that shows progress.
I managed to fix this somewhat by doubling the number of filters in the critic, but also, interestingly, the results don't seem to be that much better than those I obtained by just adding a small amount of mae loss to the generator during testing. If anyone else runs into this problem I'd recommend considering this as a fix.
Related
i made a datagenerator for a multi input model using this two references
https://datascience.stackexchange.com/questions/47623/how-feed-a-numpy-array-in-batches-in-keras
Keras open images and create a batch with them
so my generator looks like this
def load_data(df,path,position):
X_numerical = []
X_img = []
Y = []
for i in df.index.values:
# read one or more samples from your storage, do pre-processing, etc.
# for example:
basePath = os.path.sep.join([path, "{}-*".format(i)])
LiqPaths = sorted(list(glob.glob(basePath)))
img = tf.keras.preprocessing.image.load_img(LiqPaths[0], target_size=(100, 100))
img = tf.keras.preprocessing.image.img_to_array(img)
img = np.expand_dims(img, axis=0)
x=np.asarray(df.iloc[i]).astype('float32')
x=tf.expand_dims(x, axis=-1)
y = position[i]
X_numerical.append(x)
X_img.append(img)
Y.append(y)
return [np.array(X_numerical), np.array(X_img)], np.array(Y)
this is the generator
def batch_generator(df, path, position, batch_size):
batch=[]
# print(len(df.index.values))
while True:
for i in df.index.values:
batch.append(i)
if len(batch)==batch_size:
yield load_data(df,path,position)
batch=[]
and my model is this one
input_data = Input(name="input_numerical",shape=(2,1,))
x1 = LSTM(
units = 1024,
return_sequences=True
)(input_data)
x1 = LSTM(
units = 1024,
return_sequences=False
)(x1)
input_img = Input(name="input_images",shape=(60,30,3,))
x2 = Conv2D(128, (3, 3), name="first_conv", activation='relu', input_shape=(60, 30, 3))(input_img)
x2 =MaxPooling2D((2, 2),name="first_pooling")(x2)
x2 =Conv2D(256, (3, 3),name="second_conv", activation='relu')(x2)
x2 =MaxPooling2D((2, 2),name="seccond_pooling")(x2)
x2 = Flatten(name="flatten",)(x2)
x2 =Dense(1024, name="first_dense", activation='relu')(x2)
merge1 =concatenate([x1,x2], axis = 1)
x3 =Dense(1024,name="seccond_dense", activation='relu')(merge1)
output_liq =Dense(1,name="output", activation='sigmoid')(x3)
model = Model([input_img,input_data], output_liq)
model.summary()
tf.keras.utils.plot_model(
model, show_shapes=True
)
model.compile(
loss="BinaryCrossentropy",
optimizer=tf.keras.optimizers.Nadam(learning_rate=1e-6),
metrics=["BinaryAccuracy"])
history = model.fit(gen,
epochs= 30,
shuffle=False,
batch_size=256
#validation_data=([val_img, test_numerical1], val_y)
)
but it is to slow, to much i think, so i would like to know if you know how to make it perform faster, my dataset is about 44k pictures and a pandas whit 44k rows whit 2 features, i hope you can helpme
I tried creating some dummy images and a much smaller model (shown below) to try and compare performances. I took only 10 images to send through the gen function you built, and it took about 8 minutes to run. I'm not 100% sure why the gen function is this slow. It probably has to do with the for loops.
img_paths = np.arange(10)
x_nums = np.random.randn(10,2).tolist()
positions = np.arange(10)
df = pd.DataFrame({'img_paths':img_paths,
'nums':x_nums,
})
path = '/content/drive/MyDrive/Stack Overflow/Images - 60,30,3/'
gen = batch_generator(df,path,positions,5)
I tried to recreate your workflow to an extent to use tensorflow-datasets. I used a smaller model with 1.8 million parameters instead of 35 million as defined by you
input_data = tf.keras.layers.Input(name="input_numerical",shape=(2,1,))
x1 = tf.keras.layers.LSTM(
units = 128,
return_sequences=True
)(input_data)
x1 = tf.keras.layers.LSTM(
units = 128,
return_sequences=False
)(x1)
input_img = tf.keras.layers.Input(name="input_images",shape=(60,30,3,))
x2 = tf.keras.layers.Conv2D(128, (3, 3), name="first_conv", activation='relu', input_shape=(60, 30, 3))(input_img)
x2 = tf.keras.layers.MaxPooling2D((2, 2),name="first_pooling")(x2)
x2 = tf.keras.layers.Conv2D(256, (3, 3),name="second_conv", activation='relu')(x2)
x2 = tf.keras.layers.MaxPooling2D((2, 2),name="seccond_pooling")(x2)
x2 = tf.keras.layers.Flatten(name="flatten",)(x2)
x2 = tf.keras.layers.Dense(64, name="first_dense", activation='relu')(x2)
merge1 = tf.keras.layers.Concatenate(axis = 1)([x1,x2])
x3 = tf.keras.layers.Dense(64,name="seccond_dense", activation='relu')(merge1)
output_liq = tf.keras.layers.Dense(1,name="output", activation='sigmoid')(x3)
model = tf.keras.models.Model([input_img,input_data], output_liq)
model.summary()
The total run time of tf.data.dataset was 35 seconds for 1 epoch having 10,000 images compared to 8 minutes for 10 images using the gen function
Here I have created the necessary inputs as best as I could. I do not understand how exactly you are getting the images, but I have just put the image paths inside the dataframe
path = '/content/drive/MyDrive/Stack Overflow/Images - 60,30,3/'
img_paths = os.listdir(path)
x_nums = np.random.randn(len(img_paths),2).tolist()
positions = np.random.randint(0,2,len(img_paths))
df = pd.DataFrame({'img_paths':img_paths,
'nums':x_nums,
})
Next we create the tensorflow dataset
def preprocess(x,y):
img = tf.io.read_file(x[0])
img = tf.io.decode_png(img, channels=3)
img = tf.cast(img,dtype=tf.float32)
img = img / 255.
x_num = tf.expand_dims(x[1],1) #You might have to change this line depending on how your data looks
return (img,x_num),y
def dataset_gen(df,basepath,positions,batch_size):
img_paths = tf.constant([basepath+i for i in df.img_paths])
x_numerical = tf.constant([np.array(x) for x in df.nums.values])
train_dataset = tf.data.Dataset.from_tensor_slices(((img_paths,x_numerical),positions))
train_dataset = train_dataset.map(preprocess)
train_dataset = train_dataset.batch(batch_size)
return train_dataset
train_data = dataset_gen(df,path,positions,128)
You can then just plug this data into the model
model.compile(
loss="BinaryCrossentropy",
optimizer=tf.keras.optimizers.Nadam(learning_rate=1e-6),
metrics=["BinaryAccuracy"])
history = model.fit(train_data,
epochs= 1
#validation_data=([val_img, test_numerical1], val_y)
)
Here is the example of CycleGAN from the Keras
CycleGAN Example Using Keras.
Here is my modified implementation to use multiple GPUs. To implement the custom training I have used a reference Custom training with tf.distribute.Strategy
I want an example of CycleGAN from the Keras to run fast using GPUs. As further I need to process and train a huge amount of data. As well as CycleGAN uses multiple loss functions train_step will return 4 types of losses, currently, I am just returning one for easier understanding. Still, the training on GPUs is dead slow. I am not able to find the reason behind this.
Am I using tf.distribute.Strategy wrongly?
"""
Title: CycleGAN
Author: [A_K_Nain](https://twitter.com/A_K_Nain)
Date created: 2020/08/12
Last modified: 2020/08/12
Description: Implementation of CycleGAN.
"""
"""
## CycleGAN
CycleGAN is a model that aims to solve the image-to-image translation
problem. The goal of the image-to-image translation problem is to learn the
mapping between an input image and an output image using a training set of
aligned image pairs. However, obtaining paired examples isn't always feasible.
CycleGAN tries to learn this mapping without requiring paired input-output images,
using cycle-consistent adversarial networks.
- [Paper](https://arxiv.org/pdf/1703.10593.pdf)
- [Original implementation](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix)
"""
"""
## Setup
"""
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
autotune = tf.data.experimental.AUTOTUNE
# Create a MirroredStrategy.
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
"""
## Prepare the dataset
In this example, we will be using the
[horse to zebra](https://www.tensorflow.org/datasets/catalog/cycle_gan#cycle_ganhorse2zebra)
dataset.
"""
# Load the horse-zebra dataset using tensorflow-datasets.
dataset, _ = tfds.load("cycle_gan/horse2zebra", with_info=True, as_supervised=True)
train_horses, train_zebras = dataset["trainA"], dataset["trainB"]
test_horses, test_zebras = dataset["testA"], dataset["testB"]
# Define the standard image size.
orig_img_size = (286, 286)
# Size of the random crops to be used during training.
input_img_size = (256, 256, 3)
# Weights initializer for the layers.
kernel_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
# Gamma initializer for instance normalization.
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
buffer_size = 256
batch_size = 1
def normalize_img(img):
img = tf.cast(img, dtype=tf.float32)
# Map values in the range [-1, 1]
return (img / 127.5) - 1.0
def preprocess_train_image(img, label):
# Random flip
img = tf.image.random_flip_left_right(img)
# Resize to the original size first
img = tf.image.resize(img, [*orig_img_size])
# Random crop to 256X256
img = tf.image.random_crop(img, size=[*input_img_size])
# Normalize the pixel values in the range [-1, 1]
img = normalize_img(img)
return img
def preprocess_test_image(img, label):
# Only resizing and normalization for the test images.
img = tf.image.resize(img, [input_img_size[0], input_img_size[1]])
img = normalize_img(img)
return img
"""
## Create `Dataset` objects
"""
BATCH_SIZE_PER_REPLICA = batch_size
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
# Apply the preprocessing operations to the training data
train_horses = (
train_horses.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
train_zebras = (
train_zebras.map(preprocess_train_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Apply the preprocessing operations to the test data
test_horses = (
test_horses.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
test_zebras = (
test_zebras.map(preprocess_test_image, num_parallel_calls=autotune)
.cache()
.shuffle(buffer_size)
.batch(GLOBAL_BATCH_SIZE)
)
# Visualize some samples
_, ax = plt.subplots(4, 2, figsize=(10, 15))
for i, samples in enumerate(zip(train_horses.take(4), train_zebras.take(4))):
horse = (((samples[0][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
zebra = (((samples[1][0] * 127.5) + 127.5).numpy()).astype(np.uint8)
ax[i, 0].imshow(horse)
ax[i, 1].imshow(zebra)
plt.show()
plt.savefig('Visualize_Some_Samples')
plt.close()
# Building blocks used in the CycleGAN generators and discriminators
class ReflectionPadding2D(layers.Layer):
"""Implements Reflection Padding as a layer.
Args:
padding(tuple): Amount of padding for the
spatial dimensions.
Returns:
A padded tensor with the same type as the input tensor.
"""
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
super(ReflectionPadding2D, self).__init__(**kwargs)
def call(self, input_tensor, mask=None):
padding_width, padding_height = self.padding
padding_tensor = [
[0, 0],
[padding_height, padding_height],
[padding_width, padding_width],
[0, 0],
]
return tf.pad(input_tensor, padding_tensor, mode="REFLECT")
def residual_block(
x,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
gamma_initializer=gamma_init,
use_bias=False,
):
dim = x.shape[-1]
input_tensor = x
x = ReflectionPadding2D()(input_tensor)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = activation(x)
x = ReflectionPadding2D()(x)
x = layers.Conv2D(
dim,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.add([input_tensor, x])
return x
def downsample(
x,
filters,
activation,
kernel_initializer=kernel_init,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2D(
filters,
kernel_size,
strides=strides,
kernel_initializer=kernel_initializer,
padding=padding,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def upsample(
x,
filters,
activation,
kernel_size=(3, 3),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_init,
gamma_initializer=gamma_init,
use_bias=False,
):
x = layers.Conv2DTranspose(
filters,
kernel_size,
strides=strides,
padding=padding,
kernel_initializer=kernel_initializer,
use_bias=use_bias,
)(x)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
if activation:
x = activation(x)
return x
def get_resnet_generator(
filters=64,
num_downsampling_blocks=2,
num_residual_blocks=9,
num_upsample_blocks=2,
gamma_initializer=gamma_init,
name=None,
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = ReflectionPadding2D(padding=(3, 3))(img_input)
x = layers.Conv2D(filters, (7, 7), kernel_initializer=kernel_init, use_bias=False)(
x
)
x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
x = layers.Activation("relu")(x)
# Downsampling
for _ in range(num_downsampling_blocks):
filters *= 2
x = downsample(x, filters=filters, activation=layers.Activation("relu"))
# Residual blocks
for _ in range(num_residual_blocks):
x = residual_block(x, activation=layers.Activation("relu"))
# Upsampling
for _ in range(num_upsample_blocks):
filters //= 2
x = upsample(x, filters, activation=layers.Activation("relu"))
# Final block
x = ReflectionPadding2D(padding=(3, 3))(x)
x = layers.Conv2D(3, (7, 7), padding="valid")(x)
x = layers.Activation("tanh")(x)
model = keras.models.Model(img_input, x, name=name)
return model
"""
## Build the discriminators
The discriminators implement the following architecture:
`C64->C128->C256->C512`
"""
def get_discriminator(
filters=64, kernel_initializer=kernel_init, num_downsampling=3, name=None
):
img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
x = layers.Conv2D(
filters,
(4, 4),
strides=(2, 2),
padding="same",
kernel_initializer=kernel_initializer,
)(img_input)
x = layers.LeakyReLU(0.2)(x)
num_filters = filters
for num_downsample_block in range(3):
num_filters *= 2
if num_downsample_block < 2:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(2, 2),
)
else:
x = downsample(
x,
filters=num_filters,
activation=layers.LeakyReLU(0.2),
kernel_size=(4, 4),
strides=(1, 1),
)
x = layers.Conv2D(
1, (4, 4), strides=(1, 1), padding="same", kernel_initializer=kernel_initializer
)(x)
model = keras.models.Model(inputs=img_input, outputs=x, name=name)
return model
"""
## Build the CycleGAN model
"""
class CycleGan(keras.Model):
def __init__(
self,
generator_G,
generator_F,
discriminator_X,
discriminator_Y,
lambda_cycle=10.0,
lambda_identity=0.5,
):
super(CycleGan, self).__init__()
self.gen_G = generator_G
self.gen_F = generator_F
self.disc_X = discriminator_X
self.disc_Y = discriminator_Y
self.lambda_cycle = lambda_cycle
self.lambda_identity = lambda_identity
def compile(
self,
gen_G_optimizer,
gen_F_optimizer,
disc_X_optimizer,
disc_Y_optimizer,
gen_loss_fn,
disc_loss_fn,
cycle_loss_fn,
identity_loss_fn
):
super(CycleGan, self).compile()
self.gen_G_optimizer = gen_G_optimizer
self.gen_F_optimizer = gen_F_optimizer
self.disc_X_optimizer = disc_X_optimizer
self.disc_Y_optimizer = disc_Y_optimizer
self.generator_loss_fn = gen_loss_fn
self.discriminator_loss_fn = disc_loss_fn
#self.cycle_loss_fn = keras.losses.MeanAbsoluteError()
#self.identity_loss_fn = keras.losses.MeanAbsoluteError()
self.cycle_loss_fn = cycle_loss_fn
self.identity_loss_fn = identity_loss_fn
def train_step(self, batch_data):
# x is Horse and y is zebra
real_x, real_y = batch_data
with tf.GradientTape(persistent=True) as tape:
# Horse to fake zebra
fake_y = self.gen_G(real_x, training=True)
# Zebra to fake horse -> y2x
fake_x = self.gen_F(real_y, training=True)
# Cycle (Horse to fake zebra to fake horse): x -> y -> x
cycled_x = self.gen_F(fake_y, training=True)
# Cycle (Zebra to fake horse to fake zebra) y -> x -> y
cycled_y = self.gen_G(fake_x, training=True)
# Identity mapping
same_x = self.gen_F(real_x, training=True)
same_y = self.gen_G(real_y, training=True)
# Discriminator output
disc_real_x = self.disc_X(real_x, training=True)
disc_fake_x = self.disc_X(fake_x, training=True)
disc_real_y = self.disc_Y(real_y, training=True)
disc_fake_y = self.disc_Y(fake_y, training=True)
# Generator adverserial loss
gen_G_loss = self.generator_loss_fn(disc_fake_y)
gen_F_loss = self.generator_loss_fn(disc_fake_x)
# Generator cycle loss
cycle_loss_G = self.cycle_loss_fn(real_y, cycled_y) * self.lambda_cycle
cycle_loss_F = self.cycle_loss_fn(real_x, cycled_x) * self.lambda_cycle
# Generator identity loss
id_loss_G = (
self.identity_loss_fn(real_y, same_y)
* self.lambda_cycle
* self.lambda_identity
)
id_loss_F = (
self.identity_loss_fn(real_x, same_x)
* self.lambda_cycle
* self.lambda_identity
)
# Total generator loss
total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G
total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F
# Discriminator loss
disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x)
disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y)
# Get the gradients for the generators
grads_G = tape.gradient(total_loss_G, self.gen_G.trainable_variables)
grads_F = tape.gradient(total_loss_F, self.gen_F.trainable_variables)
# Get the gradients for the discriminators
disc_X_grads = tape.gradient(disc_X_loss, self.disc_X.trainable_variables)
disc_Y_grads = tape.gradient(disc_Y_loss, self.disc_Y.trainable_variables)
# Update the weights of the generators
self.gen_G_optimizer.apply_gradients(
zip(grads_G, self.gen_G.trainable_variables)
)
self.gen_F_optimizer.apply_gradients(
zip(grads_F, self.gen_F.trainable_variables)
)
# Update the weights of the discriminators
self.disc_X_optimizer.apply_gradients(
zip(disc_X_grads, self.disc_X.trainable_variables)
)
self.disc_Y_optimizer.apply_gradients(
zip(disc_Y_grads, self.disc_Y.trainable_variables)
)
return total_loss_G
# return [total_loss_G, total_loss_F, disc_X_loss, disc_Y_loss]
# Open a strategy scope.
with strategy.scope():
mae_loss_fn = keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE)
# Loss function for evaluating cycle consistency loss
def cycle_loss_fn(real, cycled):
cycle_loss = mae_loss_fn(real, cycled)
cycle_loss = tf.nn.compute_average_loss(cycle_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return cycle_loss
# Loss function for evaluating identity mapping loss
def identity_loss_fn(real, same):
identity_loss = mae_loss_fn(real, same)
identity_loss = tf.nn.compute_average_loss(identity_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return identity_loss
# Loss function for evaluating adversarial loss
adv_loss_fn = keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
# Define the loss function for the generators
def generator_loss_fn(fake):
fake_loss = adv_loss_fn(tf.ones_like(fake), fake)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return fake_loss
# Define the loss function for the discriminators
def discriminator_loss_fn(real, fake):
real_loss = adv_loss_fn(tf.ones_like(real), real)
fake_loss = adv_loss_fn(tf.zeros_like(fake), fake)
real_loss = tf.nn.compute_average_loss(real_loss, global_batch_size=GLOBAL_BATCH_SIZE)
fake_loss = tf.nn.compute_average_loss(fake_loss, global_batch_size=GLOBAL_BATCH_SIZE)
return (real_loss + fake_loss) * 0.5
# Get the generators
gen_G = get_resnet_generator(name="generator_G")
gen_F = get_resnet_generator(name="generator_F")
# Get the discriminators
disc_X = get_discriminator(name="discriminator_X")
disc_Y = get_discriminator(name="discriminator_Y")
# Create cycle gan model
cycle_gan_model = CycleGan(
generator_G=gen_G, generator_F=gen_F, discriminator_X=disc_X, discriminator_Y=disc_Y
)
optimizer = keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5)
# Compile the model
cycle_gan_model.compile(
gen_G_optimizer=optimizer,
gen_F_optimizer=optimizer,
disc_X_optimizer=optimizer,
disc_Y_optimizer=optimizer,
gen_loss_fn=generator_loss_fn,
disc_loss_fn=discriminator_loss_fn,
cycle_loss_fn=cycle_loss_fn,
identity_loss_fn=identity_loss_fn
)
train_dist_dataset = strategy.experimental_distribute_dataset(
tf.data.Dataset.zip((train_horses,
train_zebras)))
# `run` replicates the provided computation and runs it
# with the distributed input.
#tf.function
def distributed_train_step(dataset_inputs):
per_replica_losses = strategy.run(cycle_gan_model.train_step, args=(dataset_inputs,))
return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
axis=None)
"""
## Train the end-to-end model
"""
for epoch in range(1):
# TRAIN LOOP
all_loss = 0.0
num_batches = 0.0
for one_batch in train_dist_dataset:
all_loss += distributed_train_step(one_batch)
num_batches += 1
train_loss = all_loss/num_batches
print(train_loss)
Hey I realy need some help =)
firstly, sorry that it's soo long^^ but I hope that you don't need the full code at the end.
I coded a GAN for deblurring. Now I'm training it. the first 71 epochs have been trained without any problems: I trained some epochs till the colab GPU-time limit was reached, the next day I loaded my weights into the gan and continued training.
2 or 3 weeks ago I wanted to load the weights of epoch 71 in my Gan but I recieved the following error (I'm quite sure that I didn't change anything in the code). Since this moment I only can load the first 65 weights and i get the same error for every epoch higher than 65:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-a35c9a2bbf3a> in <module>()
1 # Load weights
----> 2 gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in load_weights(self, filepath, by_name, skip_mismatch, options)
2209 f, self.layers, skip_mismatch=skip_mismatch)
2210 else:
-> 2211 hdf5_format.load_weights_from_hdf5_group(f, self.layers)
2212
2213 def _updated_config(self):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py in load_weights_from_hdf5_group(f, layers)
706 str(len(weight_values)) + ' elements.')
707 weight_value_tuples += zip(symbolic_weights, weight_values)
--> 708 K.batch_set_value(weight_value_tuples)
709
710
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in batch_set_value(tuples)
3574 if ops.executing_eagerly_outside_functions():
3575 for x, value in tuples:
-> 3576 x.assign(np.asarray(value, dtype=dtype(x)))
3577 else:
3578 with get_graph().as_default():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py in assign(self, value, use_locking, name, read_value)
856 with _handle_graph(self.handle):
857 value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
--> 858 self._shape.assert_is_compatible_with(value_tensor.shape)
859 assign_op = gen_resource_variable_ops.assign_variable_op(
860 self.handle, value_tensor, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py in assert_is_compatible_with(self, other)
1132 """
1133 if not self.is_compatible_with(other):
-> 1134 raise ValueError("Shapes %s and %s are incompatible" % (self, other))
1135
1136 def most_specific_compatible_shape(self, other):
ValueError: Shapes (4, 4, 64, 128) and (64,) are incompatible
I was looking a long time for a solution and i didn't find a real one. But I found out, that if I train one epoch with one of the old weights (1-65) afterwards I can load one of the new weights. So I thought that I could use this "workaround" but yesterday I plotted the scores of the metric of the Test dataset for every epoch. I recieved this picture:
psnrscore/epoch
as you can see it looks like I'm producing trash since epoch 65 (on the pic since 60 because I lost the first 5 epochs, so it starts by 6)
I'm realy frustrated and hope that someone could help me =D
Here's the full code of the GAN:
# Libraries to build the model
from tensorflow import pad
from tensorflow.keras.layers import Layer
from keras.layers import Input, Activation, Add, UpSampling2D
from keras.layers.merge import Add
from keras.layers.core import Dropout, Dense, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.core import Lambda
from keras.layers.normalization import BatchNormalization
from keras.models import Model
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
import keras
# Reflection padding
from keras.engine import InputSpec
import tensorflow as tf
from keras.engine.topology import Layer
'''
2D Reflection Padding
Attributes:
- padding: (padding_width, padding_height) tuple
'''
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
# Res Block
def res_block(input, filters, kernel_size = (3,3), strides = (1,1), use_dropout = False):
"""
Instanciate a Keras Resnet Block using sequential API.
:param input: Input tensor
:param filters: Number of filters to use
:param kernel_size: Shape of the kernel for the convolution
:param strides: Shape of the strides for the convolution
:param use_dropout: Boolean value to determine the use of dropout
:return: Keras Model
"""
x = ReflectionPadding2D((1,1))(input)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if use_dropout:
x = Dropout(0.5)(x)
x = ReflectionPadding2D((1,1))(x)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
# Two convolution layers followed by a direct connection between input and output (skip connection)
out = Add()([input, x])
return out
# Generator
n_res_blocks = 9
def generator_model():
# encoder
inputs = Input(shape = img_shape)
x = ReflectionPadding2D((3, 3))(inputs)
x = Conv2D(filters = 64, kernel_size = (7,7), padding = 'valid')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(128, (3,3), strides=2, padding='same') (x) #DIM(15,15,128)
x = BatchNormalization() (x)
x = Activation('relu') (x)
x = Conv2D(256, (3,3), strides = 2, padding = 'same') (x) #DIM(7,7,256)
x = BatchNormalization() (x)
x = Activation('relu') (x)
# Apply 9 res blocks
for i in range(n_res_blocks):
x = res_block(x, 256, use_dropout = True)
# decoder
#x = Conv2DTranspose(128, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 128, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
#x = Conv2DTranspose(64, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 64, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = ReflectionPadding2D((3,3))(x)
x = Conv2D(filters = 3, kernel_size = (7,7), padding = 'valid')(x)
x = Activation('tanh')(x)
# Add direct connection from input to output and recenter to [-1, 1] (skip connection)
outputs = Add()([x, inputs])
outputs = Lambda(lambda z: z/2)(outputs) # to keep normalized outputs
model = Model(inputs = inputs, outputs = outputs, name = 'Generator')
return model
# Discriminator
def discriminator_model():
Input_img = Input(shape=(img_shape))
x = Conv2D(filters = 64, kernel_size = (4, 4), strides = 2, padding='same')(Input_img)
x = LeakyReLU(0.2)(x)
nf_mult, nf_mult_prev = 1, 1
for n in range(3):
nf_mult_prev, nf_mult = nf_mult, min(2**n, 8)
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 2, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
nf_mult_prev, nf_mult = nf_mult, 8
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
x = Conv2D(filters = 1, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = Flatten()(x)
x = Dense(1024, activation = 'tanh')(x)
x = Dense(1, activation = 'sigmoid')(x)
model = Model(inputs = Input_img, outputs = x, name = 'discriminator')
return model
def gan_model(generator, discriminator):
inputs = Input(shape = img_shape)
generated_images = generator(inputs)
outputs = discriminator(generated_images)
model = Model(inputs=inputs, outputs = [generated_images, outputs])
return model
#Losses
#Wassersteinloss:
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
# vgg16 model for perceptual loss
vgg = VGG16(include_top = False, weights = 'imagenet', input_shape = img_shape)
loss_model = Model(inputs = vgg.input, outputs = vgg.get_layer('block3_conv3').output)
loss_model.trainable = False
#perceptual loss:
def perceptual_loss(y_true, y_pred):
return K.mean(K.square(loss_model(y_true) - loss_model(y_pred)))
#Metrics:
#SSIM:
def ssim_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.ssim(tf.convert_to_tensor(y_true),tf.convert_to_tensor(y_pred), max_val=1.0, ))
#PSNR:
def psnr_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.psnr(y_true, y_pred, max_val=1.0))
def training(epochs, batch_size):
path_psnr = F"/content/gdrive/My Drive/Colab Notebooks/data/psnr"
path_ssim = F"/content/gdrive/My Drive/Colab Notebooks/data/ssim"
GAN_losses = []
#psnrs = []
#ssims = []
random_idx = np.arange(0, X_train.shape[0])
n_batches = int (len(random_idx)/batch_size) #divide trainingset into batches of batch_size
for e in range(epochs):
#weights_name = "deblurGAN_weights%s_batchsize_%r.h5" %(e + 66, batch_size)
weights_name = "deblurGAN_weights_test.h5"
print("epoch: %s " %(e + 66))
#randomize index of trainig set
random.shuffle(random_idx)
for i in range(n_batches):
img_batch_blured = X_train[i*batch_size:(i+1)*batch_size]
img_batch_generated = generator.predict(img_batch_blured)
img_batch_original = Y_train[i*batch_size:(i+1)*batch_size]
img_batch = np.concatenate((img_batch_generated , img_batch_original),0)
valid0 = -np.ones(batch_size)
valid1 = np.ones(batch_size)
valid = np.concatenate((valid0,valid1))
discriminator.trainable = True
for k in range(5):
loss = discriminator.train_on_batch(img_batch, valid)
discriminator.trainable = False
GAN_loss = gan.train_on_batch(img_batch_blured, [img_batch_original, valid1])
GAN_losses.append(GAN_loss)
if (100*i/n_batches).is_integer():
psnr = psnr_metric(img_batch_original, img_batch_generated)
ssim = ssim_metric(img_batch_original, img_batch_generated)
psnrs.append(psnr)
ssims.append(ssim)
#creating 2 files in Google Drive where the psnr and ssim data will be saved.
pickle.dump( psnrs, open( path_psnr, "wb" ) )
pickle.dump( ssims, open( path_ssim, "wb" ) )
print((100*i/n_batches) + 1, "% psnr: ", psnr," ssim: ", ssim)
# Save weights: mode the path to your directory
gan.save_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/{weights_name}")
return [GAN_losses, psnrs, ssims]
# Initialize models
generator = generator_model()
discriminator = discriminator_model()
gan = gan_model(generator, discriminator)
# Initialize optimizers
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
gan_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# Compile models
discriminator.trainable = True
discriminator.compile(optimizer = d_opt, loss = wasserstein_loss)
discriminator.trainable = False
loss = [perceptual_loss, wasserstein_loss]
loss_weights = [100, 1]
gan.compile(optimizer = gan_opt, loss = loss, loss_weights = loss_weights)
discriminator.trainable = True
gan.summary()
# Load weights
gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
#connect to GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
loss = training(1, 1) #epochs, batchsize
It is solved an can be closed. I didn't know that the "discriminato.Trainable = True/False" was changed. It seems to be the reason for another ordering in the weights.
Yesterday, I have created a pretrained VGG19 with custom head and tried to train it with 60000 images. After more than 12 hours, the training of first epoch didn't complete.
The batch size has been set to 64 and the number of steps per epoch has been set to training_set_size/batch_size.
Below is the code of DataLoader:
IMAGE_CHANNEL = 3
def crop(image, margin):
return image[margin:-margin, margin:-margin]
def random_rotation(image, angle):
M = cv2.getRotationMatrix2D((0, 0),angle,1)
rows,cols, _ = image.shape
new_img = cv2.warpAffine(image, M, (cols, rows))
return new_img
def get_generator(in_gen, should_augment=True):
weights = None
if should_augment:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
data_format='channels_last',
brightness_range=[0.5, 1.5])
else:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(fill_mode='reflect',
data_format='channels_last',
brightness_range=[1, 1])
for items in in_gen:
in_x, in_y = items
g_x = image_gen.flow(255 * in_x, in_y, batch_size=in_x.shape[0])
x, y = next(g_x)
yield x / 255.0, y
class DataLoader:
def __init__(self, source_filename, dataset_path, image_size, batch_size, training_set_size=0.8, sample_size=None):
path_dataset = Path(dataset_path)
path_image_folders = path_dataset / 'images'
self.data = pd.read_pickle(source_filename)
if sample_size is not None:
self.data = self.data[:sample_size]
self.image_size = image_size
self.batch_size = batch_size
self.training_set_size = training_set_size
self.steps_per_epoch = int(self.data.shape[0] * training_set_size // batch_size)
if self.steps_per_epoch == 0: self.steps_per_epoch = 1
self.validation_steps = int(self.data.shape[0] * (1 - training_set_size)//batch_size)
if self.validation_steps == 0: self.validation_steps = 1
def draw_idx(self, i):
img_path = self.data.iloc[i].image
img = tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(str(img_path)))
# print(img.shape)
height, width, _ = img.shape
fig = plt.figure(figsize=(15, 15), facecolor='w')
# original image
ax = fig.add_subplot(1, 1, 1)
ax.imshow(img / 255.0)
openness = self.data.iloc[i].Openness
conscientiousness = self.data.iloc[i].Conscientiousness
extraversion = self.data.iloc[i].Extraversion
agreeableness = self.data.iloc[i].Agreeableness
neuroticism = self.data.iloc[i].Neuroticism
ax.title.set_text(
f'O: {openness}, C: {conscientiousness}, E: {extraversion}, A: {agreeableness}, N: {neuroticism}')
plt.axis('off')
plt.tight_layout()
plt.show()
def get_image(self, index, data, should_augment):
# Read image and appropiate landmarks
image = cv2.imread(data['image'].values[index])
h, w, _ = image.shape
o, c, e, a, n = data[['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Neuroticism']].values[
index]
should_flip = random.randint(0, 1)
should_rotate = random.randint(0, 1)
should_crop = random.randint(0, 1)
if should_augment:
if should_flip == 1:
# print("Image {} flipped".format(data['path'].values[index]))
image = cv2.flip(image, 1)
if should_rotate == 1:
angle = random.randint(-5, 5)
image = random_rotation(image, angle)
if should_crop == 1:
margin = random.randint(1, 10)
image = crop(image, margin)
image = cv2.resize(image, (self.image_size, self.image_size))
return [image, o, c, e, a, n]
def generator(self, data, should_augment=True):
while True:
# Randomize the indices to make an array
indices_arr = np.random.permutation(data.count()[0])
for batch in range(0, len(indices_arr), self.batch_size):
# slice out the current batch according to batch-size
current_batch = indices_arr[batch:(batch + self.batch_size)]
# initializing the arrays, x_train and y_train
x_train = np.empty(
[0, self.image_size, self.image_size, IMAGE_CHANNEL], dtype=np.float32)
y_train = np.empty([0, 5], dtype=np.int32)
for i in current_batch:
# get an image and its corresponding color for an traffic light
[image, o, c, e, a, n] = self.get_image(i, data, should_augment)
# Appending them to existing batch
x_train = np.append(x_train, [image], axis=0)
y_train = np.append(y_train, [[o, c, e, a, n]], axis=0)
# replace nan values with zeros
y_train = np.nan_to_num(y_train)
yield (x_train, y_train)
def get_training_and_test_generators(self, should_augment_training=True, should_augment_test=True):
msk = np.random.rand(len(self.data)) < self.training_set_size
train = self.data[msk]
test = self.data[~msk]
train_gen = self.generator(train, should_augment_training)
test_gen = self.generator(test, should_augment_test)
return get_generator(train_gen, should_augment_training), get_generator(test_gen, should_augment_test)
def show_batch_images_sample(self, images, landmarks, n_rows=3, n_cols=3):
assert n_rows * n_cols <= self.batch_size, "Number of expected images to display is larger than batch!"
fig = plt.figure(figsize=(15, 15))
xs, ys = [], []
count = 1
for img, y in zip(images, landmarks):
ax = fig.add_subplot(n_rows, n_cols, count)
ax.imshow(img)
h, w, _ = img.shape
o, c, e, a, n = y
ax.title.set_text(f'{o}, {c}, {e}, {a}, {n}')
ax.axis('off')
if count == n_rows * n_cols:
break
count += 1
class CallbackTensorboardImageOutput(Callback):
def __init__(self, model, generator, log_dir, feed_inputs_display=9):
# assert ((feed_inputs_display & (feed_inputs_display - 1)) == 0) and feed_inputs_display != 0
self.generator = generator
self.model = model
self.log_dir = log_dir
self.writer = tf.summary.create_file_writer(self.log_dir)
self.feed_inputs_display = feed_inputs_display
self.seen = 0
def plot_to_image(figure):
"""Converts the matplotlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
# Save the plot to a PNG in memory.
buf = io.BytesIO()
plt.savefig(buf, format='png')
# Closing the figure prevents it from being displayed directly inside
# the notebook.
plt.close(figure)
buf.seek(0)
# Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(), channels=4)
# Add the batch dimension
image = tf.expand_dims(image, 0)
return image
#staticmethod
def get_loss(gt, predictions):
return tf.losses.mse(gt, predictions)
def on_epoch_end(self, epoch, logs={}):
self.seen += 1
if self.seen % 1 == 0:
items = next(self.generator)
images_to_display = self.feed_inputs_display
images_per_cell_count = int(math.sqrt(images_to_display))
# in case of regular model training using generator, an array is passed
if not isinstance(items, dict):
frames_arr, ocean_scores = items
# Take just 1st sample from batch
batch_size = frames_arr.shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
frames_arr = frames_arr[0:images_to_display]
ocean_scores = ocean_scores[0:images_to_display]
y_pred = self.model.predict(frames_arr)
# in case of adversarial training, a dictionary is passed
else:
batch_size = items['feature'].shape[0]
if images_to_display > batch_size:
images_to_display = batch_size
# items['feature'] = items['feature'][0:images_to_display]
# landmarks = items['label'][0:images_to_display]
frames_arr = items['feature']
landmarks = items['label']
y_pred = self.model.predict(items)
figure = plt.figure(figsize=(15, 15))
for i in range(images_to_display):
image_current = frames_arr[i]
y_prediction_current = y_pred[i]
y_gt_current = ocean_scores[i]
lbl_prediction = 'plot/img/{}'.format(i)
ax = plt.subplot(images_per_cell_count, images_per_cell_count, i + 1, title=lbl_prediction)
ax.imshow(image_current)
ax.axis('off')
with self.writer.as_default():
tf.summary.image("Training Data", CallbackTensorboardImageOutput.plot_to_image(figure), step=self.seen)
Below is the definition of the network architecture and the call of fit_generator function:
data_loader = dataloader.DataLoader('dataset.pkl', '/home/niko/data/PsychoFlickr', 224, 64)
train_gen, test_gen = data_loader.get_training_and_test_generators()
pre_trained_model = tf.keras.applications.VGG19(input_shape=(data_loader.image_size, data_loader.image_size, dataloader.IMAGE_CHANNEL), weights='imagenet', include_top=False)
x = pre_trained_model.output
x = tf.keras.layers.Flatten()(x)
# Add a fully connected layer with 256 hidden units and ReLU activation
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)
x = tf.keras.layers.Dense(5, name='regresion_output')(x)
x = tf.keras.layers.Activation('linear')(x)
model = tf.keras.Model(pre_trained_model.input, x)
print(model.summary())
log_dir = "logs/{}".format(model_name)
model_filename = "saved-models/{}.h5".format(model_name)
cb_tensorboard = TensorBoard(log_dir=log_dir)
callback_save_images = dataloader.CallbackTensorboardImageOutput(model, test_gen, log_dir)
checkpoint = ModelCheckpoint(model_filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
lr = 1e-3
opt = tf.optimizers.Adam(lr=lr)
model.compile(loss=loss_mse, optimizer=opt, metrics=[loss_mse])
history = model.fit_generator(
train_gen,
validation_data=test_gen,
steps_per_epoch=data_loader.steps_per_epoch,
epochs=20,
validation_steps=data_loader.validation_steps,
verbose=2,
use_multiprocessing=True,
callbacks=[checkpoint, callback_save_images, cb_tensorboard]
)
When I tried to run the same procedure with small sample data (200 records), everything seemed to work fine. On the dataset of 60000 records, however, after more than 12 hours the training of 1st epoch hasn't completed.
The training is performed on NVIDIA RTX2080Ti.
I would be thankful if anyone suggested what has to be modified or in general configured in order to train the network on reasonable time.
When running the MNIST data set, I want to know what actually my model outputs during training the batch.Here is my code:(I haven't added the optimizer and the loss function):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784 # the total pixels of the input images
OUTPUT_NODE = 10 # the output varies from 0 to 9
LAYER_NODE = 500
BATCH_SIZE = 100
TRAINING_STEPS = 10
def inference(input_tensor, avg_class, weight1, biase1, weight2, biase2):
if avg_class == None:
layer = tf.nn.relu(tf.matmul(input_tensor, weight1) + biase1)
return tf.matmul(layer, weight2)+biase2
else:
layer = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weight1)) +
avg_class.average(biase1))
return tf.matmul(layer, avg_class.average(weight2)) + avg_class.average(biase2)
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name = 'x-input')
y = tf.placeholder(tf.float32, [None, OUTPUT_NODE],name = 'y-input')
weight1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER_NODE], stddev = 0.1))
biase1 = tf.Variable(tf.constant(0.1, shape = [LAYER_NODE]))
weight2 = tf.Variable(tf.truncated_normal([LAYER_NODE, OUTPUT_NODE], stddev = 0.1))
biase2 = tf.Variable(tf.constant(0.1, shape = [OUTPUT_NODE]))
out = inference(x, None, weight1, biase1, weight2, biase2)
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
test_feed = {x:mnist.test.images, y:mnist.test.labels}
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
def main(arg = None):
mnist = input_data.read_data_sets("/home/vincent/Tensorflow/MNIST/data/", one_hot = True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
I try to print out:
Tensor("add_1:0", shape=(?, 10), dtype=float32)
If I want to know the value of out, what should I do?
I tried to print(out.eval()), and it raised error
out is a tensor object. If you want to get its value, replace
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
with
res_out=sess.run(out, feed_dict= {x:xs, y:ys})
print(res_out)