How to apply MeanIoU to multioutput model? - tensorflow

I have a model based on MobileNet v2 with 2 outputs: class (cat/dog) and face coordinates. So "class" output has BinaryCrossentropy loss and "bbox" output has YOLO los (in code).
The problem is, when I try to apply metrics (accuracy for class and MeanIOU to bbox):
On random dataset: they show strange results (accuracy == 0, miou == 1 all the time).
On real dataset (images and labels): fit() throws error:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
I suspect that I messed up outputs and metrics somehow, it would be nice if someone with more experience in tensorflow would take a look at it.
There is the code (with random dataset to reproduce):
import tensorflow as tf
# generate fake dataset
IMG_SIZE = 200
num_of_samples = 2000
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
train = tf.data.Dataset.zip((images, labels))
train = train.shuffle(num_of_samples)
train = train.batch(8)
train = train.prefetch(4)
num_of_samples = 500
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
valid = tf.data.Dataset.zip((images, labels))
valid = valid.shuffle(num_of_samples)
valid = valid.batch(8)
valid = valid.prefetch(4)
# Model with two outputs
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import MobileNetV2
def cnn_from_transfer():
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
input_layer = Input(shape=IMG_SHAPE)
base_net = MobileNetV2(include_top=False, weights='imagenet')(input_layer)
# Classification
h1 = GlobalMaxPooling2D()(base_net)
class1 = Dense(2048, activation='relu')(h1)
class2 = Dense(1, activation='sigmoid')(class1)
# Bounding box
h2 = GlobalMaxPooling2D()(base_net)
regress1 = Dense(2048, activation='relu')(h2)
regress2 = Dense(4, activation='sigmoid')(regress1)
return Model(inputs=input_layer, outputs=[class2, regress2])
model = cnn_from_transfer()
# Losses
def localization_loss(y_true, yhat):
delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
h_true = y_true[:,3] - y_true[:,1]
w_true = y_true[:,2] - y_true[:,0]
h_pred = yhat[:,3] - yhat[:,1]
w_pred = yhat[:,2] - yhat[:,0]
delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
return delta_coord + delta_size
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss
# Train
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=0.00001),
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.Accuracy()], [tf.keras.metrics.MeanIoU(num_classes=2)]],
)
history = model.fit(train, epochs=5, validation_data=valid)
What am I doing wrong?

As M.Innat said in comments, tf MeanIoU is not applicable to my case (bboxes), so I need to make my own custom metric (iou_metric function below). Moreover I found out that BinaryAccuracy should be used instead of regular Accuracy. The rest is fine.
Here is correct code:
def iou_metric(y_true, y_pred):
aog = tf.abs(tf.transpose(y_true)[2] - tf.transpose(y_true)[0] + 1) * tf.abs(tf.transpose(y_true)[3] - tf.transpose(y_true)[1] + 1)
aop = tf.abs(tf.transpose(y_pred)[2] - tf.transpose(y_pred)[0] + 1) * tf.abs(tf.transpose(y_pred)[3] - tf.transpose(y_pred)[1] + 1)
overlap_0 = tf.maximum(tf.transpose(y_true)[0], tf.transpose(y_pred)[0])
overlap_1 = tf.maximum(tf.transpose(y_true)[1], tf.transpose(y_pred)[1])
overlap_2 = tf.minimum(tf.transpose(y_true)[2], tf.transpose(y_pred)[2])
overlap_3 = tf.minimum(tf.transpose(y_true)[3], tf.transpose(y_pred)[3])
intersection = (overlap_2 - overlap_0 + 1) * (overlap_3 - overlap_1 + 1)
union = aog + aop - intersection
iou = intersection / union
iou = tf.keras.backend.clip(iou, 0.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon())
return iou
model.compile(
optimizer = opt,
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.BinaryAccuracy()], [iou_metric]],
)

Related

The val_loss is nan, but loss is printing. Both train and validation losses are nan in model.evaluate(), and the acc improves during training

There is a 2-class classification problem, and my loss function is custom. The labels are categorical, and the final activation function is Softmax. During the training, the loss is printed, but the val_loss is nan(inf). Using model.evaluate(X_train,Y_train) at the end of training, the train loss is the same as the vaidation loss, and both are nan. 
This is my custom loss function.
def custom_loss(y_true, y_pred):
import tensorflow as tf
bce = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
label_smoothing=0.0,
axis=-1,
reduction="none",
name="binary_crossentropy",
)
intra = tf.constant(1, dtype=tf.float64)
inter = tf.constant(0.01, dtype=tf.float64)
zeros = tf.gather_nd(y_pred,tf.where(tf.argmin(y_true, axis = 1)))
ones = tf.gather_nd(y_pred,tf.where(tf.argmax(y_true, axis = 1)))
centroid_zero = tf.reduce_mean(zeros,0)
centroid_one = tf.reduce_mean(ones,0)
loss_zero_intra = tf.math.squared_difference(zeros,centroid_zero)
loss_one_intra = tf.math.squared_difference(ones,centroid_zero)
loss_zero_intra = tf.cast(loss_zero_intra, tf.float64)
loss_one_intra = tf.cast(loss_one_intra, tf.float64)
loss_intra = tf.zeros_like(y_pred, tf.float64)
loss_intra = tf.tensor_scatter_nd_update(loss_intra,tf.where(tf.argmin(y_true, axis = 1)),loss_zero_intra)
loss_intra = tf.tensor_scatter_nd_update(loss_intra,tf.where(tf.argmax(y_true, axis = 1)),loss_one_intra)
loss_inter_value = tf.math.sqrt(tf.math.squared_difference(centroid_zero[0],centroid_one[0]) +
tf.math.squared_difference(centroid_zero[1],centroid_one[1]))
loss_inter = tf.fill(tf.shape(y_pred),loss_inter_value)
binary_cross_entropy= tf.tile(tf.expand_dims(bce(y_true,y_pred),axis=1),
tf.constant([1,2],tf.int32))
loss_intra = tf.cast(loss_intra, tf.float64)
loss_inter = tf.cast(loss_inter, tf.float64)
binary_cross_entropy= tf.cast(binary_cross_entropy, tf.float64)
loss = tf.math.multiply(intra, loss_intra) - tf.math.multiply(inter, loss_inter) + binary_cross_entropy
return loss
And Also you can see my model code here:
def create_model(kernelLength = 32, nb_classes = 2, Chans = 19, Samples = 512,
dropoutRate = 0.5 , F1 = 8, D = 2, F2 = 16, norm_rate = 0.25,
dropoutType = 'Dropout', optimizer_type = 'Adam', lr=0.0005, **kwargs):
K.clear_session()
gc.collect()
if dropoutType == 'SpatialDropout2D':
dropoutType = SpatialDropout2D
elif dropoutType == 'Dropout':
dropoutType = Dropout
else:
raise ValueError('dropoutType must be one of SpatialDropout2D '
'or Dropout, passed as a string.')
input1 = Input(shape = (1, Chans, Samples))
block1 = Conv2D(F1, (1, kernelLength), padding = 'same',
input_shape = (1, Chans, Samples),
use_bias = False)(input1)
block1 = BatchNormalization(axis = 1)(block1)
block1 = DepthwiseConv2D((Chans, 1), use_bias = False,
depth_multiplier = D,
depthwise_constraint = max_norm(1.))(block1)
block1 = BatchNormalization(axis = 1)(block1)
block1 = Activation('elu')(block1)
block1 = AveragePooling2D((1, 4))(block1)
block1 = dropoutType(dropoutRate)(block1)
block2 = SeparableConv2D(F2, (1, 16),
use_bias = False, padding = 'same')(block1)
block2 = BatchNormalization(axis = 1)(block2)
block2 = Activation('elu')(block2)
block2 = AveragePooling2D((1, 8))(block2)
block2 = dropoutType(dropoutRate)(block2)
flatten = Flatten(name = 'flatten')(block2)
dense = Dense(nb_classes, name = 'dense',
kernel_constraint = max_norm(norm_rate))(flatten)
softmax = Activation('softmax', name = 'softmax')(dense)
model = Model(inputs=input1, outputs=softmax)
if optimizer_type == 'Adam':
optimizer = Adam(learning_rate = lr)
if optimizer_type == 'Adamax':
optimizer = Adamax(learning_rate = lr)
if optimizer_type == 'AdamW':
optimizer = AdamW(learning_rate = lr)
model.compile(loss=custom_loss, optimizer=optimizer, metrics = ['accuracy'])
return model
The custom_loss function returns three distinct terms. One of them is the binary_cross_entropy. The model works fine with this term, which works the same as binary_cross_entropy in Keras. Therefore, there is no problem with the data. The train and validation accuracy improves throughout training, and the train loss decreases. The number of validation samples is the same as the number of train samples.
After the training was accomplished, by using the model.evaluation(X,Y) function, the loss was shown as "nan," however calculating the loss using the custom loss function, resulting in a "number" not a "nan".
Increasing the batch size, scaling the data, and using clipnorm or clipvalue within the optimizer all had no effect. Furthermore, no nan appears in the model predictions (y_pred).I suspect that the problem is caused by the below extreme value inside the model prediction: An example of model prediction with extremes:
Can anyone suggest a solution to this problem?
Thanks in advance.

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

Training by party keras

I need your help.
my project in general is about the development of an image analysis algorithm for the quantification of ferritin in sclera.
My code allows to mix segmentation and regression with the sequential model and the output is unique (i.e. the training is done at the same time). I want it to be per part. i.e., it does the segmentation training, it records the result and starts the regression one.
the inputs of the segmentation are the images plus their masks;
the inputs of the regressions are the images plus the values of ferritins.
Segmentation and regression layers are renamed
because some of them share the same name when backbones are used
def load_model(segmentation_model, regression_model, width, height, num_classes = 1):
# Rename segmentation model layers and weights
for layer in segmentation_model.layers:
rename(segmentation_model, layer, layer.name + '_seg')
#for i, w in enumerate(segmentation_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_seg' + '/' + split_name[1]
# segmentation_model.weights[i]._handle_name = new_name
# Rename regression model layers
for layer in regression_model.layers:
rename(regression_model, layer, layer.name + '_reg')
#for i, w in enumerate(regression_model.weights):
# split_name = w.name.split('/')
# new_name = split_name[0] + '_reg' + '/' + split_name[1]
# regression_model.weights[i]._handle_name = new_name
image = layers.Input(shape=(width, height, 3), name="img")
mask_image = segmentation_model(image)
if num_classes==1:
mask_image_categorical = K.cast(K.squeeze(mask_image, axis=3) + 0.5, dtype='int32') # Threshold at 0.5
else:
mask_image_categorical = K.argmax(mask_image, axis=3)
masked_layer = mylayers.CustomMasking(mask_value=0)
masked_image = masked_layer.call([image, mask_image_categorical])
value = regression_model(masked_image)
m = models.Model(inputs=image, outputs=[mask_image, value])
#m = models.Model(inputs=image, outputs=[mask_image, value, mask_image_categorical, masked_image])
#for i, w in enumerate(m.weights): print(i, w.name)
m.summary()
return m
def make_train(model, regression_loss_weight, regression_loss_weight_max, train_generator, epochs, val_dataset, validation_steps, weights_path, logger_path, num_classes, focal, enable_plot, init_mode = False):
optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
regression_loss_weight_variable = K.variable(regression_loss_weight, name='regression_loss_weight')
mse = losses.MeanSquaredError()
weighted_mse = weighted_loss(mse, regression_loss_weight_variable)
weighted_mse.trainable = False
if focal:
if num_classes == 1:
bce = mylosses.binary_focal_loss
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
fce = mylosses.categorical_focal_loss
weighted_fce = weighted_loss(fce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_fce, 'model_regression' : weighted_mse}
else:
if num_classes == 1:
bce = losses.BinaryCrossentropy(from_logits=True)
weighted_bce = weighted_loss(bce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_bce, 'model_regression' : weighted_mse}
else:
cce = losses.CategoricalCrossentropy()
weighted_cce = weighted_loss(cce, 1 - regression_loss_weight_variable)
loss = { 'model_segmentation' : weighted_cce, 'model_regression' : weighted_mse}
metric = metrics.BinaryAccuracy() if num_classes == 1 else metrics.CategoricalAccuracy()
metric_array_segmentation = [metric, mymetrics.iou_score_threshold, mymetrics.f1_score_threshold]
metric_array_regression = [metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError()]
metric_dict = { 'model_segmentation' : metric_array_segmentation, 'model_regression' : metric_array_regression}
loss_weights = [1.0, 1.0] # Weight for regression is taken into account in weighted_mse loss function
model.compile(optimizer, loss, metric_dict, loss_weights)
loss_weight_callback = LossWeightsCallback(regression_loss_weight_variable, regression_loss_weight, regression_loss_weight_max, epochs, 'val_model_segmentation_f1-score')
freezing_callback = SegmentationModelFreezingCallback(model, 'val_model_segmentation_f1-score', 0.95)
checkpoint = ModelCheckpoint(weights_path, monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, save_best_only=True)
csv_logger = CSVLogger(logger_path, append=True, separator=';')
lr_reducer = ReduceLROnPlateau(monitor='val_model_regression_root_mean_squared_error', mode='min', factor=0.2, patience=10, min_lr=10e-7, min_delta=0.01, verbose=1)
earlystopping = EarlyStopping(monitor='val_model_regression_root_mean_squared_error', mode='min', verbose=1, patience=20, restore_best_weights=True)
callbacks_list = [loss_weight_callback, freezing_callback, checkpoint, csv_logger, lr_reducer, earlystopping]
# Test custom masking layer or global model
#instance = train_generator[0]
#imgs = np.squeeze(instance[0], axis=3) if instance[0].shape[3] == 1 else instance[0]
#imsave("unmasked_img.png", imgs[0])
#masks = np.squeeze(instance[1]['model_segmentation'], axis=3) if instance[1]['model_segmentation'].shape[3] == 1 else instance[1]['model_segmentation']
#imsave("mask.png", masks[0] * 255)
#masked_layer = mylayers.CustomMasking(mask_value=0)
#masked_imgs = masked_layer.call([imgs, masks])
#img = K.eval(masked_imgs[0,:,:,:])
#imsave("masked_img.png", img)
#y = model(imgs)
#mask_image = y[0][0,:,:,:]
#value = K.eval(y[1][0])
if init_mode:
instance = train_generator[0]
model.train_on_batch(instance[0][:1], [instance[1]['model_segmentation'][:1], instance[1]['model_regression'][:1]] )
else:
results = model.fit(train_generator, epochs=epochs, validation_data=val_dataset, validation_steps=validation_steps, callbacks=callbacks_list)
model.save_weights(weights_path)
# Display of metrics and loss vs epochs: metric names must match the metric functions
if enable_plot:
if num_classes == 1:
plot_history(results,
metrics = ['model_segmentation_binary_accuracy', 'val_model_segmentation_binary_accuracy', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
else:
plot_history(results,
metrics = ['model_segmentation_categorical_accuracy', 'val_model_segmentation_categorical_accuracy', 'model_segmentation_mean_io_u', 'val_model_segmentation_mean_io_u', 'model_segmentation_f1-score', 'val_model_segmentation_f1-score', 'model_regression_root_mean_squared_error', 'val_model_regression_root_mean_squared_error', 'model_regression_mean_absolute_error', 'val_model_regression_mean_absolute_error', 'model_regression_mean_absolute_percentage_error', 'val_model_regression_mean_absolute_percentage_error'],
losses = ['model_segmentation_loss', 'model_regression_loss', 'val_model_segmentation_loss', 'val_model_regression_loss'])
Defining model and loading weights is not enough when using custom model and/or layer
Model must also be compiled and trained on a minimal set in order to initialize
def init_model(model, loss_weight, loss_weight_max, train_generator, epochs, weights_path, logger_path, num_classes, focal):
make_train(model, loss_weight, loss_weight_max, train_generator, epochs, None, None, '', logger_path, num_classes, focal, False, True)
model.load_weights(weights_path)

Incompatible dimensions between label and predict data (KERAS - RALSGAN)

I am trying to train RALSGAN on keras, which loss function can be refer from
https://www.kaggle.com/c/generative-dog-images/discussion/99485#latest-597003
# Take AVG over x_r and x_f in batch
disc_loss = (C(x_r) - AVG(C(x_f)) - 1)^2 + (C(x_f) - AVG(C(x_r)) + 1)^2
gen_loss = (C(x_r) - AVG(C(x_f)) + 1)^2 + (C(x_f) - AVG(C(x_r)) - 1)^2
and in order to get C(x_r) & C(x_f), I send both of data into network and concatenate them before output, however, I meet error with dimension incompatible:
# =========loss function =========
import tensorflow as tf
batch =75
def TF_errD(label_one, predict):
fake, real, = tf.split(predict, [batch,batch], 0)
label_one, label_one1 = tf.split(label_one, [batch,batch], 0)
return (tf.reduce_mean( (real - tf.reduce_mean(fake,0) - label_one)**2,0 )
+ tf.reduce_mean( (fake - tf.reduce_mean(real,0) + label_one)**2,0 ) )/2.
# label_one here is a dummy, I use ones_like in return, however, get same error
def TF_errG(label_one, predict):
fake, real, = tf.split(predict, [batch,batch], 0)
return (tf.reduce_mean( (real - tf.reduce_mean(fake,0) + tf.ones_like(real))**2,0 )
+ tf.reduce_mean( (fake - tf.reduce_mean(real,0) - tf.ones_like(real))**2,0 ) )/2.
# =============end of loss function=======
#======== here is generator phase==============
from keras.layers import Input, Dense, Reshape, BatchNormalization,
Flatten, Conv2D, UpSampling2D, Conv2DTranspose
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential, Model
from keras.optimizers import Adam
input_dims = (1000,)
basic_units= 10
def conv_l(x, channel = 10,kernel = 4, stride= (2,2), padding='same', batch_n_momentum = 0.5,relu_arpha = 0.2 ):
x = Conv2DTranspose(filters=channel,kernel_size=kernel,strides=stride,padding=padding)(x)
x = BatchNormalization(momentum=batch_n_momentum)(x)
x = LeakyReLU(alpha=relu_arpha)(x)
return x
Input_l = Input(shape=input_dims)
x = Dense(512*8*8,input_dim=input_dims)(Input_l)
x = LeakyReLU(alpha=0.2)(x)
x = BatchNormalization(momentum=0.5)(x)
x = Reshape((8,8,512))(x)
x = conv_l(x, 512)
x = conv_l(x, 256)
x = conv_l(x, 128)
Output_l = Conv2D(3,3,padding='same',activation='tanh')(x)
Generator = Model(Input_l,Output_l)
Generator.summary()
#======== end of generator ==============
#======== here is discriminator phase==============
from keras.layers import Dropout
Discriminator = Sequential()
Discriminator.add(Conv2D(32,4,padding='same',strides= 2, input_shape= (64,64,3)))
Discriminator.add(LeakyReLU(alpha=0.2))
Discriminator.add(Conv2D(64,4,strides= 2, padding='same'))
Discriminator.add(LeakyReLU(alpha=0.2))
Discriminator.add(Conv2D(128,4,strides= 2,padding='same'))
Discriminator.add(LeakyReLU(alpha=0.2))
Discriminator.add(Conv2D(256,4,strides= 2,padding='same'))
Discriminator.add(LeakyReLU(alpha=0.2))
Discriminator.add(Dropout(0.3))
Discriminator.add(Conv2D(1,4,strides= 1,padding='valid'))
Discriminator.compile(loss=TF_errD, optimizer=optimizer_dis, metrics=['accuracy'])
Discriminator.summary()
#=========== end of discriminator ==============
#=========GAN network=========
from keras.layers.merge import concatenate
Discriminator.trainable = False
image_shape = imagesIn[0].shape
input_noise = Input(input_dims)
input_real = Input(image_shape)
output_g = Generator(input_noise)
dis_inputimage = concatenate([output_g,input_real],axis=0)
output = Discriminator(dis_inputimage)
GAN = Model([input_noise,input_real],output)
GAN.compile(loss=TF_errG, optimizer=optimizer_gen, metrics=['accuracy'])
GAN.summary()
#===============end of GAN ============
#========training phase============
epochs = 600
input_dim = 1000
label_one_d=tf.ones([150,1,1,1], tf.int32)
label_one_g=tf.ones([75,1,1,1], tf.int32)
for times in range(epochs):
for batch_times in range(int(idxIn/batch)):
# =======================
# ==train discriminater==
# =======================
sub_images = imagesIn2[batch_times*batch:(batch_times+1)*batch]
noise = np.random.normal(0,1,(batch,input_dim))
synthesis_img = Generator.predict(noise)
data = np.concatenate((synthesis_img,sub_images))
d_loss = Discriminator.train_on_batch(data,label_one_d)
# =======================
# ====train generator====
# =======================
noise = np.random.normal(0,1,(batch,input_dim))
g_loss = GAN.train_on_batch(([noise,sub_images]),label_one_g)
# ======================end of training===============
if I set label_one_g=tf.ones([75,1,1,1], tf.int32)
then I would get error like
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [150,1,1,1] vs. [75,1,1,1]
[[{{node metrics_11/acc/Equal}}]]
[[loss_11/mul/_1877]]
(1) Invalid argument: Incompatible shapes: [150,1,1,1] vs. [75,1,1,1]
[[{{node metrics_11/acc/Equal}}]]
0 successful operations.
0 derived errors ignored.
if I set label_one_g=tf.ones([150,1,1,1], tf.int32), then
ValueError: Input arrays should have the same number of samples as target arrays. Found 75 input samples and 150 target samples.
Need help to address this issue. Or any other train method that I can use?

MNIST - Vanilla Neural Network - Why Cost Function is Increasing?

I've been combing through this code for a week now trying to figure out why my cost function is increasing as in the following image. Reducing the learning rate does help but very little. Can anyone spot why the cost function isn't working as expected?
I realise a CNN would be preferable, but I still want to understand why this simple network is failing.
Please help:)
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
mnist = input_data.read_data_sets("MNIST_DATA/",one_hot=True)
def createPlaceholders():
xph = tf.placeholder(tf.float32, (784, None))
yph = tf.placeholder(tf.float32, (10, None))
return xph, yph
def init_param(layers_dim):
weights = {}
L = len(layers_dim)
for l in range(1,L):
weights['W' + str(l)] = tf.get_variable('W' + str(l), shape=(layers_dim[l],layers_dim[l-1]), initializer= tf.contrib.layers.xavier_initializer())
weights['b' + str(l)] = tf.get_variable('b' + str(l), shape=(layers_dim[l],1), initializer= tf.zeros_initializer())
return weights
def forward_prop(X,L,weights):
parameters = {}
parameters['A0'] = tf.cast(X,tf.float32)
for l in range(1,L-1):
parameters['Z' + str(l)] = tf.add(tf.matmul(weights['W' + str(l)], parameters['A' + str(l-1)]), weights['b' + str(l)])
parameters['A' + str(l)] = tf.nn.relu(parameters['Z' + str(l)])
parameters['Z' + str(L-1)] = tf.add(tf.matmul(weights['W' + str(L-1)], parameters['A' + str(L-2)]), weights['b' + str(L-1)])
return parameters['Z' + str(L-1)]
def compute_cost(ZL,Y):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf.cast(Y,tf.float32), logits = ZL))
return cost
def randomMiniBatches(X,Y,minibatch_size):
m = X.shape[1]
shuffle = np.random.permutation(m)
temp_X = X[:,shuffle]
temp_Y = Y[:,shuffle]
num_complete_minibatches = int(np.floor(m/minibatch_size))
mini_batches = []
for batch in range(num_complete_minibatches):
mini_batches.append((temp_X[:,batch*minibatch_size: (batch+1)*minibatch_size], temp_Y[:,batch*minibatch_size: (batch+1)*minibatch_size]))
mini_batches.append((temp_X[:,num_complete_minibatches*minibatch_size:], temp_Y[:,num_complete_minibatches*minibatch_size:]))
return mini_batches
def model(X, Y, layers_dim, learning_rate = 0.001, num_epochs = 20, minibatch_size = 64):
tf.reset_default_graph()
costs = []
xph, yph = createPlaceholders()
weights = init_param(layers_dim)
ZL = forward_prop(xph, len(layers_dim), weights)
cost = compute_cost(ZL,yph)
optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
minibatches = randomMiniBatches(X,Y,minibatch_size)
epoch_cost = 0
for b, mini in enumerate(minibatches,1):
mini_x, mini_y = mini
_,c = sess.run([optimiser,cost],feed_dict={xph:mini_x,yph:mini_y})
epoch_cost += c
print('epoch: ',epoch+1,'/ ',num_epochs)
epoch_cost /= len(minibatches)
costs.append(epoch_cost)
plt.plot(costs)
print(costs)
X_train = mnist.train.images.T
n_x = X_train.shape[0]
Y_train = mnist.train.labels.T
n_y = Y_train.shape[0]
layers_dim = [n_x,10,n_y]
model(X_train, Y_train, layers_dim)
Without going to much into how you draw the mini batches: I think the problem is you are for some reason defining axis 1 of xph and yph as batch dimension (and feeding accordingly) while the computational graph of the network expects axis 0 to be the batch dimension like it is usually done.
So your forward propagation is actually performed along the batch dimension, which does not make sense.