Keras-Tensorflow ValueError: during model.fit whehn using Jaccard or IOU - tensorflow

I am starting my journey in deep learning and relatively new to this topic. Hoping someone could help me out as I am stuck here for a long time.
I am trying to train deeplabv3+ architecture-based model for semantic segmentation when I faced the following error. Earlier I was using categorical cross-entropy loss and metric as accuracy where it was compiling just fine.
However, soon I realized that it is more appropriate to use mIOU or Jaccard index as metrics but am facing the following error. ( I think it is because though the "values" are the same one is a tensor? I might be wrong)
the error:
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "<ipython-input-24-4f4d58e1a657>", line 11, in jacard_coef_loss *
return -jacard_coef(y_true, y_pred) # -1 ultiplied as we want to minimize this value as loss function
File "<ipython-input-24-4f4d58e1a657>", line 6, in jacard_coef *
intersection = K.sum(y_true_f * y_pred_f)
ValueError: Dimensions must be equal, but are 1048576 and 4194304 for '{{node jacard_coef_loss/mul}} = Mul[T=DT_FLOAT](jacard_coef_loss/Reshape, jacard_coef_loss/Reshape_1)' with input shapes: [1048576], [4194304].
my data set shape :
Train Dataset: <BatchDataset element_spec=(TensorSpec(shape=(4, 512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(4, 512, 512, 1), dtype=tf.float32, name=None))>
Val Dataset: <BatchDataset element_spec=(TensorSpec(shape=(4, 512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(4, 512, 512, 1), dtype=tf.float32, name=None))>
Code for model creation- referred Keras deeplabv3+ sample
def convolution_block(
block_input,
num_filters=256,
kernel_size=3,
dilation_rate=1,
padding="same",
use_bias=False,
):
x = layers.Conv2D(
num_filters,
kernel_size=kernel_size,
dilation_rate=dilation_rate,
padding="same",
use_bias=use_bias,
kernel_initializer=keras.initializers.HeNormal(),
)(block_input)
x = layers.BatchNormalization()(x)
return tf.nn.relu(x)
def DilatedSpatialPyramidPooling(dspp_input):
dims = dspp_input.shape
x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
x = convolution_block(x, kernel_size=1, use_bias=True)
out_pool = layers.UpSampling2D(
size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",
)(x)
out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)
x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
output = convolution_block(x, kernel_size=1)
return output
def DeeplabV3Plus(image_size, num_classes):
model_input = keras.Input(shape=(image_size, image_size, 3))
resnet50 = keras.applications.ResNet50(
weights="imagenet", include_top=False, input_tensor=model_input
)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DilatedSpatialPyramidPooling(x)
input_a = layers.UpSampling2D(
size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
interpolation="bilinear",
)(x)
input_b = resnet50.get_layer("conv2_block3_2_relu").output
input_b = convolution_block(input_b, num_filters=48, kernel_size=1)
x = layers.Concatenate(axis=-1)([input_a, input_b])
x = convolution_block(x)
x = convolution_block(x)
x = layers.UpSampling2D(
size=(image_size // x.shape[1], image_size // x.shape[2]),
interpolation="bilinear",
)(x)
model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs=model_input, outputs=model_output)
model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()
the model:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 512, 512, 3 0 []
)]
conv1_pad (ZeroPadding2D) (None, 518, 518, 3) 0 ['input_3[0][0]']
conv1_conv (Conv2D) (None, 256, 256, 64 9472 ['conv1_pad[0][0]']
)
conv1_bn (BatchNormalization) (None, 256, 256, 64 256 ['conv1_conv[0][0]']
)
conv1_relu (Activation) (None, 256, 256, 64 0 ['conv1_bn[0][0]']
)
pool1_pad (ZeroPadding2D) (None, 258, 258, 64 0 ['conv1_relu[0][0]']
)
pool1_pool (MaxPooling2D) (None, 128, 128, 64 0 ['pool1_pad[0][0]']
)
conv2_block1_1_conv (Conv2D) (None, 128, 128, 64 4160 ['pool1_pool[0][0]']
)
.
.
up_sampling2d_7 (UpSampling2D) (None, 128, 128, 25 0 ['tf.nn.relu_23[0][0]']
6)
tf.nn.relu_24 (TFOpLambda) (None, 128, 128, 48 0 ['batch_normalization_24[0][0]']
)
concatenate_5 (Concatenate) (None, 128, 128, 30 0 ['up_sampling2d_7[0][0]',
4) 'tf.nn.relu_24[0][0]']
conv2d_27 (Conv2D) (None, 128, 128, 25 700416 ['concatenate_5[0][0]']
6)
batch_normalization_25 (BatchN (None, 128, 128, 25 1024 ['conv2d_27[0][0]']
ormalization) 6)
tf.nn.relu_25 (TFOpLambda) (None, 128, 128, 25 0 ['batch_normalization_25[0][0]']
6)
conv2d_28 (Conv2D) (None, 128, 128, 25 589824 ['tf.nn.relu_25[0][0]']
6)
batch_normalization_26 (BatchN (None, 128, 128, 25 1024 ['conv2d_28[0][0]']
ormalization) 6)
tf.nn.relu_26 (TFOpLambda) (None, 128, 128, 25 0 ['batch_normalization_26[0][0]']
6)
up_sampling2d_8 (UpSampling2D) (None, 512, 512, 25 0 ['tf.nn.relu_26[0][0]']
6)
conv2d_29 (Conv2D) (None, 512, 512, 4) 1028 ['up_sampling2d_8[0][0]']
==================================================================================================
Total params: 11,853,124
Trainable params: 11,820,388
Non-trainable params: 32,736 }
compiler and fitter which error occurs when I wanna fit the model
from keras import backend as K
def jacard_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + 1.0)
def jacard_coef_loss(y_true, y_pred):
return -jacard_coef(y_true, y_pred)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),loss = jacard_coef_loss, metrics = jacard_coef)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

Related

ValueError: `logits` and `labels` must have the same shape, received ((None, 10) vs (None, 1))

I am running an Involution Model (based of this example), and I am constantly running into errors during the training stage. This is my error:
ValueError: `logits` and `labels` must have the same shape, received ((None, 10) vs (None, 1)).
Below is the relevant code for dataset loading:
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_ds = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=128,
class_mode='binary')
test_ds = test_datagen.flow_from_directory(
'data/test',
target_size=(150, 150),
batch_size=64,
class_mode='binary')`
And this is the code for training:
print("building the involution model...")
inputs = keras.Input(shape=(224, 224, 3))
x, _ = Involution(channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_1")(inputs)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPooling2D((2, 2))(x)
x, _ = Involution(
channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_2")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPooling2D((2, 2))(x)
x, _ = Involution(
channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_3")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, activation="relu")(x)
outputs = keras.layers.Dense(10)(x)
inv_model = keras.Model(inputs=[inputs], outputs=[outputs], name="inv_model")
print("compiling the involution model...")
inv_model.compile(
optimizer="adam",
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"],
)
print("inv model training...")
inv_hist = inv_model.fit(train_ds, epochs=20, validation_data=test_ds)`
The model itself the same used by Keras, and I have not changed anything except to use my own dataset instead of the CIFAR dataset (model works for me with this dataset). So I am sure there is an error in my data loading, but I am unable to identify what that is.
Model Summary:
Model: "inv_model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_14 (InputLayer) [(None, 224, 224, 3)] 0
inv_1 (Involution) ((None, 224, 224, 3), 26
(None, 224, 224, 9, 1,
1))
re_lu_39 (ReLU) (None, 224, 224, 3) 0
max_pooling2d_26 (MaxPoolin (None, 112, 112, 3) 0
g2D)
inv_2 (Involution) ((None, 112, 112, 3), 26
(None, 112, 112, 9, 1,
1))
re_lu_40 (ReLU) (None, 112, 112, 3) 0
max_pooling2d_27 (MaxPoolin (None, 56, 56, 3) 0
g2D)
inv_3 (Involution) ((None, 56, 56, 3), 26
(None, 56, 56, 9, 1, 1)
)
re_lu_41 (ReLU) (None, 56, 56, 3) 0
flatten_15 (Flatten) (None, 9408) 0
dense_26 (Dense) (None, 64) 602176
dense_27 (Dense) (None, 10) 650
=================================================================
When you called the train_datagen.flow_from_directory() function, you used class_mode='binary' which means you will have the labels of your images as 0 and 1 only, whereas you are have total 10 predictions i.e. 10 neurons in your final output layer. Hence the labels and logits dosen't match.
Solution: Use class_mode='categorical' which means that there will be as many labels as the number of classes. Do the same in test_datagen as well.

Error on custom dataset dimensions feeding transfer model in TensorFLow

Can someone explain this TensorFlow error for me, I'm having trouble understanding what I am doing wrong.
I have a dataset in Tensorflow constructed with a generator. When I test the output of the generator, output dimensions look correct (224 x 224 x 1). But when I try to train the model, I get an error:
WARNING:tensorflow:Model was constructed with shape (None, 224, 224, 1) for input
KerasTensor(type_spec=TensorSpec(shape=(None, 224, 224, 1), dtype=tf.float32,
name='input_2'), name='input_2', description="created by layer 'input_2'"),
but it was called on an input with incompatible shape (224, 224, 1, 1).
I'm unsure why the dimension of this output has an extra 1 at the end.
Here is the code to create the generator and model. df is a dataframe with file-paths to data and labels. The data are 2D matrices of variable dimensions. I'm using cv2.resize to make them 224x224 and then np.reshape to transform dimensions to (224x224x1). Then I yield the result.
def datagen_row():
# ======================== #
# Import data
# ======================== #
df = get_data()
rowsize = 224
colsize = 224
# ======================== #
#
# ======================== #
for row in range(len(df)):
data = get_data_from_filepath(df.iloc[row].file_path)
data = cv2.resize(data, dsize=(rowsize, colsize), interpolation=cv2.INTER_CUBIC)
labels = df.iloc[row].label
data = data.reshape( 224, 224, 1)
yield data, labels
dataset = tf.data.Dataset.from_generator(
datagen_row,
output_signature=(
tf.TensorSpec(shape = (int(os.getenv('rowsize')), int(os.getenv('colsize')), 1), dtype=tf.float32, name=None),
tf.TensorSpec(shape=(), dtype=tf.int64, name=None)
)
)
Testing the following I get what I expected:
iterator = iter(dataset.batch(8))
x = iterator.get_next()
x[0].shape # TensorShape([8, 224, 224, 1])
x[1].shape # TensorShape([8])
x[0] # <tf.Tensor: shape=(8, 224, 224, 1), dtype=float32, numpy=array(...
x[1] # <tf.Tensor: shape=(8,), dtype=int64, numpy=array([1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)>
I'm trying to plug this into InceptionV3 model to do a classification
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras import layers
origModel = InceptionV3(weights = 'imagenet', include_top = False)
inputs = layers.Input(shape = (224, 224, 1))
modified_inputs = layers.Conv2D(3, 3, padding = 'same', activation='relu')(inputs)
x = origModel(modified_inputs)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dense(512, activation = 'relu')(x)
x = layers.Dense(256, activation = 'relu')(x)
x = layers.Dense(128, activation = 'relu')(x)
x = layers.Dense(64, activation = 'relu')(x)
x = layers.Dense(32, activation = 'relu')(x)
outputs = layers.Dense(2)(x)
model = tf.keras.Model(inputs, outputs)
model.summary() # 24.6 M trainable params
for layer in origModel.layers:
layer.trainable = False
model.summary() # now shows 2.8 M trainable params
model.compile(
optimizer = 'adam',
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = ['accuracy']
)
model.fit(dataset, epochs = 1, verbose = True, batch_size = 32)
Here is the output of model.summary
model.summary()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 224, 224, 1)] 0
conv2d_94 (Conv2D) (None, 224, 224, 3) 30
inception_v3 (Functional) (None, None, None, 2048) 21802784
global_average_pooling2d (G (None, 2048) 0
lobalAveragePooling2D)
dense (Dense) (None, 1024) 2098176
dense_1 (Dense) (None, 512) 524800
dense_2 (Dense) (None, 256) 131328
dense_3 (Dense) (None, 128) 32896
dense_4 (Dense) (None, 64) 8256
dense_5 (Dense) (None, 32) 2080
dense_6 (Dense) (None, 2) 66
=================================================================
Total params: 24,600,416
Trainable params: 2,797,632
Non-trainable params: 21,802,784
_________________________________________________________________
This code worked after changing
model.fit(dataset, epochs = 1, verbose = True, batch_size = 32)
to
model.fit(dataset.batch(2), epochs = 1, verbose = True, batch_size = 32)
So... I will have to look into using dataset.batch versus batch_size in model.fit

Change Model input_shape but got an : ValueError: Input 0 of layer dense_44 is incompatible with the layer

I am new to python and DL.
Please help me to correct the error.
This class was originly created with mnist dataset (28 x 28) I tried to adapt it to my work and the image that I am using are (224 x 224). I changed the input image shape but still have the incompatible shape image and the model still use the old shapes of mnist.
Knowng that the that I am using: X_train=(676, 224, 224)/y_train(676,)/X_test(170, 224, 224)/y_test(170,)
The code :
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, concatenate
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils import to_categorical
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
class INFOGAN():
def __init__(self):
self.img_rows = 224
self.img_cols = 224
self.channels = 1
self.num_classes = 3
self.img_shape = (self.img_rows, self.img_cols, self.channels)
self.latent_dim = 72
optimizer = Adam(0.0002, 0.5)
losses = ['binary_crossentropy', self.mutual_info_loss]
# Build and the discriminator and recognition network
self.discriminator, self.auxilliary = self.build_disk_and_q_net()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# Build and compile the recognition network Q
self.auxilliary.compile(loss=[self.mutual_info_loss],
optimizer=optimizer,
metrics=['accuracy'])
# Build the generator
self.generator = self.build_generator()
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
gen_input = Input(shape=(self.latent_dim,))
img = self.generator(gen_input)
# For the combined model we will only train the generator
self.discriminator.trainable = False
# The discriminator takes generated image as input and determines validity
valid = self.discriminator(img)
# The recognition network produces the label
target_label = self.auxilliary(img)
# The combined model (stacked generator and discriminator)
self.combined = Model(gen_input, [valid, target_label])
self.combined.compile(loss=losses,
optimizer=optimizer)
def build_generator(self):
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=self.latent_dim))
model.add(Reshape((7, 7, 128)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(self.channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
gen_input = Input(shape=(self.latent_dim,))
img = model(gen_input)
model.summary()
return Model(gen_input, img)
def build_disk_and_q_net(self):
img = Input(shape=self.img_shape)
# Shared layers between discriminator and recognition network
model = Sequential()
model.add(Conv2D(64, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
model.add(ZeroPadding2D(padding=((0,1),(0,1))))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(256, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(512, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Flatten())
img_embedding = model(img)
# Discriminator
validity = Dense(1, activation='sigmoid')(img_embedding)
# Recognition
q_net = Dense(128, activation='relu')(img_embedding)
label = Dense(self.num_classes, activation='softmax')(q_net)
# Return discriminator and recognition network
return Model(img, validity), Model(img, label)
def mutual_info_loss(self, c, c_given_x):
"""The mutual information metric we aim to minimize"""
eps = 1e-8
conditional_entropy = K.mean(- K.sum(K.log(c_given_x + eps) * c, axis=1))
entropy = K.mean(- K.sum(K.log(c + eps) * c, axis=1))
return conditional_entropy + entropy
def sample_generator_input(self, batch_size):
# Generator inputs
sampled_noise = np.random.normal(0, 1, (batch_size, 62))
sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1)
sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)
return sampled_noise, sampled_labels
def train(self, epochs, batch_size=128, sample_interval=50):
# Rescale -1 to 1
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
y_train = y_train.reshape(-1, 1)
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# ---------------------
# Train Discriminator
# ---------------------
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], batch_size)
imgs = X_train[idx]
# Sample noise and categorical labels
sampled_noise, sampled_labels = self.sample_generator_input(batch_size)
gen_input = np.concatenate((sampled_noise, sampled_labels), axis=1)
# Generate a half batch of new images
gen_imgs = self.generator.predict(gen_input)
# Train on real and generated data
d_loss_real = self.discriminator.train_on_batch(imgs, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
# Avg. loss
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator and Q-network
# ---------------------
g_loss = self.combined.train_on_batch(gen_input, [valid, sampled_labels])
# Plot the progress
print ("%d [D loss: %.2f, acc.: %.2f%%] [Q loss: %.2f] [G loss: %.2f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss[1], g_loss[2]))
# If at save interval => save generated image samples
if epoch % sample_interval == 0:
self.sample_images(epoch)
def sample_images(self, epoch):
r, c = 10, 10
fig, axs = plt.subplots(r, c)
for i in range(c):
sampled_noise, _ = self.sample_generator_input(c)
label = to_categorical(np.full(fill_value=i, shape=(r,1)), num_classes=self.num_classes)
gen_input = np.concatenate((sampled_noise, label), axis=1)
gen_imgs = self.generator.predict(gen_input)
gen_imgs = 0.5 * gen_imgs + 0.5
for j in range(r):
axs[j,i].imshow(gen_imgs[j,:,:,0], cmap='gray')
axs[j,i].axis('off')
fig.savefig("images/%d.png" % epoch)
plt.close()
def save_model(self):
def save(model, model_name):
model_path = "saved_model/%s.json" % model_name
weights_path = "saved_model/%s_weights.hdf5" % model_name
options = {"file_arch": model_path,
"file_weight": weights_path}
json_string = model.to_json()
open(options['file_arch'], 'w').write(json_string)
model.save_weights(options['file_weight'])
save(self.generator, "generator")
save(self.discriminator, "discriminator")
if __name__ == '__main__':
infogan = INFOGAN()
infogan.train(epochs=50000, batch_size=128, sample_interval=50)
the error :
Model: "sequential_23"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_47 (Dense) (None, 6272) 457856
_________________________________________________________________
reshape_11 (Reshape) (None, 7, 7, 128) 0
_________________________________________________________________
batch_normalization_87 (Batc (None, 7, 7, 128) 512
_________________________________________________________________
up_sampling2d_40 (UpSampling (None, 14, 14, 128) 0
_________________________________________________________________
conv2d_99 (Conv2D) (None, 14, 14, 128) 147584
_________________________________________________________________
activation_42 (Activation) (None, 14, 14, 128) 0
_________________________________________________________________
batch_normalization_88 (Batc (None, 14, 14, 128) 512
_________________________________________________________________
up_sampling2d_41 (UpSampling (None, 28, 28, 128) 0
_________________________________________________________________
conv2d_100 (Conv2D) (None, 28, 28, 64) 73792
_________________________________________________________________
activation_43 (Activation) (None, 28, 28, 64) 0
_________________________________________________________________
batch_normalization_89 (Batc (None, 28, 28, 64) 256
_________________________________________________________________
conv2d_101 (Conv2D) (None, 28, 28, 1) 577
_________________________________________________________________
activation_44 (Activation) (None, 28, 28, 1) 0
=================================================================
Total params: 681,089
Trainable params: 680,449
Non-trainable params: 640
_________________________________________________________________
WARNING:tensorflow:Model was constructed with shape (None, 224, 224, 1) for input Tensor("input_22:0", shape=(None, 224, 224, 1), dtype=float32), but it was called on an input with incompatible shape (None, 28, 28, 1).
WARNING:tensorflow:Model was constructed with shape (None, 224, 224, 1) for input Tensor("conv2d_95_input:0", shape=(None, 224, 224, 1), dtype=float32), but it was called on an input with incompatible shape (None, 28, 28, 1).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-45-60a1c6b0bc8b> in <module>()
225
226 if __name__ == '__main__':
--> 227 infogan = INFOGAN()
228 infogan.train(epochs=50000, batch_size=128, sample_interval=50)
7 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
214 ' incompatible with the layer: expected axis ' + str(axis) +
215 ' of input shape to have value ' + str(value) +
--> 216 ' but received input with shape ' + str(shape))
217 # Check shape.
218 if spec.shape is not None:
ValueError: Input 0 of layer dense_44 is incompatible with the layer: expected axis -1 of input shape to have value 115200 but received input with shape [None, 2048]
You forgot to change the architecture of the generator. The generator's output shape and the discriminator's input shape have to match. That's what causing the error.
To fix it, you need to fix the architecture. The generator produces images in shape (28, 28, 1), but you want (224, 224, 1). The shape the architecture produces is the result of the architecture itself and its parameters.
So I added two Upsampling layers and changed the size of the other layers to match the discriminator's output.
Also, I removed ZeroPadding2D layer from discriminator, since it made the shape odd (15, 15, ..), and therefore it was impossible to match the same size in the generator.
Here's the code:
def build_generator(self):
model = Sequential()
model.add(Dense(512 * 14 * 14, activation="relu", input_dim=self.latent_dim))
model.add(Reshape((14, 14, 512)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(256, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(self.channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
gen_input = Input(shape=(self.latent_dim,))
img = model(gen_input)
model.summary()
return Model(gen_input, img)
def build_disk_and_q_net(self):
img = Input(shape=self.img_shape)
# Shared layers between discriminator and recognition network
model = Sequential()
model.add(Conv2D(64, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
#model.add(ZeroPadding2D(padding=((0,1),(0,1))))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(256, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(512, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Flatten())
model.summary()
img_embedding = model(img)
# Discriminator
validity = Dense(1, activation='sigmoid')(img_embedding)
# Recognition
q_net = Dense(128, activation='relu')(img_embedding)
label = Dense(self.num_classes, activation='softmax')(q_net)
# Return discriminator and recognition network
return Model(img, validity), Model(img, label)
And the summaries:
Model: "sequential_14"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_53 (Conv2D) (None, 112, 112, 64) 640
_________________________________________________________________
leaky_re_lu_28 (LeakyReLU) (None, 112, 112, 64) 0
_________________________________________________________________
dropout_28 (Dropout) (None, 112, 112, 64) 0
_________________________________________________________________
conv2d_54 (Conv2D) (None, 56, 56, 128) 73856
_________________________________________________________________
leaky_re_lu_29 (LeakyReLU) (None, 56, 56, 128) 0
_________________________________________________________________
dropout_29 (Dropout) (None, 56, 56, 128) 0
_________________________________________________________________
batch_normalization_46 (Batc (None, 56, 56, 128) 512
_________________________________________________________________
conv2d_55 (Conv2D) (None, 28, 28, 256) 295168
_________________________________________________________________
leaky_re_lu_30 (LeakyReLU) (None, 28, 28, 256) 0
_________________________________________________________________
dropout_30 (Dropout) (None, 28, 28, 256) 0
_________________________________________________________________
batch_normalization_47 (Batc (None, 28, 28, 256) 1024
_________________________________________________________________
conv2d_56 (Conv2D) (None, 14, 14, 512) 1180160
_________________________________________________________________
leaky_re_lu_31 (LeakyReLU) (None, 14, 14, 512) 0
_________________________________________________________________
dropout_31 (Dropout) (None, 14, 14, 512) 0
_________________________________________________________________
batch_normalization_48 (Batc (None, 14, 14, 512) 2048
_________________________________________________________________
flatten_7 (Flatten) (None, 100352) 0
=================================================================
Total params: 1,553,408
Trainable params: 1,551,616
Non-trainable params: 1,792
_________________________________________________________________
Model: "sequential_15"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_31 (Dense) (None, 100352) 7325696
_________________________________________________________________
reshape_7 (Reshape) (None, 14, 14, 512) 0
_________________________________________________________________
batch_normalization_49 (Batc (None, 14, 14, 512) 2048
_________________________________________________________________
up_sampling2d_18 (UpSampling (None, 28, 28, 512) 0
_________________________________________________________________
conv2d_57 (Conv2D) (None, 28, 28, 256) 1179904
_________________________________________________________________
activation_25 (Activation) (None, 28, 28, 256) 0
_________________________________________________________________
batch_normalization_50 (Batc (None, 28, 28, 256) 1024
_________________________________________________________________
up_sampling2d_19 (UpSampling (None, 56, 56, 256) 0
_________________________________________________________________
conv2d_58 (Conv2D) (None, 56, 56, 128) 295040
_________________________________________________________________
activation_26 (Activation) (None, 56, 56, 128) 0
_________________________________________________________________
batch_normalization_51 (Batc (None, 56, 56, 128) 512
_________________________________________________________________
up_sampling2d_20 (UpSampling (None, 112, 112, 128) 0
_________________________________________________________________
conv2d_59 (Conv2D) (None, 112, 112, 64) 73792
_________________________________________________________________
activation_27 (Activation) (None, 112, 112, 64) 0
_________________________________________________________________
batch_normalization_52 (Batc (None, 112, 112, 64) 256
_________________________________________________________________
up_sampling2d_21 (UpSampling (None, 224, 224, 64) 0
_________________________________________________________________
conv2d_60 (Conv2D) (None, 224, 224, 1) 577
_________________________________________________________________
activation_28 (Activation) (None, 224, 224, 1) 0
=================================================================
Total params: 8,878,849
Trainable params: 8,876,929
Non-trainable params: 1,920
_________________________________________________________________
EDIT:
Because you decreased the number of classes from 10 to 3, therefore you have to change the latent_dim parameter to 65. Notice that the method sample_generator_input generates noise of size 62 and labels of size number of classes, which then concatenates (size becomes 62 + 3 = 65).
The generator is defined to accept input_dim of self.latent_dim, it would be appropriate to calculate the latent_dim in the constructor based on the number of classes instead: self.latent_dim = 62 + self.num_classes.
Moreover, in method sample_images, there are hardcoded magical numbers.
How can one know what it means? I mean this: r, c = 10, 10.
I assume that it means number of classes. Since you changed it from 10 to 3 in your example, I suggest you change the line to:
r, c = self.num_classes, self.num_classes
Overall, the code is badly written and if you change a constant then it all breaks. Be careful when copying full pieces of code. Make sure you understand each and every part of it before copying.
Here's the full code:
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, concatenate
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils import to_categorical
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
class INFOGAN():
def __init__(self):
self.img_rows = 224
self.img_cols = 224
self.channels = 1
self.num_classes = 3
self.img_shape = (self.img_rows, self.img_cols, self.channels)
self.latent_dim = 62 + self.num_classes
optimizer = Adam(0.0002, 0.5)
losses = ['binary_crossentropy', self.mutual_info_loss]
# Build and the discriminator and recognition network
self.discriminator, self.auxilliary = self.build_disk_and_q_net()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# Build and compile the recognition network Q
self.auxilliary.compile(loss=[self.mutual_info_loss],
optimizer=optimizer,
metrics=['accuracy'])
# Build the generator
self.generator = self.build_generator()
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
gen_input = Input(shape=(self.latent_dim,))
img = self.generator(gen_input)
# For the combined model we will only train the generator
self.discriminator.trainable = False
# The discriminator takes generated image as input and determines validity
valid = self.discriminator(img)
# The recognition network produces the label
target_label = self.auxilliary(img)
# The combined model (stacked generator and discriminator)
self.combined = Model(gen_input, [valid, target_label])
self.combined.compile(loss=losses,
optimizer=optimizer)
def build_generator(self):
model = Sequential()
model.add(Dense(512 * 14 * 14, activation="relu", input_dim=self.latent_dim))
model.add(Reshape((14, 14, 512)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(256, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(self.channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
gen_input = Input(shape=(self.latent_dim,))
img = model(gen_input)
model.summary()
return Model(gen_input, img)
def build_disk_and_q_net(self):
img = Input(shape=self.img_shape)
# Shared layers between discriminator and recognition network
model = Sequential()
model.add(Conv2D(64, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
#model.add(ZeroPadding2D(padding=((0,1),(0,1))))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(256, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(512, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
model.add(Flatten())
model.summary()
img_embedding = model(img)
# Discriminator
validity = Dense(1, activation='sigmoid')(img_embedding)
# Recognition
q_net = Dense(128, activation='relu')(img_embedding)
label = Dense(self.num_classes, activation='softmax')(q_net)
print(label.shape)
# Return discriminator and recognition network
return Model(img, validity), Model(img, label)
def mutual_info_loss(self, c, c_given_x):
"""The mutual information metric we aim to minimize"""
eps = 1e-8
conditional_entropy = K.mean(- K.sum(K.log(c_given_x + eps) * c, axis=1))
entropy = K.mean(- K.sum(K.log(c + eps) * c, axis=1))
return conditional_entropy + entropy
def sample_generator_input(self, batch_size):
# Generator inputs
sampled_noise = np.random.normal(0, 1, (batch_size, 62))
sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1)
print(sampled_labels)
sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)
return sampled_noise, sampled_labels
def train(self, epochs, batch_size=128, sample_interval=50):
X_train = np.ones([batch_size, 224, 224])
y_train = np.zeros([batch_size,])
# Rescale -1 to 1
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
y_train = y_train.reshape(-1, 1)
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# ---------------------
# Train Discriminator
# ---------------------
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], batch_size)
imgs = X_train[idx]
# Sample noise and categorical labels
sampled_noise, sampled_labels = self.sample_generator_input(batch_size)
gen_input = np.concatenate((sampled_noise, sampled_labels), axis=1)
print(sampled_labels.shape, batch_size)
# Generate a half batch of new images
gen_imgs = self.generator.predict(gen_input)
# Train on real and generated data
d_loss_real = self.discriminator.train_on_batch(imgs, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
# Avg. loss
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator and Q-network
# ---------------------
g_loss = self.combined.train_on_batch(gen_input, [valid, sampled_labels])
# Plot the progress
print ("%d [D loss: %.2f, acc.: %.2f%%] [Q loss: %.2f] [G loss: %.2f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss[1], g_loss[2]))
# If at save interval => save generated image samples
if epoch % sample_interval == 0:
self.sample_images(epoch)
def sample_images(self, epoch):
r, c = self.num_classes, self.num_classes
fig, axs = plt.subplots(r, c)
for i in range(c):
sampled_noise, _ = self.sample_generator_input(c)
label = to_categorical(np.full(fill_value=i, shape=(r,1)), num_classes=self.num_classes)
gen_input = np.concatenate((sampled_noise, label), axis=1)
gen_imgs = self.generator.predict(gen_input)
gen_imgs = 0.5 * gen_imgs + 0.5
for j in range(r):
axs[j,i].imshow(gen_imgs[j,:,:,0], cmap='gray')
axs[j,i].axis('off')
fig.savefig("images/%d.png" % epoch)
plt.close()
def save_model(self):
def save(model, model_name):
model_path = "saved_model/%s.json" % model_name
weights_path = "saved_model/%s_weights.hdf5" % model_name
options = {"file_arch": model_path,
"file_weight": weights_path}
json_string = model.to_json()
open(options['file_arch'], 'w').write(json_string)
model.save_weights(options['file_weight'])
save(self.generator, "generator")
save(self.discriminator, "discriminator")
if __name__ == '__main__':
infogan = INFOGAN()
infogan.train(epochs=50000, batch_size=8, sample_interval=50)

TypeError: object of type 'Conv2DTranspose' has no len()

I'm coding an autoencoder using Keras and I keep getting the below error. I think it's related to adding the arg keras_initializer since I got this error before for Conv2D, added the initializer and Conv2D had length. Although, since I'm using tf.keras.layers.reshape, this isn't a valid argument.
Here is the entire error traceback.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-33-c8370b57aa14> in <module>()
57
58
---> 59 autoencoder = keras.Model(inputs = encoder_input, outputs = decoder_output, name='autoencoder')
60 autoencoder.summary()
61
4 frames
/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/usr/local/lib/python3.6/dist-packages/keras/engine/network.py in __init__(self, *args, **kwargs)
91 'inputs' in kwargs and 'outputs' in kwargs):
92 # Graph network
---> 93 self._init_graph_network(*args, **kwargs)
94 else:
95 # Subclassed network
/usr/local/lib/python3.6/dist-packages/keras/engine/network.py in _init_graph_network(self, inputs, outputs, name)
229 # Keep track of the network's nodes and layers.
230 nodes, nodes_by_depth, layers, layers_by_depth = _map_graph_network(
--> 231 self.inputs, self.outputs)
232 self._network_nodes = nodes
233 self._nodes_by_depth = nodes_by_depth
/usr/local/lib/python3.6/dist-packages/keras/engine/network.py in _map_graph_network(inputs, outputs)
1364 layer=layer,
1365 node_index=node_index,
-> 1366 tensor_index=tensor_index)
1367
1368 for node in reversed(nodes_in_decreasing_depth):
/usr/local/lib/python3.6/dist-packages/keras/engine/network.py in build_map(tensor, finished_nodes, nodes_in_progress, layer, node_index, tensor_index)
1345
1346 # Propagate to all previous tensors connected to this node.
-> 1347 for i in range(len(node.inbound_layers)):
1348 x = node.input_tensors[i]
1349 layer = node.inbound_layers[i]
TypeError: object of type 'Conv2DTranspose' has no len()
This is my code:
import tensorflow as tf
import keras
import numpy as np
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
from keras.layers import Input, Conv2DTranspose
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
num_classes = 10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
num_classes = 10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
#plt.imshow(x_train[1])
encoder_input = tf.keras.layers.Input(shape=(32, 32, 3), name="input")
x = tf.keras.layers.Conv2D(16, 3,activation = 'relu', kernel_initializer = keras.initializers.RandomUniform)(encoder_input)
x = tf.keras.layers.Conv2D(32, 3, activation = 'relu')(x)
x = tf.keras.layers.MaxPooling2D(3)(x)
x = tf.keras.layers.Conv2D(32, 3,activation = 'relu')(x)
x = tf.keras.layers.Conv2D(16, 3, activation = 'relu')(x)
encoder_output = tf.keras.layers.GlobalMaxPooling2D()(x)
encoder = tf.keras.Model(inputs=encoder_input, outputs=encoder_output, name = 'encoder')
encoder.summary()
#Decoder
decoder_input = tf.keras.layers.Reshape((4, 4, 1))(encoder_output)
x = tf.keras.layers.Conv2DTranspose(16, 3, activation = 'relu')(decoder_input)
x = tf.keras.layers.Conv2DTranspose(32, 3, activation = 'relu')(x)
x = tf.keras.layers.UpSampling2D(3)(x)
x = tf.keras.layers.Conv2DTranspose(16, 3, activation = 'relu')(x)
decoder_output = tf.keras.layers.Conv2DTranspose(1, 3, activation = 'relu')(x)
autoencoder = keras.Model(inputs = encoder_input, outputs = decoder_output, name='autoencoder')
autoencoder.summary()
You are mixing tf.keras and keras imports, and this is not supported and it will not work. You need to choose one implementation and import all modules/classes from it.
Use from tensorflow import keras for above case.
Updated code:
import tensorflow as tf
from tensorflow import keras
num_classes = 10
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
encoder_input = tf.keras.layers.Input(shape=(32, 32, 3), name="input")
x = tf.keras.layers.Conv2D(16, 3,activation = 'relu', kernel_initializer = keras.initializers.RandomUniform)(encoder_input)
x = tf.keras.layers.Conv2D(32, 3, activation = 'relu')(x)
x = tf.keras.layers.MaxPooling2D(3)(x)
x = tf.keras.layers.Conv2D(32, 3,activation = 'relu')(x)
x = tf.keras.layers.Conv2D(16, 3, activation = 'relu')(x)
encoder_output = tf.keras.layers.GlobalMaxPooling2D()(x)
encoder = tf.keras.Model(inputs=encoder_input, outputs=encoder_output, name = 'encoder')
encoder.summary()
#Decoder
decoder_input = tf.keras.layers.Reshape((4, 4, 1))(encoder_output)
x = tf.keras.layers.Conv2DTranspose(16, 3, activation = 'relu')(decoder_input)
x = tf.keras.layers.Conv2DTranspose(32, 3, activation = 'relu')(x)
x = tf.keras.layers.UpSampling2D(3)(x)
x = tf.keras.layers.Conv2DTranspose(16, 3, activation = 'relu')(x)
decoder_output = tf.keras.layers.Conv2DTranspose(1, 3, activation = 'relu')(x)
autoencoder = keras.Model(inputs = encoder_input, outputs = decoder_output, name='autoencoder')
autoencoder.summary()
output:
x_train shape: (60000, 32, 32, 3)
60000 train samples
10000 test samples
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) (None, 32, 32, 3) 0
_________________________________________________________________
conv2d_35 (Conv2D) (None, 30, 30, 16) 448
_________________________________________________________________
conv2d_36 (Conv2D) (None, 28, 28, 32) 4640
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 9, 9, 32) 0
_________________________________________________________________
conv2d_37 (Conv2D) (None, 7, 7, 32) 9248
_________________________________________________________________
conv2d_38 (Conv2D) (None, 5, 5, 16) 4624
_________________________________________________________________
global_max_pooling2d_8 (Glob (None, 16) 0
=================================================================
Total params: 18,960
Trainable params: 18,960
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) (None, 32, 32, 3) 0
_________________________________________________________________
conv2d_35 (Conv2D) (None, 30, 30, 16) 448
_________________________________________________________________
conv2d_36 (Conv2D) (None, 28, 28, 32) 4640
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 9, 9, 32) 0
_________________________________________________________________
conv2d_37 (Conv2D) (None, 7, 7, 32) 9248
_________________________________________________________________
conv2d_38 (Conv2D) (None, 5, 5, 16) 4624
_________________________________________________________________
global_max_pooling2d_8 (Glob (None, 16) 0
_________________________________________________________________
reshape_6 (Reshape) (None, 4, 4, 1) 0
_________________________________________________________________
conv2d_transpose_16 (Conv2DT (None, 6, 6, 16) 160
_________________________________________________________________
conv2d_transpose_17 (Conv2DT (None, 8, 8, 32) 4640
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 24, 24, 32) 0
_________________________________________________________________
conv2d_transpose_18 (Conv2DT (None, 26, 26, 16) 4624
_________________________________________________________________
conv2d_transpose_19 (Conv2DT (None, 28, 28, 1) 145
=================================================================
Total params: 28,529
Trainable params: 28,529
Non-trainable params: 0
_________________________________________________________________

Upsampling by decimal factor in Keras

I want to use an upsampling 2D layer in keras so that I can increase the image size by a decimal factor (in this case from [213,213] to [640,640]). The layer is compiled as expected, but when I want to train or predict on real images, they are upsampled only by the closest integer to the input factor. Any idea? Details below:
Network:
mp_size = (3,3)
inputs = Input(input_data.shape[1:])
lay1 = Conv2D(32, (3,3), strides=(1,1), activation='relu', padding='same', kernel_initializer='glorot_normal')(inputs)
lay2 = MaxPooling2D(pool_size=mp_size)(lay1)
lay3 = Conv2D(32, (3,3), strides=(1,1), activation='relu', padding='same', kernel_initializer='glorot_normal')(lay2)
size1=lay3.get_shape()[1:3]
size2=lay1.get_shape()[1:3]
us_size = size2[0].value/size1[0].value, size2[1].value/size1[1].value
lay4 = Concatenate(axis=-1)([UpSampling2D(size=us_size)(lay3),lay1])
lay5 = Conv2D(1, (1, 1), strides=(1,1), activation='sigmoid')(lay4)
model = Model(inputs=inputs, outputs=lay5)
Network summary when I use model.summary() :
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_4 (InputLayer) (None, 640, 640, 2) 0
____________________________________________________________________________________________________
conv2d_58 (Conv2D) (None, 640, 640, 32) 608 input_4[0][0]
____________________________________________________________________________________________________
max_pooling2d_14 (MaxPooling2D) (None, 213, 213, 32) 0 conv2d_58[0][0]
____________________________________________________________________________________________________
conv2d_59 (Conv2D) (None, 213, 213, 32) 9248 max_pooling2d_14[0][0]
____________________________________________________________________________________________________
up_sampling2d_14 (UpSampling2D) (None, 640.0, 640.0, 0 conv2d_59[0][0]
____________________________________________________________________________________________________
concatenate_14 (Concatenate) (None, 640.0, 640.0, 0 up_sampling2d_14[0][0]
conv2d_58[0][0]
____________________________________________________________________________________________________
conv2d_60 (Conv2D) (None, 640.0, 640.0, 65 concatenate_14[0][0]
====================================================================================================
Total params: 9,921
Trainable params: 9,921
Non-trainable params: 0
Error when training the network:
InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [1,639,639,32] vs. shape[1] = [1,640,640,32]
[[Node: concatenate_14/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](up_sampling2d_14/ResizeNearestNeighbor, conv2d_58/Relu, concatenate_14/concat/axis)]]
It can be resolved by using the below code:
from keras.layers import UpSampling2D
from keras.utils.generic_utils import transpose_shape
class UpSamplingUnet(UpSampling2D):
def compute_output_shape(self, input_shape):
size_all_dims = (1,) + self.size + (1,)
spatial_axes = list(range(1, 1 + self.rank))
size_all_dims = transpose_shape(size_all_dims,
self.data_format,
spatial_axes)
output_shape = list(input_shape)
for dim in range(len(output_shape)):
if output_shape[dim] is not None:
output_shape[dim] *= size_all_dims[dim]
output_shape[dim]=int(output_shape[dim])
return tuple(output_shape)
Then alter UpSampling2D(size=us_size) to UpSamplingUnet(size=us_size).