How to initialize sample weights for multi-class segmentation? - tensorflow

I'm working on multi-class segmentation using Keras and U-net.
I have as output of my NN 12 classes using soft max Activation function. the shape of my output is (N,288,288,12).
to fit my model I use sparse_categorical_crossentropy.
I want to initialize weights of my model for my unbalanced dataset.
I found this useful link and try it to implement it; since class_weight in Keras does not work for more than 2 classes, I used sample weights
My code is :
inputs = tf.keras.layers.Input((IMG_WIDHT, IMG_HEIGHT, IMG_CHANNELS))
smooth = 1.
s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs)
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
s) # Kernelsize : start with some weights initial value
c1 = tf.keras.layers.Dropout(0.1)(c1)
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
c1) # Kernelsize : start with some weights initial value
p1 = tf.keras.layers.MaxPool2D((2, 2))(c1)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
p1) # Kernelsize : start with some weights initial value
c2 = tf.keras.layers.Dropout(0.1)(c2)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
c2) # Kernelsize : start with some weights initial value
p2 = tf.keras.layers.MaxPool2D((2, 2))(c2)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
p2) # Kernelsize : start with some weights initial value
c3 = tf.keras.layers.Dropout(0.1)(c3)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
c3) # Kernelsize : start with some weights initial value
p3 = tf.keras.layers.MaxPool2D((2, 2))(c3)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
p3) # Kernelsize : start with some weights initial value
c4 = tf.keras.layers.Dropout(0.1)(c4)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
c4) # Kernelsize : start with some weights initial value
p4 = tf.keras.layers.MaxPool2D((2, 2))(c4)
c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
p4) # Kernelsize : start with some weights initial value
c5 = tf.keras.layers.Dropout(0.1)(c5)
c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(
c5) # Kernelsize : start wi
u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
u6 = tf.keras.layers.concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
c6 = tf.keras.layers.Dropout(0.2)(c6)
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)
u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
u7 = tf.keras.layers.concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(64, (2, 2), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
c7 = tf.keras.layers.Dropout(0.2)(c7)
c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)
u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
u8 = tf.keras.layers.concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(32, (2, 2), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
c8 = tf.keras.layers.Dropout(0.1)(c8)
c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)
u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
c9 = tf.keras.layers.Dropout(0.1)(c9)
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)
outputs = tf.keras.layers.Conv2D(12, (1, 1), activation='softmax')(c9)
outputs = tf.keras.layers.Flatten(data_format=None) (outputs)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
cc = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer=cc, loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'],sample_weight_mode="temporal") # metrics =[dice_coeff] model.summary()
model.summary()
checkpointer = tf.keras.callbacks.ModelCheckpoint('chek12class3.h5', verbose = 1, save_best_only = True)
#
print('############## Initial weights ############## : ', model.get_weights())
#callbacks = [
# tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'), tf.keras.callbacks.TensorBoard(log_dir='logs')]
#history = model.fit(train_generator, validation_split=0.1, batch_size=4,epochs = 100 ,callbacks = callbacks) #,callbacks = callbacks
class_weights = np.zeros((82944, 12))
class_weights[:, 0] += 7
class_weights[:, 1] += 10
class_weights[:, 2] += 2
class_weights[:, 3] += 3
class_weights[:, 4] += 4
class_weights[:, 5] += 5
class_weights[:, 6] += 6
class_weights[:, 7] += 50
class_weights[:, 8] += 8
class_weights[:, 9] += 9
class_weights[:, 10] += 50
class_weights[:, 11] += 11
history = model.fit(X_train, Y_train, validation_split=0.18, batch_size=1,epochs = 60 ,sample_weight=class_weights) #class_weight=clas
82944 is 288*288 h and w of my sample and 12 is number of classes.
I'm getting this error :
ValueError: Found a sample_weight array with shape (82944, 12) for an input with shape (481, 288, 288). sample_weight cannot be broadcast.
from this link here sample_weight should work as (nbr_of_training_data, shape_of_training_data)
Then I added Flatten layer before output and it steel does not work
The Architecture of my model :
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 288, 288, 3) 0
__________________________________________________________________________________________________
lambda (Lambda) (None, 288, 288, 3) 0 input_1[0][0]
__________________________________________________________________________________________________
conv2d (Conv2D) (None, 288, 288, 16) 448 lambda[0][0]
__________________________________________________________________________________________________
dropout (Dropout) (None, 288, 288, 16) 0 conv2d[0][0]
__________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 288, 288, 16) 2320 dropout[0][0]
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 144, 144, 16) 0 conv2d_1[0][0]
__________________________________________________________________________________________________
conv2d_2 (Conv2D) (None, 144, 144, 32) 4640 max_pooling2d[0][0]
__________________________________________________________________________________________________
dropout_1 (Dropout) (None, 144, 144, 32) 0 conv2d_2[0][0]
__________________________________________________________________________________________________
conv2d_3 (Conv2D) (None, 144, 144, 32) 9248 dropout_1[0][0]
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 72, 72, 32) 0 conv2d_3[0][0]
__________________________________________________________________________________________________
conv2d_4 (Conv2D) (None, 72, 72, 64) 18496 max_pooling2d_1[0][0]
__________________________________________________________________________________________________
dropout_2 (Dropout) (None, 72, 72, 64) 0 conv2d_4[0][0]
__________________________________________________________________________________________________
conv2d_5 (Conv2D) (None, 72, 72, 64) 36928 dropout_2[0][0]
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 36, 36, 64) 0 conv2d_5[0][0]
__________________________________________________________________________________________________
conv2d_6 (Conv2D) (None, 36, 36, 128) 73856 max_pooling2d_2[0][0]
__________________________________________________________________________________________________
dropout_3 (Dropout) (None, 36, 36, 128) 0 conv2d_6[0][0]
__________________________________________________________________________________________________
conv2d_7 (Conv2D) (None, 36, 36, 128) 147584 dropout_3[0][0]
__________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D) (None, 18, 18, 128) 0 conv2d_7[0][0]
__________________________________________________________________________________________________
conv2d_8 (Conv2D) (None, 18, 18, 256) 295168 max_pooling2d_3[0][0]
__________________________________________________________________________________________________
dropout_4 (Dropout) (None, 18, 18, 256) 0 conv2d_8[0][0]
__________________________________________________________________________________________________
conv2d_9 (Conv2D) (None, 18, 18, 256) 590080 dropout_4[0][0]
__________________________________________________________________________________________________
conv2d_transpose (Conv2DTranspo (None, 36, 36, 128) 131200 conv2d_9[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 36, 36, 256) 0 conv2d_transpose[0][0]
conv2d_7[0][0]
__________________________________________________________________________________________________
conv2d_10 (Conv2D) (None, 36, 36, 128) 295040 concatenate[0][0]
__________________________________________________________________________________________________
dropout_5 (Dropout) (None, 36, 36, 128) 0 conv2d_10[0][0]
__________________________________________________________________________________________________
conv2d_11 (Conv2D) (None, 36, 36, 128) 147584 dropout_5[0][0]
__________________________________________________________________________________________________
conv2d_transpose_1 (Conv2DTrans (None, 72, 72, 64) 32832 conv2d_11[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 72, 72, 128) 0 conv2d_transpose_1[0][0]
conv2d_5[0][0]
__________________________________________________________________________________________________
conv2d_12 (Conv2D) (None, 72, 72, 64) 32832 concatenate_1[0][0]
__________________________________________________________________________________________________
dropout_6 (Dropout) (None, 72, 72, 64) 0 conv2d_12[0][0]
__________________________________________________________________________________________________
conv2d_13 (Conv2D) (None, 72, 72, 64) 36928 dropout_6[0][0]
__________________________________________________________________________________________________
conv2d_transpose_2 (Conv2DTrans (None, 144, 144, 32) 8224 conv2d_13[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 144, 144, 64) 0 conv2d_transpose_2[0][0]
conv2d_3[0][0]
__________________________________________________________________________________________________
conv2d_14 (Conv2D) (None, 144, 144, 32) 8224 concatenate_2[0][0]
__________________________________________________________________________________________________
dropout_7 (Dropout) (None, 144, 144, 32) 0 conv2d_14[0][0]
__________________________________________________________________________________________________
conv2d_15 (Conv2D) (None, 144, 144, 32) 9248 dropout_7[0][0]
__________________________________________________________________________________________________
conv2d_transpose_3 (Conv2DTrans (None, 288, 288, 16) 2064 conv2d_15[0][0]
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 288, 288, 32) 0 conv2d_transpose_3[0][0]
conv2d_1[0][0]
__________________________________________________________________________________________________
conv2d_16 (Conv2D) (None, 288, 288, 16) 4624 concatenate_3[0][0]
__________________________________________________________________________________________________
dropout_8 (Dropout) (None, 288, 288, 16) 0 conv2d_16[0][0]
__________________________________________________________________________________________________
conv2d_17 (Conv2D) (None, 288, 288, 16) 2320 dropout_8[0][0]
__________________________________________________________________________________________________
conv2d_18 (Conv2D) (None, 288, 288, 12) 204 conv2d_17[0][0]
==================================================================================================
I think this solution maybe will work :
sample_weights = np.zeros(len(Y_train))
# your own weight corresponding here:
sample_weights[Y_train[Y_train==0]] = 7
sample_weights[Y_train[Y_train==1]] = 10
sample_weights[Y_train[Y_train==2]] = 2
sample_weights[Y_train[Y_train==3]] = 3
sample_weights[Y_train[Y_train==4]] = 4
sample_weights[Y_train[Y_train==5]] = 5
sample_weights[Y_train[Y_train==6]] = 6
sample_weights[Y_train[Y_train==7]] = 50
sample_weights[Y_train[Y_train==8]] = 8
sample_weights[Y_train[Y_train==9]] = 9
sample_weights[Y_train[Y_train==10]] = 50
sample_weights[Y_train[Y_train==11]] = 11
I'm getting this error :
ValueError: Found a sample_weight array with shape (481,). In order to use timestep-wise sample weighting, you should pass a 2D sample_weight array.

You are misusing sample_weight. As its name clearly implies, it assigns a weight in each sample; so, despite you having only 481 samples, you pass something of length 82944 (and additionally, of 2 dimensions), hence the expected error:
ValueError: Found a sample_weight array with shape (82944, 12) for an input with shape (481, 288, 288). sample_weight cannot be broadcast.
So, what you actually need is a sample_weight 1D-array of length equal to your training sample, with each element of it being the weight of the corresponding sample - which, in turn, should be the same for each class, as you show.
Here is how you can do it using some dummy data y of 12 classes and only 30 samples:
import numpy as np
y = np.random.randint(12, size=30) # dummy data, 12 classes
y
# array([ 8, 0, 6, 8, 9, 9, 7, 11, 6, 4, 6, 3, 10, 8, 7, 7, 11,
# 2, 5, 8, 8, 1, 7, 2, 7, 9, 5, 2, 0, 0])
sample_weights = np.zeros(len(y))
# your own weight corresponding here:
sample_weights[y==0] = 7
sample_weights[y==1] = 10
sample_weights[y==2] = 2
sample_weights[y==3] = 3
sample_weights[y==4] = 4
sample_weights[y==5] = 5
sample_weights[y==6] = 6
sample_weights[y==7] = 50
sample_weights[y==8] = 8
sample_weights[y==9] = 9
sample_weights[y==10] = 50
sample_weights[y==11] = 11
sample_weights
# result:
array([ 8., 7., 6., 8., 9., 9., 50., 11., 6., 4., 6., 3., 50.,
8., 50., 50., 11., 2., 5., 8., 8., 10., 50., 2., 50., 9.,
5., 2., 7., 7.])
Let's put them in a nice dataframe, for better viewing:
import pandas as pd
d = {'y': y, 'weight': sample_weights}
df = pd.DataFrame(d)
print(df.to_string(index=False))
# result:
y weight
8 8.0
0 7.0
6 6.0
8 8.0
9 9.0
9 9.0
7 50.0
11 11.0
6 6.0
4 4.0
6 6.0
3 3.0
10 50.0
8 8.0
7 50.0
7 50.0
11 11.0
2 2.0
5 5.0
8 8.0
8 8.0
1 10.0
7 50.0
2 2.0
7 50.0
9 9.0
5 5.0
2 2.0
0 7.0
0 7.0
and where of course you should replace sample_weight=class_weights in your model.fit with sample_weight=sample_weights.

Related

How to merge 2 trained model in keras?

Good evening everyone,
I have 5 classes and each one has 2000 images, I built 2 Models with different model names and that's my model code
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(5, activation=tf.nn.softmax)
], name="Model1")
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_images, train_labels,
batch_size=128, epochs=30, validation_split=0.2)
model.save('f3_1st_model_seg.h5')
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(5, activation=tf.nn.softmax)
], name="Model2")
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_images, train_labels,
batch_size=128, epochs=30, validation_split=0.2)
model.save('f3_2nd_model_seg.h5')
then I used this code to merge the 2 models
input_shape = [150, 150, 3]
model = keras.models.load_model('1st_model_seg.h5')
model.summary()
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 148, 148, 32) 896
max_pooling2d (MaxPooling2D (None, 74, 74, 32) 0
)
conv2d_1 (Conv2D) (None, 72, 72, 32) 9248
max_pooling2d_1 (MaxPooling (None, 36, 36, 32) 0
2D)
conv2d_2 (Conv2D) (None, 34, 34, 64) 18496
max_pooling2d_2 (MaxPooling (None, 17, 17, 64) 0
2D)
conv2d_3 (Conv2D) (None, 15, 15, 128) 73856
max_pooling2d_3 (MaxPooling (None, 7, 7, 128) 0
2D)
flatten (Flatten) (None, 6272) 0
dense (Dense) (None, 5) 31365
=================================================================
Total params: 133,861
Trainable params: 133,861
Non-trainable params: 0
model2 = keras.models.load_model('2nd_model_seg.h5')
model2.summary()
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 148, 148, 32) 896
max_pooling2d (MaxPooling2D (None, 74, 74, 32) 0
)
conv2d_1 (Conv2D) (None, 72, 72, 32) 9248
max_pooling2d_1 (MaxPooling (None, 36, 36, 32) 0
2D)
conv2d_2 (Conv2D) (None, 34, 34, 64) 18496
max_pooling2d_2 (MaxPooling (None, 17, 17, 64) 0
2D)
conv2d_3 (Conv2D) (None, 15, 15, 128) 73856
max_pooling2d_3 (MaxPooling (None, 7, 7, 128) 0
2D)
flatten (Flatten) (None, 6272) 0
dense (Dense) (None, 5) 31365
=================================================================
Total params: 133,861
Trainable params: 133,861
Non-trainable params: 0
def concat_horizontal(models, input_shape):
models_count = len(models)
hidden = []
input = tf.keras.layers.Input(shape=input_shape)
for i in range(models_count):
hidden.append(models[i](input))
output = tf.keras.layers.concatenate(hidden)
model = tf.keras.Model(inputs=input, outputs=output)
return model
new_model = concat_horizontal(
[model, model2], (input_shape))
new_model.save('f1_1st_merged_seg.h5')
new_model.summary()
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 150, 150, 3 0 []
)]
model1 (Sequential) (None, 5) 133861 ['input_1[0][0]']
model2 (Sequential) (None, 5) 133861 ['input_1[0][0]']
concatenate (Concatenate) (None, 10) 0 ['model1[0][0]',
'model2[0][0]']
==================================================================================================
Total params: 267,722
Trainable params: 267,722
Non-trainable params: 0
so after I tested the merged model I found some images getting classes 7 and 9 although I have only 5 classes and that's my code for prediction
class_names = ['A', 'B', 'C', D', 'E']
for img in os.listdir(path):
# predicting images
img2 = tf.keras.preprocessing.image.load_img(
os.path.join(path, img), target_size=(150, 150))
x = tf.keras.preprocessing.image.img_to_array(img2)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = np.argmax(model.predict(images), axis=-1)
y_out = class_names[classes[0]]
I got this error
y_out = class_names[classes[0]]
IndexError: list index out of range
for this case it could have been done even by sequential method, look you are trying to concatenate two output layers with 5 columns; so it would lead into increase classes from 5 to 10; try out to define these two models up to output layer (the flatten layer as the last layer defined for both these models) and then define final model with input layer, these two models, and concatenate layer and then the output layer with five units and activation;
so remove output layer
tf.keras.layers.Dense(5, activation=tf.nn.softmax)
from those two models, and implement it just as one layer after the output layer you have defined here
def concat_horizontal(models, input_shape):
models_count = len(models)
hidden = []
input = tf.keras.layers.Input(shape=input_shape)
for i in range(models_count):
hidden.append(models[i](input))
output = tf.keras.layers.concatenate(hidden)
output = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(output)
model = tf.keras.Model(inputs=input, outputs=output)
return model
But notice it would be better to define branch models based on functional API method for these cases

ZeroPadding2D pad twices when I set padding to 1

I've just started to learn Tensorflow (2.1.0), Keras (2.3.7) with Python 3.7.7.
I'm trying an encoder-decoder network using VGG16.
I need to Upsample a layer from (12, 12, ...) to (25, 25, ...) to make conv7_1 has the same shape as conv4_3 layer. The layer with the 'problem' is upsp2:
conv4_3 (Conv2D) (None, 25, 25, 512) 2359808
_________________________________________________________________
pool_4 (MaxPooling2D) (None, 12, 12, 512) 0
_________________________________________________________________
conv5_1 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
conv5_2 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
conv5_3 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
pool_5 (MaxPooling2D) (None, 6, 6, 512) 0
_________________________________________________________________
upsp1 (UpSampling2D) (None, 12, 12, 512) 0
_________________________________________________________________
conv6_1 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
conv6_2 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
conv6_3 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
upsp2 (UpSampling2D) (None, 24, 24, 512) 0
_________________________________________________________________
conv7_1 (Conv2D) (None, 24, 24, 512) 2359808
I have tried this:
#################################
# Decoder
#################################
#conv1 = Conv2DTranspose(512, (2, 2), strides = 2, name = 'conv1')(pool5)
upsp1 = UpSampling2D(size = (2,2), name = 'upsp1')(pool5)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_1')(upsp1)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_2')(conv6)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_3')(conv6)
zero1 = ZeroPadding2D(padding = (1,1), data_format = 'channels_last', name='zero1')(conv6)
upsp2 = UpSampling2D(size = (2,2), name = 'upsp2')(zero1)
But I get that shape (12, 12, ...) gets into (14, 14, ...) at zero1 layer:
conv6_3 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
zero1 (ZeroPadding2D) (None, 14, 14, 512) 0
_________________________________________________________________
upsp2 (UpSampling2D) (None, 28, 28, 512) 0
_________________________________________________________________
How can I upsample (12,12,512) to (25,25,512)?
I did it using padding as a tuple of 2 tuples of 2 ints: interpreted as ((top_pad, bottom_pad), (left_pad, right_pad)). And setting ZeroPadding2D at the end of convolution 7 layer:
#################################
# Decoder
#################################
#conv1 = Conv2DTranspose(512, (2, 2), strides = 2, name = 'conv1')(pool5)
upsp1 = UpSampling2D(size = (2,2), name = 'upsp1')(pool5)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_1')(upsp1)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_2')(conv6)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv6_3')(conv6)
upsp2 = UpSampling2D(size = (2,2), name = 'upsp2')(conv6)
conv7 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv7_1')(upsp2)
conv7 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv7_2')(conv7)
conv7 = Conv2D(512, 3, activation = 'relu', padding = 'same', name = 'conv7_3')(conv7)
zero1 = ZeroPadding2D(padding = ((1, 0), (1, 0)), data_format = 'channels_last', name='zero1')(conv7)

How does MobileNet v1 achieve small parameter count on tensorflow?

Problem
I was trying to re-build MobileNet model identical to the keras application provided version on Tensorflow v2.1.0.
However, no matter what I tried (i.e., Conv2d, SeparableConv2D, DepthwiseConv2D), the parameter count seems way off to a point the model starts allocating 100+ GB ram in the system.
The model summary for the keras version and my own version along with the layers for my version could be found below under snippets section.
For sake of simplicity, I am not using any width and resolution multiplier (or let's say both has the value 1.0).
Question
How might I achieve the same parameter count as low as the keras provided version?
Snippets
Portion of keras mobilenet model summary
Model: "mobilenet_1.00_224"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
conv1_pad (ZeroPadding2D) (None, 225, 225, 3) 0
_________________________________________________________________
conv1 (Conv2D) (None, 112, 112, 32) 864
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32) 128
_________________________________________________________________
conv1_relu (ReLU) (None, 112, 112, 32) 0
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D) (None, 112, 112, 32) 288
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32) 128
Portion of self created model summary
Model: "dummy_model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 225, 225, 3) 0
_________________________________________________________________
conv2d (Conv2D) (None, 112, 112, 32) 896
_________________________________________________________________
batch_normalization (BatchNo (None, 112, 112, 32) 128
_________________________________________________________________
re_lu (ReLU) (None, 112, 112, 32) 0
_________________________________________________________________
depthwise_conv2d (DepthwiseC (None, 112, 112, 32) 32800
_________________________________________________________________
batch_normalization_1 (Batch (None, 112, 112, 32) 128
Self created model with layers
inputs = Input(shape=(224, 224, 3))
x = ZeroPadding2D(padding=((1, 0), (1, 0)))(inputs)
x = Conv2D(32, (3, 3), strides=(2, 2), padding="valid")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x, 2)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x, 2)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x, 2)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x)
x = depthwise_separable_convolution(x, 2)
x = depthwise_separable_convolution(x, 2)
x = AveragePooling2D(pool_size=7)(x)
x = Flatten()(x)
x = Dense(10, activation="softmax")(x)
return Model(inputs=inputs, outputs=x, name="dummy_model")
Depthwise separable convolution
def depthwise_separable_convolution(self, input, strides=1):
input_depth = input.shape[-1]
output_depth = input_depth * 2
x = DepthwiseConv2D(input_depth, 1, padding="same")(input)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(output_depth, 1, padding="same")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
return x

keras-tensorflow CAE dimension mismatch

I'm basically following this guide to build convolutional autoencoder with tensorflow backend. The main difference to the guide is that my data is 257x257 grayscale images. The following code:
TRAIN_FOLDER = 'data/OIRDS_gray/'
EPOCHS = 10
SHAPE = (257,257,1)
FILELIST = os.listdir(TRAIN_FOLDER)
def loadTrainData():
train_data = []
for fn in FILELIST:
img = misc.imread(TRAIN_FOLDER + fn)
img = np.reshape(img,(len(img[0,:]), len(img[:,0]), SHAPE[2]))
if img.shape != SHAPE:
print "image shape mismatch!"
print "Expected: "
print SHAPE
print "but got:"
print img.shape
sys.exit()
train_data.append (img)
train_data = np.array(train_data)
train_data = train_data.astype('float32')/ 255
return np.array(train_data)
def createModel():
input_img = Input(shape=SHAPE)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu',padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid',padding='same')(x)
return Model(input_img, decoded)
x_train = loadTrainData()
autoencoder = createModel()
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
print x_train.shape
autoencoder.summary()
# Run the network
autoencoder.fit(x_train, x_train,
epochs=EPOCHS,
batch_size=128,
shuffle=True)
gives me a error:
ValueError: Error when checking target: expected conv2d_7 to have shape (None, 260, 260, 1) but got array with shape (859, 257, 257, 1)
As you can see this is not the standard problem with theano/tensorflow backend dim ordering, but something else. I checked that my data is what it's supposed to be with print x_train.shape:
(859, 257, 257, 1)
And I also run autoencoder.summary():
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 257, 257, 1) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 257, 257, 16) 160
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 129, 129, 16) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 129, 129, 8) 1160
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 65, 65, 8) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 65, 65, 8) 584
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 33, 33, 8) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 33, 33, 8) 584
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 66, 66, 8) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 66, 66, 8) 584
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 132, 132, 8) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 132, 132, 16) 1168
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 264, 264, 16) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 264, 264, 1) 145
=================================================================
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________
Now I'm not exactly sure where the problem is, but it does look like things go wrong around conv2d_6 (Param # too high). I do know how CAE's work on principle, but I'm not that familiar with the exact technical details yet and I have tried to solve this mainly by messing with deconvolution padding (instead of same, using valid). The closes I got to dims matching was (None, 258, 258, 1). I achieved this by blindly trying different combinations of padding on deconvolution side, not really a smart way to solve a problem...
At this point I'm at a loss, and any help would be appreciated
Since your input and output data are the same, your final output shape should be the same as the input shape.
The last convolutional layer should have shape (None, 257,257,1).
The problem is happening because you have an odd number as the sizes of the images (257).
When you apply MaxPooling, it should divide the number by two, so it chooses rounding either up or down (it's going up, see the 129, coming from 257/2 = 128.5)
Later, when you do UpSampling, the model doesn't know the current dimensions were rounded, it simply doubles the value. This happening in sequence is adding 7 pixels to the final result.
You could try either cropping the result or padding the input.
I usually work with images of compatible sizes. If you have 3 MaxPooling layers, your size should be a multiple of 2³. The answer is 264.
Padding the input data directly:
x_train = numpy.lib.pad(x_train,((0,0),(3,4),(3,4),(0,0)),mode='constant')
This will require that SHAPE=(264,264,1)
Padding inside the model:
import keras.backend as K
input_img = Input(shape=SHAPE)
x = Lambda(lambda x: K.spatial_2d_padding(x, padding=((3, 4), (3, 4))), output_shape=(264,264,1))(input_img)
Cropping the results:
This will be required in any case where you do not change the actual data (numpy array) directly.
decoded = Lambda(lambda x: x[:,3:-4,3:-4,:], output_shape=SHAPE)(x)

Improving accuracy of my CNN for pixel wise segmentation

I am trying to design a CNN that can do pixel wise segmentation of cell images. Such as these:
With segmentation masks such as this (except more than one segmentation mask for each raw image, eg: interior of cell, border of cell, background):
I have mostly copied the U-net design from here: https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/
However even 10 annotated images (over 300 cells) I still get quite bad dice coefficient scores and not great predictions. According to the U-Net paper this number of annotated cells should be sufficient for a good prediction.
This is the code for the model I am using.
def get_unet():
inputs = Input((img_rows, img_cols, 1))
conv1 = Conv2D(16, window_size, activation='relu', padding='same')(inputs)
conv1 = Conv2D(16, window_size, activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, window_size, activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, window_size, activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, window_size, activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, window_size, activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(128, window_size, activation='relu', padding='same')(pool3)
conv4 = Conv2D(128, window_size, activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(512, window_size, activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, window_size, activation='relu', padding='same')(conv5)
up6 = concatenate([Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(128, window_size, activation='relu', padding='same')(up6)
conv6 = Conv2D(128, window_size, activation='relu', padding='same')(conv6)
up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, window_size, activation='relu', padding='same')(up7)
conv7 = Conv2D(128, window_size, activation='relu', padding='same')(conv7)
up8 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, window_size, activation='relu', padding='same')(up8)
conv8 = Conv2D(64, window_size, activation='relu', padding='same')(conv8)
up9 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(16, window_size, activation='relu', padding='same')(up9)
conv9 = Conv2D(16, window_size, activation='relu', padding='same')(conv9)
conv10 = Conv2D(f_num, (1, 1), activation='softmax')(conv9) # change to N,(1,1) for more classes and softmax
model = Model(inputs=[inputs], outputs=[conv10])
model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])
return model`
I have tried many different hyper-parameters for the model all with no success. Dice scores hover around 0.25 and my loss barely decreases between epochs.
I feel I am doing something fundamentally wrong here. Any suggestions?
EDIT: Sigmoid activation improves dice score from 0.25 to 0.33 (again however 1 epoch reaches this score and subsequent epochs only improve very slightly from 0.33 to 0.331 etc)
dice_coef_loss is defined as below
smooth = 1.
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
Also in case it's useful the model.summary output:
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) (None, 64, 64, 1) 0
_________________________________________________________________
conv2d_20 (Conv2D) (None, 64, 64, 16) 32
_________________________________________________________________
conv2d_21 (Conv2D) (None, 64, 64, 16) 272
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 32, 16) 0
_________________________________________________________________
conv2d_22 (Conv2D) (None, 32, 32, 64) 1088
_________________________________________________________________
conv2d_23 (Conv2D) (None, 32, 32, 64) 4160
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 64) 0
_________________________________________________________________
conv2d_24 (Conv2D) (None, 16, 16, 128) 8320
_________________________________________________________________
conv2d_25 (Conv2D) (None, 16, 16, 128) 16512
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 8, 8, 128) 0
_________________________________________________________________
conv2d_26 (Conv2D) (None, 8, 8, 128) 16512
_________________________________________________________________
conv2d_27 (Conv2D) (None, 8, 8, 128) 16512
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 4, 4, 128) 0
_________________________________________________________________
conv2d_28 (Conv2D) (None, 4, 4, 512) 66048
_________________________________________________________________
conv2d_29 (Conv2D) (None, 4, 4, 512) 262656
_________________________________________________________________
conv2d_transpose_5 (Conv2DTr (None, 8, 8, 512) 1049088
_________________________________________________________________
concatenate_5 (Concatenate) (None, 8, 8, 640) 0
_________________________________________________________________
conv2d_30 (Conv2D) (None, 8, 8, 128) 82048
_________________________________________________________________
conv2d_31 (Conv2D) (None, 8, 8, 128) 16512
_________________________________________________________________
conv2d_transpose_6 (Conv2DTr (None, 16, 16, 128) 65664
_________________________________________________________________
concatenate_6 (Concatenate) (None, 16, 16, 256) 0
_________________________________________________________________
conv2d_32 (Conv2D) (None, 16, 16, 128) 32896
_________________________________________________________________
conv2d_33 (Conv2D) (None, 16, 16, 128) 16512
_________________________________________________________________
conv2d_transpose_7 (Conv2DTr (None, 32, 32, 128) 65664
_________________________________________________________________
concatenate_7 (Concatenate) (None, 32, 32, 192) 0
_________________________________________________________________
conv2d_34 (Conv2D) (None, 32, 32, 64) 12352
_________________________________________________________________
conv2d_35 (Conv2D) (None, 32, 32, 64) 4160
_________________________________________________________________
conv2d_transpose_8 (Conv2DTr (None, 64, 64, 64) 16448
_________________________________________________________________
concatenate_8 (Concatenate) (None, 64, 64, 80) 0
_________________________________________________________________
conv2d_36 (Conv2D) (None, 64, 64, 16) 1296
_________________________________________________________________
conv2d_37 (Conv2D) (None, 64, 64, 16) 272
_________________________________________________________________
conv2d_38 (Conv2D) (None, 64, 64, 4) 68
=================================================================
Total params: 1,755,092.0
Trainable params: 1,755,092.0
Non-trainable params: 0.0