pixel wise softmax with crossentropy for multiclass segmentation - tensorflow

I am trying to implement a multiclass semantic segmentation model with 2
classes ( human, car). here is my modified implementation of unet architecture. I number of output channels to 3 (3 classes - human, car, background). How do i get pixel wise classification?
here are 2 examples from my ground truth masks.
i am using 1 channel for each object class ie.
channel 1 for class=car
channel 2 for class=background
channel 3 for class=human
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Reshape((img_height*img_width, nclasses), input_shape=(img_height, img_width, nclasses))(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model

You are almost done, now backpropagate the network error with:
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_layer, labels=labels))
You don't have to convert your ground truth into the one-hot format, sparse_softmax will dot it for you.


keras custom layer unknown output shape

I am trying to build a custom keras layer that does Canny edge detection with OpenCV. Here's my code:
class CannyEdgeDetectorLayer(layers.Layer):
def __init__(self, threshold1=60, threshold2=120, **kwargs):
super(CannyEdgeDetectorLayer, self).__init__(**kwargs)
self.threshold1 = threshold1
self.threshold2 = threshold2
def call(self, inputs):
return tf.py_function(func=self.canny_edge_detector, inp=[inputs], Tout=tf.float32)
def canny_edge_detector(self, inputs):
inputs = inputs.numpy()
edges = [cv2.Canny(img, self.threshold1, self.threshold2).reshape(inputs.shape[1], inputs.shape[2], -1) / 255 for img in inputs]
return tf.reshape(tf.convert_to_tensor(edges, dtype=tf.float32), (inputs.shape[0], inputs.shape[1], inputs.shape[2], 1))
# return np.array(edges).reshape(inputs.shape[0], inputs.shape[1], inputs.shape[2], 1)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[2], 1)
def get_config(self):
config = super().get_config().copy()
'threshold1': self.threshold1,
'threshold2': self.threshold2
return config
def build(self, input_shape):
return super().build(input_shape)
And my model is as follows:
inputs = keras.Input(shape=(255, 255, 3))
x = CannyEdgeDetectorLayer(60, 120)(inputs)
x = layers.RandomFlip('horizontal')(x)
x = layers.RandomRotation(1./12)(x)
x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=128, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=(3, 3), activation='gelu')(x)
x = layers.Conv2D(filters=256, kernel_size=(3, 3), activation='gelu')(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='gelu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation='gelu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=outputs)
I have tried my custom layers on some test images, and it worked fine and successfully outputted a batch of tensors with shape (n, h, w, 1). But when I am trying to build my model I get the following error:
Image augmentation layers are expecting inputs to be rank 3 (HWC) or 4D (NHWC) tensors. Got shape: <unknown>
Call arguments received by layer "random_flip_25" (type RandomFlip):
• inputs=tf.Tensor(shape=<unknown>, dtype=float32)
• training=True
What went wrong and how should I properly specify the output shape of my custom layer?
I solved the problem by specify the output's shape in call:
def call(self, inputs):
out = tf.py_function(func=self.canny_edge_detector, inp=[inputs], Tout=tf.float32)
out.set_shape((inputs.shape[0], inputs.shape[1], inputs.shape[2], 1))
return out
Turns out it's a problem of py_function and EagerTensor.

In Gradient Tape Tensorflow gradients are coming as 0.s

Here is part of a model I am working. Being new in calculation gradients in tf, I got confused when I found all the gradient values are coming as 0. Here is the code:
class A:
def __init__(self, inputA_dim, inputB_dim):
self.inputA_dim = (35, 35, 1)
self.inputB_dim = 2
self.model = self.nn_model()
self.opt = tf.keras.optimizers.Adam()
# print(self.model.summary())
def nn_model(self):
inputA = Input(self.inputA_dim)
conv1 = Conv2D(10, 3, padding="same", activation="relu")(inputA)
pool1 = MaxPool2D(padding='same')(conv1)
conv2 = Conv2D(10, 3, padding="same", activation="relu")(pool1)
pool2 = MaxPool2D(padding='same')(conv2)
conv3 = Conv2D(10, 3, padding="same", activation="relu")(pool2)
pool3 = MaxPool2D(padding='same')(conv3)
flatten = Flatten()(pool3)
s2 = Dense(32, activation="relu")(flatten)
s3 = Dense(32, activation="relu")(s2)
s4 = Dense(2, activation="relu")(s3)
inputB = Input((self.inputB_dim,))
a1 = Dense(2, activation="relu")(inputB)
c1 = concatenate([s2, a1], axis=-1)
c2 = Dense(4, activation="relu")(c1)
outputs = Dense(1, activation="linear")(c2)
return tf.keras.Model([inputA, inputB], outputs)
def predict(self, inputs):
return self.model.predict(inputs)
def gradients(self, inputA, inputB):
inputB = tf.convert_to_tensor(inputB)
with tf.GradientTape() as tape:
values = self.model([inputA, inputB])
values = tf.squeeze(values)
g = tape.gradient(values, inputB)
return g
Later I found there is another method called jacobian which I also used here, still giving 0.s asgrad values. Can anyone tell me what to do. Thanks.

How to define custom loss function with multi-outputs in tf-keras?

I want train a model of multi-outputs, named ctr(click through rate) and cvr in tensoflow keras.
The output should be ctr and cvr. But the loss should be ctr-loss and (ctr * cvr)-loss.
So, if click-label is zero, the (ctr*cvr)-loss should be zero.
d = concatenate(inp_embed, axis=-1, name='concat') #Embeddings共享
d = Flatten()(d)
d_ctr = BatchNormalization()(d)
d_ctr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = BatchNormalization()(d_ctr)
d_ctr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = Dense(1, activation=activation)(d_ctr)
d_cvr = BatchNormalization()(d)
d_cvr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = BatchNormalization()(d_cvr)
d_cvr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = Dense(1, activation=activation)(d_cvr)
d_ivr = multiply([d_ctr, d_cvr])
deep = Model(inputs=inp_layer, outputs=[d_ctr, d_cvr])
This is how you can create a custom loss from multiple outputs :
def custom_loss(y_true, y_pred):
ctr_loss = losses.binary_crossentropy(y_true, d_ctr)
cvr_loss = losses.binary_crossentropy(y_true, d_cvr)
return ctr_loss * cvr_loss
And how to use it :
deep.compile(optimizer = sgd , loss = custom_loss, metrics=['accuracy'])
Feel free to add comments so that i can precise my answer.

TensorFlow Keras Optimise prediction

I'm Using tensorflow and keras to predict handwrting digits. For training I'm using nmist dataset.
the accuracy is about 98.8% after training. but sometimes in test its confuse between 4 and 9 , 7 and 3, i'm alerady optimize the image input with opencv, like remove noise, rescale, threshold etc.
What should i do next to improved this prdiction accuracy?
My plan is add more sample, and resize the sample image from 28x28 to 56x56.
Will this affect accuracy?
This my model for training:
epoc=15, batch size=64
def build_model():
model = Sequential()
# add Convolutional layers
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=input_shape))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
# Densely connected layers
model.add(Dense(128, activation='relu'))
# output layer
model.add(Dense(10, activation='softmax'))
# compile with adam optimizer & categorical_crossentropy loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return model
You can try to add regularization:
def conv2d_bn(x,
kernel_size=(3, 3),
y = Dropout(x)
y = Conv2D(units, kernel_size=kernel_size, use_bias=False)(y)
y = BatchNormalization(y)
y = Activation(activation)(y)
return y
def build_model(..., dropout=.5):
x = Input(shape=[...])
y = conv2d_bn(x, 32)
y = MaxPooling2D(y)
y = Dropout(dropout)(y)
y = Dense(10, activation='softmax')
model = Model(x, y)
return model
You can tweak the class weights to force the model to pay more attention to classes 3, 4, 7 and 9 during training:
model.fit(..., class_weights={0: 1, 1: 1, 2:1, 3:2, 4:2, 5:1, 6:1, 7:2, 8:1, 9:2})
If you have some time to burn, you can also try to grid or random-search the models hyperparameters. Something in the lines:
def build(conv_layers, dense_layers, dense_units, activation, dropout):
y = x = Input(shape=[...])
kernels = 32
kernel_size = (2, 2)
for i in range(conv_layers):
y = conv2d_bn(y, kernel_size, kernels, activation, dropout)
if i % 2 == 0: # or 3 or 4.
y = MaxPooling2D(y)
kernels *= 2
kernel_size = tuple(k+1 for k in kernel_size)
y = GlobalAveragePooling2D()(y)
for i in range(dense_layers):
y = Dropout(dropout)(y)
y = Dense(dense_units)(y)
y = Dense(10, activation='softmax')(y)
model = KerasClassifier(build_model,
params = dict(conv_layers=[2, 3, 4],
dense_layers=[0, 1],
activation=['relu', 'selu'],
dropout=[.2, .3, .5],
grid = GridSearchCV(model, params,
Now, combining hyperparams searching with the NumpyArrayIterator is a little tricky, because the latter assumes we have all training samples (and targets) at hand before the training steps. It's still doable, though:
g = ImageDataGenerator(...)
cv = StratifiedKFold(n_splits=3)
results = dict(params=[], valid_score=[])
for params in ParameterGrid(params):
fold_scores = []
for t, v in cv.split(train_data, train_labels):
train = g.flow(train_data[t], train_labels[t], subset='training')
nn_valid = g.flow(train_data[t], train_labels[t], subset='validation')
fold_valid = g.flow(train_data[v], train_labels[v])
nn = build_model(**params)
nn.fit_generator(train, validation_data=nn_valid, ...)
probabilities = nn.predict_generator(fold_valid, steps=...)
p = np.argmax(probabilities, axis=1)
fold_scores += [metrics.accuracy_score(valid.classes_, p)]
results['params'] += [params]
results['valid_score'] += [fold_scores]
best_ix = np.argmax(np.mean(results['valid_score'], axis=1))
best_params = results['params'][best_ix]
nn = build_model(**best_params)

training output is not in the valid range by using CNN with LSTM

I use keras with tf as the backend.
The goal of the simulation is attempting to use geo-spatial time series dataset to build a classifier. The target Y is labeled on -1, 0, 1 and 2, where -1 indicates the measured data at that grid point, 0 is meaning the data at good quality, 1 is middle quality and 2 is the worst.
Right now, i have two inputs. I have some atmospheric surface variables, such as wind, wind speed, and rain as one input. And , oceanic surface variables, such as sea surface temperature, and sea surface salinity as second input. The dimensions of all the datasets should be in, for example, (n_samples, n_timesteps, n_variables, n_xpoints: longitude, n_ypoints: latitude). The target dataset is in 3D dimensions like this: (n_samples, n_xpoints: longitude, n_ypoints: latitude).
In addition, all of the input variables are normalized by their value range. For example, the sea surface current velocity is normalized in the rage of (-1,1) from (-2, 2) [m/s], and the surface wind speed is normalized in the rage of (-1,1) from (-20,20) [m/s].
The model configuration is designed as described below.
def cnn():
model = Sequential()
model.add( Conv2D(64, (3,3), activation='relu',
data_format='channels_first', kernel_initializer='he_normal',
name='conv1') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( BatchNormalization() )
model.add( Conv2D(32, (3,3), activation='relu',
kernel_initializer='he_normal', data_format='channels_first',
name='conv2') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Dropout(0.2) )
model.add( BatchNormalization() )
model.add( Activation('relu') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Flatten() )
model.add( Dense(128,activation='relu') )
return model
def cnn2lstm(Input_shape, premo, name):
branch_in = Input(shape=Input_shape, dtype='float32')
model = TimeDistributed(premo)(branch_in)
model = LSTM(256, return_sequences=True, name=name+'_lstm1')(model)
model = TimeDistributed(Dense(4096, activation='relu'))(model)
model = TimeDistributed(Dropout(0.3))(model)
model = LSTM(256, return_sequences = True, name=name+'_lstm2')(model)
model = Dense(101, activation='sigmoid')(model)
model = Dropout(0.3)(model)
return branch_in, model
atm_in, atm = cnn2lstm(Train_atm.shape[1:], cnn(),'atm')
ocn_in, ocn = cnn2lstm(Train_ocn.shape[1:], cnn(),'ocn')
#--- two inputs into one output
x = keras.layers.concatenate([atm,ocn],axis=1)
x = LSTM(150,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(200,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(500)(x)
x = Dense(1001,activation='relu')(x)
x = Dense(2001,activation='relu')(x)
x = Dense(2501,activation='tanh')(x)
x = Dense(2701,activation='relu')(x)
x = Dense(3355,activation='softmax')(x)
x = Reshape((61,55),input_shape=(3355,))(x)
model2 = Model(inputs=[atm_in, ocn_in, bio_in], outputs=x)
plot_model(model2, show_shapes = True, to_file='model_way4_2.png')
model2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
checkpoint = ModelCheckpoint(filepath,monitor='val_acc', verbose=1,save_best_only=True,mode='max')
callbacks_list = [checkpoint]
hist = model2.fit([Train_atm, Train_ocn, Train_bio], Train_Y,
epochs=150, batch_size=3, validation_split=0.1,
shuffle=True, callbacks=callbacks_list, verbose=0)
scores = model2.evaluate([Train_atm, Train_ocn, Train_bio], Train_Y)
print("MODEL 2 %s: %.2f%%" % (model2.metrics_names[1], scores[1]*100))
The evaluation scores here is mostly like 83% or higher. But the value of the output from model2.predict doesn't give me the valid range like my target dataset. In contrary, the model output give me the value from 0 to 1 (0,1) with a similar pattern as the target dataset shows.
Could anyone tell any big issue I have in my DL algorithm?