how to reduce as much as possible the bootleneck in autoencoder? - tensorflow

dears
i have the following code:
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(16,3,activation='relu',padding='same',strides=1)(conv9)
############################# EXTRA
conv10 = Convolution1D( 8, kernel_size = (3), activation='relu', padding='same')(conv10)
pool4 = MaxPooling1D(pool_size = (2), padding='same')(conv10)
conv10 = Convolution1D( 8, 3, activation='relu', padding='same')(pool4)
encoded = Convolution1D( 8, 3, activation='relu', padding='same')(conv10)
#############
the bottleneck here has length 6920 if the input is 27000 signal
I want to reduce the bottlenack into only 400 , how to do that , the amendement sould start from the part extra
I tried to add extra conv and pool but the length can't less than 6920.

There are many different ways you could get the length you want:
Increase the pooling sizes along the way:
pool = MaxPooling1D(pool_size = (4))(prev) # or you could use higher numbers
Use VALID padding in Conv and Pool layers:
pool = MaxPooling1D(pool_size = (4), padding='valid')(prev)
conv10 = Convolution1D( 8, 3, activation='relu', padding='valid')(prev)
You could also use higher stride sizes in Pool and Conv layer
pool = MaxPooling1D(pool_size = (4), strides=4, padding='valid')(prev)
conv10 = Convolution1D( 8, 3, strides=4, activation='relu', padding='valid')(prev)

I created a draft for you as follows:
an encoder taking inputs of shape (batch_size, 160, 1), outputting vectors of shape (batch_size, 1, 4)
a decoder taking inputs of shape (batch_size, 1, 4), same as encoder output
a combined encoder_decoder model
The encoder:
from tensorflow.keras.layers import Input, Convolution1D, MaxPooling1D, GlobalAveragePooling1D, UpSampling1D
import tensorflow as tf
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=6, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(4,3,activation='relu',padding='same',strides=1)(conv9)
encoded = MaxPooling1D(pool_size=3)(conv10)
encoder = tf.keras.Model(inputs=inpt, outputs=encoded)
encoder.summary()
The decoder:
input_decoder = Input(shape = (1, 4) ) #############
upsmp1 = UpSampling1D(size=2)(input_decoder)
conv11 = Convolution1D( 4, 3, activation='relu', padding='same')(upsmp1)
upsmp1 = UpSampling1D(size=8)(conv11)
conv11 = Convolution1D( 8, 3, activation='relu', padding='same')(upsmp1)
conv12 = Convolution1D( 8, 3, activation='relu', padding='same')(conv11)
pool4 = UpSampling1D(size=10)(conv12)
conv10 = Convolution1D( 1, kernel_size = (3), activation='relu', padding='same')(pool4)
decoder = tf.keras.Model(inputs=input_decoder, outputs=conv10)
decoder.summary()
The combined encoder decoder:
encoder_decoder = tf.keras.Model(inputs=inpt, outputs=decoder(encoded))
encoder_decoder.summary()

Related

How to make the CNN prediction function output as binary number(0 or 1)?

I used the CNN model with Keras to make an image binary classification, during the final prediction part, I defined such function below to output the prediction result:
model = keras.Sequential()
model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = ((256,256,3))))
model.add(MaxPooling2D(pool_size = (2, 2), strides=(2, 2)))
model.add(Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2), strides=(2, 2)))
model.add(Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 1,activation='sigmoid'))
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
history = model.fit(
train_ds,
validation_data=valid_ds,
epochs=10)
def testing_image(image_directory):
test_image = image.load_img(image_directory, target_size = (256, 256))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = model.predict(test_image)
print(result)
testing_image('/content/drive/MyDrive/testing/01.jpg')
The output is:
[[0.4733843]]
The output is always a decimal number, but I want the output the result as only
0or 1 and without the array representation.
Any help is appreciated.
Sigmoid activation function returns the values between 0 to 1 where the values <0.5 implies to category zero(0) and >0.5 implies to category one(1) in binary classification.
To get these binary numbers, you need to add one more line of code in testing_image() as below:
Fixed code:
def testing_image(image_directory):
test_image = image.load_img(image_directory, target_size = (256, 256))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
#Changes in code
pred = model.predict(test_image)
result = np.where(pred > 0.5, 1, 0) #<--to get the binary category
print(result)
testing_image('/content/drive/MyDrive/testing/01.jpg')

How to extract the bottleneck layer from the below architecture?

I have created an model (down below). And after training, I want to get the output tensor from the bottleneck layers of this model.
So I am trying to create a model of the extracted layers and use this model for predicting.
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.layers import Dense, Activation
nstrides = (1,1)
inputs = layers.Input(imshape)
conv01 = layers.Conv2D(32, 4, activation = 'relu',
strides = nstrides, padding="same")(inputs)
conv1 = layers.Conv2D(32, 4, activation = 'relu',
strides = nstrides, padding="same")(conv01)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
.
.
#block4
conv04 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(pool3)
conv4 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conv04)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
#bottlneck
conv05 = layers.Conv2D(512, 4, activation = 'relu',
strides = nstrides, padding="same")(pool4)
conv5 = layers.Conv2D(512, 4, activation = 'relu',
strides = nstrides, padding="same")(conv05)
upconv5 = layers.Conv2DTranspose(256, kernel_size=(2, 2),
strides = (2,2))(conv5)
#upblock 1
conc6 = layers.concatenate([upconv5, conv4])
conv06 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conc6)
conv6 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conv06)
up7 = layers.Conv2DTranspose(126, kernel_size=(2, 2),
strides = (2,2))(conv7)
.
.
.
#combine the model together
model = Model(inputs, outputs)
First, in order to locate the desired layer which would be the new output tensor, you can first do
for i, layer in enumerate(model.layers):
print(i, layer.name)
...
...
12 max_pooling2d_15
13 conv2d_65
14 conv2d_66
15 conv2d_transpose_12
16 concatenate_12
17 conv2d_67
...
...
Here, the layer index from 13 to 15 is from the bottleneck layer of your model. If you want to get the output tensor from this bottleneck layer, you can do:
new_model = Model(model.input,
model.get_layer(index=15).output)
# or,
new_model = Model(model.input,
model.get_layer(name='conv2d_transpose_12').output)
Both are the same, the first one is by index and the second one is by layer name.

ValueError: Shape (None, 17) must have rank 1

I am working on a hand character recognition model. I created a CNN+BiLSTM+CTC Loss model. But getting error when I run model.fit(). Please help me fix this error.
My Model
# input with shape of height=32 and width=128
inputs = Input(shape=(32,128,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_1)
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_2)
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPooling2D(pool_size=(2, 1))(conv_4)
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPooling2D(pool_size=(2, 1))(batch_norm_6)
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
outputs = Dense(len(char_dict)+1, activation = 'softmax')(blstm_2)
act_model = Model(inputs, outputs)
Define a CTC loss model that takes the outputs of previous model as inputs
labels = Input(name='the_labels', shape=[max_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length,
label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
model.fit(x=[input_array,
output_array,
train_input_length,
train_label_length],
y=np.zeros(input_array.shape[0]),
batch_size=256,
epochs = 100,
validation_data = ([test_input_array, test_output_array, valid_input_length,
valid_label_length], [np.zeros(test_input_array.shape[0])]),
verbose = 1,
callbacks = callbacks_list)
The error I am getting is
ValueError: Shape (None, 17) must have rank 1

Distribute Tensor over Multiple GPUs

I am attempting to train a model in which the input exceeds the memory limits for a single GPU on the system (16 GB P100). The size of the input is (1,256,256,64,2). However, I have access to 4 identical GPUs on the system. I know I can distribute processes with tf.distribute but I am unsure how to do this with a batch size of 1. Is it possible to distribute a single sample over multiple GPUs so I don't receive OOM errors?
Edit:
Here is the code used to build the model.
def dice_loss(y_true, y_pred):
numerator = 2 * tf.reduce_sum(y_true * y_pred, axis=(1,2,3))
denominator = tf.reduce_sum(y_true + y_pred, axis=(1,2,3))
return tf.reshape(1 - numerator / denominator, (-1, 1, 1))
class ResidualUnitEncode(keras.layers.Layer):
def __init__(self, filters=1, strides=1, activation="relu", **kwargs):
super().__init__(**kwargs)
self.activation = keras.activations.get(activation)
self.main_layers = [
keras.layers.Conv3D(filters, (3, 3, 3), strides=strides,
padding="same", use_bias=False),
keras.layers.BatchNormalization(),
self.activation,
keras.layers.Conv3D(filters, (3, 3, 3), strides=1,
padding="same", use_bias=False),
keras.layers.BatchNormalization()]
self.skip_layers = []
if strides > 1:
self.skip_layers = [
keras.layers.Conv3D(filters, (1, 1, 1), strides=strides,
padding="same", use_bias=False),
keras.layers.BatchNormalization()]
def call(self, inputs):
Z = inputs
for layer in self.main_layers:
Z = layer(Z)
skip_Z = inputs
for layer in self.skip_layers:
skip_Z = layer(skip_Z)
return self.activation(Z + skip_Z)
def get_config(self):
base_config = super(ResidualUnitEncode, self).get_config()
return base_config
class ResidualUnitDecode(keras.layers.Layer):
def __init__(self, filters=1, strides=1, activation="relu", **kwargs):
super().__init__(**kwargs)
self.activation = keras.activations.get(activation)
self.main_layers = [
keras.layers.Conv3DTranspose(filters, (3, 3, 3), strides=1,
padding="same", use_bias=False),
keras.layers.BatchNormalization(),
self.activation,
keras.layers.Conv3DTranspose(filters, (3, 3, 3), strides=strides,
padding="same", use_bias=False),
keras.layers.BatchNormalization()]
self.skip_layers = []
if strides > 1:
self.skip_layers = [
keras.layers.Conv3DTranspose(filters, (3, 3, 3), strides=strides,
padding="same", use_bias=False),
keras.layers.BatchNormalization()]
def call(self, inputs):
Z = inputs
for layer in self.main_layers:
Z = layer(Z)
skip_Z = inputs
for layer in self.skip_layers:
skip_Z = layer(skip_Z)
return self.activation(Z + skip_Z)
def get_config(self):
base_config = super(ResidualUnitDecode, self).get_config()
return base_config
def build_unet(image_shape, batch_size):
inputs = keras.layers.Input(shape=image_shape, batch_size=batch_size)
conv1 = keras.layers.Conv3D(64, (7, 7, 7), strides=(2, 2, 1), padding="same", use_bias=False, input_shape=image_shape)(inputs)
conv1 = keras.layers.BatchNormalization()(conv1)
conv1 = keras.layers.Activation("relu")(conv1)
pool1 = keras.layers.MaxPool3D(pool_size=(3, 3, 3), strides=1, padding="same")(conv1)
conv2 = ResidualUnitEncode(filters=128, strides=2)(pool1)
pool2 = keras.layers.MaxPool3D(pool_size=(3, 3, 3), strides=1, padding="same")(conv2)
conv3 = ResidualUnitEncode(filters=256, strides=2)(pool2)
pool3 = keras.layers.MaxPool3D(pool_size=(3, 3, 3), strides=1, padding="same")(conv3)
conv4 = ResidualUnitEncode(filters=512, strides=2)(pool3)
pool4 = keras.layers.MaxPool3D(pool_size=(3, 3, 3), strides=1, padding="same")(conv4)
conv5 = ResidualUnitEncode(filters=1024, strides=2)(pool4)
drop5 = keras.layers.Dropout(0.5)(conv5)
up6 = ResidualUnitDecode(filters=512, strides=2)(drop5)
merge6 = keras.layers.concatenate([conv4, up6], axis=4)
conv6 = ResidualUnitEncode(filters=512, strides=2)(merge6)
conv6 = keras.layers.UpSampling3D(size=(2,2,2))(conv6)
up7 = ResidualUnitDecode(filters=256, strides=2)(conv6)
merge7 = keras.layers.concatenate([conv3, up7], axis=4)
conv7 = ResidualUnitEncode(filters=256, strides=2)(merge7)
conv7 = keras.layers.UpSampling3D(size=(2, 2, 2))(conv7)
up8 = ResidualUnitDecode(filters=128, strides=2)(conv7)
merge8 = keras.layers.concatenate([conv2, up8], axis=4)
conv8 = ResidualUnitEncode(filters=128, strides=2)(merge8)
conv8 = keras.layers.UpSampling3D(size=(2, 2, 2))(conv8)
up9 = ResidualUnitDecode(filters=64, strides=2)(conv8)
merge9 = keras.layers.concatenate([conv1, up9], axis=4)
conv9 = ResidualUnitDecode(filters=64, strides=2)(merge9)
conv10 = keras.layers.Conv3D(1,1, strides=(1,1,2),activation="sigmoid")(conv9)
model = keras.Model(inputs, conv10)
model.compile(optimizer=keras.optimizers.Adam(lr=0.001), loss=dice_loss)
model.summary()
return model
Here is the code to run the training using Kfold CV:
image_shape = [256,256,64,2]
dataset = tf.data.TFRecordDataset('train.tfrecord').map(parse_record).batch(69)
nx = tf.compat.v1.data.make_one_shot_iterator(dataset)
x, y = nx.get_next()
x_test = x[55:69, ...]
y_test = y[55:69, ...]
x_train = x[0:54, ...]
y_train = y[0:54, ...]
kfold = KFold(n_splits=10, shuffle=True)
fold_no = 1
acc_per_fold = []
loss_per_fold = []
for train, test in kfold.split(x_train, y_train):
model = build_unet(image_shape=image_shape, batch_size=1)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss')
model_file_name = './Fold_' + str(fold_no) + '_best_model.h5'
model_checkpoint = keras.callbacks.ModelCheckpoint(model_file_name, monitor='val_loss')
log_dir_name = './Fold_' + str(fold_no) + '_log_dir'
tb = keras.callbacks.TensorBoard(log_dir_name)
print('------------------------------------------------------------------------')
print(f'Training for fold {fold_no} ...')
train_id_rows = tf.constant(train.reshape(-1,1))
test_id_rows = tf.constant(test.reshape(-1,1))
x_train_train = tf.gather_nd(x_train, train_id_rows)
y_train_train = tf.gather_nd(y_train, train_id_rows)
x_train_test = tf.gather_nd(x_train, test_id_rows)
y_train_test = tf.gather_nd(y_train, test_id_rows)
history = model.fit(x_train_train, y_train_train, epochs=N_EPOCHS, callbacks=[tb, model_checkpoint, early_stopping], batch_size=1)
scores = model.evaluate(x_train_test, y_train_test, verbose=0)
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])
fold_no = fold_no + 1
There are 69 total samples in the dataset, 54 used for the training/validation loop.

How to get internal variables for a custom loss function in tensorflow keras api?

I am trying to add an L1 loss of the batch normalization scaling factors for network slimming.
for example let's take a simple mnist classifier:
inputs = keras.Input(shape=(28, 28, 1))
conv_1 = keras.layers.Conv2D(
32, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)(inputs)
bn_1 = keras.layers.BatchNormalization()(conv_1)
conv_2 = keras.layers.Conv2D(
32, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)(bn_1)
bn_2 = keras.layers.BatchNormalization()(conv_2)
conv_3 = keras.layers.Conv2D(
32, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)(bn_2)
bn_3 = keras.layers.BatchNormalization()(conv_3)
conv_4 = keras.layers.Conv2D(
32, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)(bn_3)
bn_4 = keras.layers.BatchNormalization()(conv_4)
conv_5 = keras.layers.Conv2D(
10, kernel_size=(3, 3), padding='same')(bn_4)
bn_5 = keras.layers.BatchNormalization()(conv_5)
gap = keras.layers.GlobalAveragePooling2D()(bn_5)
outputs = keras.layers.Activation('softmax')(gap)
model = keras.Model(inputs=inputs, outputs=outputs)
My goal is to find the relative importance of each convolution filter channel via the scaling factors of the bn_* layers so that I can remove them.
Is there a way to do this?