Different seeds lead to extremely different results with modified U-Net - tensorflow

I am using a modified U-Net architecture to perform auto-segmentation on a dataset of biomedical images. Although I have achieved some decent results, I have noticed that the training of the model is highly dependent on the seed that I set at the beginning. Using the same seeds and performing multiple runs of my code with those seeds results in very repeatable results. However, with the exact same code, hyperparameters, and training/test set images, the results vary wildly with different seeds. My loss function is the dice coefficient loss (the primary outcome metric that I am concerned with is the dice coefficient) and with some seeds my loss will level off at about 0.95 and only go down around 0.01 over the course of many many epochs and with other seeds my loss won't start to level off until about 0.10. The only difference is the seed. Since the total range for dice coefficient loss is only 0-1 these values represent extremely different results.
As additional information, this phenomenon has occurred for different training set sizes ranging from a few hundred images to a few thousand images. I have double checked and do not believe there to be an issues with my data. Additionally, my dataset is highly unbalanced (only about 3% of my pixels are the region I am trying to segment).
Things I have already tried:
Using alternative loss functions such as binary cross entropy, focal loss, tversky loss, and combined binary cross entropy and dice loss
Adjusting hyperparameters: learning rate (I am using adam optimizer), batch size, filter sizes, model depth
Trying different kernel initializers
Different activations functions (relu vs leaky relu)
Gradient clipping
Batch normalization
Dropout
Any suggestions of how I can solve this issue would be greatly appreciated. This issue has stalled my progress significantly and as I add to my training set the issue seems to exacerbated further by causing me to have to test quite a few seed options before finding one that allows my model to train correctly.
Below is my code starting after I import my images and modules, crop the images and masks, and put them into arrays:
Please note that in my full code setting the seeds and hyperparameters goes at the top.
from numpy.random import seed
seed(3)
from tensorflow import set_random_seed
set_random_seed(4)
# Define Parameters
batch_size = 16
batch_size_test = 1
filter_size = 8
kernel_dimension = 5
learning_rate = 1e-4
num_epochs = 25
# these are functions for pairing the image to its respective mask
def get_dataset(images, mask, batch_size):
dataset_input = tf.data.Dataset.from_tensor_slices(tf.constant(images, dtype=tf.float32)) #converts to tf type
dataset_mask = tf.data.Dataset.from_tensor_slices(tf.constant(mask, dtype=tf.float32)) #converts to tf type
dataset_input = dataset_input.map(lambda x: tf.image.per_image_standardization(x)) #standardizes the image
dataset_input = dataset_input.map(lambda x: tf.image.adjust_contrast(x,1.2)) #adds some contrast
dataset = tf.data.Dataset.zip((dataset_input, dataset_mask)) #pairs the images to the masks into one tf array
dataset = dataset.shuffle(len(images)).repeat() #randomly shuffles dataset and repeats the dataset
dataset = dataset.batch(batch_size).prefetch(batch_size) # set the batch size
print('image shape: ', dataset.output_shapes[0])
print('label shape: ', dataset.output_shapes[1])
print('types: ', dataset.output_types)
print()
print(dataset)
return dataset
def get_dataset_noshuffle(images, mask, batch_size):
dataset_input = tf.data.Dataset.from_tensor_slices(tf.constant(images, dtype=tf.float32))
dataset_mask = tf.data.Dataset.from_tensor_slices(tf.constant(mask, dtype=tf.float32))
dataset_input = dataset_input.map(lambda x: tf.image.per_image_standardization(x))
dataset_input = dataset_input.map(lambda x: tf.image.adjust_contrast(x,1.2))
dataset = tf.data.Dataset.zip((dataset_input, dataset_mask))
dataset = dataset.batch(batch_size).prefetch(batch_size)
print('image shape: ', dataset.output_shapes[0])
print('label shape: ', dataset.output_shapes[1])
print('types: ', dataset.output_types)
print()
print(dataset)
return dataset
X_train, X_test, y_train, y_test = train_test_split(images, mask, test_size=0.0001, random_state=42)
X_test = testimages # if you want to use a separate set of images that you imported earlier then use this
y_test = testmask # and this
# use the get_dataset function to pair the X_train with y_train and X_test with y_test. adjust batch size as needed
train_dataset = get_dataset(X_train, y_train, batch_size)
test_dataset = get_dataset(X_test, y_test, batch_size_test)
test_dataset_noshuffle = get_dataset_noshuffle(X_test, y_test, batch_size_test)
def dice_coef(y_true, y_pred):
smooth = 1.
y_true_f = tf.keras.backend.flatten(y_true)
y_pred_f = tf.keras.backend.flatten(y_pred)
intersection = tf.keras.backend.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (tf.keras.backend.sum(y_true_f) + tf.keras.backend.sum(y_pred_f) + smooth)
def dice_coef_loss(y_true, y_pred):
return 1. - dice_coef(y_true, y_pred)
def unet(pretrained_weights = None,input_size = (size,size,1), df=filter_size, kernel_size = kernel_dimension):
inputs = Input(input_size)
conv1 = Conv2D(df, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Conv2D(df, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
conv1 = BatchNormalization()(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(df*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = BatchNormalization()(conv2)
conv2 = Conv2D(df*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
conv2 = BatchNormalization()(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(df*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
conv3 = BatchNormalization()(conv3)
conv3 = Conv2D(df*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
conv3 = BatchNormalization()(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(df*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
conv4 = BatchNormalization()(conv4)
conv4 = Conv2D(df*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
conv4 = BatchNormalization()(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(df*2*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
conv5 = BatchNormalization()(conv5)
conv5 = Conv2D(df*2*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
conv5 = BatchNormalization()(conv5)
up6 = Conv2D(df*2*2*2, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv5))
merge6 = concatenate([conv4,up6], axis = 3)
conv6 = Conv2D(df*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
conv6 = BatchNormalization()(conv6)
conv6 = Conv2D(df*2*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
conv6 = BatchNormalization()(conv6)
up7 = Conv2D(df*2*2, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
merge7 = concatenate([conv3,up7], axis = 3)
conv7 = Conv2D(df*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
conv7 = BatchNormalization()(conv7)
conv7 = Conv2D(df*2*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
conv7 = BatchNormalization()(conv7)
up8 = Conv2D(df*2, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
merge8 = concatenate([conv2,up8], axis = 3)
conv8 = Conv2D(df*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
conv8 = BatchNormalization()(conv8)
conv8 = Conv2D(df*2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
conv8 = BatchNormalization()(conv8)
up9 = Conv2D(df, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
merge9 = concatenate([conv1,up9], axis = 3)
conv9 = Conv2D(df, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
conv9 = BatchNormalization()(conv9)
conv9 = Conv2D(df, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = BatchNormalization()(conv9)
conv9 = Conv2D(2, kernel_size, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv10 = Conv2D(1, (1,1), activation = 'sigmoid')(conv9)
model = Model(inputs, conv10)
model.compile(optimizer = Adam(lr = learning_rate), loss = dice_coef_loss, metrics = ['accuracy','binary_accuracy', 'mae',
tf.keras.metrics.Precision(name='precision'), dice_coef, jacard_coef,
tf.keras.metrics.FalseNegatives(thresholds=0.5, name='FN', dtype=None),
tf.keras.metrics.FalsePositives(thresholds=0.5, name='FP', dtype=None),
tf.keras.metrics.TrueNegatives(thresholds=0.5, name='TN', dtype=None),
tf.keras.metrics.TruePositives(thresholds=0.5, name='TP', dtype=None)])
model.summary() #if you want to printout all the parameters and model summary
if(pretrained_weights):
model.load_weights(pretrained_weights)
return model
model = unet(pretrained_weights=None, df=filter_size, input_size=(size, size, 1), kernel_size=kernel_dimension)
steps_epoch = np.int16(np.ceil(len(X_train)/batch_size)) # determines your steps per epoch
steps_val = np.int16(np.ceil(len(X_test)/batch_size_test)) # determines your steps for the test set
model_checkpoint = ModelCheckpoint(weights_name, monitor='val_loss',verbose=1, save_best_only=True)
#This is the actual training part of the code
history = model.fit(train_dataset, validation_data=test_dataset, steps_per_epoch=steps_epoch,
validation_steps=steps_val, batch_size=batch_size, epochs=num_epochs, verbose=1, callbacks=[model_checkpoint])

This problem is sometimes noticed when you do not have enough training data. Get more training data. If you cannot get additional data, you can increase the data through augmentation techniques. Generally with enough data points the model converges to a minima - if not stuck in local or saddle. Another approach is to use a pre-trained model and fine tune on it. As I see you are initializing the model from scratch -
model = unet(pretrained_weights=None, df=filter_size, input_size=(size, size, 1), kernel_size=kernel_dimension)

Related

All predictions returning same value after update to tensorflow-gpu

everything was going well with these cnn to classify MarkovTransitionField sequences, when I was using tensorflow, but then I changed to tensorflow-gpu and all predictions return same value, so model isn't learning (but is fast)
#model 1
def model_1_signal():
model = Sequential()
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same', input_shape = (145,5,5),
kernel_initializer = 'he_normal',
bias_initializer = 'zeros'))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same', kernel_initializer = 'he_normal',
bias_initializer = 'zeros'))
model.add(Flatten())
model.add(Dense(2, activation = 'sigmoid',
kernel_initializer = 'glorot_uniform',
bias_initializer = 'zeros'))
model.compile(loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
return model
def model_2_signal():
model = Sequential()
model.add(Conv2D(73, (5,5), activation = 'relu',
padding = 'same', input_shape = (145,5,5)))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv2D(73, (5,5), strides = (2,2), activation = 'relu',
padding = 'same'))
model.add(MaxPooling2D(pool_size = (1,1),strides = 3))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation = 'sigmoid'))
model.compile(loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
return model
EPOCHS = 300
BATCH_SIZE = 15
train_X, val_X, train_y, val_y, train_date, val_date = train_test_split(bullish_episodes_img,y,date,test_size = 0.13, shuffle = False)
val_X, test_X, val_y, test_y, val_date, test_date = train_test_split(val_X, val_y, val_date, test_size = 0.38, shuffle = False)
model_1 = model_1_signal()
model_1.fit(train_X,train_y, validation_data=(val_X,val_y),
epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = 2,
shuffle = True)
yhat_model1 = model_1.predict(test_X)
yhat_model1 = np.where(yhat_model1 >= 0.5, 1, 0)
df1 = pd.DataFrame({'signal':yhat_model1[:,1],'test':test_y[:,1],'time':test_date})
df1 = df1.sort_values(by='time')
#model_1.save('/home/f320x/Documents/AT/Py (1)/final_project/new_standard/spyder/spyder/EURCAD/signal_model/signal_model1')
model_2 = model_2_signal()
model_2.fit(train_X,train_y, validation_data=(val_X,val_y),
epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = 2,
shuffle = True)
yhat_model2 = model_2.predict(test_X)
yhat_model2 = np.where(yhat_model2 >= 0.5, 1, 0)
df2 = pd.DataFrame({'signal':yhat_model2[:,1],'test':test_y[:,1],'time':test_date})
df2 = df2.sort_values(by='time')
before transform data to MarkovTransitionField, i scaled all values between 0 and 1 and there's no nan in dataset.
does somebody has a hint?

CNN trained on MNIST data set always show same output for all inputs

I am using a Keras CNN for handwritten digit recognition. I downloaded dataset from Kaggle.The preprocessing is :
train = pd.read_csv("./input/train.csv")
test = pd.read_csv("./input/test.csv")
Y_train = train["label"]
X_train = train.drop(labels = ["label"],axis = 1)
X_train = X_train / 255.0
test = test / 255.0
X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)
Y_train = to_categorical(Y_train, num_classes = 10)
random_seed = 2
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=random_seed)
My model looks like:
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=3,
verbose=1,
factor=0.5,
min_lr=0.00001)
epochs = 1
batch_size = 86
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180)
zoom_range = 0.1, # Randomly zoom image
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip=False, # randomly flip images
vertical_flip=False) # randomly flip images
datagen.fit(X_train)
history = model.fit(datagen.flow(X_train,Y_train, batch_size=batch_size),
epochs = epochs,
validation_data = (X_val,Y_val),
verbose = 2,
steps_per_epoch=X_train.shape[0] // batch_size,
callbacks=[learning_rate_reduction])
Predicting my input:
def predict(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.resize(image, (28, 28))
image = image.astype('float32')
image = image.reshape(1, 28, 28, 1)
image /= 255
model = load_model('./model.h5')
pred = model.predict(image, batch_size=1)
print("Predicted Number: ", pred.argmax())
predict(cv2.imread('./testImages/1.png'))
What am I doing wrong?
Desired result is the digit that is provided as an input image, instead I get the same output (i.e. digit 8) for every input.

How to extract the bottleneck layer from the below architecture?

I have created an model (down below). And after training, I want to get the output tensor from the bottleneck layers of this model.
So I am trying to create a model of the extracted layers and use this model for predicting.
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.layers import Dense, Activation
nstrides = (1,1)
inputs = layers.Input(imshape)
conv01 = layers.Conv2D(32, 4, activation = 'relu',
strides = nstrides, padding="same")(inputs)
conv1 = layers.Conv2D(32, 4, activation = 'relu',
strides = nstrides, padding="same")(conv01)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
.
.
#block4
conv04 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(pool3)
conv4 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conv04)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
#bottlneck
conv05 = layers.Conv2D(512, 4, activation = 'relu',
strides = nstrides, padding="same")(pool4)
conv5 = layers.Conv2D(512, 4, activation = 'relu',
strides = nstrides, padding="same")(conv05)
upconv5 = layers.Conv2DTranspose(256, kernel_size=(2, 2),
strides = (2,2))(conv5)
#upblock 1
conc6 = layers.concatenate([upconv5, conv4])
conv06 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conc6)
conv6 = layers.Conv2D(256, 4, activation = 'relu',
strides = nstrides, padding="same")(conv06)
up7 = layers.Conv2DTranspose(126, kernel_size=(2, 2),
strides = (2,2))(conv7)
.
.
.
#combine the model together
model = Model(inputs, outputs)
First, in order to locate the desired layer which would be the new output tensor, you can first do
for i, layer in enumerate(model.layers):
print(i, layer.name)
...
...
12 max_pooling2d_15
13 conv2d_65
14 conv2d_66
15 conv2d_transpose_12
16 concatenate_12
17 conv2d_67
...
...
Here, the layer index from 13 to 15 is from the bottleneck layer of your model. If you want to get the output tensor from this bottleneck layer, you can do:
new_model = Model(model.input,
model.get_layer(index=15).output)
# or,
new_model = Model(model.input,
model.get_layer(name='conv2d_transpose_12').output)
Both are the same, the first one is by index and the second one is by layer name.

How to add dropout in CNN and LSTM architecture to handle overfitting problem, TensorFlow

My network architecture is the combination of 7 layers of CNN and 2 layers of BiLSTM, when i trained my model it shows overfitting, one of the solution to deal with this problem is Dropout in the architecture. How we can add dropout in this network architecture.
# input with shape of height=42 and width=600
inputs = Input(shape=(42,600,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.5))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.5))(blstm_1)
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
# model to be used at test time
act_model = Model(inputs, outputs)
The accuracy and loss graph of trained model is:

ValueError: Shape (None, 17) must have rank 1

I am working on a hand character recognition model. I created a CNN+BiLSTM+CTC Loss model. But getting error when I run model.fit(). Please help me fix this error.
My Model
# input with shape of height=32 and width=128
inputs = Input(shape=(32,128,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_1)
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_2)
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPooling2D(pool_size=(2, 1))(conv_4)
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPooling2D(pool_size=(2, 1))(batch_norm_6)
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
outputs = Dense(len(char_dict)+1, activation = 'softmax')(blstm_2)
act_model = Model(inputs, outputs)
Define a CTC loss model that takes the outputs of previous model as inputs
labels = Input(name='the_labels', shape=[max_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length,
label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
model.fit(x=[input_array,
output_array,
train_input_length,
train_label_length],
y=np.zeros(input_array.shape[0]),
batch_size=256,
epochs = 100,
validation_data = ([test_input_array, test_output_array, valid_input_length,
valid_label_length], [np.zeros(test_input_array.shape[0])]),
verbose = 1,
callbacks = callbacks_list)
The error I am getting is
ValueError: Shape (None, 17) must have rank 1