I have the following model:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in text_tokenizer.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(vocab_size,
100,
weights=[embedding_matrix],
input_length=50,
trainable=False)
sequence_input = Input(shape=(50,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
text_cnn = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(embedded_sequences)
text_cnn = Conv1D(filters=32, kernel_size=5, strides=1, padding='same', activation='relu')(text_cnn)
text_cnn = Conv1D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu')(text_cnn)
text_lstm = Bidirectional(LSTM(256, return_sequences=True))(text_cnn)
text_lstm = Dense(128, activation='relu')(text_lstm)
text_lstm = Dropout(0.3)(text_lstm)
char_in = Input(shape=(50, 18, ))
char_cnn = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(char_in)
char_cnn = Conv1D(filters=32, kernel_size=5, strides=1, padding='same', activation='relu')(char_cnn)
char_cnn = Conv1D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu')(char_cnn)
char_lstm = Bidirectional(LSTM(256, return_sequences=True))(char_cnn)
char_lstm = GaussianNoise(0.50)(char_lstm)
char_lstm = Dense(128, activation='relu')(char_lstm)
char_lstm = Dropout(0.3)(char_lstm)
merged = concatenate([char_lstm, text_lstm])
merged_d1 = Dense(512, activation='relu')(merged)
merged_d1 = Dropout(0.3)(merged_d1)
merged_d1 = Dense(256, activation='relu')(merged_d1)
merged_d1 = GaussianNoise(0.30)(merged_d1)
text_class = Dense(len(y_unique), activation='softmax')(merged_d1)
model = Model([sequence_input,char_in], text_class)
Trained with a GPU like such:
parallel_model = multi_gpu_model(model, gpus=4)
parallel_model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
parallel_model.fit([x_train, x_train_char], y_train, validation_data=([x_test, x_test_char], y_test), epochs=1 ,batch_size=512)
I serialize it with the model.save() method.
When I try to load it on the same machine in a different kernel, I get the following error:
ValueError: Initializer for variable conv1d_4_1/kernel/ is from inside a control-flow construct, such as a loop or conditional. When creating a variable inside a loop or conditional, use a lambda as the initializer.
I'm not sure what loop or conditional it can be referring to, maybe something to do with the Bidirectional wrapper? I've serialized other similar models without any issues.
Any help would be greatly appreciated!
Related
I'm trying to implement a machine learning model (variational autoencoder) where the architecture of the model is
start_filter_num = 32
kernel_size = 3
latent_dim = 16
x = tf.keras.Input(shape=(width,1))
conv_seq1 = conv_block_seq_res(x, start_filter_num, kernel_size, 1, "conv_seq1")
pool1 = tf.keras.layers.MaxPooling1D(name="pool1")(conv_seq1)
conv_seq2 = conv_block_seq_res(pool1, int(start_filter_num*1.5), kernel_size, 1, "conv_seq2")
pool2 = tf.keras.layers.MaxPooling1D(name="pool2")(conv_seq2)
conv_seq3 = conv_block_seq_res(pool2, start_filter_num*3, kernel_size, 1, "conv_seq3")
pool3 = tf.keras.layers.MaxPooling1D(name="pool3")(conv_seq3)
conv_seq4 = conv_block_seq_res(pool3, int(start_filter_num*4.5), kernel_size, 1, "conv_seq4")
pool4 = tf.keras.layers.MaxPooling1D(name="pool4")(conv_seq4)
conv_seq5 = conv_block_seq_res(pool4, start_filter_num*6, kernel_size, 1, "conv_seq5")
pool5 = tf.keras.layers.MaxPooling1D(name="pool5")(conv_seq5)
conv_seq6 = conv_block_seq_res(pool5, int(start_filter_num*7.5), kernel_size, 1, "conv_seq6", In=False)
pool6 = tf.keras.layers.MaxPooling1D(name="pool6")(conv_seq6)
conv_seq7 = conv_block_seq_res(pool6, start_filter_num*9, kernel_size, 1, "conv_seq7", In=False)
flatten1 = tf.keras.layers.Flatten()(conv_seq7)
z_mu = tf.keras.layers.Dense(latent_dim, name="z_mu")(flatten1)
z_log_var = tf.keras.layers.Dense(latent_dim, name="z_log_var")(flatten1)
###############################################################################
# normalize log variance to std dev
z_sigma = tf.keras.layers.Lambda(lambda t: K.exp(.5*t), name="z_sigma")(z_log_var)
eps = tf.keras.Input(tensor=K.random_normal(shape=(K.shape(x)[0], latent_dim)), name="eps")
z_eps = tf.keras.layers.Multiply(name="z_eps")([z_sigma, eps])
z = tf.keras.layers.Add(name="z")([z_mu, z_eps])
#latent_conv = tf.keras.layers.Dense(width//64, name="latent_conv")(z)
reshape1 = tf.keras.layers.Reshape([width//64,1], name="reshape1")(z)
###############################################################################
#New for conditional VAE
dconv_seq4 = conv_block_seq_res(reshape1, start_filter_num*9, kernel_size, 1, "dconv_seq4", In=False)
dconc5 = tf.keras.layers.concatenate([dconv_seq4, conv_seq7], name="dconc5")
deconv1 = Conv1DTranspose(dconc5, start_filter_num*2, kernel_size=3, strides=2, padding='same')
dconv_seq5 = conv_block_seq_res(deconv1, int(start_filter_num*7.5), kernel_size, 1, "dconv_seq5", In=False)
dconc7 = tf.keras.layers.concatenate([dconv_seq5, conv_seq6], name="dconc7")
deconv2 = Conv1DTranspose(dconc7, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq6 = conv_block_seq_res(deconv2, start_filter_num*6, kernel_size, 1, "dconv_seq6", In=False)
dconc9 = tf.keras.layers.concatenate([dconv_seq6, conv_seq5], name="dconc9")
deconv3 = Conv1DTranspose(dconc9, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq7 = conv_block_seq_res(deconv3, int(start_filter_num*4.5), kernel_size, 1, "dconv_seq7", In=False)
dconc11 = tf.keras.layers.concatenate([dconv_seq7, conv_seq4], name="dconc11")
deconv4 = Conv1DTranspose(dconc11, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq8 = conv_block_seq_res(deconv4, start_filter_num*3, kernel_size, 1, "dconv_seq8", In=False)
dconc13 = tf.keras.layers.concatenate([dconv_seq8, conv_seq3], name="dconc13")
deconv5 = Conv1DTranspose(dconc13, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq9 = conv_block_seq_res(deconv5, int(start_filter_num*1.5), kernel_size, 1, "dconv_seq9", In=False)
dconc15 = tf.keras.layers.concatenate([dconv_seq9, conv_seq2], name="dconc15")
deconv6 = Conv1DTranspose(dconc15, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq10 = conv_block_seq_res(deconv6, start_filter_num, kernel_size, 1, "dconv_seq10", In=False)
dconc17 = tf.keras.layers.concatenate([dconv_seq10, conv_seq1], name="dconc17")
x_pred = tf.keras.layers.Conv1D(1, 3, padding="same", activation="relu", name="x_pred")(dconc17)
model = tf.keras.Model(inputs=[x, eps], outputs=x_pred)
model.summary()
when I try to fit the model use this
history = model.fit((x_train-main_mean)/main_std, (y_train-app_mean)/app_std, shuffle=True, validation_split=nilm["training"]["validation_split"],
epochs=epochs, batch_size=batch_size, callbacks=list_callbacks, verbose=1, initial_epoch=0)
which of course gave me this error
ValueError: Layer "model" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 1024, 1) dtype=float32>]
I tried this
latent_dim = 16
eps = tf.keras.Input(tensor = K.random_normal(shape=(K.shape(x_train)[0], latent_dim)))
history = model.fit([(x_train-main_mean)/main_std, eps], (y_train-app_mean)/app_std, shuffle=True,# validation_split=nilm["training"]["validation_split"],
epochs=epochs, batch_size=batch_size, callbacks=list_callbacks, verbose=1, initial_epoch=0)
but it gives me an error
TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_4'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as tf.cond, tf.function, gradient tapes, or tf.map_fn. Keras Functional model construction only supports TF API calls that do support dispatching, such as tf.math.add or tf.reshape. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer call and calling that layer on this symbolic input/output.
I don't know how to solve it. I appreciate the help
thanks in advance
BATCH_SIZE = 32 # ADVISED NOT TO CHANGE THIS
N_PAST = 10 # DO NOT CHANGE THIS
N_FUTURE = 10 # DO NOT CHANGE THIS
SHIFT = 1
model = tf.keras.models.Sequential([
tf.keras.layers.Conv1D(filters=32, kernel_size=5,
strides=1, padding="causal",
activation="relu",
input_shape=[None, 1]),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(N_FEATURES)
])
I build a time-siries forcasting model but i can't understand how to handle this.
which layer should i use to make this available?
Model input shape must be (BATCH_SIZE, N_PAST = 10, N_FEATURES = 1)
Model output shape must be (BATCH_SIZE, N_FUTURE = 10, N_FEATURES = 1)
the batch_size is not specified in the model.
model = tf.keras.models.Sequential([
tf.keras.layers.Conv1D(filters=32, kernel_size=5,
strides=1, padding="causal",
activation="relu",
input_shape=[N_PAST,1]),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(N_FUTURE)
])
You don't need to specify batch_size in input layer. Just change input shape as follows:
input_shape=[N_PAST,1]
The callback is saving checkpoint files, but not the SavedModel model.pb file. Additionally, when I load the model from the checkpoints it does not reload 'val_loss' which I'm conditioning "save_best_model" on.
I tried using a model.save() only on the best iteration but was having trouble with getting that to work correctly and it would be more convenient to use the ModelCheckpoint callback.
Here is the relevant code
LOSS = tf.keras.losses.MeanSquaredError(),
#multi output 3 categories from 0 to 1
model = ImgToClassSimpleContinuous(img_height, img_width)
checkpoint_filename = "../chkpts/ImgToClassSimpleContinuous/checkpoint_dir"
model.load_weights(checkpoint_filename)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filename,
verbose=1,mode='min', monitor="val_loss", save_best_only=True, save_weights_only=False)
model.compile(
optimizer='adam',
loss = [LOSS, LOSS, LOSS],
metrics=['mse'])
model.fit(
dataset_to_use,
validation_data = dataset_validation_batched,
# validation_steps=50,
epochs=MAX_EPOCHS,
batch_size=BATCH_SIZE,
callbacks=[cp_callback]
)
class ImgToClassSimpleContinuous(Model):
'''
pair with loss = categorical_crossentropy
'''
in_types = [DataType.d]
out_types = [DataType.tlc, DataType.tls, DataType.tll]
def __init__(self, img_height, img_width, *args, **kwargs):
super().__init__(ImgToClassSimple, *args, **kwargs)
initializer = 'he_normal'
input_shape = (img_height, img_width, 1)
inputs = tf.keras.Input(shape=input_shape)
flat_pix = layers.Flatten()(inputs)
x = layers.Conv2D(8, 3, padding='same', kernel_initializer=initializer)(inputs)
x = layers.PReLU()(x)
x = layers.Conv2D(8, 3, padding='same', kernel_initializer=initializer)(x)
x = layers.PReLU()(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(16, 3, padding='same', kernel_initializer=initializer)(x)
x = layers.PReLU()(x)
x = layers.Conv2D(16, 3, padding='same', kernel_initializer=initializer)(x)
x = layers.PReLU()(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.BatchNormalization()(x)
t = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(x)
t = layers.PReLU()(t)
t = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(t)
t = layers.PReLU()(t)
t = layers.MaxPooling2D(pool_size=(2, 2))(t)
t = layers.BatchNormalization()(t)
t = tf.keras.layers.GlobalAveragePooling2D()(t)
t = layers.Flatten()(t)
s = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(x)
s = layers.PReLU()(s)
s = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(s)
s = layers.PReLU()(s)
s = layers.MaxPooling2D(pool_size=(2, 2))(s)
s = layers.BatchNormalization()(s)
s = tf.keras.layers.GlobalAveragePooling2D()(s)
s = layers.Flatten()(s)
l = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(x)
l = layers.PReLU()(l)
l = layers.Conv2D(32, 3, padding='same', kernel_initializer=initializer)(l)
l = layers.PReLU()(l)
l = layers.MaxPooling2D(pool_size=(2, 2))(l)
l = layers.BatchNormalization()(l)
l = tf.keras.layers.GlobalAveragePooling2D()(l)
l = layers.Flatten()(l)
t = layers.Dense(1, activation='sigmoid')(t)
s = layers.Dense(1, activation='sigmoid')(s)
l = layers.Dense(1, activation='sigmoid')(l)
# A Dense classifier with a single unit (binary classification)
self.model = tf.keras.Model(inputs, [t, s, l])
tf.keras.utils.plot_model(self.model, to_file="...", show_shapes=True)
def call(self, x):
return self.model(x)
I've been trying to translate some PyTorch code to TensorFlow 2, but the TF2 code is around 10 times slower. I've tried looking at where this might come from, and as far as I can tell it comes from the tape.gradient call (performance was the same with keras' .fit function). I've tried to use different data loaders, ways of declaring the model, installations, etc... and the results have been consistent.
Any explanation / solution as to why this is happening would be much appreciated.
Here is a minimalist version of the TF2 code:
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18, 1)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
# Create a small model
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=1, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling2D(),
layers.Flatten(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch):
with tf.GradientTape() as tape:
y_pred = model(data_batch)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
And the PyTorch equivalent:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 18, 120)
# Create a small model
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(18, 64, kernel_size=7, stride=3, padding=3)
self.conv2 = nn.Conv1d(64, 64, kernel_size=5, stride=2, padding=2)
self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2)
self.conv4 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(256, 128)
self.fc2 = nn.Linear(128, 32)
self.fc3 = nn.Linear(32, 1)
def forward(self, inputs):
x = F.relu(self.conv1(inputs))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = x.mean(2)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = torch.sigmoid(self.fc3(x))
return x
model = Model()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
loss_fn = torch.nn.MSELoss()
batch_size = 256
train_steps_per_epoch = train_data.shape[0] // batch_size
step_times = []
for epoch in range(20):
for step in range(train_steps_per_epoch):
batch_start, batch_end = step * batch_size, (step+1) * batch_size
data_batch = torch.FloatTensor(train_data[batch_start:batch_end]).to(device)
labels_batch = torch.FloatTensor(train_labels[batch_start:batch_end]).to(device)
step_start_time = time.perf_counter()
optimizer.zero_grad()
y_pred = model(data_batch)
loss = loss_fn(labels_batch, torch.squeeze(y_pred))
loss.backward()
optimizer.step()
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
You're using tf.GradientTape correctly, but both your models and data are different in the snippets you provided.
Here is the TF code that uses the same data and model architecture as your Pytorch model.
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling1D(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch, model):
with tf.GradientTape() as tape:
y_pred = model(data_batch, training=True)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch, model)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
So, in reality, TF is 3 times faster than Pytorch: 0.035s vs 0.112s
I am working on a hand character recognition model. I created a CNN+BiLSTM+CTC Loss model. But getting error when I run model.fit(). Please help me fix this error.
My Model
# input with shape of height=32 and width=128
inputs = Input(shape=(32,128,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_1)
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_2)
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPooling2D(pool_size=(2, 1))(conv_4)
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPooling2D(pool_size=(2, 1))(batch_norm_6)
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
outputs = Dense(len(char_dict)+1, activation = 'softmax')(blstm_2)
act_model = Model(inputs, outputs)
Define a CTC loss model that takes the outputs of previous model as inputs
labels = Input(name='the_labels', shape=[max_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length,
label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
model.fit(x=[input_array,
output_array,
train_input_length,
train_label_length],
y=np.zeros(input_array.shape[0]),
batch_size=256,
epochs = 100,
validation_data = ([test_input_array, test_output_array, valid_input_length,
valid_label_length], [np.zeros(test_input_array.shape[0])]),
verbose = 1,
callbacks = callbacks_list)
The error I am getting is
ValueError: Shape (None, 17) must have rank 1