Multi-input Model - tensorflow

I'm trying to implement a machine learning model (variational autoencoder) where the architecture of the model is
start_filter_num = 32
kernel_size = 3
latent_dim = 16
x = tf.keras.Input(shape=(width,1))
conv_seq1 = conv_block_seq_res(x, start_filter_num, kernel_size, 1, "conv_seq1")
pool1 = tf.keras.layers.MaxPooling1D(name="pool1")(conv_seq1)
conv_seq2 = conv_block_seq_res(pool1, int(start_filter_num*1.5), kernel_size, 1, "conv_seq2")
pool2 = tf.keras.layers.MaxPooling1D(name="pool2")(conv_seq2)
conv_seq3 = conv_block_seq_res(pool2, start_filter_num*3, kernel_size, 1, "conv_seq3")
pool3 = tf.keras.layers.MaxPooling1D(name="pool3")(conv_seq3)
conv_seq4 = conv_block_seq_res(pool3, int(start_filter_num*4.5), kernel_size, 1, "conv_seq4")
pool4 = tf.keras.layers.MaxPooling1D(name="pool4")(conv_seq4)
conv_seq5 = conv_block_seq_res(pool4, start_filter_num*6, kernel_size, 1, "conv_seq5")
pool5 = tf.keras.layers.MaxPooling1D(name="pool5")(conv_seq5)
conv_seq6 = conv_block_seq_res(pool5, int(start_filter_num*7.5), kernel_size, 1, "conv_seq6", In=False)
pool6 = tf.keras.layers.MaxPooling1D(name="pool6")(conv_seq6)
conv_seq7 = conv_block_seq_res(pool6, start_filter_num*9, kernel_size, 1, "conv_seq7", In=False)
flatten1 = tf.keras.layers.Flatten()(conv_seq7)
z_mu = tf.keras.layers.Dense(latent_dim, name="z_mu")(flatten1)
z_log_var = tf.keras.layers.Dense(latent_dim, name="z_log_var")(flatten1)
###############################################################################
# normalize log variance to std dev
z_sigma = tf.keras.layers.Lambda(lambda t: K.exp(.5*t), name="z_sigma")(z_log_var)
eps = tf.keras.Input(tensor=K.random_normal(shape=(K.shape(x)[0], latent_dim)), name="eps")
z_eps = tf.keras.layers.Multiply(name="z_eps")([z_sigma, eps])
z = tf.keras.layers.Add(name="z")([z_mu, z_eps])
#latent_conv = tf.keras.layers.Dense(width//64, name="latent_conv")(z)
reshape1 = tf.keras.layers.Reshape([width//64,1], name="reshape1")(z)
###############################################################################
#New for conditional VAE
dconv_seq4 = conv_block_seq_res(reshape1, start_filter_num*9, kernel_size, 1, "dconv_seq4", In=False)
dconc5 = tf.keras.layers.concatenate([dconv_seq4, conv_seq7], name="dconc5")
deconv1 = Conv1DTranspose(dconc5, start_filter_num*2, kernel_size=3, strides=2, padding='same')
dconv_seq5 = conv_block_seq_res(deconv1, int(start_filter_num*7.5), kernel_size, 1, "dconv_seq5", In=False)
dconc7 = tf.keras.layers.concatenate([dconv_seq5, conv_seq6], name="dconc7")
deconv2 = Conv1DTranspose(dconc7, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq6 = conv_block_seq_res(deconv2, start_filter_num*6, kernel_size, 1, "dconv_seq6", In=False)
dconc9 = tf.keras.layers.concatenate([dconv_seq6, conv_seq5], name="dconc9")
deconv3 = Conv1DTranspose(dconc9, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq7 = conv_block_seq_res(deconv3, int(start_filter_num*4.5), kernel_size, 1, "dconv_seq7", In=False)
dconc11 = tf.keras.layers.concatenate([dconv_seq7, conv_seq4], name="dconc11")
deconv4 = Conv1DTranspose(dconc11, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq8 = conv_block_seq_res(deconv4, start_filter_num*3, kernel_size, 1, "dconv_seq8", In=False)
dconc13 = tf.keras.layers.concatenate([dconv_seq8, conv_seq3], name="dconc13")
deconv5 = Conv1DTranspose(dconc13, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq9 = conv_block_seq_res(deconv5, int(start_filter_num*1.5), kernel_size, 1, "dconv_seq9", In=False)
dconc15 = tf.keras.layers.concatenate([dconv_seq9, conv_seq2], name="dconc15")
deconv6 = Conv1DTranspose(dconc15, start_filter_num, kernel_size=3, strides=2, padding='same')
dconv_seq10 = conv_block_seq_res(deconv6, start_filter_num, kernel_size, 1, "dconv_seq10", In=False)
dconc17 = tf.keras.layers.concatenate([dconv_seq10, conv_seq1], name="dconc17")
x_pred = tf.keras.layers.Conv1D(1, 3, padding="same", activation="relu", name="x_pred")(dconc17)
model = tf.keras.Model(inputs=[x, eps], outputs=x_pred)
model.summary()
when I try to fit the model use this
history = model.fit((x_train-main_mean)/main_std, (y_train-app_mean)/app_std, shuffle=True, validation_split=nilm["training"]["validation_split"],
epochs=epochs, batch_size=batch_size, callbacks=list_callbacks, verbose=1, initial_epoch=0)
which of course gave me this error
ValueError: Layer "model" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 1024, 1) dtype=float32>]
I tried this
latent_dim = 16
eps = tf.keras.Input(tensor = K.random_normal(shape=(K.shape(x_train)[0], latent_dim)))
history = model.fit([(x_train-main_mean)/main_std, eps], (y_train-app_mean)/app_std, shuffle=True,# validation_split=nilm["training"]["validation_split"],
epochs=epochs, batch_size=batch_size, callbacks=list_callbacks, verbose=1, initial_epoch=0)
but it gives me an error
TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_4'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as tf.cond, tf.function, gradient tapes, or tf.map_fn. Keras Functional model construction only supports TF API calls that do support dispatching, such as tf.math.add or tf.reshape. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer call and calling that layer on this symbolic input/output.
I don't know how to solve it. I appreciate the help
thanks in advance

Related

TF2 code 10 times slower than equivalent PyTorch code for a Conv1D network

I've been trying to translate some PyTorch code to TensorFlow 2, but the TF2 code is around 10 times slower. I've tried looking at where this might come from, and as far as I can tell it comes from the tape.gradient call (performance was the same with keras' .fit function). I've tried to use different data loaders, ways of declaring the model, installations, etc... and the results have been consistent.
Any explanation / solution as to why this is happening would be much appreciated.
Here is a minimalist version of the TF2 code:
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18, 1)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
# Create a small model
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=1, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling2D(),
layers.Flatten(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch):
with tf.GradientTape() as tape:
y_pred = model(data_batch)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
And the PyTorch equivalent:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 18, 120)
# Create a small model
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(18, 64, kernel_size=7, stride=3, padding=3)
self.conv2 = nn.Conv1d(64, 64, kernel_size=5, stride=2, padding=2)
self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2)
self.conv4 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv1d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(256, 128)
self.fc2 = nn.Linear(128, 32)
self.fc3 = nn.Linear(32, 1)
def forward(self, inputs):
x = F.relu(self.conv1(inputs))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = x.mean(2)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = torch.sigmoid(self.fc3(x))
return x
model = Model()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
loss_fn = torch.nn.MSELoss()
batch_size = 256
train_steps_per_epoch = train_data.shape[0] // batch_size
step_times = []
for epoch in range(20):
for step in range(train_steps_per_epoch):
batch_start, batch_end = step * batch_size, (step+1) * batch_size
data_batch = torch.FloatTensor(train_data[batch_start:batch_end]).to(device)
labels_batch = torch.FloatTensor(train_labels[batch_start:batch_end]).to(device)
step_start_time = time.perf_counter()
optimizer.zero_grad()
y_pred = model(data_batch)
loss = loss_fn(labels_batch, torch.squeeze(y_pred))
loss.backward()
optimizer.step()
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
You're using tf.GradientTape correctly, but both your models and data are different in the snippets you provided.
Here is the TF code that uses the same data and model architecture as your Pytorch model.
import time
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
# Generate some fake data
train_labels = np.random.randint(10, size=1000)
train_data = np.random.rand(1000, 120, 18)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
train_dataset = train_dataset.batch(256)
model = tf.keras.Sequential([
layers.Conv1D(64, kernel_size=7, strides=3, padding="same", activation="relu"),
layers.Conv1D(64, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=5, strides=2, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(128, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.Conv1D(256, kernel_size=3, strides=1, padding="same", activation="relu"),
layers.GlobalAveragePooling1D(),
layers.Dense(128, use_bias=True, activation="relu"),
layers.Dense(32, use_bias=True, activation="relu"),
layers.Dense(1, activation='sigmoid', use_bias=True),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=5e-4)
#tf.function
def train_step(data_batch, label_batch, model):
with tf.GradientTape() as tape:
y_pred = model(data_batch, training=True)
loss = tf.keras.losses.MSE(labels_batch, y_pred)
gradients = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
step_times = []
for epoch in range(20):
for data_batch, labels_batch in train_dataset:
step_start_time = time.perf_counter()
train_step(data_batch, labels_batch, model)
if epoch != 0:
step_times.append(time.perf_counter()-step_start_time)
print(f"Average training step time: {np.mean(step_times):.3f}s.")
So, in reality, TF is 3 times faster than Pytorch: 0.035s vs 0.112s

ValueError: Shape (None, 17) must have rank 1

I am working on a hand character recognition model. I created a CNN+BiLSTM+CTC Loss model. But getting error when I run model.fit(). Please help me fix this error.
My Model
# input with shape of height=32 and width=128
inputs = Input(shape=(32,128,1))
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_1)
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv_2)
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPooling2D(pool_size=(2, 1))(conv_4)
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPooling2D(pool_size=(2, 1))(batch_norm_6)
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
outputs = Dense(len(char_dict)+1, activation = 'softmax')(blstm_2)
act_model = Model(inputs, outputs)
Define a CTC loss model that takes the outputs of previous model as inputs
labels = Input(name='the_labels', shape=[max_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length,
label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
model.fit(x=[input_array,
output_array,
train_input_length,
train_label_length],
y=np.zeros(input_array.shape[0]),
batch_size=256,
epochs = 100,
validation_data = ([test_input_array, test_output_array, valid_input_length,
valid_label_length], [np.zeros(test_input_array.shape[0])]),
verbose = 1,
callbacks = callbacks_list)
The error I am getting is
ValueError: Shape (None, 17) must have rank 1

TensorFlow Keras(v2.2) model fit with multiple outputs and losses failed

I want to use TensorFlow Keras(v2.2) model fit in mnist with multiple outputs and losses, but it failed.
My costume model will return a list [logits, embedding]. logits is 2D tensor [batch , 10] and embedding is also 2D tensor [batch, 64].
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.reshape = tf.keras.layers.Reshape((28, 28, 1))
self.conv2D1 = tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')
self.maxPool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")
self.conv2D2 = tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')
self.maxPool2 = tf.keras.layers.MaxPooling2D(pool_size=2)
self.flatten = tf.keras.layers.Flatten(data_format="channels_last")
self.dropout = tf.keras.layers.Dropout(tf.compat.v1.placeholder_with_default(0.25, shape=[], name="dropout"))
self.dense1 = tf.keras.layers.Dense(64, activation=None)
self.dense2 = tf.keras.layers.Dense(10, activation=None)
def call(self, inputs, training):
x = self.reshape(inputs)
x = self.conv2D1(x)
x = self.maxPool1(x)
if training:
x = self.dropout(x)
x = self.conv2D2(x)
x = self.maxPool2(x)
if training:
x = self.dropout(x)
x = self.flatten(x)
x = self.dense1(x)
embedding = tf.math.l2_normalize(x, axis=1)
logits = self.dense2(embedding)
return [logits, embedding]
loss_0 is normal cross_entropy
def loss_0(y_true, y_pred):
loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred[0]))
loss_1 is triplet_semihard_loss
def loss_1(y_true, y_pred):
loss_1 = tfa.losses.triplet_semihard_loss(y_true=y_true, y_pred=y_pred[1], distance_metric="L2")
return loss_1
When I use model fit, I can only get logits tensor in each loss. I can't get embedding tensor. y_pred[0] and y_pred[1] is not work. Any suggestion?
model = MyModel()
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3), loss=[loss_0, loss_1], loss_weights=[0.1, 0.1])
history = model.fit(train_dataset, epochs=5)

You must feed value for placeholder *_sample_weights while training UNET from VGG16

I am trying to create a UNET using VGG16 as first layers.
def BuildUNet2():
keras.backend.set_learning_phase(1)
inputs = keras.layers.Input(shape=(PATCH_SIZE, PATCH_SIZE, 3), name="inputs")
vggModel=keras.applications.VGG16(include_top=False, input_tensor=inputs)
layers = dict([(layer.name, layer) for layer in vggModel.layers])
print("Layers", len(layers), layers)
block1_conv2 = layers["block1_conv2"].output
block2_conv2 = layers["block2_conv2"].output
block3_conv3 = layers["block3_conv3"].output
block4_conv3 = layers["block4_conv3"].output
vggTop = layers["block5_conv3"].output
up6=keras.layers.concatenate([keras.layers.Conv2DTranspose(256, (2,2), strides=(2,2), padding="same")(vggTop), block4_conv3], axis=3)
conv61=keras.layers.Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(up6)
conv62=keras.layers.Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv61)
up7 = keras.layers.concatenate([keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding="same")(conv62), block3_conv3], axis=3)
conv71=keras.layers.Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(up7)
conv72=keras.layers.Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv71)
up8 = keras.layers.concatenate([keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding="same")(conv72), block2_conv2], axis=3)
conv81=keras.layers.Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(up8)
conv82=keras.layers.Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv81)
up9 = keras.layers.concatenate([keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding="same")(conv82), block1_conv2], axis=3)
conv91=keras.layers.Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal")(up9)
conv92=keras.layers.Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv91)
conv93=keras.layers.Conv2D(1, (1, 1), activation="sigmoid")(conv92)
model = keras.models.Model(input=[inputs], output=[conv93])
for layer in model.layers[:19]:
layer.trainable = False
model.compile(optimizer=keras.optimizers.Adam(lr=1e-5), loss=metric.dice_coef_loss,
metrics=[metric.dice_coef, "accuracy"])
model.summary()
return model
I am training with:
with h5py.File(parms.training, "r") as trainingsFile:
wrk=trainingsFile["work"].value
np.random.seed(42)
np.random.shuffle(wrk)
limit=int(wrk.shape[0]*0.8)
trainData=wrk[:limit]
valData=wrk[limit:]
trainGen=DataGenerator(trainData, parms.batchSize)
valGen=DataGenerator(valData, parms.batchSize)
bestCheckpoint = keras.callbacks.ModelCheckpoint("best.h5",
monitor="val_loss",
save_best_only=True,
save_weights_only=False)
regCheckpoint = keras.callbacks.ModelCheckpoint("checkpoint-{epoch:04d}.h5", period=10)
csvLog = keras.callbacks.CSVLogger("log.csv", append=True)
runName = datetime.datetime.now().isoformat("#")[:19].replace(":", "-")
tensorBoard = keras.callbacks.TensorBoard(log_dir="./logs/%s/" % runName)
lrPlateau = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=10, cooldown=5)
model.fit_generator(trainGen,
epochs=parms.epochs,
steps_per_epoch=trainGen.__len__(),
validation_data=valGen,
validation_steps=valGen.__len__(),
callbacks=[bestCheckpoint, regCheckpoint, csvLog, tensorBoard, lrPlateau],
use_multiprocessing=False,
)
The DataGenerator is defined as:
class DataGenerator(keras.utils.Sequence):
def __init__(self, data, batchSize):
self.data=data
self.batchSize=batchSize
def __len__(self):
return int((self.data.shape[0]+self.batchSize-1)/(self.batchSize))
def __getitem__(self, item):
X=np.zeros((self.batchSize, self.data.shape[1], self.data.shape[2], 3), dtype=np.float32)
Y=np.zeros((self.batchSize, self.data.shape[1], self.data.shape[2]), dtype=np.float32)
j=0
wrk=np.zeros((self.data.shape[1], self.data.shape[2], self.data.shape[3]), dtype=np.float32)
for i in range(item*self.batchSize, min((item+1)*self.batchSize,self.data.shape[0])):
wrk=self.data[i, :, :, :]
if random.random() < 0.5:
wrk=wrk[:, ::-1, :]
if random.random() < 0.5:
wrk = wrk[::-1, :, :]
direction = int(random.random() * 4) * 90
if direction:
wrk = imutils.rotate(wrk, direction)
X[j, :, :, :]=wrk[:, :, 0: 3]
Y[j, :, :]=wrk[:, :, 3]
j+=1
X=X.resize((j, X.shape[1], X.shape[2], X.shape[3]))
Y=Y.resize((j, Y.shape[1], Y.shape[2]))
return X, Y
Trying to train the model results in
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'conv2d_9_sample_weights' with dtype float and shape [?]
Even explicitly returning a sample_weight (an addtional np.ones((j), dtype=np.float32) from the DataGenerator does not solve the problem.
What's wrong?
How do I correct it?
The problem was with DataGenerator.getitem():
resize does not return a new numpy array. It changes the original array and returns nothing. Therefore the getitem method returned None, None.
The keras error messages is misleading.

Conditional variable error when trying to open serialized Keras model

I have the following model:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in text_tokenizer.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(vocab_size,
100,
weights=[embedding_matrix],
input_length=50,
trainable=False)
sequence_input = Input(shape=(50,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
text_cnn = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(embedded_sequences)
text_cnn = Conv1D(filters=32, kernel_size=5, strides=1, padding='same', activation='relu')(text_cnn)
text_cnn = Conv1D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu')(text_cnn)
text_lstm = Bidirectional(LSTM(256, return_sequences=True))(text_cnn)
text_lstm = Dense(128, activation='relu')(text_lstm)
text_lstm = Dropout(0.3)(text_lstm)
char_in = Input(shape=(50, 18, ))
char_cnn = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(char_in)
char_cnn = Conv1D(filters=32, kernel_size=5, strides=1, padding='same', activation='relu')(char_cnn)
char_cnn = Conv1D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu')(char_cnn)
char_lstm = Bidirectional(LSTM(256, return_sequences=True))(char_cnn)
char_lstm = GaussianNoise(0.50)(char_lstm)
char_lstm = Dense(128, activation='relu')(char_lstm)
char_lstm = Dropout(0.3)(char_lstm)
merged = concatenate([char_lstm, text_lstm])
merged_d1 = Dense(512, activation='relu')(merged)
merged_d1 = Dropout(0.3)(merged_d1)
merged_d1 = Dense(256, activation='relu')(merged_d1)
merged_d1 = GaussianNoise(0.30)(merged_d1)
text_class = Dense(len(y_unique), activation='softmax')(merged_d1)
model = Model([sequence_input,char_in], text_class)
Trained with a GPU like such:
parallel_model = multi_gpu_model(model, gpus=4)
parallel_model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
parallel_model.fit([x_train, x_train_char], y_train, validation_data=([x_test, x_test_char], y_test), epochs=1 ,batch_size=512)
I serialize it with the model.save() method.
When I try to load it on the same machine in a different kernel, I get the following error:
ValueError: Initializer for variable conv1d_4_1/kernel/ is from inside a control-flow construct, such as a loop or conditional. When creating a variable inside a loop or conditional, use a lambda as the initializer.
I'm not sure what loop or conditional it can be referring to, maybe something to do with the Bidirectional wrapper? I've serialized other similar models without any issues.
Any help would be greatly appreciated!