GAN Discriminator model input shape issue - tensorflow

I am trying to create a simple GAN model but I am getting an input error in Discriminator. Any suggestions or help is welcome
seed(33)
tf.random.set_seed(432)
Generator
Input size (32,) output size (9,)
# Define the generator model
def build_generator():
generator_input = Input(shape=(32,))
x = Dense(16, activation='relu')(generator_input)
x = Dense(9, activation='linear')(x)
generator = Model(generator_input, x)
return generator
Discriminator
The model has 2 embeddings for batsman & bowler and 7 other predictors for a total of 9 predictors
# Define the discriminator model
def build_discriminator():
bowlerIdx_input = Input(shape=(1,), name='bowlerIdx')
batsmanIdx_input = Input(shape=(1,), name='batsmanIdx')
ballNum_input = Input(shape=(1,), name='ballNum')
ballsRemaining_input = Input(shape=(1,), name='ballsRemaining')
runs_input = Input(shape=(1,), name='runs')
runRate_input = Input(shape=(1,), name='runRate')
numWickets_input = Input(shape=(1,), name='numWickets')
runsMomentum_input = Input(shape=(1,), name='runsMomentum')
perfIndex_input = Input(shape=(1,), name='perfIndex')
no_of_unique_batman=len(df1["batsmanIdx"].unique())
print(no_of_unique_batman)
no_of_unique_bowler=len(df1["bowlerIdx"].unique())
print(no_of_unique_bowler)
embedding_size_bat = no_of_unique_batman ** (1/4)
print(embedding_size_bat)
embedding_size_bwl = no_of_unique_bowler ** (1/4)
print(embedding_size_bwl)
# create embedding layer for the categorical predictor
batsmanIdx_embedding = Embedding(input_dim=4742, output_dim=16,input_length=1)(batsmanIdx_input)
print(batsmanIdx_embedding)
batsmanIdx_flatten = Flatten()(batsmanIdx_embedding)
print(batsmanIdx_flatten)
bowlerIdx_embedding = Embedding(input_dim=3492, output_dim=16,input_length=1)(bowlerIdx_input)
bowlerIdx_flatten = Flatten()(bowlerIdx_embedding)
print(bowlerIdx_flatten)
# concatenate all the predictors
discriminator_input = keras.layers.concatenate([batsmanIdx_flatten,bowlerIdx_flatten, ballNum_input, ballsRemaining_input, runs_input, runRate_input, numWickets_input, runsMomentum_input, perfIndex_input])
print(discriminator_input.shape)
# add hidden layers
x = Dense(64, activation='relu')(discriminator_input)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)
x = Dense(8, activation='relu')(x)
# add output layer
output = Dense(1, activation='sigmoid', name='output')(x)
# create model
discriminator = Model(inputs=[batsmanIdx_input, bowlerIdx_input, ballNum_input, ballsRemaining_input, runs_input, runRate_input, numWickets_input, runsMomentum_input, perfIndex_input], outputs=output)
return discriminator
GAN model
The generator output is fed to discriminator. Concatenate synthetic and real data and run discriminator. Minimize loss
# Define the GAN model
def build_gan(generator, discriminator):
gan_input = Input(shape=(1024,))
x = generator(gan_input)
gan_output = discriminator(x)
gan = Model(gan_input, gan_output)
return gan
# Instantiate the generator, discriminator, and GAN models
generator = build_generator()
discriminator = build_discriminator()
discriminator.summary()
gan = build_gan(generator, discriminator)
# Compile the generator and discriminator models
discriminator.compile(optimizer=Adam(learning_rate=.0002, beta_1=0.5), loss='binary_crossentropy', metrics=['accuracy'])
gan.compile(optimizer=Adam(learning_rate=.0005, beta_1=0.5), loss='binary_crossentropy',metrics=['accuracy'])
# Set the batch size and number of epochs
batch_size = 1024
num_epochs = 20
# Store the losses over time
gen_losses = []
dis_losses = []
gen_acc = []
dis_acc = []
# Train the GAN
for epoch in range(num_epochs):
# Generate synthetic data
synthetic_data = generator.predict(np.random.randn(batch_size, 1024))
# Concatenate synthetic data with real data
real_data = train_dataset1.values
data = np.concatenate((synthetic_data, real_data))
# Create labels for synthetic and real data
labels = np.concatenate((np.zeros(batch_size), np.ones(len(real_data))))
# Train the discriminator on synthetic and real data
d_loss = discriminator.fit([data['batsmanIdx'],data['bowlerIdx'],data['ballNum'],data['ballsRemaining'],data['runs'],
data['runRate'],data['numWickets'],data['runsMomentum'],data['perfIndex']], labels)
#d_loss = discriminator.train_on_batch(data, labels)
dis_losses.append(d_loss[0])
dis_acc.append(d_loss[1])
# Generate random noise for the generator
random_noise = np.random.randn(batch_size, 1024)
# Create labels for the generator (all ones, since we want the generator to fool the discriminator)
# Create labels for the generator (all ones, since we want the generator to fool the discriminator)
generator_labels = np.ones(batch_size)
# Train the generator
g_loss = gan.train_on_batch(random_noise, generator_labels)
gen_losses.append(g_loss[0])
gen_acc.append(g_loss[1])
# Print loss values for each epoch
print(f'Epoch: {epoch+1}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}')
The output and error I get is
Model: "model_56"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
batsmanIdx (InputLayer) [(None, 1)] 0 []
bowlerIdx (InputLayer) [(None, 1)] 0 []
embedding_2 (Embedding) (None, 1, 16) 75872 ['batsmanIdx[0][0]']
embedding_3 (Embedding) (None, 1, 16) 55872 ['bowlerIdx[0][0]']
flatten_2 (Flatten) (None, 16) 0 ['embedding_2[0][0]']
flatten_3 (Flatten) (None, 16) 0 ['embedding_3[0][0]']
ballNum (InputLayer) [(None, 1)] 0 []
ballsRemaining (InputLayer) [(None, 1)] 0 []
runs (InputLayer) [(None, 1)] 0 []
runRate (InputLayer) [(None, 1)] 0 []
numWickets (InputLayer) [(None, 1)] 0 []
runsMomentum (InputLayer) [(None, 1)] 0 []
perfIndex (InputLayer) [(None, 1)] 0 []
concatenate_28 (Concatenate) (None, 39) 0 ['flatten_2[0][0]',
'flatten_3[0][0]',
'ballNum[0][0]',
'ballsRemaining[0][0]',
'runs[0][0]',
'runRate[0][0]',
'numWickets[0][0]',
'runsMomentum[0][0]',
'perfIndex[0][0]']
dense_228 (Dense) (None, 64) 2560 ['concatenate_28[0][0]']
dropout_111 (Dropout) (None, 64) 0 ['dense_228[0][0]']
dense_229 (Dense) (None, 32) 2080 ['dropout_111[0][0]']
dropout_112 (Dropout) (None, 32) 0 ['dense_229[0][0]']
dense_230 (Dense) (None, 16) 528 ['dropout_112[0][0]']
dropout_113 (Dropout) (None, 16) 0 ['dense_230[0][0]']
dense_231 (Dense) (None, 8) 136 ['dropout_113[0][0]']
dropout_114 (Dropout) (None, 8) 0 ['dense_231[0][0]']
output (Dense) (None, 1) 9 ['dropout_114[0][0]']
==================================================================================================
Total params: 137,057
Trainable params: 137,057
Non-trainable params: 0
__________________________________________________________________________________________________
WARNING:tensorflow:Model was constructed with shape (None, 1) for
input KerasTensor(type_spec=TensorSpec(shape=(None, 1),
dtype=tf.float32, name='batsmanIdx'), name='batsmanIdx',
description="created by layer 'batsmanIdx'"), but it was called
on an input with incompatible shape (None, 9).
(None, 9)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-30-8cbe40cd27bc> in <module>
94 print("111")
95 discriminator.summary()
---> 96 gan = build_gan(generator, discriminator)
97
98 # Compile the generator and discriminator models
2 frames
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py in _run_internal_graph(self, inputs, training, mask)
677 for x in self.outputs:
678 x_id = str(id(x))
--> 679 assert x_id in tensor_dict, "Could not compute output " + str(x)
680 output_tensors.append(tensor_dict[x_id].pop())
681
AssertionError: Exception encountered when calling layer "model_56" (type Functional).
Could not compute output KerasTensor(type_spec=TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), name='output/Sigmoid:0', description="created by layer 'output'")
Call arguments received by layer "model_56" (type Functional):
• inputs=tf.Tensor(shape=(None, 9), dtype=float32)
• training=None
• mask=None
I don't understand what this message means. I am supposed to input all 9 predictors but for some reason it only picks the first
WARNING:tensorflow:Model was constructed with shape (None, 1)
for input KerasTensor(type_spec=TensorSpec(shape=(None, 1),
dtype=tf.float32, name='batsmanIdx'), name='batsmanIdx',
description="created by layer 'batsmanIdx'"), but it was called
on an input with incompatible shape (None, 9).
(. None, 9)
All thoughts, suggestions are welcome

Related

with `with strategy.scope():` BERT output loses it's shape from tf-hub and `encoder_output` is missing

Code:
!pip install tensorflow-text==2.7.0
import tensorflow_text as text
import tensorflow_hub as hub
# ... other tf imports....
strategy = tf.distribute.MirroredStrategy()
print('Number of GPU: ' + str(strategy.num_replicas_in_sync)) # 1 or 2, shouldn't matter
NUM_CLASS=2
with strategy.scope():
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
def get_model():
text_input = Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)
output_sequence = outputs['sequence_output']
x = Dense(NUM_CLASS, activation='sigmoid')(output_sequence)
model = Model(inputs=[text_input], outputs = [x])
return model
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- look at the output 1
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model.png') # <- look at the figure 1
with strategy.scope():
optimizer = Adam()
model = get_model()
model.compile(loss=CategoricalCrossentropy(from_logits=True),optimizer=optimizer,metrics=[Accuracy(), ],)
model.summary() # <- compare with output 1, it has already lost it's shape
tf.keras.utils.plot_model(model, show_shapes=True, to_file='model_scoped.png') # <- compare this figure too, for ease
With scope, BERT loses seq_length, and it becomes None.
Model summary withOUT scope: (See there is 128 at the very last layer, which is seq_length)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[6][0]',
'keras_layer_2[6][1]',
'keras_layer_2[6][2]']
dense_6 (Dense) (None, 128, 2) 1538 ['keras_layer_3[6][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
Model with scope:
Model: "model_7"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
text (InputLayer) [(None,)] 0 []
keras_layer_2 (KerasLayer) {'input_mask': (Non 0 ['text[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_3 (KerasLayer) multiple 109482241 ['keras_layer_2[7][0]',
'keras_layer_2[7][1]',
'keras_layer_2[7][2]']
dense_7 (Dense) (None, None, 2) 1538 ['keras_layer_3[7][14]']
==================================================================================================
Total params: 109,483,779
Trainable params: 1,538
Non-trainable params: 109,482,241
__________________________________________________________________________________________________
If these image helps:
Another notable thing encoder_outputs is also missing if you take a look at the 2nd keras layer or 3rd layer of both model.

Tensorflow music generation with lstm - model.fit not working

I am trying to train a lstm model for music generation, but something seems to cause an error when calling model.fit().
# Compile the model
lstm.compile(loss='categorical_crossentropy', optimizer='rmsprop')
tf.keras.utils.plot_model(lstm, show_shapes=True)
# Train the model
lstm.fit([trainChords, trainDurations], [targetChords, targetDurations], epochs=500)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_19 (InputLayer) [(None, 32)] 0 []
input_20 (InputLayer) [(None, 32)] 0 []
embedding_18 (Embedding) (None, 32, 64) 2048 ['input_19[0][0]']
embedding_19 (Embedding) (None, 32, 64) 2048 ['input_20[0][0]']
concatenate_9 (Concatenate) (None, 64, 64) 0 ['embedding_18[0][0]',
'embedding_19[0][0]']
lstm_8 (LSTM) (None, 64, 512) 1181696 ['concatenate_9[0][0]']
dense_18 (Dense) (None, 64, 256) 131328 ['lstm_8[0][0]']
dense_19 (Dense) (None, 64, 32) 8224 ['dense_18[0][0]']
dense_20 (Dense) (None, 64, 32) 8224 ['dense_18[0][0]']
==================================================================================================
Total params: 1,333,568
Trainable params: 1,333,568
Non-trainable params: 0
I get the error that some shapes are not compatible, but I don't know how to fix this.
ValueError Traceback (most recent call last)
<ipython-input-63-8785c106bc4b> in <module>
5
6 # Train the model
----> 7 lstm.fit([trainChords, trainDurations], [targetChords, targetDurations], epochs=500)
ValueError: Shapes (None,) and (None, 64, 32) are incompatible
Any help would be appreciated.
Update on building the model. Maybe the error is with the output layers(?)
# Define input layers
chordInput = tf.keras.layers.Input(shape = (nChords))
durationInput = tf.keras.layers.Input(shape = (nDurations))
# Define embedding layers
chordEmbedding = tf.keras.layers.Embedding(nChords, embedDim, input_length = sequenceLength)(chordInput)
durationEmbedding = tf.keras.layers.Embedding(nDurations, embedDim, input_length = sequenceLength)(durationInput)
# Merge embedding layers using a concatenation layer
mergeLayer = tf.keras.layers.Concatenate(axis=1)([chordEmbedding, durationEmbedding])
# Define LSTM layer
lstmLayer = tf.keras.layers.LSTM(512, return_sequences=True)(mergeLayer)
# Define dense layer
denseLayer = tf.keras.layers.Dense(256)(lstmLayer)
# Define output layers
chordOutput = tf.keras.layers.Dense(nChords, activation = 'softmax')(denseLayer)
durationOutput = tf.keras.layers.Dense(nDurations, activation = 'softmax')(denseLayer)
# nChords and nDurations are both 32
# Define model
lstm = tf.keras.Model(inputs = [chordInput, durationInput], outputs = [chordOutput, durationOutput])

Error on custom dataset dimensions feeding transfer model in TensorFLow

Can someone explain this TensorFlow error for me, I'm having trouble understanding what I am doing wrong.
I have a dataset in Tensorflow constructed with a generator. When I test the output of the generator, output dimensions look correct (224 x 224 x 1). But when I try to train the model, I get an error:
WARNING:tensorflow:Model was constructed with shape (None, 224, 224, 1) for input
KerasTensor(type_spec=TensorSpec(shape=(None, 224, 224, 1), dtype=tf.float32,
name='input_2'), name='input_2', description="created by layer 'input_2'"),
but it was called on an input with incompatible shape (224, 224, 1, 1).
I'm unsure why the dimension of this output has an extra 1 at the end.
Here is the code to create the generator and model. df is a dataframe with file-paths to data and labels. The data are 2D matrices of variable dimensions. I'm using cv2.resize to make them 224x224 and then np.reshape to transform dimensions to (224x224x1). Then I yield the result.
def datagen_row():
# ======================== #
# Import data
# ======================== #
df = get_data()
rowsize = 224
colsize = 224
# ======================== #
#
# ======================== #
for row in range(len(df)):
data = get_data_from_filepath(df.iloc[row].file_path)
data = cv2.resize(data, dsize=(rowsize, colsize), interpolation=cv2.INTER_CUBIC)
labels = df.iloc[row].label
data = data.reshape( 224, 224, 1)
yield data, labels
dataset = tf.data.Dataset.from_generator(
datagen_row,
output_signature=(
tf.TensorSpec(shape = (int(os.getenv('rowsize')), int(os.getenv('colsize')), 1), dtype=tf.float32, name=None),
tf.TensorSpec(shape=(), dtype=tf.int64, name=None)
)
)
Testing the following I get what I expected:
iterator = iter(dataset.batch(8))
x = iterator.get_next()
x[0].shape # TensorShape([8, 224, 224, 1])
x[1].shape # TensorShape([8])
x[0] # <tf.Tensor: shape=(8, 224, 224, 1), dtype=float32, numpy=array(...
x[1] # <tf.Tensor: shape=(8,), dtype=int64, numpy=array([1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)>
I'm trying to plug this into InceptionV3 model to do a classification
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras import layers
origModel = InceptionV3(weights = 'imagenet', include_top = False)
inputs = layers.Input(shape = (224, 224, 1))
modified_inputs = layers.Conv2D(3, 3, padding = 'same', activation='relu')(inputs)
x = origModel(modified_inputs)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dense(512, activation = 'relu')(x)
x = layers.Dense(256, activation = 'relu')(x)
x = layers.Dense(128, activation = 'relu')(x)
x = layers.Dense(64, activation = 'relu')(x)
x = layers.Dense(32, activation = 'relu')(x)
outputs = layers.Dense(2)(x)
model = tf.keras.Model(inputs, outputs)
model.summary() # 24.6 M trainable params
for layer in origModel.layers:
layer.trainable = False
model.summary() # now shows 2.8 M trainable params
model.compile(
optimizer = 'adam',
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = ['accuracy']
)
model.fit(dataset, epochs = 1, verbose = True, batch_size = 32)
Here is the output of model.summary
model.summary()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 224, 224, 1)] 0
conv2d_94 (Conv2D) (None, 224, 224, 3) 30
inception_v3 (Functional) (None, None, None, 2048) 21802784
global_average_pooling2d (G (None, 2048) 0
lobalAveragePooling2D)
dense (Dense) (None, 1024) 2098176
dense_1 (Dense) (None, 512) 524800
dense_2 (Dense) (None, 256) 131328
dense_3 (Dense) (None, 128) 32896
dense_4 (Dense) (None, 64) 8256
dense_5 (Dense) (None, 32) 2080
dense_6 (Dense) (None, 2) 66
=================================================================
Total params: 24,600,416
Trainable params: 2,797,632
Non-trainable params: 21,802,784
_________________________________________________________________
This code worked after changing
model.fit(dataset, epochs = 1, verbose = True, batch_size = 32)
to
model.fit(dataset.batch(2), epochs = 1, verbose = True, batch_size = 32)
So... I will have to look into using dataset.batch versus batch_size in model.fit

Stacking ensemble with two different inputs for image segmenatation

I an stacking two models trained on different inputs from two data collections as shown below using Tensorflow Keras 2.6.2. The stacking is performed with a convolutional meta-learner to predict on a common hold out test set. Given below is the code and he model architecture.
#load data
#datase-1
X_tr1 = np.load('data/X_tr1.npy') #shape (200, 224,224,3)
Y_tr1 = np.load('data/Y_tr1.npy') #shape (200, 224,224,1)
X_val1 = np.load('data/X_val1.npy') #shape (100, 224,224,3)
Y_val1 = np.load('data/Y_val1.npy') #shape (100, 224,224,1)
#dataset-2
X_tr2 = np.load('data/X_tr2.npy') #shape (200, 224,224,3)
Y_tr2 = np.load('data/Y_tr2.npy') #shape (200, 224,224,1)
X_val2 = np.load('data/X_val2.npy') #shape (100, 224,224,3)
Y_val2 = np.load('data/Y_val2.npy') #shape (100, 224,224,1)
#common hold-out test set
X_ts = np.load('data/X_ts.npy') #shape (50, 224,224,3)
Y_ts = np.load('data/Y_ts.npy') #shape (50, 224,224,1)
#%%
#instantiate the models
img_width, img_height = 224,224
input_shape = (img_width, img_height, 3) #RGB inputs
model_input1 = Input(shape=input_shape) #input to model1
model_input2 = Input(shape=input_shape) #input to model2
n_classes=1 #grayscale mask output
activation='sigmoid'
batch_size = 8
n_epochs = 256
BACKBONE = 'vgg16'
# define model
model1 = sm.Unet(BACKBONE, encoder_weights='imagenet',
classes=n_classes, activation=activation)
model2 = sm.Unet(BACKBONE, encoder_weights='imagenet',
classes=n_classes, activation=activation)
#%%
# constructing a stacking ensemble of the two models
# A second-level fully-convolutional meta-learner is used to learn
# the features extracted from the penultimate layers of the models
n_models = 2
def load_all_models(n_models):
all_models = list()
model1.load_weights('weights/vgg16_1.hdf5') # path to model1
model_loss1a=Model(inputs=model1.input,
outputs=model1.get_layer('decoder_stage4b_relu').output) #name of the penultimate layer
x1 = model_loss1a.output
model1a = Model(inputs=model1.input, outputs=x1, name='model1')
all_models.append(model1a)
model2.load_weights('weights/vgg16_2.hdf5') #path to model2
model_loss2a=Model(inputs=model2.input,
outputs=model2.get_layer('decoder_stage4b_relu').output)
x2 = model_loss2a.output
model2a = Model(inputs=model2.input, outputs=x2, name='model2')
all_models.append(model2a)
return all_models
# load models
n_members = 2
members = load_all_models(n_members)
print('Loaded %d models' % len(members))
def define_stacked_model(members):
# update all layers in all models to not be trainable
for i in range(len(members)):
model = members[i]
for layer in model.layers [1:]:
# make not trainable
layer.trainable = False
layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
ensemble_outputs = [model(model_input1, model_input2) for model in members]
merge = Concatenate()(ensemble_outputs)
# meta-learner, fully-convolutional
x4 = Conv2D(128, (3,3), activation='relu',
name = 'NewConv1', padding='same')(merge)
x5 = Conv2D(1, (1,1), activation='sigmoid',
name = 'NewConvfinal')(x4)
model= Model(inputs=[model_input1,model_input2],
outputs=x4)
return model
print("Creating Ensemble")
ensemble = define_stacked_model(members)
print("Ensemble architecture: ")
print(ensemble.summary())
Shown below is the architecture of the stacked model:
Model: "model_4"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 224, 224, 3) 0
__________________________________________________________________________________________________
input_2 (InputLayer) [(None, 224, 224, 3) 0
__________________________________________________________________________________________________
model1 (Functional) (None, None, None, 1 23752128 input_1[0][0]
input_2[0][0]
__________________________________________________________________________________________________
model2 (Functional) (None, None, None, 1 23752128 input_1[0][0]
input_2[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 224, 224, 32) 0 model1[0][0]
model2[0][0]
__________________________________________________________________________________________________
NewConv1 (Conv2D) (None, 224, 224, 128 36992 concatenate[0][0]
__________________________________________________________________________________________________
NewConv2 (Conv2D) (None, 224, 224, 64) 73792 NewConv1[0][0]
__________________________________________________________________________________________________
NewConv3 (Conv2D) (None, 224, 224, 32) 18464 NewConv2[0][0]
__________________________________________________________________________________________________
NewConvfinal (Conv2D) (None, 224, 224, 1) 33 NewConv3[0][0]
==================================================================================================
Total params: 47,633,537
Trainable params: 129,281
Non-trainable params: 47,504,256
I compile and train the model as shown below:
opt = keras.optimizers.Adam(lr=0.001)
loss_func='binary_crossentropy'
ensemble.compile(optimizer=opt,
loss=loss_func,
metrics=['binary_accuracy'])
results_ensemble = ensemble.fit((X_tr1, Y_tr1, X_tr2, Y_tr2),
batch_size=batch_size,
epochs=n_epochs,
verbose=1,
validation_data=(X_val1, Y_val1, X_val2, Y_val2))
I get the following error:
Traceback (most recent call last):
File "/home/codes/untitled5.py", line 563, in <module>
validation_data=(X_val1, Y_val1, X_val2, Y_val2))
File "/home/anaconda3/envs/tf262/lib/python3.7/site-packages/keras/engine/training.py", line 1125, in fit
data_adapter.unpack_x_y_sample_weight(validation_data))
File "/home/anaconda3/envs/tf262/lib/python3.7/site-packages/keras/engine/data_adapter.py", line 1574, in unpack_x_y_sample_weight
raise ValueError(error_msg)
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (array([[[[0.09803922, 0.09803922, 0.09803922],
[0.09803922, 0.09803922, 0.09803922],
[0.09803922, 0.09803922, 0.09803922],
...,
[0.08627451, 0.08627451, 0.08627451],
[0.08627451, 0.08627451, 0.08627451],
[0.05098039, 0.05098039, 0.05098039]],...
Also how do I predict with a single X_ts provided the ensemble model now has two separate inputs?
New error after trying to implement the suggestions:
File "/home/codes/untitled5.py", line 595, in <module>
validation_data=outputs)
File "/home/anaconda3/envs/tf262/lib/python3.7/site-packages/keras/engine/training.py", line 1184, in fit
tmp_logs = self.train_function(iterator)
ValueError: Layer model_4 expects 2 input(s), but it received 4 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 224, 224, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 224, 224, 1) dtype=float32>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 224, 224, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:3' shape=(None, 224, 224, 1) dtype=float32>]
Answer based on comment. Multi-inputs need to be passed as a list, not a tuple.
Change:
results_ensemble = ensemble.fit((X_tr1, Y_tr1, X_tr2, Y_tr2),
batch_size=batch_size,
epochs=n_epochs,
verbose=1,
validation_data=(X_val1, Y_val1, X_val2, Y_val2))
To:
inputs = [X_tr1, Y_tr1, X_tr2, Y_tr2] # you can pass the list itself or the variable
results_ensemble = ensemble.fit(inputs,
batch_size=batch_size,
epochs=n_epochs,
verbose=1,
validation_data=([X_val1, X_val2], y_val))
# test_inputs_diff = [x_test1, x_test2] # different input
# test_inputs_same = [x_test1, x_test1] # same input
# preds_diff = ensemble.predict(test_inputs_diff)
# preds_same = ensemble.predict(test_inputs_same)

ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 50), found shape=(None, 1, 512)

Learning to use bert-base-cased and a classification model... the code for the model is the following:
def mao_func(input_ids, masks, labels):
return {'input_ids':input_ids, 'attention_mask':masks}, labels
dataset = dataset.map(mao_func)
BATCH_SIZE = 32
dataset = dataset.shuffle(100000).batch(BATCH_SIZE)
split = .8
ds_len = len(list(dataset))
train = dataset.take(round(ds_len * split))
val = dataset.skip(round(ds_len * split))
from transformers import TFAutoModel
bert = TFAutoModel.from_pretrained('bert-base-cased')
Model: "tf_bert_model"
Layer (type) Output Shape Param #
bert (TFBertMainLayer) multiple 108310272
=================================================================
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
then the NN builduing:
input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(50,), name='attention_mask', dtype='int32')
embeddings = bert(input_ids, attention_mask=mask)[0]
X = tf.keras.layers.GlobalMaxPool1D()(embeddings)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Dense(128, activation='relu')(X)
X = tf.keras.layers.Dropout(0.1)(X)
X = tf.keras.layers.Dense(32, activation='relu')(X)
y = tf.keras.layers.Dense(3, activation='softmax',name='outputs')(X)
model = tf.keras.Model(inputs=[input_ids, mask], outputs=y)
model.layers[2].trainable = False
the model.summary is:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 50)] 0 []
attention_mask (InputLayer) [(None, 50)] 0 []
tf_bert_model (TFBertModel) TFBaseModelOutputWi 108310272 ['input_ids[0][0]',
thPoolingAndCrossAt 'attention_mask[0][0]']
tentions(last_hidde
n_state=(None, 50,
768),
pooler_output=(Non
e, 768),
past_key_values=No
ne, hidden_states=N
one, attentions=Non
e, cross_attentions
=None)
global_max_pooling1d (GlobalMa (None, 768) 0 ['tf_bert_model[0][0]']
xPooling1D)
batch_normalization (BatchNorm (None, 768) 3072 ['global_max_pooling1d[0][0]']
alization)
dense (Dense) (None, 128) 98432 ['batch_normalization[0][0]']
dropout_37 (Dropout) (None, 128) 0 ['dense[0][0]']
dense_1 (Dense) (None, 32) 4128 ['dropout_37[0][0]']
outputs (Dense) (None, 3) 99 ['dense_1[0][0]']
==================================================================================================
Total params: 108,416,003
Trainable params: 104,195
Non-trainable params: 108,311,808
__________________________________________________________________________________________________
finally the model fitting is
optimizer = tf.keras.optimizers.Adam(0.01)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
model.compile(optimizer,loss=loss, metrics=[acc])
history = model.fit(
train,
validation_data = val,
epochs=140
)
with execution error in line 7 -> the model.fit(...):
ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 50), found shape=(None, 1, 512)
Can any one be so kind of helping me on what I did wrong and why... thanks:)
update: here is the git with the codes https://github.com/CharlieArreola/OnlinePosts
It seems, that your shape of the train data doen't match the expected input shape of your input layer.
You can check your shape of the train data with train.shape()
You input layer Input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32') expects train data with 50 columns, but you most likely have 512 if we look at your error.
So to fix this, you could simply change your input shape.
Input_ids = tf.keras.layers.Input(shape=(512,), name='input_ids', dtype='int32')
If you split your x and y in your dataset you can make it more flexible with:
Input_ids = tf.keras.layers.Input(shape=(train_x.shape[0],), name='input_ids', dtype='int32')
Also don't forget, that you have to do this change to all of your input layers!