Related
I am trying to create a simple GAN model but I am getting an input error in Discriminator. Any suggestions or help is welcome
seed(33)
tf.random.set_seed(432)
Generator
Input size (32,) output size (9,)
# Define the generator model
def build_generator():
generator_input = Input(shape=(32,))
x = Dense(16, activation='relu')(generator_input)
x = Dense(9, activation='linear')(x)
generator = Model(generator_input, x)
return generator
Discriminator
The model has 2 embeddings for batsman & bowler and 7 other predictors for a total of 9 predictors
# Define the discriminator model
def build_discriminator():
bowlerIdx_input = Input(shape=(1,), name='bowlerIdx')
batsmanIdx_input = Input(shape=(1,), name='batsmanIdx')
ballNum_input = Input(shape=(1,), name='ballNum')
ballsRemaining_input = Input(shape=(1,), name='ballsRemaining')
runs_input = Input(shape=(1,), name='runs')
runRate_input = Input(shape=(1,), name='runRate')
numWickets_input = Input(shape=(1,), name='numWickets')
runsMomentum_input = Input(shape=(1,), name='runsMomentum')
perfIndex_input = Input(shape=(1,), name='perfIndex')
no_of_unique_batman=len(df1["batsmanIdx"].unique())
print(no_of_unique_batman)
no_of_unique_bowler=len(df1["bowlerIdx"].unique())
print(no_of_unique_bowler)
embedding_size_bat = no_of_unique_batman ** (1/4)
print(embedding_size_bat)
embedding_size_bwl = no_of_unique_bowler ** (1/4)
print(embedding_size_bwl)
# create embedding layer for the categorical predictor
batsmanIdx_embedding = Embedding(input_dim=4742, output_dim=16,input_length=1)(batsmanIdx_input)
print(batsmanIdx_embedding)
batsmanIdx_flatten = Flatten()(batsmanIdx_embedding)
print(batsmanIdx_flatten)
bowlerIdx_embedding = Embedding(input_dim=3492, output_dim=16,input_length=1)(bowlerIdx_input)
bowlerIdx_flatten = Flatten()(bowlerIdx_embedding)
print(bowlerIdx_flatten)
# concatenate all the predictors
discriminator_input = keras.layers.concatenate([batsmanIdx_flatten,bowlerIdx_flatten, ballNum_input, ballsRemaining_input, runs_input, runRate_input, numWickets_input, runsMomentum_input, perfIndex_input])
print(discriminator_input.shape)
# add hidden layers
x = Dense(64, activation='relu')(discriminator_input)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)
x = Dense(8, activation='relu')(x)
# add output layer
output = Dense(1, activation='sigmoid', name='output')(x)
# create model
discriminator = Model(inputs=[batsmanIdx_input, bowlerIdx_input, ballNum_input, ballsRemaining_input, runs_input, runRate_input, numWickets_input, runsMomentum_input, perfIndex_input], outputs=output)
return discriminator
GAN model
The generator output is fed to discriminator. Concatenate synthetic and real data and run discriminator. Minimize loss
# Define the GAN model
def build_gan(generator, discriminator):
gan_input = Input(shape=(1024,))
x = generator(gan_input)
gan_output = discriminator(x)
gan = Model(gan_input, gan_output)
return gan
# Instantiate the generator, discriminator, and GAN models
generator = build_generator()
discriminator = build_discriminator()
discriminator.summary()
gan = build_gan(generator, discriminator)
# Compile the generator and discriminator models
discriminator.compile(optimizer=Adam(learning_rate=.0002, beta_1=0.5), loss='binary_crossentropy', metrics=['accuracy'])
gan.compile(optimizer=Adam(learning_rate=.0005, beta_1=0.5), loss='binary_crossentropy',metrics=['accuracy'])
# Set the batch size and number of epochs
batch_size = 1024
num_epochs = 20
# Store the losses over time
gen_losses = []
dis_losses = []
gen_acc = []
dis_acc = []
# Train the GAN
for epoch in range(num_epochs):
# Generate synthetic data
synthetic_data = generator.predict(np.random.randn(batch_size, 1024))
# Concatenate synthetic data with real data
real_data = train_dataset1.values
data = np.concatenate((synthetic_data, real_data))
# Create labels for synthetic and real data
labels = np.concatenate((np.zeros(batch_size), np.ones(len(real_data))))
# Train the discriminator on synthetic and real data
d_loss = discriminator.fit([data['batsmanIdx'],data['bowlerIdx'],data['ballNum'],data['ballsRemaining'],data['runs'],
data['runRate'],data['numWickets'],data['runsMomentum'],data['perfIndex']], labels)
#d_loss = discriminator.train_on_batch(data, labels)
dis_losses.append(d_loss[0])
dis_acc.append(d_loss[1])
# Generate random noise for the generator
random_noise = np.random.randn(batch_size, 1024)
# Create labels for the generator (all ones, since we want the generator to fool the discriminator)
# Create labels for the generator (all ones, since we want the generator to fool the discriminator)
generator_labels = np.ones(batch_size)
# Train the generator
g_loss = gan.train_on_batch(random_noise, generator_labels)
gen_losses.append(g_loss[0])
gen_acc.append(g_loss[1])
# Print loss values for each epoch
print(f'Epoch: {epoch+1}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}')
The output and error I get is
Model: "model_56"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
batsmanIdx (InputLayer) [(None, 1)] 0 []
bowlerIdx (InputLayer) [(None, 1)] 0 []
embedding_2 (Embedding) (None, 1, 16) 75872 ['batsmanIdx[0][0]']
embedding_3 (Embedding) (None, 1, 16) 55872 ['bowlerIdx[0][0]']
flatten_2 (Flatten) (None, 16) 0 ['embedding_2[0][0]']
flatten_3 (Flatten) (None, 16) 0 ['embedding_3[0][0]']
ballNum (InputLayer) [(None, 1)] 0 []
ballsRemaining (InputLayer) [(None, 1)] 0 []
runs (InputLayer) [(None, 1)] 0 []
runRate (InputLayer) [(None, 1)] 0 []
numWickets (InputLayer) [(None, 1)] 0 []
runsMomentum (InputLayer) [(None, 1)] 0 []
perfIndex (InputLayer) [(None, 1)] 0 []
concatenate_28 (Concatenate) (None, 39) 0 ['flatten_2[0][0]',
'flatten_3[0][0]',
'ballNum[0][0]',
'ballsRemaining[0][0]',
'runs[0][0]',
'runRate[0][0]',
'numWickets[0][0]',
'runsMomentum[0][0]',
'perfIndex[0][0]']
dense_228 (Dense) (None, 64) 2560 ['concatenate_28[0][0]']
dropout_111 (Dropout) (None, 64) 0 ['dense_228[0][0]']
dense_229 (Dense) (None, 32) 2080 ['dropout_111[0][0]']
dropout_112 (Dropout) (None, 32) 0 ['dense_229[0][0]']
dense_230 (Dense) (None, 16) 528 ['dropout_112[0][0]']
dropout_113 (Dropout) (None, 16) 0 ['dense_230[0][0]']
dense_231 (Dense) (None, 8) 136 ['dropout_113[0][0]']
dropout_114 (Dropout) (None, 8) 0 ['dense_231[0][0]']
output (Dense) (None, 1) 9 ['dropout_114[0][0]']
==================================================================================================
Total params: 137,057
Trainable params: 137,057
Non-trainable params: 0
__________________________________________________________________________________________________
WARNING:tensorflow:Model was constructed with shape (None, 1) for
input KerasTensor(type_spec=TensorSpec(shape=(None, 1),
dtype=tf.float32, name='batsmanIdx'), name='batsmanIdx',
description="created by layer 'batsmanIdx'"), but it was called
on an input with incompatible shape (None, 9).
(None, 9)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-30-8cbe40cd27bc> in <module>
94 print("111")
95 discriminator.summary()
---> 96 gan = build_gan(generator, discriminator)
97
98 # Compile the generator and discriminator models
2 frames
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py in _run_internal_graph(self, inputs, training, mask)
677 for x in self.outputs:
678 x_id = str(id(x))
--> 679 assert x_id in tensor_dict, "Could not compute output " + str(x)
680 output_tensors.append(tensor_dict[x_id].pop())
681
AssertionError: Exception encountered when calling layer "model_56" (type Functional).
Could not compute output KerasTensor(type_spec=TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), name='output/Sigmoid:0', description="created by layer 'output'")
Call arguments received by layer "model_56" (type Functional):
• inputs=tf.Tensor(shape=(None, 9), dtype=float32)
• training=None
• mask=None
I don't understand what this message means. I am supposed to input all 9 predictors but for some reason it only picks the first
WARNING:tensorflow:Model was constructed with shape (None, 1)
for input KerasTensor(type_spec=TensorSpec(shape=(None, 1),
dtype=tf.float32, name='batsmanIdx'), name='batsmanIdx',
description="created by layer 'batsmanIdx'"), but it was called
on an input with incompatible shape (None, 9).
(. None, 9)
All thoughts, suggestions are welcome
Consider
import tensorflow as tf
units=11
entrada=tf.keras.Input(name="entrada", shape=(units,))
unidad= tf.Variable([[1.0]]) # + 0.0* entrada[:,:1]
denseSoftmax=tf.keras.layers.Dense(units,name="denseSoftmax",activation="softmax")
softMaxOutput=denseSoftmax(unidad)
finalproduct=tf.keras.layers.Multiply()([entrada,softMaxOutput])
modelo=tf.keras.Model(entrada,finalproduct)
modelo.summary()
This example produces a model without trainable parameters, because the denseSoftMax layer does not act in the input. If I fake it by uncommenting + 0.0 * entrada[:,:1] then it produces the expected graph
Layer (type) Output Shape Param # Connected to
==================================================================================================
entrada (InputLayer) [(None, 11)] 0 []
tf.__operators__.getitem (Slic (None, 1) 0 ['entrada[0][0]']
ingOpLambda)
tf.math.multiply (TFOpLambda) (None, 1) 0 ['tf.__operators__.getitem[0][0]'
tf.__operators__.add (TFOpLamb (None, 1) 0 ['tf.math.multiply[0][0]']
denseSoftmax (Dense) (None, 11) 22 ['tf.__operators__.add[0][0]']
multiply (Multiply) (None, 11) 0 ['entrada[0][0]',
'denseSoftmax[0][0]']
But faking a zero valued link to an input seems as bad as adding a constant branch in the set of input layers.
Is there a way to announce to keras that it should follow the subgraph for a series of layers that are going to be merged with the resulting output, but do not depend on the input?
Is the following your desired?
class CustomModel(tf.keras.Model):
def __init__(self,units) -> None:
super().__init__()
self.entrada = tf.keras.layers.InputLayer(input_shape=(units,))
self.unidad= tf.Variable([[1.0]])
self.denseSoftmax = tf.keras.layers.Dense(units,name="denseSoftmax",activation="softmax")
self.finalproduct = tf.keras.layers.Multiply()
def call(self,inputs):
x = self.entrada(inputs)
softMaxOutput = self.denseSoftmax(self.unidad)
y = self.finalproduct([x,softMaxOutput])
return y
units = 11
modelo = CustomModel(units=units)
modelo.build(input_shape=(None,units))
modelo.summary()
Model: "custom_model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 11)] 0
denseSoftmax (Dense) multiple 22
multiply (Multiply) multiple 0
=================================================================
Total params: 23
Trainable params: 23
Non-trainable params: 0
_________________________________________________________________
Learning to use bert-base-cased and a classification model... the code for the model is the following:
def mao_func(input_ids, masks, labels):
return {'input_ids':input_ids, 'attention_mask':masks}, labels
dataset = dataset.map(mao_func)
BATCH_SIZE = 32
dataset = dataset.shuffle(100000).batch(BATCH_SIZE)
split = .8
ds_len = len(list(dataset))
train = dataset.take(round(ds_len * split))
val = dataset.skip(round(ds_len * split))
from transformers import TFAutoModel
bert = TFAutoModel.from_pretrained('bert-base-cased')
Model: "tf_bert_model"
Layer (type) Output Shape Param #
bert (TFBertMainLayer) multiple 108310272
=================================================================
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
then the NN builduing:
input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(50,), name='attention_mask', dtype='int32')
embeddings = bert(input_ids, attention_mask=mask)[0]
X = tf.keras.layers.GlobalMaxPool1D()(embeddings)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Dense(128, activation='relu')(X)
X = tf.keras.layers.Dropout(0.1)(X)
X = tf.keras.layers.Dense(32, activation='relu')(X)
y = tf.keras.layers.Dense(3, activation='softmax',name='outputs')(X)
model = tf.keras.Model(inputs=[input_ids, mask], outputs=y)
model.layers[2].trainable = False
the model.summary is:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 50)] 0 []
attention_mask (InputLayer) [(None, 50)] 0 []
tf_bert_model (TFBertModel) TFBaseModelOutputWi 108310272 ['input_ids[0][0]',
thPoolingAndCrossAt 'attention_mask[0][0]']
tentions(last_hidde
n_state=(None, 50,
768),
pooler_output=(Non
e, 768),
past_key_values=No
ne, hidden_states=N
one, attentions=Non
e, cross_attentions
=None)
global_max_pooling1d (GlobalMa (None, 768) 0 ['tf_bert_model[0][0]']
xPooling1D)
batch_normalization (BatchNorm (None, 768) 3072 ['global_max_pooling1d[0][0]']
alization)
dense (Dense) (None, 128) 98432 ['batch_normalization[0][0]']
dropout_37 (Dropout) (None, 128) 0 ['dense[0][0]']
dense_1 (Dense) (None, 32) 4128 ['dropout_37[0][0]']
outputs (Dense) (None, 3) 99 ['dense_1[0][0]']
==================================================================================================
Total params: 108,416,003
Trainable params: 104,195
Non-trainable params: 108,311,808
__________________________________________________________________________________________________
finally the model fitting is
optimizer = tf.keras.optimizers.Adam(0.01)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
model.compile(optimizer,loss=loss, metrics=[acc])
history = model.fit(
train,
validation_data = val,
epochs=140
)
with execution error in line 7 -> the model.fit(...):
ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 50), found shape=(None, 1, 512)
Can any one be so kind of helping me on what I did wrong and why... thanks:)
update: here is the git with the codes https://github.com/CharlieArreola/OnlinePosts
It seems, that your shape of the train data doen't match the expected input shape of your input layer.
You can check your shape of the train data with train.shape()
You input layer Input_ids = tf.keras.layers.Input(shape=(50,), name='input_ids', dtype='int32') expects train data with 50 columns, but you most likely have 512 if we look at your error.
So to fix this, you could simply change your input shape.
Input_ids = tf.keras.layers.Input(shape=(512,), name='input_ids', dtype='int32')
If you split your x and y in your dataset you can make it more flexible with:
Input_ids = tf.keras.layers.Input(shape=(train_x.shape[0],), name='input_ids', dtype='int32')
Also don't forget, that you have to do this change to all of your input layers!
When adding an aggregation function on the batch axis (axis=0), everything is fine, as long as the batchsize is <=32, with batchsize >32 the output shape changes.
def model_test(samples, parameters):
input_shape = (parameters)
in_ = keras.layers.Input(input_shape, batch_size=samples)
l1_size1 = parameters//2
l2_size2 = parameters//3
l1 = keras.layers.Dense(l1_size1,activation='relu')(in_)
l2 = keras.layers.Dense(l2_size2,activation='relu')(l1)
out_max = keras.backend.max(l2,axis=0)
model = keras.models.Model(in_, out_max)
return model
creating the model for 32 samples:
Model: "functional_246"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_146 (InputLayer) [(32, 10)] 0
_________________________________________________________________
dense_66 (Dense) (32, 5) 55
_________________________________________________________________
dense_67 (Dense) (32, 3) 18
_________________________________________________________________
tf_op_layer_Max_138 (TensorF [(3,)] 0
=================================================================
Total params: 73
Trainable params: 73
Non-trainable params: 0
running the model and display the real output shape:
x = tf.random.normal((orders ,parameters))
result = model.predict(x)
print(result.shape)
(3,)
creating it for 33 samples, creates a similar model, with the same output and the same number of parameters, as expected:
orders = 33
parameters = 10
model = model_test(orders, parameters)
model.summary()
Model: "functional_248"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_147 (InputLayer) [(33, 10)] 0
_________________________________________________________________
dense_68 (Dense) (33, 5) 55
_________________________________________________________________
dense_69 (Dense) (33, 3) 18
_________________________________________________________________
tf_op_layer_Max_139 (TensorF [(3,)] 0
=================================================================
Total params: 73
Trainable params: 73
Non-trainable params: 0
_________________________________________________________________
but the real output shape doubles:
x = tf.random.normal((orders ,parameters))
result = model.predict(x)
print(result.shape)
(6,)
The observed output is always a multiple of batchsize//32 + 1 and hence does not match a actual expected output.
This was tested with tensorflow 2.3.1 same on GPU and CPU.
adding a further dimension, gives the same output, so this really only happens on axis=0
def model_test(samples, parameters):
input_shape = (samples, parameters)
in_ = keras.layers.Input(input_shape, batch_size=None)
l1_size1 = parameters//2
l2_size2 = parameters//3
l1 = keras.layers.Dense(l1_size1,activation='relu')(in_)
l2 = keras.layers.Dense(l2_size2,activation='relu')(l1)
out_max = keras.backend.max(l2,axis=1)
model = keras.models.Model(in_, out_max)
return model
In WaveNet, dilated convolution is used to increase receptive field of the layers above.
From the illustration, you can see that layers of dilated convolution with kernel size 2 and dilation rate of powers of 2 create a tree like structure of receptive fields. I tried to (very simply) replicate the above in Keras.
import tensorflow.keras as keras
nn = input_layer = keras.layers.Input(shape=(200, 2))
nn = keras.layers.Conv1D(5, 5, padding='causal', dilation_rate=2)(nn)
nn = keras.layers.Conv1D(5, 5, padding='causal', dilation_rate=4)(nn)
nn = keras.layers.Dense(1)(nn)
model = keras.Model(input_layer, nn)
opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='mse', optimizer=opt)
model.summary()
And the output:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 200, 2)] 0
_________________________________________________________________
conv1d_5 (Conv1D) (None, 200, 5) 55
_________________________________________________________________
conv1d_6 (Conv1D) (None, 200, 5) 130
_________________________________________________________________
dense_2 (Dense) (None, 200, 1) 6
=================================================================
Total params: 191
Trainable params: 191
Non-trainable params: 0
_________________________________________________________________
I was expecting axis=1 to shrink after each conv1d layer, similar to the gif. Why is this not the case?
The model summary is as expected. As you note using dilated convolutions results in an increase in the receptive field. However, dilated convolution actually preserves the output shape of our input image/activation as we are just changing the convolutional kernel. A regular kernel could be the following
0 1 0
1 1 1
0 1 0
A kernel with a dilation rate of 2 would add zeros in between each entry in our original kernel as below.
0 0 1 0 0
0 0 0 0 0
1 0 1 0 1
0 0 0 0 0
0 0 1 0 0
In fact you can see that our original kernel is also a dilated kernel with a dilation rate of 1. Alternative ways to increase the receptive field result in a downsizing of the input image. Max pooling and strided convolution are 2 alternative methods.
For example. if you want to increase the receptive field by decreasing the size of your output shape you could use strided convolution as below. I replace the dilated convolution with a strided convolution. You will see that the output shape reduces every layer.
import tensorflow.keras as keras
nn = input_layer = keras.layers.Input(shape=(200, 2))
nn = keras.layers.Conv1D(5, 5, padding='causal', strides=2)(nn)
nn = keras.layers.Conv1D(5, 5, padding='causal', strides=4)(nn)
nn = keras.layers.Dense(1)(nn)
model = keras.Model(input_layer, nn)
opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='mse', optimizer=opt)
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 200, 2)] 0
_________________________________________________________________
conv1d_3 (Conv1D) (None, 100, 5) 55
_________________________________________________________________
conv1d_4 (Conv1D) (None, 25, 5) 130
_________________________________________________________________
dense_1 (Dense) (None, 25, 1) 6
=================================================================
Total params: 191
Trainable params: 191
Non-trainable params: 0
_________________________________________________________________
To summarize dilated convolution is just another way to increase the receptive field of your model. It has the benefit of preserving the output shape of your input image.
Here's an example of this dialtion with 1D Convolutional layers, output has size 14:
https://github.com/jwallbridge/translob/blob/master/python/LobFeatures.py
def lob_dilated(x):
"""
TransLOB dilated 1-D convolution module
"""
x = layers.Conv1D(14,kernel_size=2,strides=1,activation='relu',padding='causal')(x)
x = layers.Conv1D(14,kernel_size=2,dilation_rate=2,activation='relu',padding='causal')(x)
x = layers.Conv1D(14,kernel_size=2,dilation_rate=4,activation='relu',padding='causal')(x)
x = layers.Conv1D(14,kernel_size=2,dilation_rate=8,activation='relu',padding='causal')(x)
y = layers.Conv1D(14,kernel_size=2,dilation_rate=16,activation='relu',padding='causal')(x)
return y