I'm building a convolutional variational autoencoder in two different methods (for anomaly detection). One of them is based on Tensorflow_probability (method 1) and the other is based on simple tensorflow layers (method 2). Both models have same layers in encoder and decoder phases, just the sampling method and loss function are different.
Method 2: used custom function for sampling from 2 vectors (mean and std) and a custom loss function.
Method 1: used tensorflow_probability layers that simplify codes (no need custom function for sampling)
In both methods, I used ‘mse’ as metrics to compare models and realize the mse values in model 1 and model 2 are very different from each other. I attached last 3 epochs of both models.
Can anyone explain if the layers are used correctly in the model 1 (model that uses Tensorflow_probability )? And what is the reason for the big difference in mse values between the two models? have I done everything correctly?
Thanks.
model 1:
latent_dim =256
prior = tfd.MultivariateNormalDiag(loc=tf.zeros([latent_dim]),scale_identity_multiplier=1.0)
input_data = tensorflow.keras.layers.Input(shape=(8, 8, 9))
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(input_data)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(encoder)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Flatten()(encoder)
encoder = tensorflow.keras.layers.Dense(512)(encoder)
encoder = tensorflow.keras.layers.Dense(256)(encoder)
encoder = tfkl.Dense(tfpl.MultivariateNormalTriL.params_size(latent_dim), activation=None)(encoder)
encoder = tfpl.MultivariateNormalTriL(event_size=latent_dim, activity_regularizer=tfpl.KLDivergenceRegularizer(prior))(encoder)
encoder_model = tensorflow.keras.Model(input_data, encoder)
decoder_input = tensorflow.keras.layers.Input(shape=(latent_dim))
decoder = tensorflow.keras.layers.Dense(512)(decoder_input)
decoder = tensorflow.keras.layers.Reshape((2, 2, 128))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(9, (3,3), activation='sigmoid',padding="same")(decoder)
decoder = tfkl.Flatten()(decoder)
decoder_output = tfpl.IndependentBernoulli((8, 8, 9), tfd.Bernoulli.logits)(decoder)
decoder_model = tensorflow.keras.Model(decoder_input, decoder_output)
encoded = encoder_model(input_data)
decoded = decoder_model(encoded)
negloglik = lambda x, rv_x: -rv_x.log_prob(x)
autoencoder.compile(metrics=['accuracy','mse','mae'], optimizer='adam',loss=negloglik)
history = autoencoder.fit(loaded_array, loaded_array,shuffle=True, epochs=250, batch_size=64)
autoencoder = tensorflow.keras.models.Model(input_data, decoded)
enter image description here
model 2:
input_data = tensorflow.keras.layers.Input(shape=(8, 8, 9))
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(input_data)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(encoder)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Flatten()(encoder)
encoder = tensorflow.keras.layers.Dense(512)(encoder)
encoder = tensorflow.keras.layers.Dense(256)(encoder)
def sample_latent_features(distribution):
distribution_mean, distribution_variance = distribution
batch_size = tensorflow.shape(distribution_variance)[0]
random = tensorflow.keras.backend.random_normal(shape=(batch_size, tensorflow.shape(distribution_variance)[1]),seed=13)
return distribution_mean + tensorflow.exp(0.5 * distribution_variance) * random
distribution_mean = tensorflow.keras.layers.Dense(256, name='mean')(encoder)
distribution_variance = tensorflow.keras.layers.Dense(256, name='log_variance')(encoder)
latent_encoding = tf.keras.layers.Lambda(sample_latent_features)([distribution_mean, distribution_variance])
encoder_model = tensorflow.keras.Model(input_data, latent_encoding)
decoder_input = tensorflow.keras.layers.Input(shape=(256))
decoder = tensorflow.keras.layers.Dense(512)(decoder_input)
decoder = tensorflow.keras.layers.Reshape((2, 2, 128))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder_output = tensorflow.keras.layers.Conv2DTranspose(9, (3,3), activation='sigmoid',padding="same")(decoder)
decoder_model = tensorflow.keras.Model(decoder_input, decoder_output)
encoded = encoder_model(input_data)
decoded = decoder_model(encoded)
def get_loss(distribution_mean, distribution_variance):
def get_reconstruction_loss(y_true, y_pred):
reconstruction_loss = tensorflow.keras.losses.mse(y_true, y_pred)
reconstruction_loss_batch = tensorflow.reduce_mean(reconstruction_loss)
return reconstruction_loss_batch*8*8
def get_kl_loss(distribution_mean, distribution_variance):
kl_loss = 1 + distribution_variance - tensorflow.square(distribution_mean) - tensorflow.exp(distribution_variance)
kl_loss_batch = tensorflow.reduce_mean(kl_loss)
return kl_loss_batch*(-0.5)
def total_loss(y_true, y_pred):
reconstruction_loss_batch = get_reconstruction_loss(y_true, y_pred)
kl_loss_batch = get_kl_loss(distribution_mean, distribution_variance)
return reconstruction_loss_batch + kl_loss_batch
return total_loss
autoencoder = tensorflow.keras.models.Model(input_data, decoded)
autoencoder.compile(loss=get_loss(distribution_mean, distribution_variance),metrics['accuracy','mse','mae'], optimizer='adam')
history = autoencoder.fit(loaded_array, loaded_array,shuffle=True, epochs=epchs, batch_size=64)
enter image description here
Related
I have two functional Keras models in the same level (same input and output shape), one of them is pre-trained, I would like to combine them horizontally and then retrain the whole model. I mean I want to initialize the pretrained with its weights and the other one randomly. How can I horizontally combine them by adding them in branches (not concatenate)?
def define_model_a(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = Lambda(lambda x: abs(x))(path11)
output = Add()([path10, path12])
define_model_a = Model(inputs=input_layer, outputs=output)
define_model_a._name = 'model_a'
return define_model_a
def define_model_b(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = ReLU()(path11)
path13 = Dense(1, use_bias = True)(path12)
output = path13
define_model_b = Model(inputs=input_layer, outputs=output)
define_model_b._name = 'model_b'
return define_model_b
def define_merge_interpretation()
????
????
output = Add()(model_a, model_b)
model = Model(inputs=input_layer, outputs=output)
return model
initializer = tf.keras.initializers.HeNormal()
model_a = define_model_a(input_shape, initializer, outputs = 1)
model_b = define_model_b(input_shape, initializer, outputs = 1)
model_a.load_weights(load_path)
merge_interpretation = def merge_interprettation( )
history = merge_interpretation.fit(......
As reference, I am looking for a final structure like this in the image, but with some pretrained branches.
I am trying to build a classifier for mixed data consisting of text and categorical data. I want to process text data using an embedding layer followed by a lstm cell and a dense network.
For Categorical data, I created a dense Network.
I want to concatenate output of two netowrks and pass through another dense network.
Here is my code
def create_model():
seq = tf.keras.layers.Input(shape=(100,))
embeddings = tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(seq)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128))(embeddings)
x = tf.keras.layers.Dense(64, activation = 'relu')(x)
x = tf.keras.layers.Dense(32, activation = 'relu')(x)
categorical_input = tf.keras.layers.Input(shape =(4198, ) )
y = tf.keras.layers.Dense(128, activation = 'relu')(categorical_input)
y = tf.keras.layers.Dense(64, activation = 'relu')(y)
combined = tf.keras.layers.concatenate([x, y])
z = tf.keras.layers.Dense(32, activation='relu')(combined)
z = tf.keras.layers.Dense(16, activation = 'relu')(z)
outs = tf.keras.layers.Dense(3, activation = 'softmax')
return tf.keras.Model(inputs = [seq, categorical_input], outputs = outs)
model = create_model()
model.summary()
And Here is the full Traceback
/tmp/ipykernel_33/1585487845.py in create_model()
14 z = tf.keras.layers.Dense(16, activation = 'relu')(z)
15 outs = tf.keras.layers.Dense(3, activation = 'softmax')
---> 16 return tf.keras.Model(inputs = [seq, categorical_input], outputs = outs)
17 ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.layers.core.Dense object at 0x7f30ecaee790>
I am trying to create a CNN model using hyperparameterization for image classification. When I run the code I receive the following error:
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 32, 32, 32, 3), found shape=(32, 32, 32, 3)
How to fix the error? Here is the whole code pasted below:
# first we create our actual code which requires the arguments, units, activation, dropout, lr:
def build_model(hp):
model = ks.Sequential([
# adding first conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
),
# adding second conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
)])
model.add(layers.Flatten())
# Let's tune the number of Dense layers.
for i in range(hp.Int("num_dense_layers", 1, 3)):
model.add(
layers.Dense(
# Let's tune the number of units separately
units = hp.Int(f"units_{i}", min_value=1, max_value = 100, step = 16),
activation = hp.Choice("activation", ["relu", "tanh", "softmax"])
))
if hp.Boolean("dropout"):
model.add(layers.Dropout(rate = 0.25))
model.add(layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = ks.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
build_model(keras_tuner.HyperParameters())
You are getting this error due the input shape mismatch.
Here i have implemented the hypermodel on the mnist fashion dataset which contains images of shape (28,282,1).
def build_model(hp):
model = tf.keras.Sequential([
tf.keras.Input(shape=(28,28,1)),
# adding first conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
),
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_1',values=[2,3])),
# adding second conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
)])
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_2',values=[2,3])),
model.add(tf.keras.layers.Flatten())
if hp.Boolean("dropout"):
model.add(tf.keras.layers.Dropout(rate = 0.25))
model.add(tf.keras.layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
By providing the correct shape you will not get any error.
For more details, Please refer to this gist and this documentation. Thank You!
I want train a model of multi-outputs, named ctr(click through rate) and cvr in tensoflow keras.
The output should be ctr and cvr. But the loss should be ctr-loss and (ctr * cvr)-loss.
So, if click-label is zero, the (ctr*cvr)-loss should be zero.
d = concatenate(inp_embed, axis=-1, name='concat') #Embeddings共享
d = Flatten()(d)
d_ctr = BatchNormalization()(d)
d_ctr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = BatchNormalization()(d_ctr)
d_ctr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = Dense(1, activation=activation)(d_ctr)
d_cvr = BatchNormalization()(d)
d_cvr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = BatchNormalization()(d_cvr)
d_cvr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = Dense(1, activation=activation)(d_cvr)
d_ivr = multiply([d_ctr, d_cvr])
deep = Model(inputs=inp_layer, outputs=[d_ctr, d_cvr])
This is how you can create a custom loss from multiple outputs :
def custom_loss(y_true, y_pred):
ctr_loss = losses.binary_crossentropy(y_true, d_ctr)
cvr_loss = losses.binary_crossentropy(y_true, d_cvr)
return ctr_loss * cvr_loss
And how to use it :
deep.compile(optimizer = sgd , loss = custom_loss, metrics=['accuracy'])
Feel free to add comments so that i can precise my answer.
I want to classification for pictures with different input sizes. I would like to use the following paper ideas.
'Fully Convolutional Networks for Semantic Segmentation'
https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf
I did change the dense layer to conv2D layer like this.
def FullyCNN(input_shape, n_classes):
inputs = Input(shape=(None, None, 1))
first_layer = Conv2D(filters=16, kernel_size=(12,16), strides=1, activation='relu', kernel_initializer='he_normal', name='conv1')(inputs)
first_layer = BatchNormalization()(first_layer)
first_layer = MaxPooling2D(pool_size=2)(first_layer)
second_layer = Conv2D(filters=24, kernel_size=(8,12), strides=1, activation='relu', kernel_initializer='he_normal', name='conv2')(first_layer)
second_layer = BatchNormalization()(second_layer)
second_layer = MaxPooling2D(pool_size=2)(second_layer)
third_layer = Conv2D(filters=32, kernel_size=(5,7), strides=1, activation='relu', kernel_initializer='he_normal', name='conv3')(first_layer)
third_layer = BatchNormalization()(third_layer)
third_layer = MaxPooling2D(pool_size=2)(third_layer)
fully_layer = Conv2D(64, kernel_size=8, activation='relu', kernel_initializer='he_normal')(third_layer)
fully_layer = BatchNormalization()(fully_layer)
fully_layer = Dropout(0.5)(fully_layer)
fully_layer = Conv2D(n_classes, kernel_size=1)(fully_layer)
output = Conv2DTranspose(n_classes, kernel_size=1, activation='softmax')(fully_layer)
model = Model(inputs=inputs, outputs=output)
return model
and I made generator for using fit_generator().
def data_generator(x_train, y_train):
while True:
index = np.asscalar(np.random.choice(len(x_train),1))
feature = np.expand_dims(x_train[index],-1)
feature = np.resize(feature,(-1,feature.shape))
feature = np.expand_dims(feature,0) # make (1,input_height,input_width,1)
label = y_train[index]
yield (feature,label)
and These are images about my data.
However, there is some problems about dimension.
Since the output layer must have 4 dimensions unlike the original CNN model, dimensions do not fit in the label.
Model summary:
Original CNN model summary:
How can I handle this problem? I tried to change dimension about label by expanding the dimension.
label = np.expand_dims(label,0)
label = np.expand_dims(label,0)
label = np.expand_dims(label,0)
I think there is a better way and I wonderd that is it necessary to have conv2DTranspose? And should batch size be 1?