I want train a model of multi-outputs, named ctr(click through rate) and cvr in tensoflow keras.
The output should be ctr and cvr. But the loss should be ctr-loss and (ctr * cvr)-loss.
So, if click-label is zero, the (ctr*cvr)-loss should be zero.
d = concatenate(inp_embed, axis=-1, name='concat') #Embeddings共享
d = Flatten()(d)
d_ctr = BatchNormalization()(d)
d_ctr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = BatchNormalization()(d_ctr)
d_ctr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_ctr)
d_ctr = Dense(1, activation=activation)(d_ctr)
d_cvr = BatchNormalization()(d)
d_cvr = Dense(100, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = BatchNormalization()(d_cvr)
d_cvr = Dense(50, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01))(d_cvr)
d_cvr = Dense(1, activation=activation)(d_cvr)
d_ivr = multiply([d_ctr, d_cvr])
deep = Model(inputs=inp_layer, outputs=[d_ctr, d_cvr])
This is how you can create a custom loss from multiple outputs :
def custom_loss(y_true, y_pred):
ctr_loss = losses.binary_crossentropy(y_true, d_ctr)
cvr_loss = losses.binary_crossentropy(y_true, d_cvr)
return ctr_loss * cvr_loss
And how to use it :
deep.compile(optimizer = sgd , loss = custom_loss, metrics=['accuracy'])
Feel free to add comments so that i can precise my answer.
Related
I have two functional Keras models in the same level (same input and output shape), one of them is pre-trained, I would like to combine them horizontally and then retrain the whole model. I mean I want to initialize the pretrained with its weights and the other one randomly. How can I horizontally combine them by adding them in branches (not concatenate)?
def define_model_a(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = Lambda(lambda x: abs(x))(path11)
output = Add()([path10, path12])
define_model_a = Model(inputs=input_layer, outputs=output)
define_model_a._name = 'model_a'
return define_model_a
def define_model_b(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = ReLU()(path11)
path13 = Dense(1, use_bias = True)(path12)
output = path13
define_model_b = Model(inputs=input_layer, outputs=output)
define_model_b._name = 'model_b'
return define_model_b
def define_merge_interpretation()
????
????
output = Add()(model_a, model_b)
model = Model(inputs=input_layer, outputs=output)
return model
initializer = tf.keras.initializers.HeNormal()
model_a = define_model_a(input_shape, initializer, outputs = 1)
model_b = define_model_b(input_shape, initializer, outputs = 1)
model_a.load_weights(load_path)
merge_interpretation = def merge_interprettation( )
history = merge_interpretation.fit(......
As reference, I am looking for a final structure like this in the image, but with some pretrained branches.
I'm building a convolutional variational autoencoder in two different methods (for anomaly detection). One of them is based on Tensorflow_probability (method 1) and the other is based on simple tensorflow layers (method 2). Both models have same layers in encoder and decoder phases, just the sampling method and loss function are different.
Method 2: used custom function for sampling from 2 vectors (mean and std) and a custom loss function.
Method 1: used tensorflow_probability layers that simplify codes (no need custom function for sampling)
In both methods, I used ‘mse’ as metrics to compare models and realize the mse values in model 1 and model 2 are very different from each other. I attached last 3 epochs of both models.
Can anyone explain if the layers are used correctly in the model 1 (model that uses Tensorflow_probability )? And what is the reason for the big difference in mse values between the two models? have I done everything correctly?
Thanks.
model 1:
latent_dim =256
prior = tfd.MultivariateNormalDiag(loc=tf.zeros([latent_dim]),scale_identity_multiplier=1.0)
input_data = tensorflow.keras.layers.Input(shape=(8, 8, 9))
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(input_data)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(encoder)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Flatten()(encoder)
encoder = tensorflow.keras.layers.Dense(512)(encoder)
encoder = tensorflow.keras.layers.Dense(256)(encoder)
encoder = tfkl.Dense(tfpl.MultivariateNormalTriL.params_size(latent_dim), activation=None)(encoder)
encoder = tfpl.MultivariateNormalTriL(event_size=latent_dim, activity_regularizer=tfpl.KLDivergenceRegularizer(prior))(encoder)
encoder_model = tensorflow.keras.Model(input_data, encoder)
decoder_input = tensorflow.keras.layers.Input(shape=(latent_dim))
decoder = tensorflow.keras.layers.Dense(512)(decoder_input)
decoder = tensorflow.keras.layers.Reshape((2, 2, 128))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(9, (3,3), activation='sigmoid',padding="same")(decoder)
decoder = tfkl.Flatten()(decoder)
decoder_output = tfpl.IndependentBernoulli((8, 8, 9), tfd.Bernoulli.logits)(decoder)
decoder_model = tensorflow.keras.Model(decoder_input, decoder_output)
encoded = encoder_model(input_data)
decoded = decoder_model(encoded)
negloglik = lambda x, rv_x: -rv_x.log_prob(x)
autoencoder.compile(metrics=['accuracy','mse','mae'], optimizer='adam',loss=negloglik)
history = autoencoder.fit(loaded_array, loaded_array,shuffle=True, epochs=250, batch_size=64)
autoencoder = tensorflow.keras.models.Model(input_data, decoded)
enter image description here
model 2:
input_data = tensorflow.keras.layers.Input(shape=(8, 8, 9))
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(input_data)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Conv2D(256, (3,3), activation='LeakyReLU', strides=1, padding="same")(encoder)
encoder = tensorflow.keras.layers.MaxPooling2D((2,2), padding="same")(encoder)
encoder = tensorflow.keras.layers.Flatten()(encoder)
encoder = tensorflow.keras.layers.Dense(512)(encoder)
encoder = tensorflow.keras.layers.Dense(256)(encoder)
def sample_latent_features(distribution):
distribution_mean, distribution_variance = distribution
batch_size = tensorflow.shape(distribution_variance)[0]
random = tensorflow.keras.backend.random_normal(shape=(batch_size, tensorflow.shape(distribution_variance)[1]),seed=13)
return distribution_mean + tensorflow.exp(0.5 * distribution_variance) * random
distribution_mean = tensorflow.keras.layers.Dense(256, name='mean')(encoder)
distribution_variance = tensorflow.keras.layers.Dense(256, name='log_variance')(encoder)
latent_encoding = tf.keras.layers.Lambda(sample_latent_features)([distribution_mean, distribution_variance])
encoder_model = tensorflow.keras.Model(input_data, latent_encoding)
decoder_input = tensorflow.keras.layers.Input(shape=(256))
decoder = tensorflow.keras.layers.Dense(512)(decoder_input)
decoder = tensorflow.keras.layers.Reshape((2, 2, 128))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder = tensorflow.keras.layers.Conv2DTranspose(256, (3,3), activation='LeakyReLU', padding="same")(decoder)
decoder = tensorflow.keras.layers.UpSampling2D((2,2))(decoder)
decoder_output = tensorflow.keras.layers.Conv2DTranspose(9, (3,3), activation='sigmoid',padding="same")(decoder)
decoder_model = tensorflow.keras.Model(decoder_input, decoder_output)
encoded = encoder_model(input_data)
decoded = decoder_model(encoded)
def get_loss(distribution_mean, distribution_variance):
def get_reconstruction_loss(y_true, y_pred):
reconstruction_loss = tensorflow.keras.losses.mse(y_true, y_pred)
reconstruction_loss_batch = tensorflow.reduce_mean(reconstruction_loss)
return reconstruction_loss_batch*8*8
def get_kl_loss(distribution_mean, distribution_variance):
kl_loss = 1 + distribution_variance - tensorflow.square(distribution_mean) - tensorflow.exp(distribution_variance)
kl_loss_batch = tensorflow.reduce_mean(kl_loss)
return kl_loss_batch*(-0.5)
def total_loss(y_true, y_pred):
reconstruction_loss_batch = get_reconstruction_loss(y_true, y_pred)
kl_loss_batch = get_kl_loss(distribution_mean, distribution_variance)
return reconstruction_loss_batch + kl_loss_batch
return total_loss
autoencoder = tensorflow.keras.models.Model(input_data, decoded)
autoencoder.compile(loss=get_loss(distribution_mean, distribution_variance),metrics['accuracy','mse','mae'], optimizer='adam')
history = autoencoder.fit(loaded_array, loaded_array,shuffle=True, epochs=epchs, batch_size=64)
enter image description here
Here is part of a model I am working. Being new in calculation gradients in tf, I got confused when I found all the gradient values are coming as 0. Here is the code:
class A:
def __init__(self, inputA_dim, inputB_dim):
self.inputA_dim = (35, 35, 1)
self.inputB_dim = 2
self.model = self.nn_model()
self.opt = tf.keras.optimizers.Adam()
# print(self.model.summary())
def nn_model(self):
inputA = Input(self.inputA_dim)
conv1 = Conv2D(10, 3, padding="same", activation="relu")(inputA)
pool1 = MaxPool2D(padding='same')(conv1)
conv2 = Conv2D(10, 3, padding="same", activation="relu")(pool1)
pool2 = MaxPool2D(padding='same')(conv2)
conv3 = Conv2D(10, 3, padding="same", activation="relu")(pool2)
pool3 = MaxPool2D(padding='same')(conv3)
flatten = Flatten()(pool3)
s2 = Dense(32, activation="relu")(flatten)
s3 = Dense(32, activation="relu")(s2)
s4 = Dense(2, activation="relu")(s3)
inputB = Input((self.inputB_dim,))
a1 = Dense(2, activation="relu")(inputB)
c1 = concatenate([s2, a1], axis=-1)
c2 = Dense(4, activation="relu")(c1)
outputs = Dense(1, activation="linear")(c2)
return tf.keras.Model([inputA, inputB], outputs)
def predict(self, inputs):
return self.model.predict(inputs)
def gradients(self, inputA, inputB):
inputB = tf.convert_to_tensor(inputB)
with tf.GradientTape() as tape:
tape.watch(inputB)
values = self.model([inputA, inputB])
values = tf.squeeze(values)
g = tape.gradient(values, inputB)
print(g)
return g
Later I found there is another method called jacobian which I also used here, still giving 0.s asgrad values. Can anyone tell me what to do. Thanks.
I'm Using tensorflow and keras to predict handwrting digits. For training I'm using nmist dataset.
the accuracy is about 98.8% after training. but sometimes in test its confuse between 4 and 9 , 7 and 3, i'm alerady optimize the image input with opencv, like remove noise, rescale, threshold etc.
What should i do next to improved this prdiction accuracy?
My plan is add more sample, and resize the sample image from 28x28 to 56x56.
Will this affect accuracy?
This my model for training:
epoc=15, batch size=64
def build_model():
model = Sequential()
# add Convolutional layers
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
# Densely connected layers
model.add(Dense(128, activation='relu'))
# output layer
model.add(Dense(10, activation='softmax'))
# compile with adam optimizer & categorical_crossentropy loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return model
You can try to add regularization:
def conv2d_bn(x,
units,
kernel_size=(3, 3),
activation='relu',
dropout=.5):
y = Dropout(x)
y = Conv2D(units, kernel_size=kernel_size, use_bias=False)(y)
y = BatchNormalization(y)
y = Activation(activation)(y)
return y
def build_model(..., dropout=.5):
x = Input(shape=[...])
y = conv2d_bn(x, 32)
y = MaxPooling2D(y)
...
y = Dropout(dropout)(y)
y = Dense(10, activation='softmax')
model = Model(x, y)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
You can tweak the class weights to force the model to pay more attention to classes 3, 4, 7 and 9 during training:
model.fit(..., class_weights={0: 1, 1: 1, 2:1, 3:2, 4:2, 5:1, 6:1, 7:2, 8:1, 9:2})
If you have some time to burn, you can also try to grid or random-search the models hyperparameters. Something in the lines:
def build(conv_layers, dense_layers, dense_units, activation, dropout):
y = x = Input(shape=[...])
kernels = 32
kernel_size = (2, 2)
for i in range(conv_layers):
y = conv2d_bn(y, kernel_size, kernels, activation, dropout)
if i % 2 == 0: # or 3 or 4.
y = MaxPooling2D(y)
kernels *= 2
kernel_size = tuple(k+1 for k in kernel_size)
y = GlobalAveragePooling2D()(y)
for i in range(dense_layers):
y = Dropout(dropout)(y)
y = Dense(dense_units)(y)
y = Dense(10, activation='softmax')(y)
model = KerasClassifier(build_model,
epochs=epochs,
validation_split=validation_split,
verbose=0,
...)
params = dict(conv_layers=[2, 3, 4],
dense_layers=[0, 1],
activation=['relu', 'selu'],
dropout=[.2, .3, .5],
callbacks=[callbacks.EarlyStopping(patience=10,
restore_best_weights=True)])
grid = GridSearchCV(model, params,
scoring='balanced_accuracy_score',
verbose=2,
n_jobs=1)
Now, combining hyperparams searching with the NumpyArrayIterator is a little tricky, because the latter assumes we have all training samples (and targets) at hand before the training steps. It's still doable, though:
g = ImageDataGenerator(...)
cv = StratifiedKFold(n_splits=3)
results = dict(params=[], valid_score=[])
for params in ParameterGrid(params):
fold_scores = []
for t, v in cv.split(train_data, train_labels):
train = g.flow(train_data[t], train_labels[t], subset='training')
nn_valid = g.flow(train_data[t], train_labels[t], subset='validation')
fold_valid = g.flow(train_data[v], train_labels[v])
nn = build_model(**params)
nn.fit_generator(train, validation_data=nn_valid, ...)
probabilities = nn.predict_generator(fold_valid, steps=...)
p = np.argmax(probabilities, axis=1)
fold_scores += [metrics.accuracy_score(valid.classes_, p)]
results['params'] += [params]
results['valid_score'] += [fold_scores]
best_ix = np.argmax(np.mean(results['valid_score'], axis=1))
best_params = results['params'][best_ix]
nn = build_model(**best_params)
nn.fit_generator(...)
I am trying to implement a multiclass semantic segmentation model with 2
classes ( human, car). here is my modified implementation of unet architecture. I number of output channels to 3 (3 classes - human, car, background). How do i get pixel wise classification?
here are 2 examples from my ground truth masks.
i am using 1 channel for each object class ie.
channel 1 for class=car
channel 2 for class=background
channel 3 for class=human
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Reshape((img_height*img_width, nclasses), input_shape=(img_height, img_width, nclasses))(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
You are almost done, now backpropagate the network error with:
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_layer, labels=labels))
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
You don't have to convert your ground truth into the one-hot format, sparse_softmax will dot it for you.