I am using CNN to solve a regression problem in a supervised manner. i have input data(X_train) and the target data(y_train).
from keras import backend as K
def custom_loss(loss1, loss2):
def loss(y_true, y_pred):
return loss1(y_true, y_pred) + loss2(y_true, y_pred)
return loss
Example usage
from keras.losses import mean_squared_error
model.compile(loss=custom_loss(mean_squared_error, mean_absolute_error), optimizer='adam')
After you declare your model, you can define a custom loss function in the model.compile step. Here's a snippet in the case you need to sum MSE and MAE:
model = keras.Sequential([
Dense(32, input_shape=(10,), activation='softmax')
])
def custom_loss_function(y_true, y_pred):
mse = MeanSquaredError()
mae = MeanAbsoluteError()
return mse(y_true, y_pred) + mae(y_true, y_pred)
model.compile(loss=custom_loss_function, optimizer='adam')
Related
Below is a simple program I wrote. Its job is to learn the parameters of a simple linear function Ax+B.
When training "manually" it converges after a few thousand epochs, but when I try to do the same using a Dense(1) layer+model it converges to a loss of 500000, and the trained parameters are nowhere near the correct ones (-2, 34).
I thought Dense(1) layer is just like Ax+B, but it's not?
from tensorflow.keras import layers, models
from tensorflow.keras import optimizers
from tensorflow.keras import initializers
import tensorflow as tf
import random
import numpy as np
x_train = np.linspace(1, 100, 100)
y_train = -2*x_train+34
def manual_train():
optimizer = optimizers.Adam(lr=1e-2)
vars = [tf.Variable(random.random(), trainable=True) for i in range(2)]
for epoch in range(1000000):
with tf.GradientTape() as tape:
y_pred = vars[0]*x_train + vars[1]
loss = tf.reduce_sum(tf.abs(y_train - y_pred))
model_gradients = tape.gradient(loss, vars)
optimizer.apply_gradients(zip(model_gradients, vars))
print(epoch, 'parameters', vars[0].numpy(), vars[1].numpy(), 'loss', loss.numpy())
def nn_train():
input_layer = layers.Input(shape=(1,))
output_layer = layers.Dense(1, kernel_initializer=initializers.RandomUniform(0,1), bias_initializer=initializers.RandomUniform(0,1))(input_layer)
model = models.Model(inputs=input_layer, outputs=output_layer)
optimizer = optimizers.Adam(lr=1e-2)
#model.compile(optimizer=optimizer, loss=None)
for epoch in range(1000000):
with tf.GradientTape() as tape:
y_pred = model(x_train.reshape((-1,1)))
loss = tf.reduce_sum(tf.abs(y_train - y_pred))
model_gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))
print(epoch, 'parameters', model.trainable_variables[0].numpy(), model.trainable_variables[1].numpy(), 'loss', loss.numpy())
# uncomment one:
manual_train()
#nn_train()
You have a problem of shape in your keras example.
Because of that problem, the operation y_train - y_pred does not exactly what you think it does.
Because at that point, y_train has a shape of (100) and y_pred a shape of (100,1), when you do the subtraction, TensorFlow will do a "smart" broadcast, and the end result is an array of size (100,100)
Reshape your ground truth to the same shape as your output (or the other way around), to get the correct value of your loss:
loss = tf.reduce_sum(tf.abs(y_train.reshape((-1,1)) - y_pred))
I have created custom loss (Weighted Absolute error) in keras but implementation doesn't work - I get an error ValueError: No gradients provided for any variable: ['my_model/conv2d/kernel:0', 'my_model/conv2d/bias:0'].
I want to apply different weight for each pixel.
class WeightedMeanAbsoluteError(tf.keras.metrics.Metric):
def __init__(self, name='weighted_mean_absolute_error'):
super(WeightedMeanAbsoluteError, self).__init__(name=name)
self.wmae = self.add_weight(name='wmae', initializer='zeros')
def update_state(self, y_true, y_pred, loss_weights):
values = tf.math.abs(y_true - y_pred) * loss_weights
return self.wmae.assign_add(tf.reduce_sum(values))
def result(self):
return self.wmae
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wmae.assign(0.)
loss_object = WeightedMeanAbsoluteError()
train_loss = WeightedMeanAbsoluteError()
I use the following code to implement a training step:
#tf.function
def train_step(input_images, output_images):
with tf.GradientTape() as tape:
# training=True is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
result_images = model(input_images, training=True)
loss = loss_object(output_images, result_images)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
Also my code works just fine if I use
loss_object = tf.keras.losses.MeanAbsoluteError()
train_loss = tf.keras.metrics.MeanAbsoluteError()
The best and simple way to minimize a weighted standard loss (such mae) is using the sample_weights parameter in fit method where we pass an array with the desired weight of each sample
X = np.random.uniform(0,1, (1000,50))
y = np.random.uniform(0,1, 1000)
W = np.random.randint(1,10, 1000)
inp = Input((50))
x = Dense(64, activation='relu')(inp)
out = Dense(10)(x)
model = Model(inp, out)
model.compile('adam','mae')
model.fit(X,y, epochs=100, sample_weights=W)
I get different results from Tensorflow and Keras with the same network structure.
The loss function looks like
class MaskedMultiCrossEntropy(object):
def loss(self, y_true, y_pred):
vec = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true, dim=1)
mask = tf.equal(y_true[:,0,:], -1)
zer = tf.zeros_like(vec)
loss = tf.where(mask, x=zer, y=vec)
return loss
The network layer I used is called CrowdsClassification, which is implemented by Keras. Then I build the network by
x = Dense(128, input_shape=(input_dim,), activation='relu')(inputs)
x = Dropout(0.5)(x)
x = Dense(N_CLASSES)(x)
x = Activation("softmax")(x)
crowd = CrowdsClassification(num_classes, num_oracles, conn_type="MW")
x = crowd(x)
Train the model with Keras
model = Model(inputs=inputs, outputs=x)
model.compile(optimizer='adam', loss=loss)
model.fit(inputs,
true_class, epochs=100, shuffle=False, verbose=2, validation_split=0.1))
Train the model with tensorflow
optimizer = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opt_op = optimizer.minimize(loss, global_step=global_step)
sess.run(tf.global_variables_initializer())
for epoch in range(100):
sess.run([loss, opt_op], feed_dict=train_feed_dict)
The Tensorflow will get a wrong prediction. It seems that the issue comes from the loss function, that Tensorflow cannot backproporgate the masked loss. Anyone can give some advices? Thx a lot.
I want to create a custom keras layer which does something during training and something else for validation or testing.
from tensorflow import keras
K = keras.backend
from keras.layers import Layer
import tensorflow as tf
class MyCustomLayer(Layer):
def __init__(self, ratio=0.5, **kwargs):
self.ratio = ratio
super(MyCustomLayer, self).__init__(**kwargs)
#tf.function
def call(self, x, is_training=None):
is_training = K.learning_phase()
tf.print("training: ", is_training)
if is_training is 1 or is_training is True:
xs = x * 4
return xs
else:
xs = x*0
return xs
model = Sequential()
model.add(Dense(16, input_dim=input_dim))
model.add(MyCustomLayer(0.5))
model.add(ReLU())
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(output_dim, activation='softmax', kernel_regularizer=l2(0.01)))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, validation_split=0.05, epochs=5)
In the output I always get:
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
Does anyone knows how to fix this?
There are some issues and misconceptions here. First you are mixing imports between keras and tf.keras imports, you should use only one of them. Second the parameter for call is called training, not is_training.
I think the issue is that tf.print does not really print the value of the training variable as its a tensorflow symbolic variable and might change value indirectly. There are other ways to check if the layer behaves differently during inference and training, for example:
class MyCustomLayer(Layer):
def __init__(self, ratio=0.5, **kwargs):
super(MyCustomLayer, self).__init__(**kwargs)
def call(self, inputs, training=None):
train_x = inputs * 4
test_x = inputs * 0
return K.in_train_phase(train_x,
test_x,
training=training)
Using then this model:
model = Sequential()
model.add(Dense(1, input_dim=10))
model.add(MyCustomLayer(0.5))
model.compile(loss='mse', optimizer='adam')
And making an instance of a function that explictly receives the K.learning_phase() variable:
fun = K.function([model.input, K.learning_phase()], [model.output])
If you call it with Klearning_phase() set to 1 or 0 you do see different outputs:
d = np.random.random(size=(2,10))
print(fun([d, 1]))
print(fun([d, 0]))
Result:
[array([[4.1759257], [3.9988194]], dtype=float32)]
[array([[0.], [0.]], dtype=float32)]
And this indicates that the layer has differen behavior during training and inference/testing.
So, I just figured out what was going wrong. I was mixing two different types of classes:
from keras import Sequential
from tensorflow import keras
K = keras.backend
So, the model is using keras and I was calling the flag from tensorflow.keras. For this reason K.learning_phase() was not working as expected.
To fix it I used
from tensorflow.keras import Sequential
from tensorflow import keras
K = keras.backend
I have a loss function built in tensorflow, it need logits and labels as input:
def median_weight_class_loss(labels, logits):
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
softmax = tf.nn.softmax(logits)
#this is just the number of samples in each class in my dataset divided by the sum of samples 10015.
weight_sample = np.array([1113,6705,514,327,1099,115,142])/10015
weight_sample = 0.05132302/weight_sample
xent = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), weight_sample), axis=1)
return xent
the problem is in keras loss functions are in different format:
custom_loss(y_true, y_pred)
it used y_true, y_pred as inputs,
I found a way to get logits in keras, by using linear activation instead softmax in the last layer in my model.
model.add(Activation('linear'))
But I need my model to have softmax activation in the last layer, what you think the solution is?
thank you.
Strictly speaking, this loss does not need logits, you can input softmax probabilities directly by modifying the loss like:
def median_weight_class_loss(y_true, y_pred):
epsilon = tf.constant(value=1e-10)
weight_sample = np.array([1113,6705,514,327,1099,115,142])/10015
weight_sample = 0.05132302/weight_sample
xent = -tf.reduce_sum(tf.multiply(y_true * tf.log(y_pred + epsilon), weight_sample), axis=1)
return xent