Customized loss in tensorflow with keras - tensorflow

OS Platform and Distribution: Linux Ubuntu16.04
; TensorFlow version : '1.4.0'
I can run properly with the following code:
import tensorflow as tf
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.backend import categorical_crossentropy
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input
mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)
img_size_flat = 28*28
batch_size = 64
def gen(batch_size=32):
while True:
batch_data, batch_label = mnist_data.train.next_batch(batch_size)
yield batch_data, batch_label
inputs = Input(shape=(img_size_flat,))
x = Dense(128, activation='relu')(inputs) # fully-connected layer with 128 units and ReLU activation
x = Dense(128, activation='relu')(x)
preds = Dense(10, activation='softmax')(x) # output layer with 10 units and a softmax activation
model = Model(inputs=inputs, outputs=preds)
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit_generator(gen(batch_size), steps_per_epoch=len(mnist_data.train.labels)//batch_size, epochs=2)
But if I want to write loss function with my own code like:
preds_softmax = tf.nn.softmax(preds)
step1 = tf.cast(y_true, tf.float32) * tf.log(preds_softmax)
step2 = -tf.reduce_sum(step1, reduction_indices=[1])
loss = tf.reduce_mean(step2) # loss
Can I using customized loss function and train it based on keras's model.fit_generator?
Is something like the following code on tensorflow?
inputs = tf.placeholder(tf.float32, shape=(None, 784))
x = Dense(128, activation='relu')(inputs) # fully-connected layer with 128 units and ReLU activation
x = Dense(128, activation='relu')(x)
preds = Dense(10, activation='softmax')(x) # output layer with 10 units and a softmax activation
y_true = tf.placeholder(tf.float32, shape=(None, 10))
How can I do based on above code(part I)? Thanks for any help!!

Just wrap your loss into a function, and provide it to model.compile.
def custom_loss(y_true, y_pred):
preds_softmax = tf.nn.softmax(y_pred)
step1 = y_true * tf.log(preds_softmax)
return -tf.reduce_sum(step1, reduction_indices=[1])
model.compile(optimizer='rmsprop',
loss=custom_loss,
metrics=['accuracy'])
Also note that,
you don't need to cast y_true into float32. It is done automatically by Keras.
you don't need to take the final reduce_mean. Keras will also take care of that.

Related

Implementing TensorFlow Triplet Loss

I would like to implement the built in TensorFlow addons version of triplet loss with a tutorial here for a siamese network, however I can't seem to get it quite right. No matter how I wrangle the code another error pops up, currently
TypeError: Could not build a TypeSpec for <KerasTensor: shape=(3, None, 256) dtype=float32 (created by layer 'tf.math.l2_normalize_4')> with type KerasTensor.
Note, this is just a token implementation kept simple in order to understand how to implement Triplet Loss. I don't expect the model to actually learn anything.
Code:
!pip install -U tensorflow-addons
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.datasets import fashion_mnist
# Dummy data to pass to the model
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
train_data = [x_train[:20000],x_train[20000:40000],x_train[40000:]]
train_labels = [y_train[:20000],y_train[20000:40000],y_train[40000:]]
train_data = tf.convert_to_tensor(train_data)
train_labels = tf.convert_to_tensor(train_labels)
#train_data = np.asarray(train_data)
#train_labels = np.asarray(train_labels)
def create_model(input_shape):
inp = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
#x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
model = tf.keras.Model(inp,x)
return model
def get_siamese_model(input_shape):
"""
Model architecture
"""
# Define the tensors for the triplet of input images
anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
# Convolutional Neural Network (same from earlier)
embedding_model = create_model(input_shape)
# Generate the embedding outputs
encoded_anchor = embedding_model(anchor_input)
encoded_positive = embedding_model(positive_input)
encoded_negative = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [encoded_anchor, encoded_positive, encoded_negative]
#x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
# Connect the inputs with the outputs
siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
# return the model
return embedding_model, siamese_triplet
emb_mod, model = get_siamese_model([28,28,1])
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss())
# Train the network
#train_dataset = tf.convert_to_tensor(train_dataset)
history = model.fit(
train_data,
epochs=5)
I am not sure what exactly you are trying to do, but you also have to incorporate your labels into your training dataset when using the tfa.losses.TripletSemiHardLoss(). Here is a working example:
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.datasets import fashion_mnist
# Dummy data to pass to the model
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
train_data = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(x_train[:20000]),
tf.data.Dataset.from_tensor_slices(x_train[20000:40000]),
tf.data.Dataset.from_tensor_slices(x_train[40000:])))
train_labels = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(y_train[:20000]),
tf.data.Dataset.from_tensor_slices(y_train[20000:40000]),
tf.data.Dataset.from_tensor_slices(y_train[40000:])))
dataset = tf.data.Dataset.zip((train_data, train_labels)).batch(32)
def create_model(input_shape):
inp = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
#x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
model = tf.keras.Model(inp,x)
return model
def get_siamese_model(input_shape):
"""
Model architecture
"""
# Define the tensors for the triplet of input images
anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
# Convolutional Neural Network (same from earlier)
embedding_model = create_model(input_shape)
# Generate the embedding outputs
encoded_anchor = embedding_model(anchor_input)
encoded_positive = embedding_model(positive_input)
encoded_negative = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [encoded_anchor, encoded_positive, encoded_negative]
#x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
# Connect the inputs with the outputs
siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
# return the model
return embedding_model, siamese_triplet
emb_mod, model = get_siamese_model([28,28,1])
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss())
# Train the network
history = model.fit(
dataset,
epochs=1)
625/625 [==============================] - 76s 120ms/step - loss: 0.1354 - model_79_loss: 0.0572 - model_79_1_loss: 0.0453 - model_79_2_loss: 0.0330

Different results from Tensorflow and Keras

I get different results from Tensorflow and Keras with the same network structure.
The loss function looks like
class MaskedMultiCrossEntropy(object):
def loss(self, y_true, y_pred):
vec = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true, dim=1)
mask = tf.equal(y_true[:,0,:], -1)
zer = tf.zeros_like(vec)
loss = tf.where(mask, x=zer, y=vec)
return loss
The network layer I used is called CrowdsClassification, which is implemented by Keras. Then I build the network by
x = Dense(128, input_shape=(input_dim,), activation='relu')(inputs)
x = Dropout(0.5)(x)
x = Dense(N_CLASSES)(x)
x = Activation("softmax")(x)
crowd = CrowdsClassification(num_classes, num_oracles, conn_type="MW")
x = crowd(x)
Train the model with Keras
model = Model(inputs=inputs, outputs=x)
model.compile(optimizer='adam', loss=loss)
model.fit(inputs,
true_class, epochs=100, shuffle=False, verbose=2, validation_split=0.1))
Train the model with tensorflow
optimizer = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opt_op = optimizer.minimize(loss, global_step=global_step)
sess.run(tf.global_variables_initializer())
for epoch in range(100):
sess.run([loss, opt_op], feed_dict=train_feed_dict)
The Tensorflow will get a wrong prediction. It seems that the issue comes from the loss function, that Tensorflow cannot backproporgate the masked loss. Anyone can give some advices? Thx a lot.

Concatenate an input before Dense layer. [Keras with TF backend]

So, I need to concatenate an input to the flattened layer before going in the dense layer.
I'm using Keras with TF as backend.
model.add(Flatten())
aux_input = Input(shape=(1, ))
model.add(Concatenate([model, aux_input]))
model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
I have a scenario like this: X_train, y_train, aux_train. The shape of y_train and aux_train is same (1, ). An image has a ground-truth and an aux_input.
How do I add this aux_input to the model while doing model.fit?
As suggested in answers, I changed my model with functional api. However, now, I get the following error.
ValueError: Layer dense_1 was called with an input that isn't a
symbolic tensor. Received type: . Full input:
[]. All
inputs to the layer should be tensors.
Here's the code for that part.
flatten = Flatten()(drop_5)
aux_rand = Input(shape=self.aux_shape)
concat = Concatenate([flatten, aux_input])
fc1 = Dense(512, kernel_regularizer=regularizers.l2(weight_decay))(concat)
Shape of aux input
aux_shape = (1,)
And then calling the model as follow
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
aux_rand = np.random.rand(y_train.shape[0])
model_inst = cifar10vgg()
x_train_input = Input(shape=(32,32,3))
aux_input = Input(shape=(1,))
model = Model(inputs=[x_train_input, aux_input], output=model_inst.build_model())
model.fit(x=[x_train, aux_rand], y=y_train, batch_size=batch_size, steps_per_epoch=x_train.shape[0] // batch_size,
epochs=maxepoches, validation_data=(x_test, y_test),
callbacks=[reduce_lr, tensorboard], verbose=2)
model_inst.build_model() returns Activation('softmax')(fc2) which is the output to be fed into the Model (as far as I understood)
As I see from your code, you implement the model with sequential API which is not a good option in this case. If you have some auxiliary inputs the best way to implement such a feature is to use functional API.
Here is a example from Keras website:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model
main_input = Input(shape=(100,), dtype='int32', name='main_input')
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
lstm_out = LSTM(32)(x)
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
Based on description, I think following code can give you some intuition:
x1 = Input(shape=(32, 32, 3))
flatten1 = Flatten()(x1)
x2 = Input(shape=(244, 244, 3))
vgg = VGG19(weights='imagenet', include_top=False)(x2)
flatten2 = Flatten()(vgg)
concat = Concatenate()([flatten1, flatten2])
d = Dense(10)(concat)
model = Model(inputs=[x1, x2], outputs=[d])
model.compile('adam', 'categorical_crossentropy')
model.fit(x=[x_train1, x_train2],outputs=y_labels)

the same model converged in keras but not tensorflow, how is that possible?

I'm trying to work with lstm in tensorflow, but I got to the point I can't make a simple imdb sentiment model to converge.
I took a keras model and tried to duplicate the exact same model in tensorflow, in keras it trains and converge however in tensorflow it is just stuck at some point (0.69 loss).
I tried to make them as equal as possible, the only difference I can tell of is that in keras the padding is before the sequence, while in tensorflow I use 'post' padding due to the conventions in tensorflow.
Any idea whats wrong with my tensorflow model?
from __future__ import print_function
import random
import numpy as np
from tensorflow.contrib.keras.python.keras.preprocessing import sequence
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, Activation
from tensorflow.contrib.keras.python.keras.layers import Embedding
from tensorflow.contrib.keras.python.keras.layers import LSTM
from tensorflow.contrib.keras.python.keras.layers import Conv1D, MaxPooling1D
from tensorflow.contrib.keras.python.keras.datasets import imdb
import tensorflow as tf
# Embedding
max_features = 30000
maxlen = 2494
embedding_size = 128
# Convolution
kernel_size = 5
filters = 64
pool_size = 4
# LSTM
lstm_output_size = 70
# Training
batch_size = 30
epochs = 2
class TrainData:
def __init__(self, batch_sz=batch_size):
(x_train, y_train), (_, _) = imdb.load_data(num_words=max_features)
y_train = [[int(x == 1), int(x != 1)] for x in y_train]
self._batch_size = batch_sz
self._train_data = sequence.pad_sequences(x_train, padding='pre')
self._train_labels = y_train
def next_batch(self):
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
return batch_x, batch_y
def batch_generator(self):
while True:
if len(self._train_data) < self._batch_size:
self.__init__()
batch_x, batch_y = self._train_data[:self._batch_size], self._train_labels[:self._batch_size]
self._train_data = self._train_data[self._batch_size:]
self._train_labels = self._train_labels[self._batch_size:]
yield batch_x, batch_y
def get_num_batches(self):
return int(len(self._train_data) / self._batch_size)
def length(sequence):
used = tf.sign(tf.abs(sequence))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def get_model(x, y):
embedding = tf.get_variable("embedding", [max_features, embedding_size], dtype=tf.float32)
embedded_x = tf.nn.embedding_lookup(embedding, x)
print(x)
print(embedded_x)
print(length(x))
cell_1 = tf.contrib.rnn.BasicLSTMCell(lstm_output_size)
output_1, state_1 = tf.nn.dynamic_rnn(cell_1, embedded_x, dtype=tf.float32, scope="rnn_layer1",
sequence_length=length(x))
# Select last output.
last_index = tf.shape(output_1)[1] - 1
# reshaping to [seq_length, batch_size, num_units]
output = tf.transpose(output_1, [1, 0, 2])
last = tf.gather(output, last_index)
# Softmax layer
with tf.name_scope('fc_layer'):
weight = tf.get_variable(name="weights", shape=[lstm_output_size, 2])
bias = tf.get_variable(shape=[2], name="bias")
logits = tf.matmul(last, weight) + bias
loss = tf.losses.softmax_cross_entropy(y, logits=logits)
optimizer = tf.train.AdamOptimizer()
optimize_step = optimizer.minimize(loss=loss)
return loss, optimize_step
def tf_model():
x_holder = tf.placeholder(tf.int32, shape=[None, maxlen])
y_holder = tf.placeholder(tf.int32, shape=[None, 2])
loss, opt_step = get_model(x_holder, y_holder)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(10):
cost_epochs = []
train_data = TrainData()
cost_batch = 0
for batch in range(train_data.get_num_batches()):
x_train, y_train = train_data.next_batch()
_, cost_batch = sess.run([opt_step, loss],
feed_dict={x_holder: x_train,
y_holder: y_train})
cost_epochs.append(cost_batch)
step += 1
# if step % 100 == 0:
print("Epoch: " + str(epoch))
print("\tcost: " + str(np.mean(cost_epochs)))
def keras_model():
# print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
y_test = [[int(x == 1), int(x != 1)] for x in y_test]
x_test = sequence.pad_sequences(x_test, maxlen=maxlen, padding='pre')
model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(LSTM(lstm_output_size))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
data = TrainData()
model.fit_generator(data.batch_generator(), steps_per_epoch=data.get_num_batches(),
epochs=epochs,
validation_data=(x_test, y_test))
if __name__ == '__main__':
# keras_model()
tf_model()
EDIT
When I limit the sequence length to 100 both models converge, so I assume there is something different in the the lstm layer.
Check the initial values of your operations. In my case the adadelta optimizer in keras had initial learning rate of 1.0 and in tf.keras it had 0.001 so in the mnist dataset it converged much slowly.

Tensorflow Autoencoder - How To Calculate Reconstruction Error?

I've implemented the following Autoencoder in Tensorflow as shown below. It basically takes MNIST digits as inputs, learns the structure of the data and reproduces the input at its output.
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# Parameters
learning_rate = 0.01
training_epochs = 20
batch_size = 256
display_step = 1
examples_to_show = 10
# Network Parameters
n_hidden_1 = 256 # 1st layer num features
n_hidden_2 = 128 # 2nd layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([n_input])),
}
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
return layer_2
# Building the decoder
def decoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2']))
return layer_2
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
total_batch = int(mnist.train.num_examples/batch_size)
# Training cycle
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(c))
print("Optimization Finished!")
# Applying encode and decode over test set
encode_decode = sess.run(
y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
# Compare original images with their reconstructions
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
f.show()
plt.draw()
plt.waitforbuttonpress()
When I am encoding and decoding over the test set, how do I calculate the reconstruction error (i.e. the Mean Squared Error/Loss) for each sample?
In other words I'd like to see how well the Autoencoder is able to reconstruct its input so that I can use the Autoencoder as a single-class classifier.
Many thanks in advance.
Barry
You can take the output of the decoder and take the difference with the true image and take the average.
Say y is the output of the decoder and the original test image is x then you can do something like for each of the examples and take an average over it:
tf.square(y-x)
This will be your reconstruction cost for the test set.