I am trying to create a custom macro for recall = (recall of class1 + recall of class2)/2. I came up with the following code but I am not sure how to calculate the true positive of class 0.
def unweightedRecall():
def recall(y_true, y_pred):
# recall of class 1
true_positives1 = K.sum(K.round(K.clip(y_pred * y_true, 0, 1)))
possible_positives1 = K.sum(K.round(K.clip(y_true, 0, 1)))
recall1 = true_positives1 / (possible_positives1 + K.epsilon())
# --- get true positive of class 0 in true_positives0 here ---
# Also, is there a cleaner way to get possible_positives0
possible_positives0 = K.int_shape(y_true)[0] - possible_positives1
recall0 = true_positives0 / (possible_positives0 + K.epsilon())
return (recall0 + recall1)/2
return recall
It seems I will have to use Keras.backend.equal(x, y), but how do i create a tensor with shape K.int_shape(y_true)[0] and all values, say x?
Edit 1
Based on Marcin's comments, I wanted to create a custom metric based on callback in keras. While browsing issues in Keras, I came across the following code for f1 metric:
class Metrics(keras.callbacks.Callback):
def on_epoch_end(self, batch, logs={}):
predict = np.asarray(self.model.predict(self.validation_data[0]))
targ = self.validation_data[1]
self.f1s=f1(targ, predict)
return
metrics = Metrics()
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=[X_test,y_test],
verbose=1, callbacks=[metrics])
But how is the callback returning the accuracy? I wanted to implement unweighted recall = (recall class1 + recall class2)/2. I can think of the following code but would appreciate any help to complete it
from sklearn.metrics import recall_score
class Metrics(keras.callbacks.Callback):
def on_epoch_end(self, batch, logs={}):
predict = np.asarray(self.model.predict(self.validation_data[0]))
targ = self.validation_data[1]
# --- what to store the result in?? ---
self.XXXX=recall_score(targ, predict, average='macro')
# we really dont need to return anything ??
return
metrics = Metrics()
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=[X_test,y_test],
verbose=1, callbacks=[metrics])
Edit 2: model:
def createModelHelper(numNeurons=40, optimizer='adam'):
inputLayer = Input(shape=(data.shape[1],))
denseLayer1 = Dense(numNeurons)(inputLayer)
outputLayer = Dense(1, activation='sigmoid')(denseLayer1)
model = Model(input=inputLayer, output=outputLayer)
model.compile(loss=unweightedRecall, optimizer=optimizer)
return model
keras version (with the mean problem).
Are your two classes actually only one dimension output (0 or 1)?
If so:
def recall(y_true, y_pred):
# recall of class 1
#do not use "round" here if you're going to use this as a loss function
true_positives = K.sum(K.round(y_pred) * y_true)
possible_positives = K.sum(y_true)
return true_positives / (possible_positives + K.epsilon())
def unweightedRecall(y_true, y_pred):
return (recall(y_true,y_pred) + recall(1-y_true,1-y_pred))/2.
Now, if your two classes are actually a 2-element output:
def unweightedRecall(y_true, y_pred):
return (recall(y_true[:,0],y_pred[:,0]) + recall(y_true[:,1],y_pred[:,1]))/2.
Callback version:
For the callback, you can use a LambdaCallback, and you manually print or store the results:
myCallBack = LambdaCallback(on_epoch_end=unweightedRecall)
stored_metrics = []
def unweightedRecall(epoch,logs):
predict = model.predict(self.validation_data[0])
targ = self.validation_data[1]
result = (recall(targ,predict) + recall(1-targ,1-predict))/2.
print("recall for epoch " + str(epoch) + ": " + str(result))
stored_metrics.append(result)
Where recall is a function using np instead of K. And epsilon = np.finfo(float).eps or epsilon = np.finfo(np.float32).eps)
Related
I have a dataset of German news articles that I need to classify in my job. Since it is imbalanced, I am focussing on only 12 of 30 labels currently. Therefore I tried to balance the dataset by oversampling enhanced with data augmentation. Each sample can belong to multiple categories, thus it is a multi label problem.
The train dataset contains about 127.000 samples.
I am using a German BERT model with Tensorflow but despite fine tuning and even adding new layers, my val accuracy is always about 65%. Sometimes 67 to 68 but never higher. I wondered if my code is maybe broken or if it is due to the dataset.
Here is what I have right now:
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")
transformer_model = TFAutoModel.from_pretrained("dbmdz/bert-base-german-cased", output_hidden_states=False)
def multi_label_accuracy(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
"""For multi-label classification, one has to define a custom
acccuracy function because neither tf.keras.metrics.Accuracy nor
tf.keras.metrics.CategoricalAccuracy evaluate the number of
exact matches.
:Example:
>>> from tensorflow.keras import metrics
>>> y_true = tf.convert_to_tensor([[1., 1.]])
>>> y_pred = tf.convert_to_tensor([[1., 0.]])
>>> metrics.Accuracy()(y_true, y_pred).numpy()
0.5
>>> metrics.CategoricalAccuracy()(y_true, y_pred).numpy()
1.0
>>> multi_label_accuracy(y_true, y_pred).numpy()
0.0
"""
y_pred = tf.math.sigmoid(y_pred)
y_pred = tf.math.round(y_pred)
exact_matches = tf.math.reduce_all(y_pred == y_true, axis=1)
exact_matches = tf.cast(exact_matches, tf.float32)
return tf.math.reduce_mean(exact_matches)
def f1_score(y_true, y_logit):
'''
Calculate F1 score
y_true: true value
y_logit: predicted value
'''
y_logit = tf.math.sigmoid(y_logit)
true_positives = K.sum(K.round(K.clip(y_true * y_logit, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
predicted_positives = K.sum(K.round(K.clip(y_logit, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return (2 * precision * recall) / (precision + recall + K.epsilon())
for l in transformer_model.layers:
l.trainable = True
bert = transformer_model.layers[0]
input_ids = tf.keras.layers.Input(shape=(60,), name='input_ids', dtype=np.int32)
attention_masks = tf.keras.layers.Input(shape=(60,), name='attention_masks', dtype=np.int32)
bert_model = bert(input_ids, attention_mask=attention_masks)[0][:, 0, :]
dropout = tf.keras.layers.Dropout(0.2, name="pooled_output")
pooled_output = dropout(bert_model)
dense = tf.keras.layers.Dense(units=256, activation="sigmoid")(pooled_output)
dropout2 = tf.keras.layers.Dropout(0.2)(dense)
dense2 = tf.keras.layers.Dense(units=64, activation="relu")(dropout2)
output = tf.keras.layers.Dense(units=12, name="output")(dense2)
model = tf.keras.models.Model(inputs=[input_ids, attention_masks], outputs=output)
print("Compile model...", flush=True)
optimizer = Adam(learning_rate=1e-5, decay=1e-6)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer=optimizer, metrics=[f1_score, multi_label_accuracy]
)
history = model.fit([dataset['train']['bert'], dataset['train']['bert2']], dataset['train']['outputs'], epochs=4, batch_size=64, validation_data=([dataset['val']['bert'], dataset['val']['bert2']], dataset['val']['outputs']))
I would expect the val accuracy to change a lot more by changing the architecture of the model.
I've got a custom model that I've pre-trained in a separate notebook called vae which I've saved using vae.save().
I'm now looking to implement a model which is a full training pipeline containing the pre-trained vae and some other (including a ResNet-50 from the Tensorflow gallery).
In the Tensorflow documentation about Making new Layers and Models via subclassing it doesn't mention anything about including models in subclassed models.
My question is how do I include my pre-trained VAE in a new subclassed model?
I've already tried writing the code below, which works fine for training, but I haven't seen implementations like it elsewhere and I'm getting errors when trying to do reid.save() or reid.summary() on the model, so I presume there is a better way to do it:
class ReId(keras.Model):
def __init__(self, vae, num_ids, **kwargs):
super(ReId, self).__init__(**kwargs)
self.vae = vae
self.convex_combination = ConvexCombination()
self.resnet_50 = resnet50
self.glob_avg_pool = keras.layers.GlobalAveragePooling2D()
self.bnneck = keras.layers.BatchNormalization()
self.num_ids = num_ids
self.final_fc_layer = keras.layers.Dense(self.num_ids)
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.vae_loss_tracker = keras.metrics.Mean(
name="vae_loss"
)
self.classification_loss_tracker = keras.metrics.Mean(name="classification_loss")
self.triplet_loss_tracker = keras.metrics.Mean(name="triplet_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.vae_loss_tracker,
# Classification loss is cross-entropy loss here
self.classification_loss_tracker,
self.triplet_loss_tracker,
]
def train_step(self, data):
x, y = data
y = tf.cast(y, tf.int32)
with tf.GradientTape() as tape:
# Calculate VAE loss
z_mean, z_log_var, z = self.vae.encoder(x)
reconstruction = self.vae.decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mean_squared_error(x, reconstruction)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
# Calculate VAE loss
vae_loss = reconstruction_loss + (LAMBDA * kl_loss)
embedding_output = self.call(x)
bnneck_output = self.bnneck(embedding_output)
training_output = self.final_fc_layer(bnneck_output)
# Calculate triplet loss
triplet_loss = tfa.losses.triplet_semihard_loss(y_true=y, y_pred=embedding_output)
# Calculate cross-entropy loss
ce_loss = keras.losses.sparse_categorical_crossentropy(y, training_output)
# Sum up the losses
total_loss = triplet_loss + ce_loss + (TOTAL_LOSS_VAE_MULTIPLIER * vae_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.classification_loss_tracker.update_state(ce_loss)
self.triplet_loss_tracker.update_state(triplet_loss)
self.vae_loss_tracker.update_state(vae_loss)
return {
"loss": self.total_loss_tracker.result(),
"classification loss:": self.classification_loss_tracker.result(),
"triplet loss:": self.triplet_loss_tracker.result(),
"vae loss:": self.vae_loss_tracker.result(),
}
def get_config(self):
config = super(ReId, self).get_config()
config.update({"num_ids": self.num_ids})
return config
def call(self, data):
recon_img = self.vae(data)
convex_combo_input = [data, (data - recon_img)]
convex_combo_output = self.convex_combination(convex_combo_input)
final_output = self.glob_avg_pool(self.resnet_50(convex_combo_output))
return final_output
I have created custom loss (Weighted Absolute error) in keras but implementation doesn't work - I get an error ValueError: No gradients provided for any variable: ['my_model/conv2d/kernel:0', 'my_model/conv2d/bias:0'].
I want to apply different weight for each pixel.
class WeightedMeanAbsoluteError(tf.keras.metrics.Metric):
def __init__(self, name='weighted_mean_absolute_error'):
super(WeightedMeanAbsoluteError, self).__init__(name=name)
self.wmae = self.add_weight(name='wmae', initializer='zeros')
def update_state(self, y_true, y_pred, loss_weights):
values = tf.math.abs(y_true - y_pred) * loss_weights
return self.wmae.assign_add(tf.reduce_sum(values))
def result(self):
return self.wmae
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.wmae.assign(0.)
loss_object = WeightedMeanAbsoluteError()
train_loss = WeightedMeanAbsoluteError()
I use the following code to implement a training step:
#tf.function
def train_step(input_images, output_images):
with tf.GradientTape() as tape:
# training=True is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
result_images = model(input_images, training=True)
loss = loss_object(output_images, result_images)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
Also my code works just fine if I use
loss_object = tf.keras.losses.MeanAbsoluteError()
train_loss = tf.keras.metrics.MeanAbsoluteError()
The best and simple way to minimize a weighted standard loss (such mae) is using the sample_weights parameter in fit method where we pass an array with the desired weight of each sample
X = np.random.uniform(0,1, (1000,50))
y = np.random.uniform(0,1, 1000)
W = np.random.randint(1,10, 1000)
inp = Input((50))
x = Dense(64, activation='relu')(inp)
out = Dense(10)(x)
model = Model(inp, out)
model.compile('adam','mae')
model.fit(X,y, epochs=100, sample_weights=W)
I have tried to implement a multi layer perceptron with sigmoid activations. Below is the code:
import numpy as np
def sigmoid(x):
return 1.0/(1.0 + np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x) * (1.0 - sigmoid(x))
class MLP:
def __init__(self, layers, x_train, y_train):
self.layers = layers
self.inputs = x_train
self.outputs = y_train
def forward(self, input):
output = input
for layer in self.layers:
layer.activations = output
output = layer.feedforward(output)
return output
def backward(self, output, predicted):
error = np.multiply(2 * np.subtract(output, predicted), sigmoid_derivative(predicted))
for layer in self.layers[::-1]:
#recursively backpropagate the error
error = layer.backpropagate(error)
def train(self):
for i in range(1,500):
predicted = self.forward(self.inputs)
self.backward(self.outputs,predicted)
def test(self, input):
return self.forward(input)
class Layer:
def __init__(self, prevNodes, selfNodes):
self.weights = np.random.rand(prevNodes,selfNodes)
self.biases = np.zeros(selfNodes)
self.activations = np.array([])
def feedforward(self, input):
return sigmoid(np.dot(input, self.weights) + self.biases)
def backpropagate(self, error):
delPropagate = np.dot(error, self.weights.transpose())
dw = np.dot(self.activations.transpose(), error)
db = error.mean(axis=0) * self.activations.shape[0]
self.weights = self.weights + 0.1 * dw
self.biases = self.biases + 0.1 * db
return np.multiply(delPropagate ,sigmoid_derivative(self.activations))
layer1 = Layer(3,4)
layer2 = Layer(4,1)
x_train = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]])
y_train = np.array([[0],[1],[1],[0]])
x_test = np.array([[0,0,1]])
mlp = MLP([layer1,layer2], x_train, y_train)
mlp.train()
mlp.test(x_test)
However the problem is the network saturates to output the average of all training outputs for any input. For eg, in the above case the y_train avarage is about 0.5 and no matter what 'test_x' value I feed to the network the output is always around the 0.5 mark.
Where could be the problem in code. Am I missing something in the algorithms. Help is appreciated
The issue seems to be with lesser number of iterations, increasing iterations from 500 to 50000 works or changing the learning rate to 0.5 also works with lesser number of iterations. The matrix manipulations and all mathematics seem to be consistent
I am using tensorflow 1.7.0 in eager execution mode. I have the model working, but none of the examples that I have found for saving the model work.
This is the code that I am using:
checkpoint_directory ='./JokeWords/'
checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
checkpoint = tfe.Checkpoint(model=model,optimizer=optimizer) # save as "x"
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
evaluate(model,jokes,2,32)
....
checkpoint.save(file_prefix=checkpoint_prefix)
I have trained the model and use evaluate to check the results when loading from a restart. Each time I get a random result from evaluate, meaning that the model is not loading from the data, but instead only having random weights.
How do I save the model? It can take days to train one of these.
Edit. Here is the model:
class EagerRNN(tfe.Network):
def __init__(self,embedding, hidden_dim, num_layers, keep_ratio):
super(EagerRNN, self).__init__()
self.keep_ratio = keep_ratio
self.cells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.backcells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.linear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.backlinear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.attension = layers.Dense(hidden_dim, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
def call(self, input_seq,seq_lengths, training):
lengths=[i[0] for i in seq_lengths]
nRotations=max(lengths)
batchSize=input_seq.shape[0]
input_seq2 = tf.unstack(input_seq, num=int(input_seq.shape[1]), axis=1)
atten = None
state = self.cells[0].zero_state(batchSize, tf.float32)
for i in range(0,nRotations):
for j in range(0,len(self.cells)):
c=self.cells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
if atten==None:
atten =self.linear(output)
else:
atten=atten+self.linear(output)
for i in range(nRotations-1,-1,-1):
for j in range(0,len(self.backcells)):
c=self.backcells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
atten=atten+self.backlinear(output)
#input_seq = tf.stack(input_seq2[0:nRotations], axis=1)
atten=self.attension(atten)
if training:
input_seq = tf.nn.dropout(input_seq, self.keep_ratio)
# Returning a list instead of a single tensor so that the line:
# y = self.rnn(y, ...)[0]
# in PTBModel.call works for both this RNN and CudnnLSTM (which returns a
# tuple (output, output_states).
return input_seq,state,atten
def _add_cells(self, cells):
# "Magic" required for keras.Model classes to track all the variables in
# a list of Layer objects.
# TODO(ashankar): Figure out API so user code doesn't have to do this.
for i, c in enumerate(cells):
setattr(self, "cell-%d" % i, c)
return cells
class EagerLSTM_Model(tfe.Network):
"""LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
See also:
https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
"""
def __init__(self,
embedding,
hidden_dim,
num_layers,
dropout_ratio,
use_cudnn_rnn=True):
super(EagerLSTM_Model, self).__init__()
self.keep_ratio = 1 - dropout_ratio
self.use_cudnn_rnn = use_cudnn_rnn
self.embedding = embedding
if self.use_cudnn_rnn:
self.rnn = cudnn_rnn.CudnnLSTM(
num_layers, hidden_dim, dropout=dropout_ratio)
else:
self.rnn = EagerRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
self.unrnn = EagerUnRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
def callRNN(self, input_seq,seq_lengths, training):
y = self.embedding.callbatchword(input_seq)
if training:
y = tf.nn.dropout(y, self.keep_ratio)
y,state,atten = self.rnn.call(y,seq_lengths, training=training)
return state,atten
def callUnRNN (self,state,atten,seq_lengths, training ):
x,state = self.unrnn(state,atten,seq_lengths,training=training)
#b=tf.reshape(y, self._output_shape)
#c=self.linear(b)
return x
tfe.Network is not (easily) Checkpointable and it will soon be deprecated. Prefer to subclass tf.Keras.Model instead. So if you change class EagerRNN(tfe.Network) to class EagerRNN(tf.keras.Model) and class EagerLSTM_Model(tfe.Network) to class EagerLSTM_Model(tf.keras.Model), checkpoint.save(file_prefix=checkpoint_prefix) should actually save all your variables and checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) should restore them.