Is there a way to make a layer behave differently during forward pass for model.fit() and model.evaluate() in a customised Keras model? - tensorflow

I am trying to reproduce the incomplete convolution layer with a linear profile (gamma) proposed in this paper, where a operation called incomplete dot product (IDP) is introduced, which decides the percentage of the input channels to be used for inference. I customised the incomplete convolutional layer as below:
class IncompleteConv2D(layers.Conv2D):
def __init__(self, IDP=1, **kwargs):
self.IDP = IDP
super(IncompleteConv2D, self).__init__(**kwargs)
def call(self, inputs, training=None):
def linear_profile(N, training):
g_list = []
num_channels_in_used = []
if training != False:
for i in range(1, N+1):
g_list.append(1 - i/N)
g = tf.stack([g_list])
return g, num_channels_in_used
num_channels_in_used = np.floor(N*self.IDP).astype(np.int32)
if num_channels_in_used == 0:
for i in range(1, N+1):
g_list.append(1 - i/N)
else:
for i in range(1, num_channels_in_used+1):
g_list.append(1 - i/N)
g = tf.stack([g_list])
return g, num_channels_in_used
num_channels_in = inputs.shape[-1]
num_channels_out = self.kernel.shape[-1]
gamma, num_channels_in_used = linear_profile(num_channels_in, training)
g_converted_list = []
if training != False:
for j in range(num_channels_out):
g_converted = tf.reshape(
tf.broadcast_to(gamma, [self.kernel[:,:,:,j].shape[0]*self.kernel[:,:,:,j].shape[1], self.kernel[:,:,:,j].shape[2]]),
tf.shape(self.kernel[:,:,:,j])
)
g_converted = tf.expand_dims(g_converted, 3)
g_converted_list.append(g_converted)
else:
for j in range(num_channels_out):
if (num_channels_in_used == 0):
g_converted = tf.reshape(
tf.broadcast_to(gamma, [self.kernel[:,:,:,j].shape[0]*self.kernel[:,:,:,j].shape[1], self.kernel[:,:,:,j].shape[2]]),
tf.shape(self.kernel[:,:,:,j])
)
else:
g_converted = tf.reshape(
tf.broadcast_to(gamma, [self.kernel[:,:,:,j].shape[0]*self.kernel[:,:,:,j].shape[1], num_channels_in_used]),
[self.kernel[:,:,:,j].shape[0], self.kernel[:,:,:,j].shape[1], num_channels_in_used]
)
g_converted = tf.expand_dims(g_converted, 3)
g_converted_list.append(g_converted)
if (num_channels_in_used > 0):
self.kernel = self.kernel[:,:,:num_channels_in_used,:]
inputs = inputs[:,:,:,:num_channels_in_used]
gamma_converted = tf.concat(g_converted_list, 3)
mean, var = tf.nn.moments(gamma_converted * self.kernel, axes=[0, 1, 2], keepdims=True)
result = self.convolution_op(
inputs, (gamma_converted * self.kernel - mean) / tf.sqrt(var + 1e-10)
)
if self.use_bias:
result = result + self.bias
return result
Basically, a linear profile gamma is learned during training, and IDP between 0 and 1 should be set during inference. In other words, IDP should remain 1 at all times during training. With this IncompleteConv2D layer defined, an example model initialisation is as below:
x = keras.Input(shape=input_shape)
output = IncompleteConv2D(filters=32, kernel_size=(3, 3), activation="relu", IDP=1)(x)
output = layers.MaxPooling2D(pool_size=(2, 2))(output)
output = IncompleteConv2D(filters=64, kernel_size=(3, 3), activation="relu", IDP=1)(output)
output = layers.MaxPooling2D(pool_size=(2, 2))(output)
output = layers.Flatten()(output)
output = layers.Dropout(0.5)(output)
output = layers.Dense(10, activation="softmax")(output)
model = Model(x, output)
model.summary()
where IDP=1 is set as a parameter for the layer (which I believe is not the best way, since IDP is only related to the inference phase...) Note that IDP has to be set as 1 here because for the testing part of the training phase all channels should be utilised.
After compilation, the code for training phase is as below:
model_history = model.fit(training_generator,
validation_data=validation_generator,
verbose=1,
epochs=1)
where the validation accuracy during training phase is evaluated with training=False within the call method for the IncompleteConv2D layer, with IDP=1.
Now comes the tricky part, for evaluation:
test_loss, test_accuracy = model.evaluate(X_test_data, y_test, IDP=0.8, verbose=1)
An IDP argument between 0 and 1 should be fed into the evaluation phase. However, IDP is not recognised.
TypeError: Invalid keyword arguments: ['IDP']
Problem: I looked into customising the model.evaluate() here using test_step method of a CustomModel, but it seemed that you cannot add your own argument for model.evaluate().
If this can be solved, I expect that during model.evaluate(IDP=0.8), this new IDP will be passed in the call method during forward pass for inference, thus overriding the original IDP (which was set to 1).
Is there any way I can achieve this? Any help is highly appreciated.
Updated: Following the useful pointers from #Innat, I inherited the keras.Model class with IDP initialised as follow:
class CustomModel(keras.Model):
def __init__(self, IDP=1, **kwargs):
super(CustomModel, self).__init__(**kwargs)
self.IDP = tf.Variable(IDP, dtype=tf.float32)
model = CustomModel(inputs=x, outputs=output, IDP=1)
With the help of a custom callback, I managed to change the IDP attribute in the CustomModel class to 0.8 at the beginning of model.evaluate().
class CustomCallback(keras.callbacks.Callback):
def on_test_begin(self, logs=None):
# self.model.IDP.assign(0.8)
for layer in self.model.layers:
if isinstance(layer, IncompleteConv2D):
layer.IDP = 0.8
test_loss, test_accuracy = model.evaluate(X_test_data, y_test, verbose=1, callbacks=[CustomCallback()])
However, from the observation that the test accuracy is similar to the validation accuracy, I believe the IDP attribute in the IncompleteConv2D layer class remained 1 for model.evaluate().
What should I do next to link IDP in CustomModel to IDP in IncompleteConv2D, as in, how to pass a model attribute to its layer?

In essence, you've created a custom layer (IncompleteConv2D) and initialize a variable of a scaler value which further is used in call method. Now, you want to change this value in testing time or inference time. In keras, one way to achieve this by using the callback API. Here I'm showing a general solutions, and not using your IncompleteConv2D layer.
Let's import some necessary libraries and define a simple custom layer. Here in the call method, we will print out the predefined scaler value.
import tensorflow as tf
import tensorflow.keras as keras
import keras.layers as layers
import numpy as np
class StandardizedConv2DWithCall(layers.Conv2D):
def __init__(self, some_var, *args, **kwargs):
super().__init__(*args, **kwargs)
self.some_var = tf.Variable(
some_var, dtype=tf.float32, trainable=False
)
def call(self, inputs):
print('printing from call fn: ', self.some_var)
mean, var = tf.nn.moments(
self.kernel, axes=[0, 1, 2], keepdims=True
)
result = self.convolution_op(
inputs, (self.kernel - mean) / tf.sqrt(var + 1e-10)
)
if self.use_bias:
result = result + self.bias
return result
Prepare some data to train and test.
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
def prepare_ds(x, y, take=None):
if take:
x, y = x[:take], y[:take]
x = tf.expand_dims(tf.cast(x, dtype=tf.float32), axis=3)
x = tf.repeat(x, repeats=3, axis=3)
x = tf.divide(x, 255.)
x = tf.image.resize(x, [28, 28])
y = tf.one_hot(y, depth=10)
return x, y
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, y_train = prepare_ds(x_train, y_train, take=100)
x_test, y_test = prepare_ds(x_test, y_test, take=50)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
(TensorShape([100, 28, 28, 3]),
TensorShape([100, 10]),
TensorShape([50, 28, 28, 3]),
TensorShape([50, 10]))
Let's define a dummy model, which contains the above custom layer.
model = keras.Sequential(
[
keras.layers.InputLayer(input_shape=(28,28,3)),
StandardizedConv2DWithCall(
some_var= 1., filters=32, kernel_size=(3, 3), activation="relu"
),
layers.MaxPooling2D(pool_size=(2, 2)),
StandardizedConv2DWithCall(
some_var=1., filters=64, kernel_size=(3, 3), activation="relu"
),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Flatten(),
layers.Dense(num_classes, activation="softmax"),
]
)
model.summary()
Run the model with eager model to see the print value of the above custom layer.
batch_size = 128
model.compile(
loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"],
run_eagerly=True
)
model.fit(x_train, y_train, batch_size=batch_size, epochs=1)
printing from call fn: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
printing from call fn: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
1/1 [==============================] - 0s 179ms/step - loss: 40.5805 - accuracy: 0.0400
<keras.callbacks.History at 0x7f653355bd00>
Printed value 1.0 as set. Now, let's define a custom callback, which will be used to change this value in testing time.
class UpdateSomeVariableCallback(keras.callbacks.Callback):
def __init__(self, update_value):
super().__init__()
self.update_value = update_value
def on_test_begin(self, logs=None):
for layer in self.model.layers:
if isinstance(layer, StandardizedConv2DWithCall):
layer.some_var.assign(self.update_value)
Run the test
# test 1
loss, acc = model.evaluate(
x_test,
y_test,
callbacks=[
UpdateSomeVariableCallback(update_value=0.8)
]
)
printing from call fn: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.8>
2/2 [==============================] - 0s 41ms/step - loss: 26.0635 - accuracy: 0.3600
# test 2
loss, acc = model.evaluate(
x_test,
y_test,
callbacks=[
UpdateSomeVariableCallback(update_value=0.2)
]
)
printing from call fn: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.2>
2/2 [==============================] - 0s 31ms/step - loss: 26.0635 - accuracy: 0.3600
As we can see that, the variable is updated and printed accordingly in the call method.

Related

sklearn classification_report ValueError: Found input variables with inconsistent numbers of samples: [18, 576]

I'm working on a CNN classification problem. I used keras and a pre-trained model. Now I want to evaluate my model and need the precision, recall and f1-Score. When I use sklearn.metrics classification_report I get above error. I know where the numbers are coming from, first is the length of my test dataset in batches and second are the number of actual sampels (predictions) in there. However I don't know how to "convert" them.
See my code down below:
# load train_ds
train_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "training",
seed = 123,
shuffle = True)
# load val_ds
val_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "validation",
seed = 123,
shuffle = True)
# move some batches of val_ds to test_ds
test_ds = val_ds.take((1*len(val_ds)) // 2)
print('test_ds =', len(test_ds))
val_ds = val_ds.skip((1*len(val_ds)) // 2)
print('val_ds =', len(val_ds)) #test_ds = 18 val_ds = 18
# Load Model
base_model = keras.applications.vgg19.VGG19(
include_top=False,
weights='imagenet',
input_shape=(224,224,3)
)
# Freeze base_model
base_model.trainable = False
#
inputs = keras.Input(shape=(224,224,3))
x = data_augmentation(inputs) #apply data augmentation
# Preprocessing
x = tf.keras.applications.vgg19.preprocess_input(x)
# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x) # Regularize with dropout
outputs = keras.layers.Dense(5, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer="Adam",
metrics=['acc']
)
model.fit(train_ds, epochs=8, validation_data=val_ds, callbacks=[tensorboard_callback])
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
base_model.trainable = True
model.summary()
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.000001), # Low learning rate
loss="sparse_categorical_crossentropy",
metrics=['acc']
)
model.fit(train_ds, epochs=5, validation_data=val_ds)
#Evaluate
from sklearn.metrics import classification_report
y_pred = model.predict(test_ds, batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)
print(classification_report(test_ds, y_pred_bool))
I also tried something like this, but I'm not sure if this gives me the correct values for multiclass classification.
from keras import backend as K
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f1_m(y_true, y_pred):
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
# fit the model
history = model.fit(Xtrain, ytrain, validation_split=0.3, epochs=10, verbose=0)
# evaluate the model
loss, accuracy, f1_score, precision, recall = model.evaluate(Xtest, ytest, verbose=0)
This is a lot, Sorry. Hope somebody can help.

"No gradients provided for any variable" error when trying to use GradientTape mechanism

I'm trying to use GradientTape mechanism for the first time. I've looked at some examples but I'm getting the "No gradients provided for any variable" error and was wondering how to overcome this?
I want to define some complex loss functions, so I tried using GradientTape to produce its gradient for the CNN training. What was I doing wrong and can I fix it?
Attached is a run-able example code that demonstrates my problem:
# imports
import numpy as np
import tensorflow as tf
import sklearn
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
tf.config.run_functions_eagerly(True)
#my loss function
def my_loss_fn(y_true, y_pred):
` # train SVM classifier
VarC=1E6
VarGamma='scale'
clf = SVC(kernel='rbf', C=VarC, gamma=VarGamma, probability=True )
clf.fit(y_pred, y_true)
y_pred = clf.predict_proba(y_pred)
scce = tf.keras.losses.SparseCategoricalCrossentropy()
return scce(y_true, y_pred)
`
#creating inputs to demontration
X0=0.5*np.ones((12,12))
X0[2:12:4,:]=0
X0[3:12:4,:]=0
X1=0.5*np.ones((12,12))
X1[1:12:4,:]=0
X1[2:12:4,:]=0
X1=np.transpose(X1)
X=np.zeros((2000,12,12))
for i in range(0,1000):
X[i]=X0+np.random.rand(12,12)
for i in range(1000,2000):
X[i]=X1+np.random.rand(12,12)
y=np.zeros(2000, dtype=int)
y[1000:2000]=1
x_train, x_val, y_train, y_val = train_test_split(X, y, train_size=0.5)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, train_size=0.5)
x_train = tf.convert_to_tensor(x_train)
x_val = tf.convert_to_tensor(x_val)
x_test = tf.convert_to_tensor(x_test)
y_train = tf.convert_to_tensor(y_train)
y_val = tf.convert_to_tensor(y_val)
y_test = tf.convert_to_tensor(y_test)
inputs = keras.Input((12,12,1), name='images')
x0 = tf.keras.layers.Conv2D(8,4,strides=4)(inputs)
x0 = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), name='pooling')(x0)
outputs = tf.keras.layers.Flatten(name='predictions')(x0)
model = keras.Model(inputs=inputs, outputs=outputs)
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
# Instantiate a loss function.
loss_fn = my_loss_fn
# Prepare the training dataset.
batch_size = 256
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
epochs = 100
for epoch in range(epochs):
print('Start of epoch %d' % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
# Open a GradientTape to record the operations run
# during the forward pass, which enables autodifferentiation.
with tf.GradientTape() as tape:
tape.watch(model.trainable_weights)
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Log every 200 batches.
if step % 200 == 0:
print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
print('Seen so far: %s samples' % ((step + 1) * 64))
And when running, I get:
ValueError: No gradients provided for any variable: (['conv2d_2/kernel:0', 'conv2d_2/bias:0'],). Provided grads_and_vars is ((None, <tf.Variable 'conv2d_2/kernel:0' shape=(4, 4, 1, 8) dtype=float32, nump
If I use some standard loss function:
For example the following model and loss function
inputs = keras.Input((12,12,1), name='images')
x0 = tf.keras.layers.Conv2D(8,4,strides=4)(inputs)
x0 = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), name='pooling')(x0)
x0 = tf.keras.layers.Flatten(name='features')(x0)
x0 = layers.Dense(16, name='meta_features')(x0)
outputs = layers.Dense(2, name='predictions')(x0)
model = keras.Model(inputs=inputs, outputs=outputs)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
Everything works fine and converges well.
What am I doing wrong and can I fix it?

TypeError: Expected keras.losses.Loss, found function

I want to build a TFF model for speech recognition systems. For this, I use the CNN-GRU model architecture with a CTC loss function. but I got error when I wanted to build_federated_averaging_process and think it's about the ctc_loss function but I cant fix it.
part of my code is:
def CTCLoss(y_true, y_pred):
# Compute the training-time loss value
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
return loss
def create_compiled_keras_model():
"""Model similar to DeepSpeech2."""
# Model's input
input_spectrogram = layers.Input((None, fft_length // 2 + 1), name="input")
# Expand the dimension to use 2D CNN.
x = layers.Reshape((-1, fft_length // 2 + 1 , 1), name="expand_dim")(input_spectrogram)
# Convolution layer 1
x = layers.Conv2D(
filters=32,
kernel_size=[11, 41],
strides=[2, 2],
padding="same",
use_bias=False,
name="conv_1",
)(x)
x = layers.BatchNormalization(name="conv_1_bn")(x)
x = layers.ReLU(name="conv_1_relu")(x)
# Convolution layer 2
x = layers.Conv2D(
filters=32,
kernel_size=[11, 21],
strides=[1, 2],
padding="same",
use_bias=False,
name="conv_2",
)(x)
x = layers.BatchNormalization(name="conv_2_bn")(x)
x = layers.ReLU(name="conv_2_relu")(x)
# Reshape the resulted volume to feed the RNNs layers
x = layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x)
# RNN layers
for i in range(1, 2 + 1):
recurrent = layers.GRU(
units=128,
activation="tanh",
recurrent_activation="sigmoid",
use_bias=True,
return_sequences=True,
reset_after=True,
name=f"gru_{i}",
)
x = layers.Bidirectional(
recurrent, name=f"bidirectional_{i}", merge_mode="concat"
)(x)
if i < 2:
x = layers.Dropout(rate=0.5)(x)
# Dense layer
x = layers.Dense(units=128 * 2, name="dense_1")(x)
x = layers.ReLU(name="dense_1_relu")(x)
x = layers.Dropout(rate=0.5)(x)
# Classification layer
output = layers.Dense(units= output_dim + 1, activation="softmax")(x)
# Model
model = keras.Model(input_spectrogram, output, name="DeepSpeech_2")
return model
def model_fn():
# We _must_ create a new model here, and _not_ capture it from an external
# scope. TFF will call this within different graph contexts.
keras_model = create_compiled_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=layers.Input((None, fft_length // 2 + 1)),
loss=CTCLoss)
and I got error in this step :
iterative_process = tff.learning.build_federated_averaging_process(
model_fn,
client_optimizer_fn=lambda:keras.optimizers.Adam(learning_rate=1e-4))
TypeError: Expected keras.losses.Loss, found function.
how do I fix it?
class Customloss(tf.keras.losses.Loss):
def __init__(self):
super().__init__()
#tf.function
def CTCLoss(self, y_true, y_pred):
...#
return loss
try to use tf.keras.losses.Loss for custom loss in tff. It will work.

Tensorflow 2: How to fit a subclassed model that returns multiple values in the call method?

I built the following model via Model Subclassing in TensorFlow 2:
from tensorflow.keras import Model, Input
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.layers import Flatten, Dense
class Detector(Model):
def __init__(self, num_classes=3, name="DenseNet201"):
super(Detector, self).__init__(name=name)
self.feature_extractor = DenseNet201(
include_top=False,
weights="imagenet",
)
self.feature_extractor.trainable = False
self.flatten_layer = Flatten()
self.prediction_layer = Dense(num_classes, activation=None)
def call(self, inputs):
x = preprocess_input(inputs)
extracted_feature = self.feature_extractor(x, training=False)
x = self.flatten_layer(extracted_feature)
y_hat = self.prediction_layer(x)
return extracted_feature, y_hat
The subsequent steps are compiling and fitting the model. The model compiled as normal but when fitting my image generator (built from ImageDataGenerator), I encountered the error: InvalidArgumentError: Incompatible shapes: [64,18,18] vs. [64,1] [[node Equal (defined at :19) ]] [Op:__inference_train_function_32187] Function call stack: train_function –.
history = detector.fit(
train_generator,
epochs=1,
validation_data=val_generator,
callbacks=callbacks
)
This is obvious because TensorFlow does not know whether the prediction is y_hat or extracted_featureduring detector.fit() and thus threw an error. So, what is the right implementation of detector.fit for my case?
Following this question-answer1, you should first train your model with (let's say) one input and one output. And later if you want to compute grad-cam, you would pick some intermediate layer of your base model (not the final output of the base model) and in that case, you need to build your feature extractor separately. For example
# (let's say: one input and one output)
# use for training
base_model = keras.application(...)
x = base_model(..)
dese_drop_bn_[whatever] = x
out = dese_drop_bn_[whatever]
model = Model(base_model.input, out)
# inference / we need to compute grad cam
new_model = tf.keras.models.Model(model.input,
[model.layers[15].output, model.output])
In the above, the model is used for training, and later in inference time if you need to compute grad-cam based on the layer for example layer number 15, you need to build new_model with appropriate outputs. Hope this makes things clear. For more information about feature extraction, see the official doc, Extract and reuse nodes in the graph of layers2. FYI, the exact same things are happening here as I informed you earlier. Also, check this official code example, you will see exact same thing there.
However, there is another way that I'm thinking might work for your easily. That is, as you're using a custom model, we can take the privilege training argument in the call() method. Normally in training time, this is True and for inference time it's False. So, based on this, we can return desired output the accordingly. Here is the complete code example:
import tensorflow as tf
# get some data
data_dir = tf.keras.utils.get_file(
'flower_photos',
'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
untar=True)
datagen_kwargs = dict(rescale=1./255, validation_split=.20)
dataflow_kwargs = dict(target_size=(64, 64),
batch_size=16,
interpolation="bilinear")
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=40,
horizontal_flip=True,
width_shift_range=0.2, height_shift_range=0.2,
shear_range=0.2, zoom_range=0.2,
**datagen_kwargs)
train_generator = train_datagen.flow_from_directory(
data_dir, subset="training", shuffle=True, **dataflow_kwargs)
for image, label in train_generator:
print(image.shape, image.dtype)
print(label.shape, label.dtype)
print(label[:4])
break
(16, 64, 64, 3) float32
(16, 5) float32
[[0. 0. 0. 0. 1.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
Here we do that trick based on the boolean value of training in the call method.
class Detector(Model):
def __init__(self, num_classes=5, name="DenseNet201"):
super(Detector, self).__init__(name=name)
self.feature_extractor = DenseNet201(
include_top=False,
weights="imagenet",
)
self.feature_extractor.trainable = False
self.flatten_layer = Flatten()
self.prediction_layer = Dense(num_classes, activation='softmax')
def call(self, inputs, training):
x = preprocess_input(inputs)
extracted_feature = self.feature_extractor(x, training=False)
x = self.flatten_layer(extracted_feature)
y_hat = self.prediction_layer(x)
if training:
return y_hat
else:
return [y_hat, extracted_feature]
Train
det = Detector()
det.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['acc'])
train_step = train_generator.samples // train_generator.batch_size
det.fit(train_generator,
steps_per_epoch=train_step,
validation_data=train_generator,
validation_steps=train_step,
epochs=2, verbose=2)
Epoch 1/2
37s 139ms/step - loss: 1.7543 - acc: 0.2650 - val_loss: 1.5310 - val_acc: 0.3764
Epoch 2/2
21s 115ms/step - loss: 1.4913 - acc: 0.3915 - val_loss: 1.3066 - val_acc: 0.4667
<tensorflow.python.keras.callbacks.History at 0x7fa2890b1790>
Evaluate
det.evaluate(train_generator,
steps=train_step)
4s 76ms/step - loss: 1.3066 - acc: 0.4667
[1.3065541982650757, 0.46666666865348816]
Inference
Here, we will get two outputs of this model (unlike 1 output that we've got in the training time).
y_hat, base_feature = det.predict(train_generator,
steps=train_step)
y_hat.shape, base_feature.shape
((720, 5), (720, 2, 2, 1920))
Now, you can do grad-cam or whatever require such feature maps.

how to get an array of predictions from tensor flow classification model

I have the following classification model.
I would like to get a numpy array similar to y_t which is the test labels one hot encoded. However I keep getting variable error.
# Construct placeholders
with graph.as_default():
inputs_ = tf.placeholder(tf.float32, [None, seq_len, n_channels], name = 'inputs')
labels_ = tf.placeholder(tf.float32, [None, n_classes], name = 'labels')
keep_prob_ = tf.placeholder(tf.float32, name = 'keep')
learning_rate_ = tf.placeholder(tf.float32, name = 'learning_rate')
with graph.as_default():
# (batch, 100, 3) --> (batch, 50, 6)
conv1 = tf.layers.conv1d(inputs=inputs_, filters=6, kernel_size=2, strides=1,
padding='same', activation = tf.nn.relu)
max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
with graph.as_default():
# Flatten and add dropout
flat = tf.reshape(max_pool_1, (-1, 6*6))
flat = tf.nn.dropout(flat, keep_prob=keep_prob_)
# Predictions
logits = tf.layers.dense(flat, n_classes)
# Cost function and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Then I use the test set
with tf.Session(graph=graph) as sess:
# Restore
saver.restore(sess, tf.train.latest_checkpoint('bschkpnt-cnn'))
for x_t, y_t in get_batches(X_test, y_test, batch_size):
feed = {inputs_: x_t,
labels_: y_t,
keep_prob_: 1}
batch_acc = sess.run(accuracy, feed_dict=feed)
test_acc.append(batch_acc)
print("Test accuracy: {:.6f}".format(np.mean(test_acc)))
y_t is a nX3 bumpy array.
I want to get a y_pred in similar format
Thanks
soft = tf.nn.softmax(logits)
this will be your probability distribution such that sum(soft) = 1. Every value in this array will indicate how sure the model is about the class.
pred = sess.run(soft, feed_dict=feed)
print(pred)
So basically all I do is place an additional softmax, since you have it inbuilt in the loss you calculate, you've to place it again to predict. Then I ask for the output prediction, and just feed the feed_dict again.
Hope this helped!