Model performance on test set fluctuates highly from epoch to epoch - tensorflow

I have been trying to learn a binary classifier for photos with the following arhitechture:
class PatchNDepthBasedCNN:
#staticmethod
def build(width=256, height=256, depth=3):
# initialize the model along with the input shape to be
# "channels last" and the channels dimension itself
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# 1
model.add(Conv2D(32, (3, 3), strides=1, padding="same", input_shape=inputShape))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2, padding="same"))
# 2
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")
model.add(x1)
# the reshaped tensor will be used later in concatenate
print(x1.output.shape)
x1r = Reshape((int(width / 8), int(height / 8), 128))(x1.output)
# 3
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x2 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")
model.add(x2)
# 4
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x3 = Conv2D(32, (3, 3), strides=1, padding="same")
model.add(x3)
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
# 5 - concat
c = Concatenate()([x1r, x2.output, x3.output])
model.add(InputLayer(input_tensor=c))
# 6 -
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
# 7
final_layer = Conv2D(2, (3, 3), strides=1, padding="same")
model.add(final_layer)
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
# * first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(64))
model.add(Activation("relu"))
model.add(BatchNormalization())
# model.add(Dropout(0.5))
# softmax classifier
model.add(Dense(2))
model.add(Activation("softmax"))
return model
new_model = PatchNDepthBasedCNN.build(width=IMG_DIM, height=IMG_DIM, depth=3)
new_model.compile(
optimizer="rmsprop",
loss="categorical_crossentropy",
metrics=["accuracy"],
)
During training, I save the model at each epoch (for the purpose of experiment). I always thought that the latest model (the latest epoch) must be the preferred one (in case the model hasn't started to overfit). Still, when I assess each variant (epoch) of the trained model on the test set (from another data distribution), I get randomly fluctuating results from epoch to epoch. Say, the test accuracy on epoch 60 can be around 72%, epoch 61 - 97%, epoch 63 - 80%.
At the same time, if I substitute the last two layers of the model, and change the loss function to simulate SVM, I get overall worse results, but the tendency is clearly seen from epoch to epoch (test accuracy slowly rises from base 50% to around 78%, and then fluctuates within a small margin):
class PatchNDepthBasedCNN:
#staticmethod
def build(width=256, height=256, depth=3):
# initialize the model along with the input shape to be
# "channels last" and the channels dimension itself
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# 1
model.add(Conv2D(32, (3, 3), strides=1, padding="same", input_shape=inputShape))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2, padding="same"))
# 2
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")
model.add(x1)
# the reshaped tensor will be used later in concatenate
print(x1.output.shape)
x1r = Reshape((int(width / 8), int(height / 8), 128))(x1.output)
# 3
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x2 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")
model.add(x2)
# 4
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
x3 = Conv2D(32, (3, 3), strides=1, padding="same")
model.add(x3)
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
# 5 - concat
c = Concatenate()([x1r, x2.output, x3.output])
model.add(InputLayer(input_tensor=c))
# 6 -
model.add(Conv2D(32, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same"))
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
# 7
final_layer = Conv2D(2, (3, 3), strides=1, padding="same")
model.add(final_layer)
model.add(BatchNormalization(axis=chanDim))
model.add(Activation("relu"))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation("relu"))
model.add(Dense(2, kernel_regularizer=l2(0.0001)))
model.add(Activation('linear'))
return model
new_model = PatchNDepthBasedCNN.build(width=IMG_DIM, height=IMG_DIM, depth=3)
new_model.compile(loss='hinge',
optimizer='adadelta',
metrics=['accuracy'])
What are the possble reasons/explanations for this behavior?
What advice could you give me to try achieve better results (if it is possible to infer from the provided data)?
Thank you for considering my question!
EDIT: Removed unused code (LR and other unused parameters from the model, they weren't actually taken into account while training the model, just forgot to remove them)

Related

Why a tensorflow model with Spatial Pyramid Pooling layers is slower than a sequential, vanilla model?

I'm training a U-net type model with a minor variation in the architecture which is the Atrous Spatial Pyramid pooling (ASPP) layer at the bottleneck after the encoder. I profiled the model during one forward pass and used tensorboard to check the tracer_view to see which part of the model has the highest latency.
Profiler Tracer View with ASPP layer
This revealed that there's a lot of idle GPU time at ASPP computation. I double checked it by removing the ASPP layer and the just connected the encoder to the decoder. In this experiment, the idle time that was previously there disappeared.
Profiler Tracer View without ASPP layer
I understand that the second model example would be a bit smaller than the former.
This is how my model looks like with ASPP layer. And to I just commented those ASPP layers out to profile the model without ASPP layers.
With ASPP
def get_custom_deeplab(image_size: tuple, num_classes: int):
"""
This model uses a vanilla CNN backbone. This model also uses upsampling2d in place of conv2d transpose
"""
input_layer = keras.Input(shape=(image_size[0], image_size[1], 3))
conv1 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(input_layer)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(pool1)
conv2 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(pool2)
conv3 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(128, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
#######ASPP layers
out_1 = Conv2D(256, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), dilation_rate=1, padding='same')(pool4)
out_6 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), dilation_rate=6, padding='same')(pool4)
out_12 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), dilation_rate=10, padding='same')(pool4)
out_14 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), dilation_rate=14, padding='same')(pool4)
x = layers.Concatenate(axis=-1)([out_1, out_6, out_12, out_14])
########ASPP's output
x = Conv2D(256, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), dilation_rate=1, padding='same')(x)
x = layers.UpSampling2D(
(2,2),interpolation="bilinear",
)(x)
skip_connection_1 = pool3
x = layers.Concatenate(axis=-1)([x,skip_connection_1])
x = Conv2D(128, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(x)
x = layers.UpSampling2D(
(2,2),interpolation="bilinear",
)(x)
skip_connection_2 = pool2
x = layers.Concatenate(axis=-1)([x,skip_connection_2])
x = Conv2D(128, (1, 1), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(x)
x = layers.UpSampling2D(
(2,2),interpolation="bilinear",
)(x)
x = Conv2D(64, (3, 3), activation='relu', kernel_initializer='lecun_uniform', kernel_constraint=max_norm(3), padding='same')(x)
x = layers.UpSampling2D(
(2,2),interpolation="bilinear",
)(x)
x = Conv2D(
num_classes,
kernel_size=1,
padding="same",
use_bias=True,
kernel_initializer=keras.initializers.HeNormal(),
)(x)
return tf.keras.Model(inputs=input_layer,outputs=x)
But, I would like to know if there's any workaround to mitigate the problem of GPU idle time when the model has layers like ASPP?

Keras FER-2013 model predict for a single image

i'm pretty new to machine learning. I followed a tutorial to classify if the user is similing or not. I created this code:
def get_model(input_size, classes=7):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu', input_shape =input_size))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(2, 2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.01)))
model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(classes, activation='softmax'))
#Compliling the model
model.compile(optimizer=Adam(lr=0.0001, decay=1e-6),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
if i try to predict an array from flow_from_directory its working fine but i would like to predict it using the following code:
final_image = cv2.imread('./tesimg.jpeg')
final_image = np.expand_dims(final_image, axis=0)
final_image = final_image/255.0
The problem is that i'm getting this error:
UnimplementedError: Graph execution error:

Tensorflow returns 10% validation accuracy for VGG model (irrespective of number of epochs)?

I am trying to train a neural network on CIFAR-10 using keras package in tensorflow. The neural network considered is VGG-16, which I directly borrowed from the official keras models.
The definition is:
def cnn_model(nb_classes=10):
# VGG-16 official keras model
img_input= Input(shape=(32,32,3))
vgg_layer= Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
vgg_layer= Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(vgg_layer)
vgg_layer= MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(vgg_layer)
# Block 2
vgg_layer= Conv2D(64, (3, 3), activation='relu', padding='same', name='block2_conv1')(vgg_layer)
vgg_layer= Conv2D(64, (3, 3), activation='relu', padding='same', name='block2_conv2')(vgg_layer)
vgg_layer= MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(vgg_layer)
# Block 3
vgg_layer= Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv1')(vgg_layer)
vgg_layer= Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv2')(vgg_layer)
vgg_layer= Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv3')(vgg_layer)
vgg_layer= MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(vgg_layer)
# Block 4
vgg_layer= Conv2D(256, (3, 3), activation='relu', padding='same', name='block4_conv1')(vgg_layer)
vgg_layer= Conv2D(256, (3, 3), activation='relu', padding='same', name='block4_conv2')(vgg_layer)
vgg_layer= Conv2D(256, (3, 3), activation='relu', padding='same', name='block4_conv3')(vgg_layer)
vgg_layer= MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(vgg_layer)
# Classification block
vgg_layer= Flatten(name='flatten')(vgg_layer)
vgg_layer= Dense(1024, activation='relu', name='fc1')(vgg_layer)
vgg_layer= Dense(1024, activation='relu', name='fc2')(vgg_layer)
vgg_layer= Dense(nb_classes, activation='softmax', name='predictions')(vgg_layer)
return Model(inputs=img_input, outputs=vgg_layer)
However during training, I always get both train and validation accuracy as 0.1 i.e, 10%.
validation accuracy for adv. training of model for epoch 1= 0.1
validation accuracy for adv. training of model for epoch 2= 0.1
validation accuracy for adv. training of model for epoch 3= 0.1
validation accuracy for adv. training of model for epoch 4= 0.1
validation accuracy for adv. training of model for epoch 5= 0.1
As a step towards debugging, whenever I replace with any other model (eg, any simple CNN model) it works perfectly well. This shows that the rest of the script works well.
For example the following CNN model works perfectly well and achieves an accuracy of 75% after 30 epochs.
def cnn_model(nb_classes=10, num_hidden=1024, weight_decay= 0.0001, cap_factor=4):
model=Sequential()
input_shape = (32,32,3)
model.add(Conv2D(32*cap_factor, kernel_size=(3,3), strides=(1,1), kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", activation='relu', padding='same', input_shape=input_shape))
model.add(Conv2D(32*cap_factor, kernel_size=(3,3), strides=(1,1), kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(64*cap_factor, kernel_size=(3,3), strides=(1,1), kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", activation="relu", padding="same"))
model.add(Conv2D(64*cap_factor, kernel_size=(3,3), strides=(1,1), kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(num_hidden, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
return model
It appears to me that both of these models are correctly defined. However, one works perfect while the other doesn't learn at all. I also tried writing the VGG model as an Sequential structure i.e, similar to the second one, but it still gave me 10% accuracy.
Even if the model doesn't update any weights, still the "he_normal" initializer will easily able to obtain a much better accuracy than pure chance. It appears that somehow tensorflow computing the output logits from the model which results in accuracy as pure chance.
I will be really helpful if someone can point out my mistake in it.
Your 10% corresponds higly with nr of classes = 10. That makes me think that regardless of the training, your answer is always "1" for all categories, what constantly gives you 10% accuracy on 10 classes.
Check the output of the untrained model, if it is always 1
If so, check the initial weights of the model, probably it's wrongly initialized, gradients are zero and it can't converge

Conv neural network to tell standard 52-card deck apart

I'm using the below keras model to train a neural network to tell 52 game cards 23456789TJQA each with Club, Diamond, Heart and Spade apart.
The model is working quite well but occasionally has problems telling Club and Diamond apart, as they are the most similar (and the difference is quite granular). I was wondering if anybody has some suggestions in what way I can improve the below model?
I've tried different things, like converting everything to black and white, grayscale, smoothing, augmentation etc, but nothing seems to solve that problem.
The pictures are all 15x50 pixels, with 1 channel, so the input shape is (15,50,1)
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape, activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(1024, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

Obtaining output of an Intermediate layer in TensorFlow/Keras

I'm trying to obtain output of an intermediate layer in Keras, Following is my code:
XX = model.input # Keras Sequential() model object
YY = model.layers[0].output
F = K.function([XX], [YY]) # K refers to keras.backend
Xaug = X_train[:9]
Xresult = F([Xaug.astype('float32')])
Running this, I got an Error :
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dropout_1/keras_learning_phase' with dtype bool
i came to know that because I'm using dropout layer in my model, I have to specify a learning_phase() flag to my function as per keras documentation.
I changed my code to the following:
XX = model.input
YY = model.layers[0].output
F = K.function([XX, K.learning_phase()], [YY])
Xaug = X_train[:9]
Xresult = F([Xaug.astype('float32'), 0])
Now I'm getting a new Error that I'm unable to figure out:
TypeError: Cannot interpret feed_dict key as Tensor: Can not convert a int into a Tensor.
Any help would be appreciated.
PS : I'm new to TensorFlow and Keras.
Edit 1 :
Following is the complete code that I'm using. I'm using Spatial Transformer Network as discussed in this NIPS paper and it's Kera's implementation here
input_shape = X_train.shape[1:]
# initial weights
b = np.zeros((2, 3), dtype='float32')
b[0, 0] = 1
b[1, 1] = 1
W = np.zeros((100, 6), dtype='float32')
weights = [W, b.flatten()]
locnet = Sequential()
locnet.add(Convolution2D(64, (3, 3), input_shape=input_shape, padding='same'))
locnet.add(Activation('relu'))
locnet.add(Convolution2D(64, (3, 3), padding='same'))
locnet.add(Activation('relu'))
locnet.add(MaxPooling2D(pool_size=(2, 2)))
locnet.add(Convolution2D(128, (3, 3), padding='same'))
locnet.add(Activation('relu'))
locnet.add(Convolution2D(128, (3, 3), padding='same'))
locnet.add(Activation('relu'))
locnet.add(MaxPooling2D(pool_size=(2, 2)))
locnet.add(Convolution2D(256, (3, 3), padding='same'))
locnet.add(Activation('relu'))
locnet.add(Convolution2D(256, (3, 3), padding='same'))
locnet.add(Activation('relu'))
locnet.add(MaxPooling2D(pool_size=(2, 2)))
locnet.add(Dropout(0.5))
locnet.add(Flatten())
locnet.add(Dense(100))
locnet.add(Activation('relu'))
locnet.add(Dense(6, weights=weights))
model = Sequential()
model.add(SpatialTransformer(localization_net=locnet,
output_size=(128, 128), input_shape=input_shape))
model.add(Convolution2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#==============================================================================
# Start Training
#==============================================================================
#define training results logger callback
csv_logger = keras.callbacks.CSVLogger(training_logs_path+'.csv')
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=20,
validation_data=(X_valid, y_valid),
shuffle=True,
callbacks=[SaveModelCallback(), csv_logger])
#==============================================================================
# Visualize what Transformer layer has learned
#==============================================================================
XX = model.input
YY = model.layers[0].output
F = K.function([XX, K.learning_phase()], [YY])
Xaug = X_train[:9]
Xresult = F([Xaug.astype('float32'), 0])
# input
for i in range(9):
plt.subplot(3, 3, i+1)
plt.imshow(np.squeeze(Xaug[i]))
plt.axis('off')
for i in range(9):
plt.subplot(3, 3, i + 1)
plt.imshow(np.squeeze(Xresult[0][i]))
plt.axis('off')
The easiest way is to create a new model in Keras, without calling the backend. You'll need the functional model API for this:
from keras.models import Model
XX = model.input
YY = model.layers[0].output
new_model = Model(XX, YY)
Xaug = X_train[:9]
Xresult = new_model.predict(Xaug)
You could try:
model1 = tf.keras.models.Sequential(base_model.layers[:1])
model2 = tf.keras.models.Sequential(base_model.layers[1:])
Xaug = X_train[:9]
out = model1(Xaug)