Loss when starting fine tuning is higher than loss from transfer learning - tensorflow

Since I start fine tuning with the weights learned by transfer learning, I would expect the loss to be the same or less. However it looks like it starts fine tuning using a different set of starting weights.
Code to start transfer learning:
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(units=3, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
epochs = 1000
callback = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
history = model.fit(train_generator,
steps_per_epoch=len(train_generator),
epochs=epochs,
validation_data=val_generator,
validation_steps=len(val_generator),
callbacks=[callback],)
Output from last epoch:
Epoch 29/1000
232/232 [==============================] - 492s 2s/step - loss: 0.1298 - accuracy: 0.8940 - val_loss: 0.1220 - val_accuracy: 0.8937
Code to start fine tuning:
model.trainable = True
# Fine-tune from this layer onwards
fine_tune_at = -20
# Freeze all the layers before the `fine_tune_at` layer
for layer in model.layers[:fine_tune_at]:
layer.trainable = False
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
loss='binary_crossentropy',
metrics=['accuracy'])
history_fine = model.fit(train_generator,
steps_per_epoch=len(train_generator),
epochs=epochs,
validation_data=val_generator,
validation_steps=len(val_generator),
callbacks=[callback],)
But this is what I see for the first few epochs:
Epoch 1/1000
232/232 [==============================] - ETA: 0s - loss: 0.3459 - accuracy: 0.8409/usr/local/lib/python3.7/dist-packages/PIL/Image.py:960: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images
"Palette images with Transparency expressed in bytes should be "
232/232 [==============================] - 509s 2s/step - loss: 0.3459 - accuracy: 0.8409 - val_loss: 0.7755 - val_accuracy: 0.7262
Epoch 2/1000
232/232 [==============================] - 502s 2s/step - loss: 0.1889 - accuracy: 0.9066 - val_loss: 0.5628 - val_accuracy: 0.8881
Eventually the loss drops and passes the transfer learning loss:
Epoch 87/1000
232/232 [==============================] - 521s 2s/step - loss: 0.0232 - accuracy: 0.8312 - val_loss: 0.0481 - val_accuracy: 0.8563
Why was the loss in the first epoch of fine tuning higher than the last loss from transfer learning?

Related

Transfer learning model not learning much, validation plateau at 45% while train go up to 90%

So it's been days i've been working on this model on image classification. I have 70000 images and 375 classes. I've tried training it with Vgg16, Xception, Resnet & Mobilenet ... and I always get the same limit of 45% on the validation.
As you can see here
I've tried adding dropout layers and regularization and it gets the same result for validation.
Data augmentation didn't do much to help either
Any ideas why this isn't working ?
Here's a snipped of the code of the last model I used:
from keras.models import Sequential
from keras.layers import Dense
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import regularizers
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
validation_datagen = ImageDataGenerator(rescale=1./255)
target_size = (height, width)
datagen = ImageDataGenerator(rescale=1./255,
validation_split=0.2)
train_generator = datagen.flow_from_directory(
path,
target_size=(height, width),
batch_size=batchSize,
shuffle=True,
class_mode='categorical',
subset='training')
validation_generator = datagen.flow_from_directory(
path,
target_size=(height, width),
batch_size=batchSize,
class_mode='categorical',
subset='validation')
num_classes = len(train_generator.class_indices)
xception_model = Xception(weights='imagenet',input_shape=(width, height, 3), include_top=False,classes=num_classes)
x = xception_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
out = Dense(num_classes, activation='softmax')(x)
opt = Adam()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
n_epochs = 15
history = model.fit(
train_generator,
steps_per_epoch = train_generator.samples // batchSize,
validation_data = validation_generator,
validation_steps = validation_generator.samples // batchSize,
verbose=1,
epochs = n_epochs)
Yes, you may need a balanced dataset among each category in your dataset for better model training performance. Please try again by changing class_mode='sparse' and loss='sparse_categorical_crossentropy' because you are using the image dataset. Also freeze the pretrained model layers 'xception_model.trainable = False'.
Check the below code: (I have used a flower dataset of 5 classes)
xception_model = tf.keras.applications.Xception(weights='imagenet',input_shape=(width, height, 3), include_top=False,classes=num_classes)
xception_model.trainable = False
x = xception_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(32, activation='relu')(x)
out = Dense(num_classes, activation='softmax')(x)
opt = tf.keras.optimizers.Adam()
model = keras.Model(inputs=xception_model.input, outputs=out)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator, epochs=10, validation_data=validation_generator)
Output:
Epoch 1/10
217/217 [==============================] - 23s 95ms/step - loss: 0.5945 - accuracy: 0.7793 - val_loss: 0.4610 - val_accuracy: 0.8337
Epoch 2/10
217/217 [==============================] - 20s 91ms/step - loss: 0.3439 - accuracy: 0.8797 - val_loss: 0.4550 - val_accuracy: 0.8419
Epoch 3/10
217/217 [==============================] - 20s 93ms/step - loss: 0.2570 - accuracy: 0.9150 - val_loss: 0.4437 - val_accuracy: 0.8384
Epoch 4/10
217/217 [==============================] - 20s 91ms/step - loss: 0.2040 - accuracy: 0.9340 - val_loss: 0.4592 - val_accuracy: 0.8477
Epoch 5/10
217/217 [==============================] - 20s 91ms/step - loss: 0.1649 - accuracy: 0.9494 - val_loss: 0.4686 - val_accuracy: 0.8512
Epoch 6/10
217/217 [==============================] - 20s 92ms/step - loss: 0.1301 - accuracy: 0.9589 - val_loss: 0.4805 - val_accuracy: 0.8488
Epoch 7/10
217/217 [==============================] - 20s 93ms/step - loss: 0.0966 - accuracy: 0.9754 - val_loss: 0.4993 - val_accuracy: 0.8442
Epoch 8/10
217/217 [==============================] - 20s 91ms/step - loss: 0.0806 - accuracy: 0.9806 - val_loss: 0.5488 - val_accuracy: 0.8372
Epoch 9/10
217/217 [==============================] - 20s 91ms/step - loss: 0.0623 - accuracy: 0.9864 - val_loss: 0.5802 - val_accuracy: 0.8360
Epoch 10/10
217/217 [==============================] - 22s 100ms/step - loss: 0.0456 - accuracy: 0.9896 - val_loss: 0.6005 - val_accuracy: 0.8360

How to get more accuracy in imbalanced binary time series classifier

I am new to machine learning and I am working on a problem of sequence classification.
The data in the dataset consists of sequences of shape(20,9)
9 features of length 20.
I have tried below model but I have failed to get good accuracy.
f = 32
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units = 4*f, activation='tanh', return_sequences = True, input_shape = (window_length, 9)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(units = 4*f, activation='tanh', return_sequences = True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(units = 4*f, activation='tanh'))
model.add(tf.keras.layers.Dense(units = 256, activation = 'tanh'))
model.add(tf.keras.layers.Dense(units = 64, activation = 'tanh'))
model.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', TP, TN, FP, FN])
model.build()
model.summary()
history = model.fit(X_train, y_train, epochs=256, batch_size=256, callbacks=[WeightsSaver(model,1)], validation_data=(X_test, y_test))
I have also tried more deep network, CNN_LSTMs and CNN_LSTM with dense layers at the end.
I am not getting good confustion matrix. It is mostly giving me false negatives even on train set.
I have also tried to balance the data using SMOTE and SVM_SMOTE still it is giving me low accuracy.
The problems I am facing:
1)Without resampling: majority class prevails and very less minority class.
2)With resampling: I am getting many false positives on train and test data.
I have pasted the data on below kaggle page.
https://www.kaggle.com/jgranth/binary-seqclassification-of-input-shape209
Can someone expert in this field help to share experiences and possible solution or point out mistakes that I could be making.
Below is the output I am getting on train without resampling.
Epoch 1/256
2602/2602 [==============================] - 143s 48ms/step - loss: 0.4011 - accuracy: 0.8606 - TP: 4.3987e-04 - TN: 0.8601 - FP: 7.5963e-04 - FN: 0.1387 - val_loss: 0.4525 - val_accuracy: 0.8285 - val_TP: 5.8129e-04 - val_TN: 0.8281 - val_FP: 5.8710e-04 - val_FN: 0.1708
Epoch 2/256
2602/2602 [==============================] - 122s 47ms/step - loss: 0.3956 - accuracy: 0.8609 - TP: 3.2877e-04 - TN: 0.8605 - FP: 3.4228e-04 - FN: 0.1388 - val_loss: 0.4554 - val_accuracy: 0.8285 - val_TP: 0.0000e+00 - val_TN: 0.8286 - val_FP: 5.2316e-05 - val_FN: 0.1713
Epoch 3/256
2602/2602 [==============================] - 123s 47ms/step - loss: 0.3937 - accuracy: 0.8609 - TP: 4.1434e-04 - TN: 0.8605 - FP: 3.8132e-04 - FN: 0.1387 - val_loss: 0.4495 - val_accuracy: 0.8285 - val_TP: 8.7193e-05 - val_TN: 0.8286 - val_FP: 6.9754e-05 - val_FN: 0.1713
Epoch 4/256
2602/2602 [==============================] - 122s 47ms/step - loss: 0.3926 - accuracy: 0.8610 - TP: 5.7198e-04 - TN: 0.8604 - FP: 4.8640e-04 - FN: 0.1385 - val_loss: 0.4479 - val_accuracy: 0.8289 - val_TP: 0.0016 - val_TN: 0.8275 - val_FP: 0.0012 - val_FN: 0.1698

Why accuracy stays zero in Keras LSTM while other metrics improve when training?

I have constructed a LSTM model for binary text classification problem. Here the labels are one hot encoded. Following is my constructed model
model = Sequential()
model.add(Embedding(input_dim=dictionary_length, output_dim=60, input_length=max_word_count))
model.add(LSTM(600))
model.add(Dense(units=max_word_count, activation='tanh', kernel_regularizer=regularizers.l2(0.04), activity_regularizer=regularizers.l2(0.015)))
model.add(Dense(units=max_word_count, activation='relu', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01)))
model.add(Dense(2, activation='softmax', kernel_regularizer=regularizers.l2(0.001)))
adam_optimizer = Adam(lr=0.001, decay=0.0001)
model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer,
metrics=[tf.keras.metrics.Accuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall()])
When I fit this model the accuracy stays 0 all the time but other matrices get improved. What is the issue here?
Epoch 1/3
94/94 [==============================] - 5s 26ms/step - loss: 3.4845 - accuracy: 0.0000e+00 - auc_4: 0.7583 - precision_4: 0.7251 - recall_4: 0.7251
Epoch 2/3
94/94 [==============================] - 2s 24ms/step - loss: 0.4772 - accuracy: 0.0000e+00 - auc_4: 0.9739 - precision_4: 0.9249 - recall_4: 0.9249
Epoch 3/3
94/94 [==============================] - 3s 27ms/step - loss: 0.1786 - accuracy: 0.0000e+00 - auc_4: 0.9985 - precision_4: 0.9860 - recall_4: 0.9860
Because you need CategoricalAccuracy : https://www.tensorflow.org/api_docs/python/tf/keras/metrics/CategoricalAccuracy to mesuring the accuracy of this problem.
model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer,
metrics=[tf.keras.metrics.CategoricalAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall()])

Saving and loading of Keras model not working

I've been trying to save and reupload a model and whenever I do that the accuracy always goes down.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(tf.keras.layers.Conv2D(32, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(len(SURFACE_TYPES), activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=EPOCHS,
validation_steps=10)
Output:
Epoch 1/3
84/84 [==============================] - 2s 19ms/step - loss: 1.9663 - acc: 0.6258 - val_loss: 0.8703 - val_acc: 0.6867
Epoch 2/3
84/84 [==============================] - 1s 18ms/step - loss: 0.2865 - acc: 0.9105 - val_loss: 0.4494 - val_acc: 0.8667
Epoch 3/3
84/84 [==============================] - 1s 18ms/step - loss: 0.1409 - acc: 0.9574 - val_loss: 0.3614 - val_acc: 0.9000
This followed by running these commands to produce outputs result in the same training loss but different training accuracies. The weights and structures of the models are also identical.
model.save("my_model2.h5")
model2 = load_model("my_model2.h5")
model2.evaluate(train_ds)
model.evaluate(train_ds)
Output:
84/84 [==============================] - 1s 9ms/step - loss: 0.0854 - acc: 0.0877
84/84 [==============================] - 1s 9ms/step - loss: 0.0854 - acc: 0.9862
[0.08536089956760406, 0.9861862063407898]
i have shared reference link click here
it has all formats to save & load your model

Training CNN with Keras was fast, now it is many times slower

I was training a CNN with 120 thousand pictures, and it was ok. About 320 seconds per epoch.
3073/3073 [==============================] - 340s 110ms/step - loss: 0.4146 - accuracy: 0.8319 - val_loss: 0.3776 - val_accuracy: 0.8489
Epoch 2/20
3073/3073 [==============================] - 324s 105ms/step - loss: 0.3462 - accuracy: 0.8683 - val_loss: 0.3241 - val_accuracy: 0.8770
Epoch 3/20
3073/3073 [==============================] - 314s 102ms/step - loss: 0.3061 - accuracy: 0.8878 - val_loss: 0.2430 - val_accuracy: 0.9052
Epoch 4/20
3073/3073 [==============================] - 327s 107ms/step - loss: 0.2851 - accuracy: 0.8977 - val_loss: 0.2236 - val_accuracy: 0.9149
Epoch 5/20
3073/3073 [==============================] - 318s 104ms/step - loss: 0.2725 - accuracy: 0.9033 - val_loss: 0.2450 - val_accuracy: 0.9119
Epoch 6/20
3073/3073 [==============================] - 309s 101ms/step - loss: 0.2642 - accuracy: 0.9065 - val_loss: 0.2168 - val_accuracy: 0.9218
Epoch 7/20
3073/3073 [==============================] - 311s 101ms/step - loss: 0.2589 - accuracy: 0.9083 - val_loss: 0.1996 - val_accuracy: 0.9286
Epoch 8/20
3073/3073 [==============================] - 317s 103ms/step - loss: 0.2538 - accuracy: 0.9110 - val_loss: 0.2653 - val_accuracy: 0.9045
Epoch 9/20
3073/3073 [==============================] - 1346s 438ms/step - loss: 0.2497 - accuracy: 0.9116 - val_loss: 0.2353 - val_accuracy: 0.9219
Epoch 10/20
3073/3073 [==============================] - 1434s 467ms/step - loss: 0.2457 - accuracy: 0.9141 - val_loss: 0.1943 - val_accuracy: 0.9326`
Then, after a few tests, it became 12x (times) slower. With the same parameters. I thought it was because the CPU got too hot, but it never worked like before again. I tried reseting my keras session, tried reinstalling Ubuntu 18.04, tried setting up my GPU, tried installing on conda, but none of these worked. Also tried other codes, with different datasets, which are also slower. Quite frustrating.
No other symptom of a burnt CPU problem. Everything runs as always did.
Tensorflow 2.2.0
I would appreciate some help.
Edit: actually, if I wait one hour, it gets back to normal speed! Now I think it could be some problem with the first import from the disk into the memory.
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import model_from_json
from keras.preprocessing import image
import os
#======= GPU
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #disables GPU
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '4' #verbose
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
from tensorflow.compat.v1.keras import backend as K
K.clear_session()
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
K.set_session(session)
#======= GPU
print(tf.__version__)
callbacks = [
tf.keras.callbacks.EarlyStopping(
# Stop training when `val_loss` is no longer improving
monitor='val_accuracy',
# "no longer improving" being defined as "no better than 1e-2 less"
min_delta=1e-3,
# "no longer improving" being further defined as "for at least 2 epochs"
patience=2,
verbose=1)
]
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
#flow_from_directory knows categories are divided by folders
training_set = train_datagen.flow_from_directory('dataset/training_set', #applies a method to the object
target_size = (64, 64),
batch_size = 32, #minibatch
class_mode = 'categorical')
print(training_set.class_indices)
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory('dataset/test_set',
target_size = (64, 64), #has to be the same as that of the training set
batch_size = 32,
class_mode = 'categorical')
cnn = tf.keras.models.Sequential() #an ANN base
#convolution
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3])) #convolutional layer
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2)) #maxPooling
#new convolution
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu')) #remove input_shape
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))
cnn.add(tf.keras.layers.Dense(units=2, activation='softmax'))
cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print("Got here.")
cnn.fit(x = training_set, validation_data = test_set, epochs = 20)
# serialize model to JSON
model_json = cnn.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
cnn.save_weights("model.h5")
print("Saved model to disk.")