Why does my class-weighted model perform so strangely/bad? - tensorflow

Here are the accuracy and loss plots for the class-weighted version:
Here are the accuracy and loss plots for the unweighted version:
Here is the code. The only difference in the above two versions is that one calls the class weights dictionary and one doesn't. (General advice about how this is set up is also welcome -- as you can see I am very new to this!)
from tensorflow import keras
from keras import optimizers
from keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Rescaling, Conv2D, MaxPool2D, Flatten
#Create datasets
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/train/All classes/',
labels="inferred",
label_mode="int",
validation_split=0.2,
seed=1337,
subset="training",
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/train/All classes/',
labels="inferred",
label_mode="int",
validation_split=0.2,
seed=1337,
subset="validation",
)
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
'/content/drive/MyDrive/Colab Notebooks/test/All classes/',
labels="inferred",
label_mode="int",
)
#Import ResNet
base_model = ResNet50V2(weights='imagenet', include_top=False)
#Create basic network to append to ResNet above
x = base_model.output
x = Rescaling(1.0 / 255)(x)
x = Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(256,256,3), padding="same")(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)
x = GlobalAveragePooling2D()(x)
predictions = Dense(units=5, activation='softmax')(x)
#Merge the models
model = Model(inputs=base_model.input, outputs=predictions)
#Freeze ResNet layers
for layer in base_model.layers:
layer.trainable = False
#Compile
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#These are the weights. They are derived here from their numbers in the train dataset -- there are 25,811
#files in class 0, 2444 files in class 1, etc. This dictionary was not called for the unweighted version.
class_weight = {0: 1.0,
1: 25811.0/2444.0,
2: 25811.0/5293.0,
3: 25811.0/874.0,
4: 25811.0/709.0}
#Training the model
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath='/content/drive/MyDrive/Colab Notebooks/ResNet/',
save_weights_only=False,
mode='auto',
save_best_only=True,
save_freq= 'epoch')
history = model.fit(
x=train_ds,
epochs=30,
class_weight=class_weight,
validation_data=val_ds,
callbacks=[model_checkpoint_callback]
)
#Evaluating
loss, acc = model.evaluate(test_ds)
print("Accuracy", acc)
Also, some other questions:
Should the metrics=['accuracy'] actually be metrics=['sparse_categorical_accuracy']?
Should class_weight=class_weight actually be sample_weight=sample_weight? I couldn't tell the difference in the documentation, although most examples seem to use class_weight.
I only used padding in one Conv2D layer, and this was a bodge to force the whole thing to actually compile. Should I have been more consistent and used it for the other one too?
On that note, are there other ways my simple appended CNN model (it was called 'predictions') could be laid out to make better sense?
Ah yes, before I forget -- as you can see from the above code, I didn't preprocess the data in accordance with the keras guidance for ResNet. I figured it probably wouldn't make that much of a difference (but also because I was having trouble trying to implement it). Would that be worth looking into? I suppose the unweighted model shows a very high accuracy... probably too high now that I'm looking at it... oh, dear.
I shall be so very thankful for any advice!

Related

Great validation accuracy but terrible prediction

I have got the following CNN:
import os
import numpy as np
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
# Load the data
data_dir = PATH_DIR
x_train = []
y_train = []
total_files = 0
for subdir in os.listdir(data_dir):
subdir_path = os.path.join(data_dir, subdir)
if os.path.isdir(subdir_path):
total_files += len([f for f in os.listdir(subdir_path) if f.endswith('.npy')])
with tqdm(total=total_files, unit='file') as pbar:
for subdir in os.listdir(data_dir):
subdir_path = os.path.join(data_dir, subdir)
if os.path.isdir(subdir_path):
for image_file in os.listdir(subdir_path):
if image_file.endswith('.npy'):
image_path = os.path.join(subdir_path, image_file)
image = np.load(image_path)
x_train.append(image)
y_train.append(subdir)
pbar.update()
x_train = np.array(x_train)
y_train = np.array(y_train)
# Preprocess the labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_train = to_categorical(y_train)
# Create the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(57, 57, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10)
model.save('GeneratedModels/units_model_np.h5')
And then the following function that is called within a loop about 15 times a second. Where image is a numpy array.
def guess_unit(image, classList):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
model = tf.keras.models.load_model(MODEL_PATH)
image = np.expand_dims(image, axis=0)
prediction = model.predict(image, verbose=0)
index = np.argmax(prediction)
# Return the predicted unit
return classList[index]
The problem is that when i train the model the accuracy is very high (99,99976%) but when I am using the predict the output is terribily wrong, to the point it does not make any sense. Sometimes the image received will be the same but the predict will return 2 different things.
I have no idea what am I doing wrong. It's the first time I am tinkering with Neural Networks.
I have tried to use the model.predict with the images that it was trained on and it's always getting them right. Is just when it receives dynamic images that it's terribly wrong.
NOTE: I have 8 classes and it was trained using about 13000 images.
Generally to get performance on your training data you have to split your data into training, testing and validation (which I see you haven't done). This can be done manually or done via adding validation_split into your fit function.
Without seeing any curves on how your loss and accuracy it's behaving it's difficult to make any suggestions. However it might be the case that your are underfitting or overfitting to your data (I would assume that your facing overfitting in your case). In case you are overfitting to your data, I would suggest you to add some regularization or change your model architecture as the one used might not be appropriate. Options that one could think of would be to add regularization via Dropout or adding regularization to your weights.

Keras Sequential Model accuracy is bad. Model is Ignoring/neglecting a class

little background: I'm making a simple rock, paper, scissors image classifier program. Basically, I want the image classifier to be able to distinguish between a rock, paper, or scissor image.
problem: The program works amazing for two of the classes, rock and paper, but completely fails whenever given a scissors test image. I've tried increasing my training data and a few other things but no luck. I was wondering if anyone has any ideas on how to offset this.
sidenote: I suspect it also has something to do with overfitting. I say this because the model has about a 92% accuracy with the training data but 55% accuracy on test data.
import numpy as np
import os
import cv2
import random
import tensorflow as tf
from tensorflow import keras
CATEGORIES = ['rock', 'paper', 'scissors']
IMG_SIZE = 400 # The size of the images that your neural network will use
CLASS_SIZE = len(CATEGORIES)
TRAIN_DIR = "../Train/"
def loadData( directoryPath ):
data = []
for category in CATEGORIES:
path = os.path.join(directoryPath, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
data.append([new_array, class_num])
except Exception as e:
pass
return data
training_data = loadData(TRAIN_DIR)
random.shuffle(training_data)
X = [] #features
y = [] #labels
for i in range(len(training_data)):
features = training_data[i][0]
label = training_data[i][1]
X.append(features)
y.append(label)
X = np.array(X)
y = np.array(y)
X = X/255.0
model = keras.Sequential([
keras.layers.Flatten(input_shape=(IMG_SIZE, IMG_SIZE)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(CLASS_SIZE)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(X, y, epochs=25)
TEST_DIR = "../Test/"
test_data = loadData( TEST_DIR )
random.shuffle(test_data)
test_images = []
test_labels = []
for i in range(len(test_data)):
features = test_data[i][0]
label = test_data[i][1]
test_images.append(features)
test_labels.append(label)
test_images = np.array(test_images)
test_images = test_images/255.0
test_labels = np.array(test_labels)
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
# Saving the model
model_json = model.to_json()
with open("model.json", "w") as json_file :
json_file.write(model_json)
model.save_weights("model.h5")
print("Saved model to disk")
model.save('CNN.model')
If you want to create a massive amount of training data fast: https://github.com/ThomasStuart/RockPaperScissorsMachineLearning/blob/master/source/0.0-collectMassiveData.py
Thanks in advance to any help or ideas :)
You can simply test overfitting by adding 2 additional layers, one dropout layer and one dense layer. Also be sure to shuffle your train_data after each epoch, so the model keeps the learning general. Also, if I see this correctly, you are doing a multi class classification but do not have a softmax activation in the last layer. I would recommend you, to use it.
With drouput and softmax your model would look like this:
model = keras.Sequential([
keras.layers.Flatten(input_shape=(IMG_SIZE, IMG_SIZE)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dropout(0.4), #0.4 means 40% of the neurons will be randomly unused
keras.layers.Dense(CLASS_SIZE, activation="softmax")
])
As last advice: Cnns perform in general way better with tasks like this. You might want to switch to a CNN network, for having even better performance.

My loss is "nan" and accuracy is " 0.0000e+00 " in Transfer learning: InceptionV3

I am working on transfer learning. My use case is to classify two categories of images. I used InceptionV3 to classify images. When training my model, I am getting nan as loss and 0.0000e+00 as accuracy in every epoch. I am using 20 epochs because my data amount is small: I got 1000 images for training and 100 for testing and per batch 5 records.
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(32, activation='relu')(x)
# and a logistic layer -- we have 2 classes
predictions = Dense(1, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
layer.trainable = False
for layer in model.layers[249:]:
layer.trainable = True
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(
'C:/Users/Desktop/Transfer/train/',
target_size=(64, 64),
batch_size=5,
class_mode='binary')
test_set = test_datagen.flow_from_directory(
'C:/Users/Desktop/Transfer/test/',
target_size=(64, 64),
batch_size=5,
class_mode='binary')
model.fit_generator(
training_set,
steps_per_epoch=1000,
epochs=20,
validation_data=test_set,
validation_steps=100)
It sounds like your gradient is exploding. There could be a few reasons for that:
Check that your input is generated correctly. For example use the save_to_dir parameter of flow_from_directory
Since you have a batch size of 5, fix the steps_per_epoch from 1000 to 1000/5=200
Use sigmoid activation instead of softmax
Set a lower learning rate in Adam; to do that you need to create the optimizer separately like adam = Adam(0.0001) and pass it in model.compile(..., optimizer=adam)
Try VGG16 instead of InceptionV3
Let us know when you tried all of the above.
Using Softmax for the activation does not make sense in case of single class. Your output value will always be normed by itself, thus equals to 1. The purpose of softmax is to make the values sum up to 1. In case of single value you will get it == 1. I believe at some moment in time you got 0 as predicted value, which resulted in zero division and NaN loss value.
You should either change the number of classes to 2 by:
predictions = Dense(2, activation='softmax')(x)
class_mode='categorical' in flow_from_directory
loss="categorical_crossentropy"
or use the sigmoid activation function for the last layer.

Writing the following CNN in tensorflow

I am new to this Deep Learning. I have learnt the basics through reading and trying to implement a real network to see how/if it'll really work. I chose Tensorflow in digits and the following network because they give out the exact architecture with training materiel. Steganalysis with DL
I wrote the following code for the architecture in Steganalysis with DL by looking at networks existing networks in digits and Tensorflow documentation.
from model import Tower
from utils import model_property
import tensorflow as tf
import tensorflow.contrib.slim as slim
import utils as digits
class UserModel(Tower):
#model_property
def inference(self):
x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=tf.contrib.layers.xavier_initializer(),
weights_regularizer=slim.l2_regularizer(0.0001)):
conv1 = tf.layers.conv2d(inputs=x, filters=64, kernel_size=7, padding='same', strides=2, activation=tf.nn.relu)
rnorm1 = tf.nn.local_response_normalization(input=conv1)
conv2 = tf.layers.conv2d(inputs=rnorm1, filters=16, kernel_size=5, padding='same', strides=1, activation=tf.nn.relu)
rnorm2 = tf.nn.local_response_normalization(input=conv2)
flatten = tf.contrib.layers.flatten(rnorm2)
fc1 = tf.contrib.layers.fully_connected(inputs=flatten, num_outputs=1000, activation_fn=tf.nn.relu)
fc2 = tf.contrib.layers.fully_connected(inputs=fc1, num_outputs=1000, activation_fn=tf.nn.relu)
fc3 = tf.contrib.layers.fully_connected(inputs=fc2, num_outputs=2)
sm = tf.nn.softmax(fc3)
return fc3
#model_property
def loss(self):
model = self.inference
loss = digits.classification_loss(model, self.y)
accuracy = digits.classification_accuracy(model, self.y)
self.summaries.append(tf.summary.scalar(accuracy.op.name, accuracy))
return loss
I tried running it but the accuracy is pretty low. Could someone tell me if I've done it completely wrong or what's wrong with it and tell me how to properly code it?
UPDATE: Thank you Nessuno! With the fix you mentioned I came up with this code:
from model import Tower
from utils import model_property
import tensorflow as tf
import tensorflow.contrib.slim as slim
import utils as digits
class UserModel(Tower):
#model_property
def inference(self):
x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_initializer=tf.contrib.layers.xavier_initializer(),
weights_regularizer=slim.l2_regularizer(0.00001)):
conv1 = tf.layers.conv2d(inputs=x, filters=64, kernel_size=7, padding='Valid', strides=2, activation=tf.nn.relu)
rnorm1 = tf.nn.local_response_normalization(input=conv1)
conv2 = tf.layers.conv2d(inputs=rnorm1, filters=16, kernel_size=5, padding='Valid', strides=1, activation=tf.nn.relu)
rnorm2 = tf.nn.local_response_normalization(input=conv2)
flatten = tf.contrib.layers.flatten(rnorm2)
fc1 = tf.contrib.layers.fully_connected(inputs=flatten, num_outputs=1000, activation_fn=tf.nn.relu)
fc2 = tf.contrib.layers.fully_connected(inputs=fc1, num_outputs=1000, activation_fn=tf.nn.relu)
fc3 = tf.contrib.layers.fully_connected(inputs=fc2, num_outputs=2, activation_fn=None)
return fc3
#model_property
def loss(self):
model = self.inference
loss = digits.classification_loss(model, self.y)
accuracy = digits.classification_accuracy(model, self.y)
self.summaries.append(tf.summary.scalar(accuracy.op.name, accuracy))
return loss
Solver type is SGD. Learning rate is 0.001. I am shuffling training data.I have increased training data to 6000 (3000 per category, 20% from that is reserved for validation). I downloaded the training data from this link. But I am only getting the following graph. I think this is overfitting. Do you have any suggestions to improve the validation accuracy?
In NVIDIA digits, classification_loss, exactly as in tensorflow tf.nn.softmax_cross_entropy_with_logits expects as input a linear layer of neuron.
Instead, you're passing as input sm = tf.nn.softmax(fc3), hence you're applying the softmax operation 2 times and this is the reasong of your low accuracy.
In order to solve this issue, just change the model output layer to
fc3 = slim.fully_connected(fc2, 2, activation_fn=None, scope='fc3')
return fc3

Tensorboard event file size is growing after consecutive model training

I'm training 8 models in a for loop and saving each tensorboard log file into a seperate directory. Folder structure is like Graph is my main directory for graphs and directories under Graph such as net01 net02... net08 are the ones I'm outputting my event files. By doing this I can visualize training logs in Tensorboard in that fancy fashion with every single training process gets its own colour.
My problem is the growing sizes of eventfiles. The first event file is apporoximately 300KB's, but the second event file have a size of 600KB's, third is 900 KB and so on. They each reside in their own seperate directory and each of them are different training sessions from each other but somehow tensorboard appends the earlier sessions into last one. In the end I should've a total size of 12*300Kb= 3600 KB of session files, but I endup with something like 10800KB of session files. As the nets are getting deeper I endup with session file sizes of like 600 MB. So clearly I'm missing something out.
I tried to visualize last file with the biggest size to check whether it includes all the previous training sessions and can draw like 8 nets but it failed. SO a big bunch of irrelevant information is stored in this session file.
I'm using Anaconda3-Spyder on Win7-64. Database is divided into 8 and for each run I'm leaving one out for validation and using the rest as training. Here is a simplified version of my code:
from keras.models import Model
from keras.layers import Dense, Flatten, Input, Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger
import os.path
import shutil
import numpy
# ------------------------------------------------------------------
img_width, img_height = 48, 48
num_folds=8
folds_path= "8fold_folds"
nets_path = "8fold_nets_simplenet"
csv_logpath = 'simplenet_log.csv'
nets_string = "simplenet_nets0"
nb_epoch = 50
batch_size = 512
cvscores = []
#%%
def foldpath(foldnumber):
pathbase= os.path.join(folds_path,'F')
train_data_dir = os.path.join(pathbase+str(foldnumber),"train")
valid_data_dir = os.path.join(pathbase+str(foldnumber),"test")
return train_data_dir,valid_data_dir
#%%
for i in range(1, num_folds+1):
modelpath= os.path.join(nets_path,nets_string+str(i))
if os.path.exists(modelpath):
shutil.rmtree(modelpath)
os.makedirs(modelpath)
[train_data_dir, valid_data_dir]=foldpath(i)
img_input = Input(shape=(img_width,img_height,1),name='input')
x = Conv2D(32, (3,3), activation='relu', padding='same', name='conv1-'+str(i))(img_input)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1-'+str(i))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same', name='conv2-'+str(i))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2-'+str(i))(x)
x = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3-'+str(i))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3-'+str(i))(x)
x = Flatten()(x)
x = Dense(512, name='dense1-'+str(i))(x)
#x = Dropout(0.5)(x)
x = Dense(512, name='dense2-'+str(i))(x)
#x = Dropout(0.5)(x)
predictions = Dense(6, activation='softmax', name='predictions-'+str(i))(x)
model = Model(inputs=img_input, outputs=predictions)
# compile model-----------------------------------------------------------
model.compile(optimizer='Adam', loss='binary_crossentropy',
metrics=['accuracy'])
# ----------------------------------------------------------------
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(rescale=1./255,
featurewise_std_normalization=True,
featurewise_center=True)
valid_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
color_mode='grayscale',
classes = ['1','3','4','5','6','7'],
class_mode='categorical',
shuffle='False'
)
validation_generator = valid_datagen.flow_from_directory(
valid_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
color_mode='grayscale',
classes = ['1','3','4','5','6','7'],
class_mode='categorical',
shuffle='False'
)
# --------------------callbacks---------------------------
csv_logger = CSVLogger(csv_logpath, append=True, separator=';')
graph_path = os.path.join('Graphs',modelpath)
os.makedirs(graph_path)
tensorboard = TensorBoard(log_dir= graph_path, write_graph=True, write_images=False)
callbacks_list=[csv_logger,tensorboard]
# ------------------
print("Starting to fit the model")
model.fit_generator(train_generator,
steps_per_epoch = train_generator.samples/batch_size,
validation_data = validation_generator,
validation_steps = validation_generator.samples/batch_size,
epochs = nb_epoch, verbose=1, callbacks=callbacks_list)
Not sure about this one but my guess would be that it has to do with your graphs being stored after each loop iteration. To check if your graphs are responsible for this, you could try write_graph = False, and see if you still have the same problem. To make sure the graph is reset, you could try to clear the tensorflow graph at the end of each iteration using this:
keras.backend.clear_session()
The problem is that with training of each model, the next model still contains all the graph elements of previous trainings. Thus before training each model, reset the Tensorflow graph and then continue with the training.