I'm working on image classification task for diabetic retinopathy with fundus image data. There are 5 classes. The data distribution is 1805 images (class 1), 370 images (class 2), 999 images (class 3), 193 images (class 4), 295 images (class 5).
Here are the steps that I have tried to run:
Preprocessing (resized 224 * 224)
The divide of train and test data is 85% : 15%
x_train, xtest, y_train, ytest = train_test_split(
x_train, y_train,
test_size = 0.15,
stratify = y_train
Data agumentation
Training with the ResNet-50 model and cross-validation
def getResNet():
modelres = ResNet50(weights=None, include_top=False, input_shape= (IMAGE_HEIGHT,IMAGE_HEIGHT, 3))
x = modelres.output
x = GlobalAveragePooling2D()(x)
x = Dense(5, activation= 'softmax')(x)
model = Model(inputs = modelres.input, outputs = x)
return model
num_folds = 5
skf = StratifiedKFold(n_splits = 5, shuffle=True, random_state=2021)
cvscores = []
fold = 1
for train, val in skf.split(x_train, y_train.argmax(1)):
print('Fold: ', fold)
Xtrain = x_train[train]
Xval = x_train[val]
Ytrain = y_train[train]
Yval = y_train[val]
data_generator = create_datagen().flow(Xtrain, Ytrain, batch_size=32, seed=2021)
model = getResNet()
with tf.compat.v1.device('/device:GPU:0'):
model_train =,
validation_data=(Xval, Yval),
epochs=30, batch_size = 32, verbose=1)
model_name = 'cnn_keras_aug_Fold_'+str(fold)+'.h5'
scores = model.evaluate(xtest, ytest, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
cvscores.append(scores[1] * 100)
fold = fold +1
The maximum results I got from this method were training accuracy of 81.2%, validation accuracy of 72.2%, and test accuracy of 70.73%.
Can anyone give me an idea to improve the model so that I can get the test accuracy above 90% as possible?
Later, I will use this model as a pre-trained model to train diabetic retinopathy data as well but from other sources.
BTW, I've tried replacing my preprocessing with this method:
def preprocessing(path):
image = cv2.imread(path)
image = crop_image_from_gray(image)
green = image[:,:,1]
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
cl = clahe.apply(green)
image[:,:,0] = image[:,:,0]
image[:,:,2] = image[:,:,2]
image[:,:,1] = cl
image = cv2.resize(image, (224,224))
return image
I've also tried to replace my model with VGG16, EfficientNetB0. However, none of that had much effect on my results. I'm still stucked with about 70% accuracy.
Please help me come up with ideas to improve my modeling results. I hope.

Your training accuracy is 81.2%. It is generally impossible to have testing accuracy higher that training accuracy, i.e. with current setup you will not achieve 90%.
However, your validation (and also testing) accuracy is about 70-72%. I can suggest that on your small dataset your model is overfitting. So if you add model regularization (e.g. dropout), it is possible that the gap between your training and your validation (and test) will decrease. This way you can improve your validation score.
To further increase the score, you need to check your data manually and try to understand which classes contribute the most to the errors and figure out how those errors can be reduced (e.g. updating your preprocessing pipeline).


Large variation in loss and accuracy validation values during training Resnet50 on binary class image classification

I am using Resnet50 for binary image classification and the model shows a high variation in loss and accuracy on validation data during the epochs.
This is what I get after 40 epochs.
Here is my model code:
def build_model():
model = ResNet50(include_top=True ,input_shape=(224,224,3) , weights="imagenet")
for layer in model.layers:
base_input= model.layers[0].input
base_output= model.layers[-2].output
l =Dense(units = 512 ,activation='sigmoid')(l)
final_output= Dense(units = 1 ,activation='sigmoid')(l)
new_model= Model(inputs=base_input,outputs= final_output)
return new_model
def train_model(model, train_generator, valid_generator):
model.compile(optimizer = Adam(), loss = 'binary_crossentropy', metrics = ['accuracy'])
history= ,validation_data=(valid_generator) ,epochs=40)
return history
I need to know what's the problem & how to fix it
thanks in advance

Completely different results using Tensorflow and Pytorch for MobilenetV3 Small

I am using transfer learning from MobileNetV3 Small to predict 5 different points on an image. I am doing this as a regression task.
For both models:
Setting the last 50 layers trainable and adding the same fully connected layers to the end.
Learning rate 3e-2
Batch size 32
Adam optimizer with the same betas
100 epochs
The inputs consist of RGB unscaled images
def _init_weights(m):
if type(m) == nn.Linear:
def get_mob_v3_small():
model = torchvision.models.mobilenet_v3_small(pretrained=True)
children_list = get_children(model)
for c in children_list[:-50]:
for p in c.parameters():
p.requires_grad = False
return model
class TransferMobileNetV3_v2(nn.Module):
def __init__(self,
num_keypoints: int = 5):
super(TransferMobileNetV3_v2, self).__init__()
self.classifier_neurons = num_keypoints*2
self.base_model = get_mob_v3_small()
self.base_model.classifier = nn.Sequential(
nn.Linear(in_features=1024, out_features=1024),
nn.Linear(in_features=1024, out_features=512),
nn.Linear(in_features=512, out_features=self.classifier_neurons)
def forward(self, x):
out = self.base_model(x)
return out
Training Script
def train(net, trainloader, testloader, train_loss_fn, optimizer, scaler, args):
len_dataloader = len(trainloader)
for epoch in range(1, args.epochs+1):
for batch_idx, sample in enumerate(trainloader):
inputs, labels = sample
inputs, labels =,
with torch.cuda.amp.autocast(args.use_amp):
prediction = net(inputs)
loss = train_loss_fn(prediction, labels)
def main():
args = make_args_parser()
args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = args.seed
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
loss_fn = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=3e-2,
betas=(0.9, 0.999))
scaler = torch.cuda.amp.GradScaler(enabled=args.use_amp)
train(net, train_loader, test_loader, loss_fn, optimizer, scaler, args)
base_model = tf.keras.applications.MobileNetV3Small(weights='imagenet',
x_in = base_model.layers[-6].output
x = Dense(units=1024, activation="relu")(x_in)
x = Dense(units=512, activation="relu")(x)
x = Dense(units=10, activation="linear")(x)
model = Model(inputs=base_model.input, outputs=x)
for layer in model.layers[:-50]:
Training Script
model.compile(loss = "mse",
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-2))
history =, output_numpy,
batch_size=32, epochs=100,validation_split = 0.2)
The PyTorch model predicts one single point around the center for all 5 different points.
The Tensorflow model predicts the points quite well and are quite accurate.
The loss in the Pytorch model is much higher than the Tensorflow model.
Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results. Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results.
Note: I also noticed that the MobileNetV3 Small model seems to be different in PyTorch and different in Tensorflow. I do not know if am interpreting it wrong, but I'm putting it here just in case.

Getting constant accuracies for training and validation sets despite their losses are changing during CNN training?

As the title clearly describes the issue I've been experiencing during the training of my CNN model, the accuracies of training and validation sets are constant despite the losses of them are changing. I have included the detail regarding the model and its training setup below. What may cause this issue?
Here is the data that was used by training (X_train & y_train), validation, and test sets (X_test and y_test):
df = pd.read_csv(CSV_PATH, sep=',', header=None)
print(f'Shape of all data: {df.shape}')
y = df.iloc[:, -1].values
X = df.iloc[:, :-1].values
encoder = LabelEncoder()
encoded_Y = encoder.transform(y)
dummy_y = to_categorical(encoded_Y)
X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, test_size=0.3, random_state=RANDOM_STATE)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
Here are the shapes of training and test sets:
Shape of X_train: (1322, 10800, 1)
Shape of Y_train: (1322, 3)
Shape of X_test: (567, 10800, 1)
Shape of y_test: (567, 3)
Here is my CNN model:
# Model hyper-parameters
activation_fn = 'relu'
n_lr = 1e-4
weight_decay = 1e-4
batch_size = 64
num_epochs = 200*10*10
num_classes = 3
n_dropout = 0.6
n_momentum = 0.5
n_kernel = 5
n_reg = 1e-5
# the sequential model
model = Sequential()
model.add(Conv1D(128, n_kernel, input_shape=(10800, 1)))
model.add(MaxPooling1D(pool_size=2, strides=2))
model.add(Conv1D(256, n_kernel))
model.add(MaxPooling1D(pool_size=2, strides=2))
model.add(GlobalAveragePooling1D()) # have tried model.add(Flatten()) as well
model.add(Dense(256, activation=activation_fn))
model.add(Dense(64, activation=activation_fn))
model.add(Dense(num_classes, activation='softmax'))
adam = Adam(lr=n_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=weight_decay)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['acc'])
Here is how I have evaluated the model:
Y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(Y_pred, axis=1)
y_test_int = np.argmax(y_test, axis=1)
And, my model always predicts the same class of three classes during the model evaluation as you can see from the classification result below (via classification_result(y_test_int, y_pred) function):
precision recall f1-score support
normal 0.743 1.000 0.852 421
apb 0.000 0.000 0.000 45
pvc 0.000 0.000 0.000 101
The model was trained using the EarlyStopping callback of Keras. Thus, the training has continued for 4,173 epochs. Here is the obtained losses during the training for training and validation sets:
Here are the obtained accuracies during the training for training and validation sets:
The model was implemented using Keras and hosted on Google Colab.
Although such issues are difficult to resolve without the data, there are a couple of general rules applicable.
The very first thing we do when the model does not seem to learn anything, like here (despite the mild drop in the loss), is to remove all dropout.
In fact, dropout is not supposed to be used by default; its nominal function is to guard against overfitting - but of course, before starting to worry about overfitting, you must first have some success with fitting, something that is clearly not happening here. The fact that, with a dropout rate of n_dropout = 0.6, you also seem to be rather too aggressive in its use, does not help, either.

Good training accuracy but bad evaluation

I trained a DNN model, get good training accuracy but bad evaluation accuracy.
def DNN_Metrix(shape, dropout):
model = tf.keras.Sequential()
for i in range(0,2):
model.add(tf.keras.layers.Dense(1, activation=tf.nn.sigmoid))
return model
model_dnn = DNN_Metrix(shape=(28,20,1), dropout=0.1)
Here is my training process, and result:
Epoch 10/10
- 55s - loss: 0.4763 - acc: 0.7807
But when I evaluation with test dataset, I got:
result = model_dnn.evaluate(np.array(X_test), np.array(y_test), batch_size=len(X_test))
loss, accuracy = [0.9485417604446411, 0.3649936616420746]
it's a binary classification, Positive label : Negetive label is about
0.37 : 0.63
I don't think it was result from overfiting, I have 700k instances when training, with shape of 28 * 20, and my DNN model is simple and have few parameters.
Here is my code when generating the test data and training data:
def parse_function(example_proto):
dics = {
'feature': tf.FixedLenFeature(shape=(), dtype=tf.string, default_value=None),
'label': tf.FixedLenFeature(shape=(2), dtype=tf.float32),
'shape': tf.FixedLenFeature(shape=(2), dtype=tf.int64)
parsed_example = tf.parse_single_example(example_proto, dics)
parsed_example['feature'] = tf.decode_raw(parsed_example['feature'], tf.float64)
parsed_example['feature'] = tf.reshape(parsed_example['feature'], [28,20,1])
label_t = tf.cast(parsed_example['label'], tf.int32)
parsed_example['label'] = parsed_example['label'][1]
return parsed_example['feature'], parsed_example['label']
def read_tfrecord(train_tfrecord):
dataset =
dataset =
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.repeat(100)
dataset = dataset.batch(670)
return dataset
def read_tfrecord_test(test_tfrecord):
dataset =
dataset =
return dataset
# tf_record_target = 'train_csv_temp_norm_vx.tfrecords'
train_files = 'train_baseline.tfrecords'
test_files = 'test_baseline.tfrecords'
train_dataset = read_tfrecord(train_files)
test_dataset = read_tfrecord_test(test_files)
it_test_dts = test_dataset.make_one_shot_iterator()
it_train_dts = train_dataset.make_one_shot_iterator()
X_test = []
y_test = []
el = it_test_dts.get_next()
count = 1
with tf.Session() as sess:
while True:
x_t, y_t =
except tf.errors.OutOfRangeError:
Judging from the fact that your data distribution in your test set is [37%-63%] and your final accuracy is 0.365, I would first check the labels predicted on the test set.
Most probably, all your predictions are of class 0, provided that class 0 amounts for 37% of your dataset. In this case, it means that your neural network is not able to learn anything on the training set, and you have a massive scenario of overfitting.
I recommend that you always use a validation set, so that at the end of each epoch, you would check to see if your neural network has learnt anything. In such a situation(like yours), you would see very fast the overfitting issue.
Training accuracy doesn't mean much. A NN can fit any random set of inputs and outputs, even if they're unrelated. That's why you want to use validation data.
After training look at your loss curves, this will give you a better idea of where things are going wrong.
NN's default to just guessing the most popular class it's seen in training data for classification problems. This is usually what happens when you haven't setup your experiment correctly.
And since your dealing with binary classification you might want to look at things like StratifiedKFold which will provided you folds of train/test data were the sample % is persevered.

ways to improve training speed

I'm new to tensorflow and I'm trying to adopt transfer learning for feature extraction. I have a large image dataset of 600k images stored in a gzip compressed hdf5 file of 100GB. I'm using a generator to load the images into the vgg16 model. However, it is going to take me 2000+ hours to complete 1 epoch. Is there any way to optimize the code so that I can have a faster training speed?
NAME = "vgg16-CNN"
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True,gpu_options=gpu_options))
image_input = Input(shape=(224, 224, 3))
model = VGG16(input_tensor=image_input,include_top=True, weights='imagenet')
output_vgg16_conv = model.get_layer('fc2').output
x = Dense(num_classes, activation='softmax', name='predictions') (output_vgg16_conv)
pretrained_model = Model(inputs=image_input, outputs=x)
for layer in pretrained_model.layers[:-1]:
optimizer='adam', metrics=['accuracy'])
def generator():
extendable_hdf5_file = h5py.File('npx_train.hdf5','r')['dataset']
y_train = to_categorical(np.array(y_train),len_class)
for a,im in enumerate(extendable_hdf5_file):
yield (im,y_train[a])
#Dataset from generator
ds =
(tf.float32, tf.float32),
ds = ds.prefetch(
ds = ds.batch(10)
#Model compile
with sess:,epochs=10,steps_per_epoch=66662,
I've managed to cut the training time to 60 hours per epoch by loading the generator directly to
hdf5_path = "npx_train.hdf5"
extendable_hdf5_file = h5py.File(hdf5_path,'r')['dataset']
def train_loader(files,y_train, batch_size):
L = 553292
while True:
batch_start = 0
batch_end = batch_size
while batch_start < L:
limit = min(batch_end, L)
X = files[batch_start:limit]
X = X/255
X = np.float32(X)
Y = y_train[batch_start:limit]
yield (X,Y)
batch_start += batch_size
batch_end += batch_size
with tf.device('/gpu:0'):
pretrained_model.fit_generator(generator=train_loader(extendable_hdf5_file,y_train, 32),
steps_per_epoch=16666, epochs=10, verbose=1,callbacks=[tensorboard],
validation_data=val_loader(extendable_hdf5_file,y_train, 32),
validation_steps=4167, workers=0)
However, it is still a long time to spend to train a single layer. Would appreciate help to speed up the process.
Graphics card: gtx1070