i want to apply oversampling to the minority classes.but it displayes an error code - tensorflow2.0

I have a datasets which is imbalanced between normal and abnormal ultrasound liver images.I want to balance the datasets using imageDatagenerator packages but it displays an error
TypeError: init() got an unexpected keyword argument 'oversample'
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
samplewise_center=True,
samplewise_std_normalization=True,
horizontal_flip=True,
vertical_flip=True,
rotation_range=90,
brightness_range=[0.5, 1.5],
zoom_range=0.2,
shear_range=0.2,
oversample='majority')

Related

Model predict giving same results after fitting

Hi I am a beginner in ML and Tensorflow so do pardon me for not understanding complex theories.
I was building an image classifier CNN as a form of practice. The model is trained using MobileNetv2 and its supposed to classify images of cats, dogs and pandas. After training my model (with decent accuracy of 92%), I tried using model.predict() to evaluate how it does with new images but I noticed that all my outputs were 1. This happens even if I used the same previous training data. By the way, I used 2700 (900 from each class) images for training and 300 for validation.
Here is my code
%tensorflow_version 2.x # this line is not required unless you are in a notebook
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow_datasets as tfds
import pathlib
from google.colab import drive
drive.mount('/content/gdrive')
IMAGE_SIZE=[150,150]
train_path = "/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/train"
test_path = "/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/test"
IMAGE_SHAPE=[150,150,3]
base_model = tf.keras.applications.MobileNetV2(input_shape=IMAGE_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(3)
model = tf.keras.Sequential([
base_model,
global_average_layer,
prediction_layer
])
model.summary()
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
# creates a data generator object that transforms images
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
test_datagen = ImageDataGenerator (rescale=1./255)
training_set = train_datagen.flow_from_directory(train_path, target_size=IMAGE_SIZE, batch_size=32, class_mode='categorical')
testing_set = test_datagen.flow_from_directory(test_path, target_size=IMAGE_SIZE, batch_size=32, class_mode='categorical')
model.fit(
training_set,
epochs=3,
validation_data=testing_set)
img = Image.open("/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/test/panda/panda_00094.jpg").convert('RGB').resize((150, 150), Image.ANTIALIAS)
img = np.array(img)
predictions = model.predict(img[None,:,:])
np.argmax(predictions[0])
in model.compile you have loss=tf.keras.losses.BinaryCrossentropy(from_logits=True). Since you have 3 classes you should have
loss=tf.keras.losses.CategoricalCrossentropy()
The Mobilenet model was trained with pixel values scaled within the range -1 to +1. So you should set rescale to
rescale=1/127.5-1
When you want to feed an image into model.predict you must perform the same preprocessing
of the image as you did for the training images namely rescale the image and resize the image to be the same size you used in training, namely (150,150).

Why save_to_dir dont work in flow_from_directory In Keras

Here i want to save the augmented pictures to a directory.but that didn't happend using the below code.I am beginner .Please Help.Thanks in advance.
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
validation_split=0.2,
zoom_range=0.2,
horizontal_flip=True,
brightness_range=[0.2,1.0],
)
train_generator = train_datagen.flow_from_directory(
r'C:\Users\Mahmudul Hasan\Desktop\practice',
target_size=(150, 150),
batch_size=32,
class_mode='categorical',
seed=42,
subset="training",
save_to_dir=r"C:\Users\Mahmudul Hasan\Desktop\Augmented data" )
train_datagen.flow_from_directory gives you a generator. To really generate the image, use this example code:
i = 0
for batch in train_datagen.flow_from_directory(input_dir, target_size=(150, 150),
class_mode='binary', shuffle=False,
save_to_dir=output_dir, save_prefix='aug'):
i += 1
if i > 20:
break

Transfer learning InceptionV3 show poor validation accuracy with training

## MODEL IMPORTING ##
import tensorflow
import pandas as pd
import numpy as np
import os
import keras
import random
import cv2
import math
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D,Convolution2D,BatchNormalization
from tensorflow.keras.layers import Flatten,MaxPooling2D,Dropout
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import warnings
warnings.filterwarnings("ignore")
WIDTH = 299
HEIGHT = 299
CLASSES = 4
base_model = InceptionV3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dropout(0.4)(x)
predictions = Dense(CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()
model.compile(optimizer='adam', ##also tried other optimiser --> similar poor accuracy found
loss='categorical_crossentropy',
metrics=['accuracy'])
## IMAGE DATA GENERATOR ##
from keras.applications.inception_v3 import preprocess_input
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest',
validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
TRAIN_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
subset="training")
validation_generator = train_datagen.flow_from_directory(
TRAIN_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
subset="validation")
test_datagen = ImageDataGenerator(rescale=1./255)
generator_test = test_datagen.flow_from_directory(directory=TEST_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
shuffle=False)
## MODEL training ##
EPOCHS = 20
STEPS_PER_EPOCH = 320 #train_generator.n//train_generator.batch_size
VALIDATION_STEPS = 64 #validation_generator.n//validation_generator.batch_size
history = model.fit_generator(
train_generator,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
validation_data=validation_generator,
validation_steps=VALIDATION_STEPS)
Result found:
VALIDATION ACCURACY around 0.55-0.67 fluctuating..
TRAINING ACCURACY 0.99
Questions:
What/Where is the problem in transfer learning process?
are train, validate and test data generator function parameter are chosen correctly?
Well I think you would be better off to train the entire model. So remove the code that makes the base model layers not trainable. If you look at the documentation for Inceptionv3 located here you can set pooling='max' which puts a GlobalMaxPooling2d layer as the output layer so if you do that you do not need to add your own layer as you did. Now I noticed you imported the callbacks ModelCheckpoint and ReduceLROnPlateau but you did not use them in model.fit. Using an adjustable learning rate will be beneficial to achieving a lower validation loss. ModelCheckpoint is useful to save the best model for use in predictions. See code below for implementations. save_loc is the directory where you want to store the results from ModelCheckpoint. NOTE in ModelCheckpoint I set save_weights_only=True. Reason is this is far faster than saving the entire model on each epoch for which the validation loss decreases.
checkpoint=tf.keras.callbacks.ModelCheckpoint(filepath=save_loc, monitor='val_loss', verbose=1, save_best_only=True,
save_weights_only=True, mode='auto', save_freq='epoch', options=None)
lr_adjust=tf.keras.callbacks.ReduceLROnPlateau( monitor="val_loss", factor=0.5, patience=1, verbose=1, mode="auto",
min_delta=0.00001, cooldown=0, min_lr=0)
callbacks=[checkpoint, lr_adjust]
history = model.fit_generator( train_generator, epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,validation_data=validation_generator,
validation_steps=VALIDATION_STEPS, callbacks=callbacks)
model.load_weights(save_loc) # load the saved weights
# after this use the model to evaluate or predict on the test set.
# if you are satisfied with the results you can then save the entire model with
model.save(save_loc)
Be careful on the test set generator. Ensure you do the same preprocessing on the test data as you did on the training data. I noticed you only rescaled the pixels. Not sure what the preprocess function does but I would use that.
I would also remove the dropout layer initially. Monitor the training loss and validation loss on each epoch and plot the results. If the training loss continues to decrease and the validation loss trends toward increasing then you are over fitting if so then restore the dropout layer if needed. If you do evaluation or prediction on the test set, you only want to go through the test set once. So select the test batch size to that no. of test samples/test batch size is an integer and use that integer as the number of test steps. Here is a
handy function that will determine that for you where length is the number of test samples and b_max is the maximum batch size you will allow based on your memory capacity.
def get_bs(length,b_max):
batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=b_max],reverse=True)[0]
return batch_size,int(length/batch_size)
# example of use
batch_size, step=get_bs(10000,70)
#result is batch_size= 50 steps=200

What's wrong with my ResNet50 on two machines?

Firstly, I trained a ResNet50 to be a six-class classifier from scratch on Kaggle, and got like this.
As you can see, the accuracy of training set and validation set improved steadily.
And after that, I rented a cloud host on the internet for a better GPU(1080ti), and copied my code (I uploaded my Jupyter notebook). And then I runned it. But strange things happened. My validation accuracy is extremely unsteady and always fluctuated widely (around 0.3). Here's the screenshot.
And also, the training on the host is much more difficult than on Kaggle kernel.
Here are the screenshots after some epochs.(actually the host's one is trained over much more epochs than the Kaggle's one)
And here's my codes of ImageDataGenerator.
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.1,
zoom_range=0.1,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True,
vertical_flip=True,
validation_split=0.1
)
test_datagen = ImageDataGenerator(
rescale=1./255,
validation_split=0.1
)
train_generator = train_datagen.flow_from_directory(
base_path,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
subset='training',
seed=0
)
validation_generator = test_datagen.flow_from_directory(
base_path,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
subset='validation',
seed=0
)

Keras, training with an empty category

Maybe it is a naive question.
I want to try a small experiment for research: train the model with an extra and empty category from the one that I have in the training and validation and see how the prediction for this extra category goes down with the number of samples and epochs.
In particular I added a 5th phantom category in the pandas dataframe.
I am also using an ImageDataGenerator.
train_datagen = ImageDataGenerator(
rotation_range=0,
rescale=1./255,
shear_range=0.0,
zoom_range=0.2,
horizontal_flip=False,
width_shift_range=0.0,
height_shift_range=0.0
)
train_generator = train_datagen.flow_from_dataframe(
train_df,
"/mypath/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
validate_df,
"/mypath/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
history = model.fit_generator(
train_generator,
epochs=epochs,
validation_data=validation_generator,
validation_steps=total_validate//batch_size,
steps_per_epoch=total_train//batch_size,
callbacks=callbacks
)
However when I a try to train the CNN I got the following error:
Error when checking target: expected dense_2 to have shape (5,) but got array with shape (4,)
Someone can suggest a workaround?