Already exist error in image data generator - tensorflow

generator = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=0,
width_shift_range=0.0,
height_shift_range=0.0,
brightness_range=None,
shear_range=0.0,
zoom_range=0.0,
channel_shift_range=0.0,
cval=0.0,
horizontal_flip=False,
vertical_flip=False,
rescale=1./255,
preprocessing_function=None,
data_format=None,
validation_split=0.0,
dtype=None)
I'm getting already exist error in this line. It only works base environment in anaconda. It doesn't work in other environments even though the required libraries are installed

Related

i want to apply oversampling to the minority classes.but it displayes an error code

I have a datasets which is imbalanced between normal and abnormal ultrasound liver images.I want to balance the datasets using imageDatagenerator packages but it displays an error
TypeError: init() got an unexpected keyword argument 'oversample'
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
samplewise_center=True,
samplewise_std_normalization=True,
horizontal_flip=True,
vertical_flip=True,
rotation_range=90,
brightness_range=[0.5, 1.5],
zoom_range=0.2,
shear_range=0.2,
oversample='majority')

Data augmentation is not generated İmages

I am working on image classification (12 classes of image data) by applying CNN Model. As a result, İ have been facing overfitting therefore İ applied Data Augmentation. Although it works pretty well, however, İ have faced an error i.e.
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 4985 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 4985 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 554 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
As a result, it has been quite unable to generate new images.
My code:
import shutil
aug_path = r'/content/dataset' # path to augmentation directory
if os.path.isdir(aug_path):
shutil.rmtree(aug_path) # remove anything in aug directory if it exists
os.mkdir(aug_path) # make the aug directory
for label in image_df['Label'].unique():
subpath=os.path.join(aug_path, label) # path for the sub directory
os.mkdir(subpath)
target= 110
image_shape=(256,256)
gen=ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=20, width_shift_range=.2,
height_shift_range=.2, zoom_range=.2)
groups=image_df.groupby('Label') # group by class
for label in image_df['Label'].unique(): # for every class
group=groups.get_group(label) # a dataframe holding only rows with the specified label
sample_count=len(group) # determine how many samples there are in this class
if sample_count< target: # if the class has less than target number of images
aug_img_count=0
delta=target-sample_count # number of augmented images to create
target_dir=os.path.join(aug_path, label) # define where to write the images
aug_gen=gen.flow_from_dataframe( group, x_col='Filepath', y_col=None, target_size=image_shape, class_mode=None, batch_size=1,
shuffle=False, save_to_dir=target_dir, save_prefix='aug-',save_format='jpg')
while aug_img_count<delta:
images=next(aug_gen)
aug_img_count += len(images)
def create_gen():
# Load the Images with a generator and Data Augmentation
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
validation_split=0.1
)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)
train_images = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='Label',
target_size=(128, 128),
color_mode='rgb',
class_mode='categorical',
batch_size=64,
shuffle=True,
seed=0,
subset='training',
rotation_range=30, # Uncomment to use data augmentation
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest"
)
val_images = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='Label',
target_size=(128, 128),
color_mode='rgb',
class_mode='categorical',
batch_size=64,
shuffle=True,
seed=0,
subset='validation',
rotation_range=30, # Uncomment to use data augmentation
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest"
)
test_images = test_generator.flow_from_dataframe(
dataframe=test_df,
x_col='Filepath',
y_col='Label',
target_size=(128, 128),
color_mode='rgb',
class_mode='categorical',
batch_size=64,
shuffle=False
)
return train_generator,test_generator,train_images,val_images,test_images
from sklearn.model_selection import train_test_split
# Separate in train and test data
train_df, test_df = train_test_split(image_df, train_size=0.9, shuffle=True, random_state=101)
# Create the generators
train_generator,test_generator,train_images,val_images,test_images = create_gen()
Output:
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 4985 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 4985 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
/usr/local/lib/python3.7/dist-packages/keras_preprocessing/image/dataframe_iterator.py:282: UserWarning: Found 554 invalid image filename(s) in x_col="Filepath". These filename(s) will be ignored.
.format(n_invalid, x_col)
You have put parameters in flow_from_dataframe that actually belong in the call to ImageDataGenerator. For example in your code below I have marked what does not belong in flow_from_dataframe. See documentation here.
train_images = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='Label',
target_size=(128, 128),
color_mode='rgb',
class_mode='categorical',
batch_size=64,
shuffle=True,
seed=0,
subset='training',
rotation_range=30, remove
zoom_range=0.15, remove
width_shift_range=0.2, remove
height_shift_range=0.2, remove
shear_range=0.15, remove
horizontal_flip=True, remove
fill_mode="nearest" remove
P.S. looks like you used some of my code for augmentation from a Kagle notebook. Look at the notebook to see how to setup your generators

What is the python code to load dataset from my hard disk

I try to learn machine learning from the TensorFlow official tutorial.
But most tutorials do the download in command prompt.
I can't find any tutorial about loading my own image dataset from my own disk.
Would be great if you can give me a direct answer.
I put the image data set on my window 10 desktop:
C:\Users\User\Desktop\DataSet\coins\data
\test (label 1-211)
\train (label 1-211)
\validation (label 1-211)
You can use image_dataset_from_directory for this where you just have to pass in the path to the files in the argument directory.
from tensorflow.keras.preprocessing import image_dataset_from_directory
train_dataset = image_dataset_from_directory(
directory=TRAIN_DIR,
labels="inferred",
label_mode="categorical",
image_size=SIZE,
seed=SEED,
subset=None,
interpolation="bilinear",
follow_links=False,
)
validation_dataset = image_dataset_from_directory(
directory=VALIDATION_DIR,
labels="inferred",
label_mode="categorical",
image_size=SIZE,
seed=SEED,
subset=None,
interpolation="bilinear",
follow_links=False,
)
test_dataset = image_dataset_from_directory(
directory=TEST_DIR,
labels="inferred",
label_mode="categorical",
image_size=SIZE,
seed=SEED,
subset=None,
interpolation="bilinear",
follow_links=False,
)
you can use flow_from_disk in keras.
here is a pretty good tutorial
flow from disk in keras

What's wrong with my ResNet50 on two machines?

Firstly, I trained a ResNet50 to be a six-class classifier from scratch on Kaggle, and got like this.
As you can see, the accuracy of training set and validation set improved steadily.
And after that, I rented a cloud host on the internet for a better GPU(1080ti), and copied my code (I uploaded my Jupyter notebook). And then I runned it. But strange things happened. My validation accuracy is extremely unsteady and always fluctuated widely (around 0.3). Here's the screenshot.
And also, the training on the host is much more difficult than on Kaggle kernel.
Here are the screenshots after some epochs.(actually the host's one is trained over much more epochs than the Kaggle's one)
And here's my codes of ImageDataGenerator.
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.1,
zoom_range=0.1,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True,
vertical_flip=True,
validation_split=0.1
)
test_datagen = ImageDataGenerator(
rescale=1./255,
validation_split=0.1
)
train_generator = train_datagen.flow_from_directory(
base_path,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
subset='training',
seed=0
)
validation_generator = test_datagen.flow_from_directory(
base_path,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
subset='validation',
seed=0
)

Keras, training with an empty category

Maybe it is a naive question.
I want to try a small experiment for research: train the model with an extra and empty category from the one that I have in the training and validation and see how the prediction for this extra category goes down with the number of samples and epochs.
In particular I added a 5th phantom category in the pandas dataframe.
I am also using an ImageDataGenerator.
train_datagen = ImageDataGenerator(
rotation_range=0,
rescale=1./255,
shear_range=0.0,
zoom_range=0.2,
horizontal_flip=False,
width_shift_range=0.0,
height_shift_range=0.0
)
train_generator = train_datagen.flow_from_dataframe(
train_df,
"/mypath/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
validate_df,
"/mypath/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
history = model.fit_generator(
train_generator,
epochs=epochs,
validation_data=validation_generator,
validation_steps=total_validate//batch_size,
steps_per_epoch=total_train//batch_size,
callbacks=callbacks
)
However when I a try to train the CNN I got the following error:
Error when checking target: expected dense_2 to have shape (5,) but got array with shape (4,)
Someone can suggest a workaround?