Stuck In First Epoch When Training CNN model in google Colab - tensorflow

I created a model to identify plant diseases. I expected to identify 10 diseases. in jupyter notebook, it worked fine but it was slow due to GPU constraints. Then I decided to run that model in google colab but it did not run. it stuck at the first epoch.
The code I use to construct the model is given below
BATCH_SIZE = 64
IMAGE_SIZE = 256
CHANNELS=3
EPOCHS=10
dataset = tf.keras.preprocessing.image_dataset_from_directory(
"/content/drive/MyDrive/google-colab-files/PlantVillage",
seed=123,
shuffle=True,
image_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=BATCH_SIZE
)
def get_dataset_partisions_tf(ds,trains_split=0.8,val_split=0.1,test_split=0.1,shuffle=True,shuffle_size=10000):
ds_size = len(ds)
if shuffle:
ds = ds.shuffle(shuffle_size,seed=12)
train_size = int(trains_split * ds_size)
val_size = int(val_split * ds_size)
train_ds = ds.take(train_size)
val_ds = ds.skip(train_size).take(val_size)
test_ds = ds.skip(train_size).skip(val_size)
return train_ds,val_ds,test_ds
train_ds,val_ds,test_ds = get_dataset_partisions_tf(dataset)
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
resize_and_rescales = Sequential([
layers.experimental.preprocessing.Resizing(IMAGE_SIZE,IMAGE_SIZE),
layers.experimental.preprocessing.Rescaling(1.0/255)
])
data_agmetation = Sequential([
layers.experimental.preprocessing.RandomFlip('horizontal_and_vertical'),
layers.experimental.preprocessing.RandomRotation(0.2),
])
input_shape = (BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,CHANNELS)
n_classes = 10
model = Sequential([
resize_and_rescales,
data_agmetation,
layers.Conv2D(32,(3,3), activation='relu',input_shape = input_shape),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,(3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,(3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,(3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Flatten(),
layers.Dense(64,activation='relu'),
layers.Dense(n_classes, activation='softmax'),
])
model.build(input_shape = input_shape)
model.summary()
A screenshot of the model summary is:
model.compile(
optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy']
)
When I use the following code to train data:
model.fit(
train_ds,
epochs=EPOCHS,
batch_size=BATCH_SIZE,
verbose=2,
validation_data=val_ds
)
it keeps stuck in the first epoch

Check if TensorFlow is using a GPU or not. You can try reducing batch size.

my assumption is that this is because of your verbose, you should set verbose to 1 to see the step of the epoch you are in.

Related

HIGH Resolution Image and Colab PRO Crash

I have 1700 images of 1000*1000 Image height and Width. There are minor details in it, so I prefer to keep this size. Now, my google colab pro crashes. Please Help.
'''
##title IMAGE TO DATA, NORMALIZATION AND AUGMENTATION
#Directories with Subdirectories as Classes for training and validation datasets
%%capture
train_dir = '/content/Dataset/Training'
validation_dir = '/content/Dataset/Validation'
# Set batch size and Image Height and Width
batch_size = 32
IMG_HEIGHT, IMG_WIDTH = (1000,1000)
#Image to Data Transform using ImageDataGenerator of Keras
#Image to Data for Training Data
Dataset_Image_Training = ImageDataGenerator(rescale = 1./255, zoom_range=[0.8, 1.5], brightness_range= [0.8, 2.0])
train_data_gen = Dataset_Image_Training.flow_from_directory(
batch_size= batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT,IMG_WIDTH),
class_mode='binary')
#Image to Data for Validation Data
validation_image_generator = ImageDataGenerator(rescale=1./255, zoom_range=[0.8, 1.5], brightness_range= [0.8, 2.0])
val_data_gen = validation_image_generator.flow_from_directory(
batch_size=batch_size,
directory= validation_dir,
shuffle=True,
target_size=(IMG_HEIGHT,IMG_WIDTH),
class_mode= 'binary')
#Check Classes in Dataset
train_data_gen.class_indices
##title Deep Learning CNN Model with Keras Seqential with **Dropout**
#%%capture
model = Sequential([
Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
MaxPool2D(2,2),
Dropout(0.5),
Conv2D(64, (3,3), padding='same', activation='relu'),
MaxPool2D(2,2),
Dropout(0.5),
Conv2D(128, (3,3), padding='same', activation='relu'),
MaxPool2D(2,2),
Dropout(0.5),
Conv2D(256, (3,3), padding='same', activation='relu'),
MaxPool2D(2,2),
Dropout(0.5),
Flatten(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')])
# Model Compilation
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
#Tensorboard Set up
import tensorflow as tf
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#Checkpoint and earlystop setting
filepath = '/content/drive/My Drive/DL_Model.hdf5'
checkpoint = [tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', save_best_only=True, Save_weights_only = False, verbose = 1),
tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience = 15, verbose =1), [tensorboard_callback]]
#Model Fitting
hist = model.fit(
train_data_gen,
steps_per_epoch=None,
epochs=500,
validation_data=val_data_gen,
validation_steps=None,
callbacks = [checkpoint]
)
#Accuracy Print
train_acc = max(hist.history['accuracy'])
val_acc = max(hist.history['val_accuracy'])
train_loss = min(hist.history['loss'])
val_loss = min(hist.history['val_loss'])
print('Training accuracy is')
print(train_acc)
print('Validation accuracy is')
print(val_acc)
print('Training loss is')
print(train_loss)
print('Validation loss is')
print(val_loss)
#Load Tensorboard
%load_ext tensorboard
%tensorboard --logdir logs
'''

how to prevent overfitting in reinforced learning with vgg16

I'm trying to train a model to recognize facial expressions, so basically a classification problem with 7 classes:
img_size=48
batch_size=64
datagen_train=ImageDataGenerator( rotation_range=15,
width_shift_range=0.15,
height_shift_range=0.15,
shear_range=0.15,
zoom_range=0.15,
horizontal_flip=True,
preprocessing_function=preprocess_input
)
train_generator=datagen_train.flow_from_directory(
train_path,
target_size=(img_size,img_size),
# color_mode='grayscale',
batch_size=batch_size,
class_mode='categorical',
shuffle=True
)
datagen_validation=ImageDataGenerator( horizontal_flip=True, preprocessing_function=preprocess_input)
validation_generator=datagen_train.flow_from_directory(
valid_path,
target_size=(img_size,img_size),
# color_mode='grayscale',
batch_size=batch_size,
class_mode='categorical',
shuffle=True,
)
I'm using ImageDataGenerator and I did my model with VGG16 no head transfer learning like so :
ptm = PretrainedModel(
input_shape=[48,48,3],
weights='imagenet',
include_top=False)
x = Flatten()(ptm.output)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(7, activation='softmax',kernel_initializer='random_uniform', bias_initializer='random_uniform', bias_regularizer=regularizers.l2(0.01), name='predictions')(x)
opt=optimizers.RMSprop(learning_rate=0.0001)
model = Model(inputs=ptm.input, outputs=x)
model.compile(
loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
model.summary()
I used optimizers and early stopping and ran 100 epochs :
early_stopping = EarlyStopping(
monitor='val_accuracy',
min_delta=0.00005,
patience=11,
verbose=1,
restore_best_weights=True,
)
lr_scheduler = ReduceLROnPlateau(
monitor='val_accuracy',
factor=0.5,
patience=7,
min_lr=1e-7,
verbose=1,
)
callbacks = [
early_stopping,
lr_scheduler,
]
and after 61 epochs I had an early stop, and I got a decent accuracy, but the val_accuracy was very low compared to it:
loss: 0.6081 - accuracy: 0.7910 - val_loss: 1.4658 - val_accuracy: 0.5608
any suggestions on how I can fix this overfitting? thanks!
In your validation generator remove horizontal_flip=True and set shuffle=False. Also, you have the code
validation_generator=datagen_train.flow_from_directory( etc
You want to change it to
validation_generator=datagen_validation.flow_from_directory(etc

Tensorflow Keras Shape mismatch

While trying to implement a standard MNIST digit recognizer that many tutorials use to introduce you to neural networks, I'm encountering the error
ValueError: Shape mismatch: The shape of labels (received (1,)) should equal the shape of logits except for the last dimension (received (28, 10)).
I would like to use from_tensor_slices to process the data, since I want to apply the code to another problem where the data comes from a CSV file. Anyway, here is the code producing the error in the line model.fit(...)
import tensorflow as tf
train_dataset, test_dataset = tf.keras.datasets.mnist.load_data()
train_images, train_labels = train_dataset
train_images = train_images/255.0
train_dataset_tensor = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
num_of_validation_data = 10000
validation_data = train_dataset_tensor.take(num_of_validation_data)
train_data = train_dataset_tensor.skip(num_of_validation_data)
model = tf.keras.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(100, activation='sigmoid'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy']
)
model.fit(train_data, batch_size=50, epochs=5)
performance = model.evaluate(validation_data)
I don't understand where the shape (28, 10) of the logits comes from, I thought I was flattening the image, essentially making a 1D vector out of the 2D image? How can I prevent the error?
You can use the following code
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
train_ds = tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(100, activation='sigmoid'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy']
)
model.fit(train_ds)

How to minimize the loss?

This should be a regression problem.
I would like the Neural Network to be able to estimate the length of a line, in pixel, from an image, like this 3 images, each image is 200 x 200 pcs:
a)b)c)
Training image of 6000 pcs, and validation image of 1000 pcs.
The labels are the distance in pixel:
a) 1.205404496424333018e+02
b) 1.188780888137086436e+02
c) 1.110180165558725918e+02
Here is my training code:
img_size = 200
def preprocess_image(image):
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [img_size, img_size])
image /= 255.0 # normalize to [0,1] range
return image
def load_and_preprocess_image(path):
image = tf.read_file(path)
return preprocess_image(image)
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 16
train_labels = np.loadtxt("train_labels.txt")
val_labels = np.loadtxt("test_labels.txt")
train_images = sorted(glob.glob("train_img/img_*.jpg"))
val_images = sorted(glob.glob("test_img/img_*.jpg"))
steps_per_epoch_count=tf.ceil(len(train_images)/BATCH_SIZE)
train_path_ds = tf.data.Dataset.from_tensor_slices(train_images)
val_path_ds = tf.data.Dataset.from_tensor_slices(val_images)
train_image_ds = train_path_ds.map(load_and_preprocess_image,
num_parallel_calls = AUTOTUNE)
train_label_ds =
tf.data.Dataset.from_tensor_slices(tf.cast(train_labels, tf.float32))
train_image_label_ds = tf.data.Dataset.zip((train_image_ds,
train_label_ds))
val_image_ds = val_path_ds.map(load_and_preprocess_image,
num_parallel_calls = AUTOTUNE)
val_label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(val_labels, tf.float32))
val_image_label_ds = tf.data.Dataset.zip((val_image_ds, val_label_ds))
model = tf.keras.models.Sequential([
tf.keras.layers.Convolution2D(16,3,3, input_shape=(img_size,
img_size, 3), activation = 'relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Convolution2D(32,3,3, activation = 'relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
# tf.keras.layers.Convolution2D(64,3,3, activation = 'relu'),
# tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(400, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(200, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(100, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.05),
tf.keras.layers.Dense(1, activation=tf.nn.relu)
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.01),
loss = "mean_squared_error",
metrics = ["mean_absolute_error", "mean_squared_error"]
)
train_ds = train_image_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=len(train_images)))
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_image_label_ds.apply(
tf.data.experimental.shuffle_and_repeat(buffer_size=len(val_images)))
val_ds = val_ds.batch(BATCH_SIZE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
history = model.fit(
train_ds,
epochs = 80,
validation_data = val_ds,
steps_per_epoch = 374,
validation_steps = 62
)
However, this is the train vs eval mean_squared_error plot:
Question:
Why is the validation loss not stable?
The average Mean Squared Error is about 400 in training, which seems too high. What modification I can do to improve the estimation?
EDIT:
This is my latest model:
Learning rate = 0.01
Batch size = 16
model = tf.keras.models.Sequential([
tf.keras.layers.Convolution2D(16,3,3, input_shape=(img_size, img_size, 3), activation = 'relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Convolution2D(32,3,3, activation = 'relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(2, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(2, activation=tf.nn.relu), #, kernel_regularizer = tf.keras.regularizers.l2(0.001)
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(2, activation=tf.nn.relu), #, kernel_regularizer = tf.keras.regularizers.l2(0.001)
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(2, activation=tf.nn.relu), #, kernel_regularizer = tf.keras.regularizers.l2(0.001)
tf.keras.layers.Dense(1, activation="linear")
])
The output looks like this:
As you can see, the train and validation loss is almost identical. The mse loss are both stabilized around 2393, which square root to 48.91 pixel error, quite high.
What advice to lower it further? Is it normal?

CoreMLtools and Keras ValueError: need more than 1 value to unpack

I'm fine-tuning the Inception V3 model with Keras, in order to convert it with coremltools into a .mlmodel file.
However, when converting the model coremltools throws an error saying the following when the converter reaches the last layer of the model:
coremltools/models/neural_network.py", line 2501, in set_pre_processing_parameters
channels, height, width = array_shape
ValueError: need more than 1 value to unpack
I used the code from the Keras documentation on applications found here: https://keras.io/applications/#fine-tune-inceptionv3-on-a-new-set-of-classes
And added a piece of code loading my dataset from the VGG example found here: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
My final script looks like this, using TesorFlow as backend:
LOAD THE DATA
from keras.preprocessing.image import ImageDataGenerator
img_width, img_height = 299, 299
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 358
nb_validation_samples = 21
epochs = 1
batch_size = 15
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
TRAIN THE MODEL
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(7, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
for i, layer in enumerate(base_model.layers):
print(i, layer.name)
for layer in model.layers[:249]:
layer.trainable = False
for layer in model.layers[249:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
model.save('finetuned_inception.h5')
I'm writing here in response to #SwimBikeRun's request (as I need a bit more space)
I was converting YOLO to Keras and then Keras to CoreML. For conversion I was using this script https://github.com/qqwweee/keras-yolo3/blob/master/convert.py
In the conversion-process the model was eventually created like that:
input_layer = Input(shape=(None, None, 3))
...
model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
And those "None"-inputs was what made CoreML conversion fail. For CoreML the input-size to your model must be known. So I changed it to this:
input_layer = Input(shape=(416, 416, 3)
Your input-size will probably vary.
For your original question:
Maybe check your base_model.input size for the same problem.