Im trying to build a CONV2D network for image classification. I have generated my own dataset by scraping images for 9 classes airplanes, animals, birds, cars, flowers, people, ships, traffic_signs, trains. The images are of resolution 612 x 400 plus (400 varies between 430 and 480). I have resized the images to 100x100 for convenience of system resources. I have designed the network and training accuracy is more than 90% but the test accuracy isn't increasing more than 45%.
Any idea what im doing wrong?
Total images in each class - 500
Total images - 4479
resolution - 612 x 400plus (varies between 400 & 440).
Below is the code
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import PIL
import tensorflow as tf
import pathlib
import requests
import urllib
import time
from bs4 import BeautifulSoup
from tensorflow import keras
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from keras.optimizers import *
from keras.losses import sparse_categorical_crossentropy
data_dir = pathlib.Path('D:/mixed images')
data_dir
len(list(data_dir.glob('*/*.jpg')))
# planes = list(data_dir.glob('airplanes/*.jpg'))
# PIL.Image.open(planes[10])
img_list = [list(data_dir.glob('airplanes/*')),
list(data_dir.glob('animals/*')),
list(data_dir.glob('birds/*')),
list(data_dir.glob('cars/*')),
list(data_dir.glob('flowers/*')),
list(data_dir.glob('people/*')),
list(data_dir.glob('ships/*')),
list(data_dir.glob('traffic_signs/*')),
list(data_dir.glob('trains/*'))]
obj_list = os.listdir(data_dir)
obj_img_dict = dict(zip(obj_list,img_list))
obj_label_dict = dict(zip(obj_list,[0,1,2,3,4,5,6,7,8]))
obj_label_dict
X = []
y = []
for image_name,images in obj_img_dict.items():
for image in images:
img = cv2.imread(str(image))
resized_img = cv2.resize(img,(100,100))
X.append(resized_img)
y.append(obj_label_dict[image_name])
X = np.array(X)
y = np.array(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
X_train_scaled = X_train/255
X_test_scaled = X_test/255
obj_classes = 9
model = keras.Sequential()
# building the convolution layers
model.add(keras.layers.Conv2D(32,(3,3),input_shape= (100,100,3),padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Conv2D(64,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Conv2D(128,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Conv2D(256,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Conv2D(512,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Flatten())
# building the dense layers
model.add(keras.layers.Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(keras.layers.Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(keras.layers.Dense(256, activation='relu'))
# model.add(Dropout(0.6))
model.add(keras.layers.Dense(128, activation='relu'))
# model.add(Dropout(0.6))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(obj_classes,activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, batch_size=64,epochs=50, verbose=2)
model.evaluate(X_test_scaled,y_test)
Try making the following modifications to the model:
reducing the number of parameters, in order not to under-fit
spreading the number of parameters from layer to layer as evenly as possible
using batch normalization between convolutions
using only 3 dense layers in the end
having a ratio of almost 50/50 in the number of parameters between convolutions and final dense layers
A good candidate model would be:
model = keras.Sequential()
# building the convolution layers
model.add(keras.layers.Conv2D(32,(3,3),input_shape= (100,100,3),padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(32,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(64,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(64,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(128,(3,3), padding='same',activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Flatten())
# building the dense layers
model.add(keras.layers.Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(keras.layers.Dense(128, activation='relu'))
# model.add(Dropout(0.5)) # optional
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(obj_classes,activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
Related
I made an image classification system which detects plant leaf diseases using the PlantVillage dataset. I created the whole process starting from the preprocessing to the model building but when I try to run the program, the above error pops up. Now I tried a lot of things and frankly I do not want to mess with the dataset in colab so could anyone please help me out with this, I will be ever so grateful.
This is the preprocessing part of my code.
import numpy as np
import pickle
import cv2
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/content/drive')
#Resize the image to match the input shape of the layer
default_image_size = tuple((256, 256))
image_size = 0
#dataset directory
directory_root = '/content/drive/MyDrive/proj/PlantVillage'
width = 256
height = 256
depth = 3
def convert_image_to_array(image_dir):
#loads an image from the directory if the image exists
image = cv2.imread(image_dir)
if image is not None:
#changes the dimensions of the image, width or height or both and also maintains the original aspect ratio in the resized version
image = cv2.resize(image, default_image_size)
return img_to_array(image)
else:
#if the image does not exist, it returns an empty array
return np.array([])
image_list, label_list = [], []
print("[INFO] Loading Images...")
root_dir = listdir(directory_root)
for plant_folder in root_dir:
plant_disease_folderlist = listdir(f"{directory_root}/{plant_folder}")
for plant_disease_folder in plant_disease_folderlist:
print(f"[INFO] Processing {plant_disease_folder} ...")
plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/")
for image in plant_disease_image_list[:200]:
image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
image_list.append(convert_image_to_array(image_directory))
label_list.append(plant_disease_folder)
print("[INFO] Image Loading Complete!")
#transforms the resized image data into numpy array
np_image_list = np.array(image_list, dtype = np.float16) / 255.0
#checks for the number of images loaded for training
image_size = len(image_list)
print(f"Total number of images: {image_size}")
#each class or label is assigned a unique value for training
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
#dumping the labels in the pkl file so it can be used for predictions
pickle.dump(label_binarizer,open('plantlabel.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)
print("Total number of classes: ", n_classes)
print("Labels: ", label_binarizer.classes_)
print("[INFO] Splitting Data Into Training and Testing Set...")
#splitting the data with a 0.2 split ratio
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42)
#data augmentation is used to generate more images in the dataset. The different operations are applied on the image to diversify the dataset so it performs well with unseen images
#only the object is created here, this will be used later in the training
aug = ImageDataGenerator(rotation_range=25,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode="nearest")
Now I built the model with keras and added the layers, everything was recognized correctly until this part.
EPOCHS = 10
LR = 1e-3
BATCH_SIZE = 32
WIDTH = 256
HEIGHT = 256
DEPTH = 3
#creating the model
inputShape = (HEIGHT, WIDTH, DEPTH)
chanDim = -1
if K.image_data_format() == "channels_first":
inputShape = (DEPTH, HEIGHT, WIDTH)
chanDim = -1
model = Sequential()
model.add(Conv2D(32, (3, 3), padding = "same", input_shape = inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis = chanDim))
model.add(MaxPooling2D(pool_size = (3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding = "same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis = chanDim))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding = "same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis = chanDim))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))
model.summary()
opt = Adam(lr = LR, decay = LR/EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
print("[INFO] Training Begins...")
history = model.fit_generator(
aug.flow(x_train, y_train, batch_size=BATCH_SIZE),
validation_data=(x_test, y_test),
steps_per_epoch=len(x_train) // BATCH_SIZE,
epochs=EPOCHS, verbose=1
)
print("[INFO] Training Complete...")
Here at the aug.flow(x_train, batch_size=BATCH_SIZE,...) part, the error occurs. The error is as follows.
ValueError Traceback (most recent call last)
<ipython-input-13-a2fb6e262c72> in <module>()
4 print("[INFO] Training Begins...")
5 history = model.fit_generator(
----> 6 aug.flow(x_train, y_train, batch_size=BATCH_SIZE),
7 validation_data=(x_test, y_test),
8 steps_per_epoch=len(x_train) // BATCH_SIZE,
2 frames
/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/numpy_array_iterator.py in __init__(self, x, y, image_data_generator, batch_size, shuffle, sample_weight, seed, data_format, save_to_dir, save_prefix, save_format, subset, dtype)
124 raise ValueError('Input data in `NumpyArrayIterator` '
125 'should have rank 4. You passed an array '
--> 126 'with shape', self.x.shape)
127 channels_axis = 3 if data_format == 'channels_last' else 1
128 if self.x.shape[channels_axis] not in {1, 3, 4}:
ValueError: ('Input data in `NumpyArrayIterator` should have rank 4. You passed an array with shape', (120000, 0))
I am training on only 1500 images because the purpose of my project was only to build a model. I just need to get the training done. I hope someone can aid me with this. Thank you.
I am new to Machine Learning and Keras etc.
Trying to use trained model to increase accuracy, in my case I followed Jerry Kurata on Pluralsight to use InceptionV3 and only modify the last layer to train for recognizing birds.
The dataset I have is from Keras built-in CIFAR10 and here is the official tutorial
Here is the error message:
F tensorflow/stream_executor/cuda/cuda_dnn.cc:516] Check failed:
cudnnSetTensorNdDescriptor(handle_.get(), elem_type, nd, dims.data(),
strides.data()) == CUDNN_STATUS_SUCCESS (3 vs. 0)batch_descriptor:
{count: 32 feature_map_count: 288 spatial: %d 0%d 0 value_min:
0.000000 value_max: 0.000000 layout: BatchDepthYX} Aborted (core dumped)
I saw 1 possible cause from here
The image samples in CIFAR10 (32*32) is too small which cause this
issue
But I cannot figure out how to fix it.
Here is my code:
import matplotlib.pyplot as plt
import keras
from keras import backend as K
with K.tf.device("/device:GPU:0"):
config = K.tf.ConfigProto(intra_op_parallelism_threads=4,
inter_op_parallelism_threads=4, allow_soft_placement=True,
device_count = {'CPU' : 1, 'GPU' : 1})
session = K.tf.Session(config=config)
K.set_session(session)
from keras.callbacks import EarlyStopping
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.datasets import cifar10
# "/device:GPU:0"
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def create_generator():
return ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
zca_epsilon=1e-06, # epsilon for ZCA whitening
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
# randomly shift images horizontally (fraction of total width)
width_shift_range=0.1,
# randomly shift images vertically (fraction of total height)
height_shift_range=0.1,
shear_range=0., # set range for random shear
zoom_range=0., # set range for random zoom
channel_shift_range=0., # set range for random channel shifts
# set mode for filling points outside the input boundaries
fill_mode='nearest',
cval=0., # value used for fill_mode = "constant"
horizontal_flip=True, # randomly flip images
vertical_flip=False, # randomly flip images
# set rescaling factor (applied before any other transformation)
rescale=None,
# set function that will be applied on each input
preprocessing_function=None,
# image data format, either "channels_first" or "channels_last"
data_format=None,
# fraction of images reserved for validation (strictly between 0 and 1)
validation_split=0.0)
Training_Epochs = 1
Batch_Size = 32
Number_FC_Neurons = 1024
Num_Classes = 10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, Num_Classes)
y_test = keras.utils.to_categorical(y_test, Num_Classes)
# load cifar10 data here https://keras.io/datasets/
datagen = create_generator()
datagen.fit(x_train)
Inceptionv3_model = InceptionV3(weights='imagenet', include_top=False)
print('Inception v3 model without last FC loaded')
x = Inceptionv3_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(Number_FC_Neurons, activation='relu')(x)
predictions = Dense(Num_Classes, activation='softmax')(x)
model = Model(inputs=Inceptionv3_model.input, outputs=predictions)
# print(model.summary())
print('\nFine tuning existing model')
Layers_To_Freeze = 172
for layer in model.layers[:Layers_To_Freeze]:
layer.trainable = False
for layer in model.layers[Layers_To_Freeze:]:
layer.trainable = True
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
cbk_early_stopping = EarlyStopping(monitor='val_acc', mode='max')
print(len(x_train))
history_transfer_learning = model.fit_generator(
datagen.flow(x_train, y_train, batch_size=Batch_Size),
epochs=Training_Epochs,
validation_data=(x_test, y_test),
workers=4,
steps_per_epoch=len(x_train)//Batch_Size,
callbacks=[cbk_early_stopping]
)
model.save('incepv3_transfer_cifar10.h5', overwrite=True, include_optimizer=True)
# Score trained model.
scores = model.evaluate(x_test, 12, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
Your error as you said is the input size difference. The pre trained Imagenet model takes a bigger size of image than the Cifar-10 (32, 32).
You need to specify the input_shape of the model before hand like this.
Inceptionv3_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
For more explanation you can check this tutorial.
I have created a project using keras and tensorflow. I used the NSL KDD dataset and coded my project in python. I also used the SGD optimizer.
I would like to fit a model then evaluate it and then check its accuracy. (So I can compare it to the results with machine learning).
Here is my complete code below, please review it.
import tensorflow as tf
from keras import backend as K
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import tag_constants, signature_constants, signature_def_utils_impl
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
sess = tf.Session()
K.set_session(sess)
K.set_learning_phase(0)
model_version = "2"
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('KDD_Dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 41:42].values
# Encoding categorical data X
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:,0] = labelencoder_X.fit_transform(X[:,0])
X[:,1] = labelencoder_X.fit_transform(X[:,1])
X[:,2] = labelencoder_X.fit_transform(X[:,2])
#
from sklearn.preprocessing import OneHotEncoder
onehotencoder_0 = OneHotEncoder(categorical_features=[0])
onehotencoder_1 = OneHotEncoder(categorical_features=[1])
onehotencoder_2 = OneHotEncoder(categorical_features=[2])
X = onehotencoder_0.fit_transform(X).toarray()
X = onehotencoder_1.fit_transform(X).toarray()
X = onehotencoder_2.fit_transform(X).toarray()
# Encoding categorical data y
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
max(y)
# Splitting the dataset into the Training set and Test set
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.2,
random_state = 0)
# create the model
model = Sequential()
model.add(Dense(41, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(20, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# compile the model
model.compile(loss='binary_crossentropy', optimizer=sgd,metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=200, batch_size=5, verbose=0)
See Dense(41, input_dim=8, init='uniform', activation='relu')
The model you defined with 8 features, however your inputs have 45 features. They are not matching. You have to either make your model with 45 features to match the input, or cut the length of input feature to 8 to match your model.
Change line
model.add(Dense(41, input_dim=8, init='uniform', activation='relu'))
to
model.add(Dense(42, input_dim=42, init='uniform', activation='relu'))
and
optimizer=sgd to optimizer='sgd'
I am using Inception v3 model for identification of disease present in a Chest XRay image. For training I am using NIH Chest XRay Dataset. I have 14 different classes of diseases present in the dataset and also I have reduced the original image resolution to reduce the dataset size on disk. As I don't have a GPU I am using Google Colab to train my model and I am taking only 300 images per classs for all minority classes and 400 images for 'No Finding' class (Majority class). Please point out the bugs in my code if any and please suggest me some other approaches so that I can achieve better accuracy.
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
rn.seed(12345)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(1234)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
# from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
# import os.path
'''F1 score calculation class'''
# import numpy as np
# from keras.callbacks import Callback
# from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
# class Metrics(Callback):
# def on_train_begin(self, logs={}):
# self.val_f1s = []
# self.val_recalls = []
# self.val_precisions = []
# def on_epoch_end(self, epoch, logs={}):
# val_predict = (np.asarray(self.model.predict(self.model.validation_data[0]))).round()
# val_targ = self.model.validation_data[1]
# _val_f1 = f1_score(val_targ, val_predict)
# _val_recall = recall_score(val_targ, val_predict)
# _val_precision = precision_score(val_targ, val_predict)
# self.val_f1s.append(_val_f1)
# self.val_recalls.append(_val_recall)
# self.val_precisions.append(_val_precision)
# print(" — val_f1: %f — val_precision: %f — val_recall %f" % (_val_f1, _val_precision, _val_recall))
# return
# metrics = Metrics()
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# dimensions of our images.
#Inception input size
img_width, img_height = 299, 299
top_layers_checkpoint_path = 'cp.top.best.hdf5'
fine_tuned_checkpoint_path = 'cp.fine_tuned.best.hdf5'
new_extended_inception_weights = 'final_weights.hdf5'
train_data_dir = 'drive/My Drive/Colab Notebooks/Sample-300-XRay-Dataset/train'
validation_data_dir = 'drive/My Drive/Colab Notebooks/Sample-300-XRay-Dataset/test'
nb_train_samples = 3528
nb_validation_samples = 896
top_epochs = 50
fit_epochs = 50
batch_size = 24
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
#x =Dropout(0.2)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
#x= Dropout(0.3)(x)
# and a logistic layer -- we have 15 classes
predictions = Dense(15, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
if os.path.exists(top_layers_checkpoint_path):
model.load_weights(top_layers_checkpoint_path)
print ("Checkpoint '" + top_layers_checkpoint_path + "' loaded.")
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
#Save the model after every epoch.
mc_top = ModelCheckpoint(top_layers_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
#Save the TensorBoard logs.
tb = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True)
# train the model on the new data for a few epochs
#model.fit_generator(...)
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples // batch_size,
epochs=top_epochs,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples // batch_size,
callbacks=[mc_top, tb])
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
# for i, layer in enumerate(base_model.layers):
# print(i, layer.name)
#Save the model after every epoch.
mc_fit = ModelCheckpoint(fine_tuned_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
if os.path.exists(fine_tuned_checkpoint_path):
model.load_weights(fine_tuned_checkpoint_path)
print ("Checkpoint '" + fine_tuned_checkpoint_path + "' loaded.")
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
#model.fit_generator(...)
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples // batch_size,
epochs=fit_epochs,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples // batch_size,
callbacks=[mc_fit, tb])
model.save_weights(new_extended_inception_weights)
Running a single hidden layer MLP on MNIST, I get extremly different results for Keras and sklearn.
import numpy as np
np.random.seed(5)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.neural_network import MLPClassifier
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
batch_data = x_train[:2000]
batch_labels = y_train[:2000]
# flat 2d images
batch_data_flat = batch_data.reshape(2000, 784)
# one-hot encoding
batch_labels_one_hot = np_utils.to_categorical(batch_labels, num_classes)
num_hidden_nodes = 100
alpha = 0.0001
batch_size = 128
beta_1 = 0.9
beta_2 = 0.999
epsilon = 1e-08
learning_rate_init = 0.001
epochs = 200
# keras
keras_model = Sequential()
keras_model.add(Dense(num_hidden_nodes, activation='relu',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_model.add(Dense(num_classes, activation='softmax',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_optim = Adam(lr=learning_rate_init, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
keras_model.compile(optimizer=keras_optim, loss='categorical_crossentropy', metrics=['accuracy'])
keras_model.fit(batch_data_flat, batch_labels_one_hot, batch_size=batch_size, epochs=epochs, verbose=0)
# sklearn
sklearn_model = MLPClassifier(hidden_layer_sizes=(num_hidden_nodes,), activation='relu', solver='adam',
alpha=alpha, batch_size=batch_size, learning_rate_init=learning_rate_init,
max_iter=epochs, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
sklearn_model.fit(batch_data_flat, batch_labels_one_hot)
# evaluate both on their training data
score_keras = keras_model.evaluate(batch_data_flat, batch_labels_one_hot)
score_sklearn = sklearn_model.score(batch_data_flat, batch_labels_one_hot)
print("Acc: keras %f, sklearn %f" % (score_keras[1], score_sklearn))
Outputs: Acc: keras 0.182500, sklearn 1.000000
The only difference I see is that scikit-learn computes for the Glorot initialization of the final layer sqrt(2 / (fan_in + fan_out)) vs. sqrt(6 / (fan_in + fan_out)) from Keras. But that should not cause such a difference I think. Do I forget something here?
scikit-learn 0.19.1, Keras 2.2.0 (Backend Tensorflow 1.9.0)
You should probably initialize the biases with 'zeros' and not with 'glorot_uniform'.