Low GPU usage in machine learning computing - tensorflow

My friend and I challenged a kaggle competition named "Plant Seedlings Classification". He built a keras kernel as follow :
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from os import listdir, makedirs
from os.path import join, exists, expanduser
from keras import models
from keras.preprocessing import image
from keras.applications import xception
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
from keras import optimizers
from keras import layers
from keras.applications.xception import Xception
train_dir = './Plant_Seedlings_Classification/train/'
valid_dir = './Plant_Seedlings_Classification/validation/'
im_size = 299
batch_size = 10
train_num = 3803
valid_num = 947
conv_base = Xception(weights='imagenet',\
include_top=False,
input_shape=(im_size, im_size, 3))
conv_base.trainable = False
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten()) # Flatten
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(12, activation='softmax'))
model.summary()
train_datagen = ImageDataGenerator(rescale=1./255,\
rotation_range=30,\
width_shift_range=0.2,\
height_shift_range=0.2,\
zoom_range=0.2,\
horizontal_flip=True,\
fill_mode='nearest')
valid_datagen = ImageDataGenerator(rescale=1./255)
print("train gen")
train_generator = train_datagen.flow_from_directory(
train_dir,
class_mode='categorical',
target_size=(im_size, im_size),
color_mode='rgb',
batch_size=batch_size,)
print("validation gen")
validation_generator = valid_datagen.flow_from_directory(
valid_dir,
class_mode='categorical',
target_size=(im_size, im_size),
color_mode='rgb',
batch_size=batch_size,)
print("train indices",train_generator.class_indices)
print("validation indices", validation_generator.class_indices)
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.Nadam(lr=1e-4,
beta_1=0.9, \
beta_2=0.999, \
epsilon=1e-08, \
schedule_decay=0.004),
metrics=['acc'])
steps_per_epoch = int(train_num/batch_size)+50
validation_step = int(valid_num/batch_size)+1
print("steps_per_epoch", steps_per_epoch)
print("validation_step", validation_step)
model_save_path = 'xception_299_0304_nomask_epoch{epoch:02d}_vacc{val_acc:.4f}.h5'
history = model.fit_generator(
train_generator,
steps_per_epoch=steps_per_epoch,
epochs=50,
validation_data=validation_generator,
validation_steps=validation_step,
callbacks=[TensorBoard(log_dir='/tmp/tensorflow/log/1'), \
ModelCheckpoint(filepath=model_save_path, \
monitor='val_acc',\
save_best_only=True,\
mode='max')])
The code will run 50 epochs.
When the kernel ran on my friend's PC (I7-7700, GTX 1060, 8G DDR4-2400), it took about 90 seconds per epoch.
And it took about 120 seconds per epoch when it ran on my PC (I5-7400, GTX 1070Ti, 16G DDR4-2400)
We both used tensorflow_gpu to run this kernel. My question is, why the GPU computing speed on my PC is much slower than my friend's?
We checked the GPU usage using nvidia-smi. When the kernel was running, the Volatile GPU-Util is only 30~60% on my GPU, and >95% on my friend's.
The Volatile GPU-Util can reach ~100% on my GPU when I run other ML frameworks like Caffe or Tensorflow without keras API.
Any pointer? Thanks.

Related

Keras CNN predicts 2 classes out of 4

I have a problem about my CNN model made using tensorflow. The goal is to predict the classes of satellite images, corresponding to the type of clouds (data extracted from the kaggle competition "Planet: Understanding the Amazon from Space"). There are 4 classes : clear, cloudy, partly cloudy and haze.
Everything works fine until I try to test the model on individual images. Then, it always predicts 2 classes and nothing else. I noticed that if I run the model again, it may predict 2 other classes among the 4. The model was trained for 10 epochs, which gave an accuracy of 0.8717.
Here is my code :
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import h5py
import os
os.listdir("/kaggle/input/")
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,MaxPooling2D,Conv2D,Flatten,Dropout,Activation
from tensorflow.keras.layers import BatchNormalization
from sklearn import svm
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from oauth2client.client import GoogleCredentials
import csv
#from keras.optimizers import RMSprop
from tensorflow.keras import Input, Model
batch_size = 128
img_width = 256
img_height = 256
train_data = ImageDataGenerator(
rescale = 1./255,
validation_split = 0.25)
train_generator = train_data.flow_from_directory(
'../input/clouds',
target_size=(img_height, img_width),
color_mode='rgb',
batch_size=batch_size,
shuffle = True,
class_mode="categorical",
subset = 'training'
)
valid_generator = train_data.flow_from_directory(
'../input/clouds',
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical',
subset = 'validation'
)
num_classes = 4
model = Sequential([
Input(shape = [img_width, img_height, 3]),
Conv2D(128,4,activation = 'relu'),
MaxPooling2D(),
Conv2D(64,4,activation = 'relu'),
MaxPooling2D(),
Conv2D(32,4, activation = 'relu'),
MaxPooling2D(),
Conv2D(16,4,activation = 'relu'),
MaxPooling2D(),
Flatten(),
Dense(64, activation = 'relu'),
Dense(num_classes, activation = 'softmax')
])
model.compile(optimizer = "adam",
loss = 'categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_generator, validation_data = valid_generator, epochs = 10)
img_to_predict = cv2.imread('/kaggle/input/clouds-test/clouds_test/test_3877_6013089.jpg') #an augmented image from original dataset
img_to_predict = cv2.cvtColor(img_to_predict, cv2.COLOR_BGR2RGB)
img_to_predict = np.expand_dims(cv2.resize(img_to_predict, (256,256)), axis = 0)
res = model.predict(img_to_predict)
label_map = (train_generator.class_indices)
print(label_map)
print(list(label_map)[np.argmax(res, axis = -1)[0]])
Thank you for you help.

Model predict giving same results after fitting

Hi I am a beginner in ML and Tensorflow so do pardon me for not understanding complex theories.
I was building an image classifier CNN as a form of practice. The model is trained using MobileNetv2 and its supposed to classify images of cats, dogs and pandas. After training my model (with decent accuracy of 92%), I tried using model.predict() to evaluate how it does with new images but I noticed that all my outputs were 1. This happens even if I used the same previous training data. By the way, I used 2700 (900 from each class) images for training and 300 for validation.
Here is my code
%tensorflow_version 2.x # this line is not required unless you are in a notebook
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow_datasets as tfds
import pathlib
from google.colab import drive
drive.mount('/content/gdrive')
IMAGE_SIZE=[150,150]
train_path = "/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/train"
test_path = "/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/test"
IMAGE_SHAPE=[150,150,3]
base_model = tf.keras.applications.MobileNetV2(input_shape=IMAGE_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(3)
model = tf.keras.Sequential([
base_model,
global_average_layer,
prediction_layer
])
model.summary()
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
# creates a data generator object that transforms images
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
test_datagen = ImageDataGenerator (rescale=1./255)
training_set = train_datagen.flow_from_directory(train_path, target_size=IMAGE_SIZE, batch_size=32, class_mode='categorical')
testing_set = test_datagen.flow_from_directory(test_path, target_size=IMAGE_SIZE, batch_size=32, class_mode='categorical')
model.fit(
training_set,
epochs=3,
validation_data=testing_set)
img = Image.open("/content/gdrive/MyDrive/Colab Notebooks/Cat_Dog_Panda_CNN/test/panda/panda_00094.jpg").convert('RGB').resize((150, 150), Image.ANTIALIAS)
img = np.array(img)
predictions = model.predict(img[None,:,:])
np.argmax(predictions[0])
in model.compile you have loss=tf.keras.losses.BinaryCrossentropy(from_logits=True). Since you have 3 classes you should have
loss=tf.keras.losses.CategoricalCrossentropy()
The Mobilenet model was trained with pixel values scaled within the range -1 to +1. So you should set rescale to
rescale=1/127.5-1
When you want to feed an image into model.predict you must perform the same preprocessing
of the image as you did for the training images namely rescale the image and resize the image to be the same size you used in training, namely (150,150).

Transfer learning InceptionV3 show poor validation accuracy with training

## MODEL IMPORTING ##
import tensorflow
import pandas as pd
import numpy as np
import os
import keras
import random
import cv2
import math
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D,Convolution2D,BatchNormalization
from tensorflow.keras.layers import Flatten,MaxPooling2D,Dropout
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import warnings
warnings.filterwarnings("ignore")
WIDTH = 299
HEIGHT = 299
CLASSES = 4
base_model = InceptionV3(weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dropout(0.4)(x)
predictions = Dense(CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()
model.compile(optimizer='adam', ##also tried other optimiser --> similar poor accuracy found
loss='categorical_crossentropy',
metrics=['accuracy'])
## IMAGE DATA GENERATOR ##
from keras.applications.inception_v3 import preprocess_input
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest',
validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
TRAIN_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
subset="training")
validation_generator = train_datagen.flow_from_directory(
TRAIN_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
subset="validation")
test_datagen = ImageDataGenerator(rescale=1./255)
generator_test = test_datagen.flow_from_directory(directory=TEST_DIR,
target_size=(HEIGHT, WIDTH),
batch_size=BATCH_SIZE,
class_mode='categorical',
shuffle=False)
## MODEL training ##
EPOCHS = 20
STEPS_PER_EPOCH = 320 #train_generator.n//train_generator.batch_size
VALIDATION_STEPS = 64 #validation_generator.n//validation_generator.batch_size
history = model.fit_generator(
train_generator,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
validation_data=validation_generator,
validation_steps=VALIDATION_STEPS)
Result found:
VALIDATION ACCURACY around 0.55-0.67 fluctuating..
TRAINING ACCURACY 0.99
Questions:
What/Where is the problem in transfer learning process?
are train, validate and test data generator function parameter are chosen correctly?
Well I think you would be better off to train the entire model. So remove the code that makes the base model layers not trainable. If you look at the documentation for Inceptionv3 located here you can set pooling='max' which puts a GlobalMaxPooling2d layer as the output layer so if you do that you do not need to add your own layer as you did. Now I noticed you imported the callbacks ModelCheckpoint and ReduceLROnPlateau but you did not use them in model.fit. Using an adjustable learning rate will be beneficial to achieving a lower validation loss. ModelCheckpoint is useful to save the best model for use in predictions. See code below for implementations. save_loc is the directory where you want to store the results from ModelCheckpoint. NOTE in ModelCheckpoint I set save_weights_only=True. Reason is this is far faster than saving the entire model on each epoch for which the validation loss decreases.
checkpoint=tf.keras.callbacks.ModelCheckpoint(filepath=save_loc, monitor='val_loss', verbose=1, save_best_only=True,
save_weights_only=True, mode='auto', save_freq='epoch', options=None)
lr_adjust=tf.keras.callbacks.ReduceLROnPlateau( monitor="val_loss", factor=0.5, patience=1, verbose=1, mode="auto",
min_delta=0.00001, cooldown=0, min_lr=0)
callbacks=[checkpoint, lr_adjust]
history = model.fit_generator( train_generator, epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,validation_data=validation_generator,
validation_steps=VALIDATION_STEPS, callbacks=callbacks)
model.load_weights(save_loc) # load the saved weights
# after this use the model to evaluate or predict on the test set.
# if you are satisfied with the results you can then save the entire model with
model.save(save_loc)
Be careful on the test set generator. Ensure you do the same preprocessing on the test data as you did on the training data. I noticed you only rescaled the pixels. Not sure what the preprocess function does but I would use that.
I would also remove the dropout layer initially. Monitor the training loss and validation loss on each epoch and plot the results. If the training loss continues to decrease and the validation loss trends toward increasing then you are over fitting if so then restore the dropout layer if needed. If you do evaluation or prediction on the test set, you only want to go through the test set once. So select the test batch size to that no. of test samples/test batch size is an integer and use that integer as the number of test steps. Here is a
handy function that will determine that for you where length is the number of test samples and b_max is the maximum batch size you will allow based on your memory capacity.
def get_bs(length,b_max):
batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=b_max],reverse=True)[0]
return batch_size,int(length/batch_size)
# example of use
batch_size, step=get_bs(10000,70)
#result is batch_size= 50 steps=200

Why are my results still not reproducible?

I want to get reproducible results for a CNN. I use Keras and Google Colab with GPU.
In addition to recommendations to insert certain code snippets, which should allow a reproducibility, I also added seeds to the layers.
###### This is the first code snipped to run #####
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
###### This is the second code snipped to run #####
from __future__ import print_function
import numpy as np
import tensorflow as tf
print(tf.test.gpu_device_name())
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
###### This is the third code snipped to run #####
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
###### This is the fourth code snipped to run #####
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def split_data(X,y):
X_train_val, X_val, y_train_val, y_val = train_test_split(X, y, random_state=42, test_size=1/5, stratify=y)
return(X_train_val, X_val, y_train_val, y_val)
def train_model_with_EarlyStopping(model, X, y):
# make train and validation data
X_tr, X_val, y_tr, y_val = split_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
return history
###### This is the fifth code snipped to run #####
train_model_with_EarlyStopping(model_cnn(), X, y)
Always I run the above code I get different results.
Does the reason lies in the code, or it is simply not possible to obtain reproducible results in Google Colab with GPU support?
The complete code (there are unneccessary parts in the code, such as libraries which are not used):
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
from __future__ import print_function # NEU
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
import os
local_root_path = os.path.expanduser("~/data/data")
print(local_root_path)
try:
os.makedirs(local_root_path, exist_ok=True)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path, exist_ok=True)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
else:
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1DyM_D2ZJ5UHIXmXq4uHzKqXSkLTH-lSo", local_root_path)
import glob
import h5py
from time import time
from keras import initializers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import backend as K
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
%matplotlib inline
import gzip
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.datasets import fashion_mnist
from numpy import mean, std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc, average_precision_score, f1_score
import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from google.colab import files
from PIL import Image
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def train_model_with_EarlyStopping(model, X, y):
X_tr, X_val, y_tr, y_val = split_train_val_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
evaluate_model(model, history, X_tr, y_tr)
return history
```
The problem isn't limited to Colab, and is reproducible locally. The behavior, however, may be inevitable.
Code at bottom is a minimally-reproducible version of your code, with fit parameters tweaked for faster testing. What I observed is, the maximum difference for loss is only 0.0144% for 468 iterations per run, across 5 runs. This is pretty good. With batch_size=64, 60000 samples, and 20 epochs, you'll have 18750 iterations - which will amplify this figure substantially.
Regardless, GPU parallelism is the most likely culprit driving the randomnes - and the small differences do accumulate over time to yield a substantial difference - demo below. If 1e-8 seems small, try adding random noise to half your weights w/ magnitude clipped at 1e-8, and witness its life philosophy change.
The role of the seeds becomes dramatically pronounced if you don't use them - try it, all your metrics will fly rampant within the first 10 iterations. Also, loss is better for measuring runtime differences, as accuracy's lot more sensitive to numeric precision errors: the difference between 60% accuracy and 70% accuracy on a 10-sample batch is a prediction that differs by 0.000001 w.r.t. 0.5 - but loss will barely budge.
Lastly, note that your hyperparameter choice will have a far greater impact upon model performance than randomness; no matter how many seeds you throw, they won't magic a model into SOTA. -- I recommend this fine clip.
Your code - is fine. You've taken all practical steps to ensure reproducibility, with an exception: PYTHONHASHSEED must be set before your Python kernel starts.
What can you do to reduce randomness?
Repeat runs, average results. Understandably that's expensive, but note that even a perfectly reproducible run isn't perfectly informative, as model variance w.r.t. train & validation sets is likely to be much greater than noise-induced randomness
K-Fold Cross-Validation: can mitigate both data & noise variance significantly
Larger validation set: extracted features can differ only so much due to noise; the larger the validation set, the less small perturbations in weights should reflect in metrics
GPU Parallelism: amplifying float error
print(2. * 11. / 9.) # 2.4444444444444446
print(2. / 9. * 11.) # 2.444444444444444
Order of operations matters, and by exploiting multithreading, GPU parallelism gives no guarantee whatsoever of operations being executed in the same order. On a first look, the difference may look innocent - but give it enough iterations ...
one = 1
for _ in range(int(1e8)):
one *= (2. / 9. * 11.) / (2. * 11. / 9.)
print(one) # 0.9999999777955395
print(1 - one) # 1.8167285897874308e-08
... and a "one" is a typical small weight value of 1e-08 away from being its original self. If 100 million iterations seems to be a stretch, consider that the operation completed in ~half a minute, whereas your model can train over an hour, and former runs entirely on CPU.
Minimal reproducible experimentation:
import tensorflow as tf
import random as rn
import numpy as np
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.optimizers import Adam
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
kernel_initializer='he_uniform', input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, kernel_initializer='he_uniform'))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001),
metrics=['accuracy'])
return model
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
X_train = np.random.randn(30000, 28, 28, 1)
y_train = np.random.randint(0, 2, (30000, 10))
X_val = np.random.randn(30000, 28, 28, 1)
y_val = np.random.randint(0, 2, (30000, 10))
model = model_cnn()
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
history = model.fit(X_train, y_train, batch_size=64,shuffle=True,
epochs=1, verbose=1, validation_data=(X_val,y_val))
Run differences:
loss: 12.5044 - acc: 0.0971 - val_loss: 11.5389 - val_acc: 0.1051
loss: 12.5047 - acc: 0.0958 - val_loss: 11.5369 - val_acc: 0.1018
loss: 12.5055 - acc: 0.0955 - val_loss: 11.5382 - val_acc: 0.0980
loss: 12.5042 - acc: 0.0961 - val_loss: 11.5382 - val_acc: 0.1179
loss: 12.5062 - acc: 0.0960 - val_loss: 11.5366 - val_acc: 0.1082

MLP totally different results for Keras and scikit-learn

Running a single hidden layer MLP on MNIST, I get extremly different results for Keras and sklearn.
import numpy as np
np.random.seed(5)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.neural_network import MLPClassifier
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
batch_data = x_train[:2000]
batch_labels = y_train[:2000]
# flat 2d images
batch_data_flat = batch_data.reshape(2000, 784)
# one-hot encoding
batch_labels_one_hot = np_utils.to_categorical(batch_labels, num_classes)
num_hidden_nodes = 100
alpha = 0.0001
batch_size = 128
beta_1 = 0.9
beta_2 = 0.999
epsilon = 1e-08
learning_rate_init = 0.001
epochs = 200
# keras
keras_model = Sequential()
keras_model.add(Dense(num_hidden_nodes, activation='relu',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_model.add(Dense(num_classes, activation='softmax',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_optim = Adam(lr=learning_rate_init, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
keras_model.compile(optimizer=keras_optim, loss='categorical_crossentropy', metrics=['accuracy'])
keras_model.fit(batch_data_flat, batch_labels_one_hot, batch_size=batch_size, epochs=epochs, verbose=0)
# sklearn
sklearn_model = MLPClassifier(hidden_layer_sizes=(num_hidden_nodes,), activation='relu', solver='adam',
alpha=alpha, batch_size=batch_size, learning_rate_init=learning_rate_init,
max_iter=epochs, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
sklearn_model.fit(batch_data_flat, batch_labels_one_hot)
# evaluate both on their training data
score_keras = keras_model.evaluate(batch_data_flat, batch_labels_one_hot)
score_sklearn = sklearn_model.score(batch_data_flat, batch_labels_one_hot)
print("Acc: keras %f, sklearn %f" % (score_keras[1], score_sklearn))
Outputs: Acc: keras 0.182500, sklearn 1.000000
The only difference I see is that scikit-learn computes for the Glorot initialization of the final layer sqrt(2 / (fan_in + fan_out)) vs. sqrt(6 / (fan_in + fan_out)) from Keras. But that should not cause such a difference I think. Do I forget something here?
scikit-learn 0.19.1, Keras 2.2.0 (Backend Tensorflow 1.9.0)
You should probably initialize the biases with 'zeros' and not with 'glorot_uniform'.