Accuracy drops when using ImageDataGenerator - tensorflow

I am working on skin disease classification and trying to build a CNN model. I have approximitly 200 images. I have splited my data using by importing train_test_split from sklearn.model_selection.
code snippet:
data = [] #this is where I will store all the data
for category in categories:
path = os.path.join(data_dir,category)
# print(path)
class_num = categories.index(category)
# print(class_num)
for img in os.listdir(path):
# print(img)
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,img_size)
data.append([new_array,class_num])
except Exception as e:
pass
X = []
Y = []
for features,label in data:
X.append(features)
Y.append(label)
X = np.array(X)
X = X.astype('float32')/255.0
X = X.reshape(-1,height, width,3)
Y = np.array(Y)
from keras.utils.np_utils import to_categorical
Y = to_categorical(Y, num_classes = 4)
from sklearn.model_selection import train_test_split
# train_ratio = 0.75
# validation_ratio = 0.15
# test_ratio = 0.10
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42, stratify=Y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=1, stratify=y_train)
import pickle
pickle.dump(X_train, open('/content/drive/MyDrive/Data/X_train', 'wb'))
pickle.dump(y_train, open('/content/drive/MyDrive/Data/y_train', 'wb'))
pickle.dump(X_test, open('/content/drive/MyDrive/Data/X_test', 'wb'))
pickle.dump(y_test, open('/content/drive/MyDrive/Data/y_test', 'wb'))
pickle.dump(X_val, open('/content/drive/MyDrive/Data/X_val', 'wb'))
pickle.dump(y_val, open('/content/drive/MyDrive/Data/y_val', 'wb'))
The accuracy drastically drops when I use ImageDataGenerator for data augmentation.
code snippet:
Adam(learning_rate=0.00001, name='Adam')
model.compile(optimizer = 'Adam',loss = 'categorical_crossentropy',metrics = ['accuracy'])
epochs = 80
from tensorflow.keras import callbacks
import time
import keras
from keras.callbacks import EarlyStopping
es_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20)
datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=30,
shear_range=0.3,
zoom_range=0.3,
width_shift_range=0.4,
height_shift_range=0.4,
horizontal_flip=True,
fill_mode='nearest'
)
checkpoint = callbacks.ModelCheckpoint(
filepath='/content/drive/MyDrive/Model1/model.{epoch:02d}-{accuracy:.2f}-{val_accuracy:.2f}.h5',
monitor='val_accuracy',
verbose=1,
save_best_only=True,
mode='auto'
)
history5 = model.fit(datagen.flow(X_train,
y_train,
batch_size=32),
epochs = epochs,
validation_data = (X_val,y_val)
)
without data sugmentation the validation accuracy is 55%
with data augmentation the validation accuracy is 30%

Related

how to make a soft accuracy and loss curves in deep learning models

There is an imbalance two class classification problem with 12750 samples for class 0 and 2550 samples for class 1. I've gotten class weights using class_weight.compute_class_weight and fed them to model.fit. I've tested many loss and optimizer functions. The accuracy on test data is reasonable but loss and accuracy curves aren't normal, which are shown as below. I was wonder if some one give me a suggestion that how can I smooth the curves and fix this problem.
Thank you
import tensorflow as tf
import keras
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, UpSampling2D,Dropout, Conv1D
from sklearn.utils import class_weight
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import sklearn.metrics as metrics
from sklearn.utils import class_weight
#General Variables
batch_size = 32
epochs = 100
num_classes = 2
#Load Data
# X_p300 = scipy.io.loadmat('D:/P300_challenge/BCI data- code 2005/code2005/p300Cas.mat',variable_names='p300Cas').get('p300Cas')
# X_np300 = scipy.io.loadmat('D:/P300_challenge/BCI data- code 2005/code2005/np300Cas.mat',variable_names='np300Cas').get('np300Cas')
X_p300 = scipy.io.loadmat('/content/drive/MyDrive/p300/p300Cas.mat',variable_names='p300Cas').get('p300Cas')
X_np300 = scipy.io.loadmat('/content/drive/MyDrive/p300/np300Cas.mat',variable_names='np300Cas').get('np300Cas')
X_np300=X_np300[:,:]
X_p300=X_p300[:,:]
X=np.concatenate((X_p300,X_np300))
X = np.expand_dims(X,2)
Y=np.zeros((15300,))
Y[0:2550]=1
#Shuffle data as it is now in order by row colunm index
print('Shuffling...')
X, Y = shuffle(X, Y)
#Split data between 80% Training and 20% Testing
print('Splitting...')
x_train, x_test, y_train, y_test = train_test_split(
X, Y, train_size=.8, test_size=.2, shuffle=True)
# determine the weight of each class
class_weights = class_weight.compute_class_weight('balanced',
np.unique(y_train),
y_train)
class_weights = {i:class_weights[i] for i in range(2)}
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv1D(256,kernel_size=3,activation='relu', input_shape =(1680, 1)))
# model.add(Dropout(.5))
model.add(Flatten())
model.add(Dense(200, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='mse',
optimizer='sgd',
metrics= ['acc'])
## use it when you want to apply weight of the classes
history = model.fit(x_train, y_train,class_weight=class_weights, validation_split = 0.3, epochs = epochs, verbose = 1)
#model.fit(x_train, y_train,batch_size=32,validation_split = 0.1, epochs = epochs, verbose = 1)
import matplotlib.pyplot as plt
history_dict = history.history
history_dict.keys()
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc = history_dict.get('acc')
epochs = range(1, len(acc) + 1)
plt.plot(epochs, loss_values, 'r--', label = 'Training loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation_loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
plt.plot(epochs, acc, 'r--', label = 'Training acc')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation acc')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
model.summary()
test_loss, test_acc = model.evaluate(x_test, y_test)
print('test_acc:', test_acc)

Feed a scikit classifier with Tensorflow dataset

Is there any simple way to connect a Tensorflow dataset to a scikit classifier (such as SVM), such that the dataset records are read automatically during training by scikit fit function?
Here is an example using TensorFlow with both a Classifier algo and a Regressor algo.
Classification with TensorFlow 2.0
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
sns.set(style="darkgrid")
cols = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety','output']
cars = pd.read_csv(r'C:/your_path_here/car_evaluation.csv', names=cols, header=None)
cars.head()
plot_size = plt.rcParams["figure.figsize"]
plot_size [0] = 8
plot_size [1] = 6
plt.rcParams["figure.figsize"] = plot_size
cars.output.value_counts().plot(kind='pie', autopct='%0.05f%%', colors=['lightblue', 'lightgreen', 'orange', 'pink'], explode=(0.05, 0.05, 0.05,0.05))
price = pd.get_dummies(cars.price, prefix='price')
maint = pd.get_dummies(cars.maint, prefix='maint')
doors = pd.get_dummies(cars.doors, prefix='doors')
persons = pd.get_dummies(cars.persons, prefix='persons')
lug_capacity = pd.get_dummies(cars.lug_capacity, prefix='lug_capacity')
safety = pd.get_dummies(cars.safety, prefix='safety')
labels = pd.get_dummies(cars.output, prefix='condition')
X = pd.concat([price, maint, doors, persons, lug_capacity, safety] , axis=1)
labels.head()
y = labels.values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
#Model Training
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Model
input_layer = Input(shape=(X.shape[1],))
dense_layer_1 = Dense(15, activation='relu')(input_layer)
dense_layer_2 = Dense(10, activation='relu')(dense_layer_1)
output = Dense(y.shape[1], activation='softmax')(dense_layer_2)
model = Model(inputs=input_layer, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print(model.summary())
history = model.fit(X_train, y_train, batch_size=8, epochs=50, verbose=1, validation_split=0.2)
score = model.evaluate(X_test, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])
Regression with TensorFlow 2.0
petrol_cons = pd.read_csv(r'C:/your_path_here/petrol_consumption.csv')
petrol_cons.head()
X = petrol_cons.iloc[:, 0:4].values
y = petrol_cons.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
input_layer = Input(shape=(X.shape[1],))
dense_layer_1 = Dense(100, activation='relu')(input_layer)
dense_layer_2 = Dense(50, activation='relu')(dense_layer_1)
dense_layer_3 = Dense(25, activation='relu')(dense_layer_2)
output = Dense(1)(dense_layer_3)
model = Model(inputs=input_layer, outputs=output)
model.compile(loss="mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
history = model.fit(X_train, y_train, batch_size=2, epochs=100, verbose=1, validation_split=0.2)
from sklearn.metrics import mean_squared_error
from math import sqrt
pred_train = model.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train)))
pred = model.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred)))
Path to data set:
https://www.kaggle.com/elikplim/car-evaluation-data-set

A question about simple Keras and Tensorflow code performance

I wrote simple Sin function predictors using Keras and Tensorflow with LSTM, but found the performance of Keras code is much slower which runs about 5 min while Tensorflow code runs the model just in 20 seconds. Moreover, the Keras prediction performance is less precide as Keras one. Could anyone help me find the code difference between the 2 model?
I hacked the code online and intend to train the model with the same hyper parameters. But the performance is not as expected. Tried searching many materials online, but found no reasons.
Keras Code:
import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0
params = {
"batch_size": BATCH_SIZE, # 20<16<10, 25 was a bust
"epochs": 500,
"lr": 0.00010000,
"time_steps": TIME_STEPS
}
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20
# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIME_STEPS):
X.append([[e] for e in seq[i: i + TIME_STEPS]])
y.append([seq[i + TIME_STEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)
def print_time(text, stime):
seconds = (time.time()-stime)
print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")
def create_model():
lstm_model = Sequential()
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS))
lstm_model.add(Dense(1, activation=None))
lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
return lstm_model
model = create_model()
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)
mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
"best_model.h5"), monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=False, mode='min', period=1)
# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)
history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])
print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))
# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)
y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)
Tensorflow code:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
NUM_EPOCH = 1000
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 20
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIMESTEPS):
X.append([seq[i: i + TIMESTEPS]])
y.append([seq[i + TIMESTEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
def lstm_model(X, y, is_training):
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
output = outputs[:, -1, :]
predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
if not is_training:
return predictions, None, None
loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
return predictions, loss, train_op
def train(sess, train_X, train_Y):
ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
X, y = ds.make_one_shot_iterator().get_next()
losses = np.array([])
with tf.variable_scope('model'):
predictions, loss, train_op = lstm_model(X, y, True)
sess.run(tf.global_variables_initializer())
for i in range(TRAINING_STEPS):
_, l = sess.run([train_op, loss])
losses = np.append(losses, l)
if i % NUM_EPOCH == 0:
print('train step: ' + str(i) + ', loss: ' + str(l))
plt.figure()
plt.plot(losses, label='loss')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/loss.png')
def run_eval(sess, test_X, test_y):
ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
ds = ds.batch(1)
X, y = ds.make_one_shot_iterator().get_next()
with tf.variable_scope('model', reuse=True):
prediction, _, _ = lstm_model(X, [0, 0], False)
predictions = []
labels = []
for i in range(int(TESTING_EXAMPLES / 2)):
p, l = sess.run([prediction, y])
predictions.append(p)
labels.append(l)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
print('Mean Square Error is: %f' % rmse)
plt.figure()
print(predictions[:15])
print(labels[:15])
plt.plot(predictions, label='predictions')
plt.plot(labels, label='real_val')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/test.png')
test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
x_val, test_X = np.split(test_X, 2)
y_val, test_y = np.split(test_y, 2)
with tf.Session() as sess:
train(sess, train_X, train_y)
run_eval(sess, test_X, test_y)
You maybe should try to use CuDNNLSTM instead of LSTM. They are CUDA accelerated.
Fast LSTM implementation with CuDNN.
See here: https://github.com/keras-team/keras/blob/master/keras/layers/cudnn_recurrent.py#L328
Your model structure is not same, first has 3 layers of LSTM, other has 2.
Tensorflow data API is highly optimized, It preparing the data-set, without wasting any resources.
Note that: You can even more accelerate the training in tensorflow using parallelization in dynamic_rnn cell. check out this.

Keras Model Always Predicts the Same Result

I've written the following Keras model:
import configuration_reader
# Import libraries
import pandas
import keras
from pprint import pprint
# Import TensorFlow
import tensorflow as tf
# Import keras dependencies
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Embedding
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import GRU
# Read configuration
configuration = configuration_reader.read_configuration()
training_configuration = configuration['train']
MAX_DOCUMENT_LENGTH = training_configuration['max_document_length']
MAX_SIZE = training_configuration['max_size']
LOSS_FUNCTION = training_configuration['loss']
TENSORBOARD_DIR = configuration['tensorboard']['dir']
ACTIVATION_FUNCTION = training_configuration['activation_function']
EMBEDDING_SIZE = training_configuration['embedding_size']
LSTM_UNITS = training_configuration['lstm_units']
RECURRENT_DROPOUT = training_configuration['recurrent_dropout']
DROPOUT = training_configuration['dropout']
MAX_LABEL = §training_configuration['max_label']
BATCH_SIZE = training_configuration['batchSize']
EPOCHS = training_configuration['epochs']
WORDS_FEATURE = training_configuration['wordsFeature']
MODEL_FILE_NAME = configuration['modelFileName']
LEARNING_RATE = training_configuration['learning_rate']
def get_data():
tf.logging.set_verbosity(tf.logging.INFO)
dbpedia = tf.contrib.learn.datasets.load_dataset('dbpedia')
x_train = pandas.Series(dbpedia.train.data[:, 1])
y_train = pandas.Series(dbpedia.train.target)
x_test = pandas.Series(dbpedia.test.data[:, 1])
y_test = pandas.Series(dbpedia.test.target)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_train)
x_train_sequences = tokenizer.texts_to_sequences(x_train)
x_test_sequences = tokenizer.texts_to_sequences(x_test)
tokenizer = Tokenizer(num_words=MAX_DOCUMENT_LENGTH)
X_train = tokenizer.sequences_to_matrix(x_train_sequences, mode='binary')
X_test = tokenizer.sequences_to_matrix(x_test_sequences, mode='binary')
num_classes = len(y_test)
num_classes_unique = len(y_test.unique())
print(u'Number of classes: {}'.format(num_classes_unique))
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)
return (X_train, y_train, X_test, y_test, num_classes, tokenizer)
if __name__ == '__main__':
(X_train, y_train, X_test, y_test, num_classes, tokenizer) = get_data()
model = Sequential()
model.add(Embedding(MAX_SIZE, EMBEDDING_SIZE))
model.add(GRU(LSTM_UNITS, recurrent_dropout=RECURRENT_DROPOUT))
model.add(Dropout(DROPOUT))
model.add(Dense(num_classes))
model.add(Activation(ACTIVATION_FUNCTION))
model.compile(loss=LOSS_FUNCTION,
optimizer=keras.optimizers.Adam(LEARNING_RATE),
metrics=['accuracy'])
print(model.summary())
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=TENSORBOARD_DIR,
histogram_freq=1,
write_graph=True)
history = model.fit(X_train, y_train,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
shuffle=True,
validation_data=(X_test, y_test),
callbacks=[tensorboard_callback])
score = model.evaluate(X_test, y_test,
batch_size=BATCH_SIZE, verbose=1)
model.save(MODEL_FILE_NAME)
print(u'Loss: {}'.format(score[0]))
print(u'Accuracy: {}'.format(score[1]))
def _predict(text):
text_to_predict = pandas.Series([text])
tokenizer.fit_on_texts(text_to_predict)
text_to_predict_sequences = tokenizer.texts_to_sequences(text_to_predict)
to_predict = tokenizer.sequences_to_matrix(text_to_predict_sequences, mode='binary')
prediction = model.predict(to_predict, steps=1000)
predicted_class_index = prediction.argmax(axis=1)
pprint(predicted_class_index)
while True:
text_input = input('Enter: ')
_predict(text_input)
This is the configuration I'm using:
{
{
"logDir": "graph",
"port": 9001,
"modelFileName": "model.h5",
"tensorboard": {
"dir": "./graph"
}
},
"train": {
"wordsFeature": "words",
"epochs": 3,
"batchSize": 32,
"dropout": 0.5,
"recurrent_dropout": 0.5,
"max_label": 15,
"lstm_units": 128,
"embedding_size": 200,
"activation_function": "sigmoid",
"loss": "binary_crossentropy",
"max_size": 1000,
"max_document_length": 50,
"learning_rate": 0.001
}
}
The model trains as expected and gets an accuracy of about 98% when evaluating.
But there are two things, that are quite unusual there.
The predictions of the model are inaccurate
After I predict the first label, all of the next predictions are exactly the same.
As for the first point, there might be something wrong with my model. But for the second, I'm really confused as to why this is happening?
Does anybody have an idea, why this is happening and what the solution might be?
I'm using Keras with the TensorFlow backend (CUDA version).

Neural Network loss drastically jumps during training

During training of my LSTM network the loss/acc went from 0.75/85% to 4.97/17% from the 42 epoch to the 44 epoch.
Why would this happen?
I only have 1500 training examples currently and am overfitting heavily. Would this be a cause?
For context I am using keras and a lstm network to predict reactions to text on slack. My training data are label encoded sentences and I am predicting the reaction class which is just a one hot representation of all the possible classes.
Here is my model in Keras
# create the model
embedding_vecor_length = 128
model = Sequential()
model.add(Embedding(max_words, embedding_vecor_length, input_length=max_message_length))
model.add(LSTM(1024))
model.add(Dense(classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
The loss and accuracy end up recovering by the 100th epoch. Is this something to worry about?
# coding: utf-8
# In[1]:
import pandas as pd
import re
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import os
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999
np.random.seed(7)
# In[2]:
raw_data = pd.DataFrame()
for file in os.listdir('data/random'):
temp_df = pd.read_json(path_or_buf='data/random/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
for file in os.listdir('data/company-wide'):
temp_df = pd.read_json(path_or_buf='data/company-wide/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
for file in os.listdir('data/politics'):
temp_df = pd.read_json(path_or_buf='data/politics/' + file, orient='values', dtype=False)
raw_data = pd.concat([raw_data, temp_df])
# In[3]:
raw_data.shape
# In[4]:
# Only selected messages with reactions
data = raw_data.loc[(raw_data['reactions'].isnull() == False) & (raw_data['text'] != '')][['reactions', 'text']]
# In[5]:
data.shape
# In[6]:
def extractEmojiName(x):
max_count = 0
result = ''
for emoji in x:
if (emoji['count'] > max_count):
result = emoji['name']
return result
def removeUrls(x):
line = re.sub(r"<(http|https).*>", "", x)
return line
def removeUsername(x):
line = re.sub(r"<#.*>", "", x)
return line
# In[7]:
data['reactions_parsed'] = data['reactions'].apply(lambda x: extractEmojiName(x))
# In[8]:
data['text'] = data['text'].apply(lambda x: removeUrls(x))
data['text'] = data['text'].apply(lambda x: removeUsername(x))
# In[9]:
max_words = 10000
tokenizer = Tokenizer(nb_words=max_words)
tokenizer.fit_on_texts(data['text'])
text_vectors = tokenizer.texts_to_sequences(data['text'])
data['text_vector'] = text_vectors
# In[10]:
encoder = LabelEncoder()
data['reactions_encoded'] = encoder.fit_transform(data['reactions_parsed'])
# In[11]:
data
# In[12]:
classes = len(data['reactions_parsed'].unique())
target_vector = data['reactions_encoded'].values
reactions_vector = np.eye(classes)[target_vector]
data['reactions_vector'] = reactions_vector.tolist()
# In[13]:
max_message_length = data['text_vector'].apply(lambda x: len(x)).max()
# In[14]:
X_train, X_test, y_train, y_test = train_test_split(text_vectors, reactions_vector, test_size=.2, stratify=reactions_vector)
# In[15]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
# In[17]:
X_train = sequence.pad_sequences(X_train, maxlen=max_message_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_message_length)
# In[18]:
# create the model
embedding_vecor_length = 128
model = Sequential()
model.add(Embedding(max_words, embedding_vecor_length, input_length=max_message_length))
model.add(Dropout(0.2))
model.add(LSTM(1024))
model.add(Dropout(0.2))
model.add(Dense(classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
# In[ ]:
model.fit(X_train, y_train, nb_epoch=35, batch_size=64)
# In[ ]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))
# In[45]:
scores
# In[56]:
def show_predictions(model, X_test, y_test):
predictions = model.predict(X_test)
index = 0
for prediction in predictions:
print('Prediction -> ' + encoder.inverse_transform(prediction.argmax()))
print('Actual -> ' + encoder.inverse_transform(y_test[index].argmax()))
index+=1
# In[57]:
show_predictions(model, X_test, y_test)
# In[58]:
show_predictions(model, X_train[0:100], y_train[0:100])
# In[ ]: