How to plot training loss and accuracy curves for a MLP model in Keras? - tensorflow

I am modeling a neural network using Keras and I am trying to evaluate it with a graph of acc and val_acc. I have 3 errors in the following lines of code:
In print(history.keys()) The error is function' object has not attribute 'keys'
In y_pred = classifier.predict(X_test) The error is name 'classifier' is not defined
In plt.plot(history.history['acc']) The error is 'History' object is not subscriptable
I'm also trying to graph the ROC curve, how could I do it?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn import cross_validation
from matplotlib import pyplot
from keras.utils import plot_model
dataset = pd.read_csv('Data_BP.csv')
X = dataset.iloc[:, 0:11].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
def Model():
classifier = Sequential()
classifier.add(Dense(units = 12, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse', 'acc'])
return classifier
classifier = Model()
history = classifier.fit(X_train, y_train, validation_split=0.25, batch_size = 10, epochs = 5)
print('\n', history.history.keys())
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
cm = confusion_matrix(y_test, y_pred)
print(cm)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
What functions should be added?

Change history to classifier in the following lines (actually History object is the return value of fit method called on Model object) like this:
classifier = Model()
history = classifier.fit(...)
Don't confuse the return value of fit method with your model. The History object, as its name suggests, only contains the history of training. However, your model is classifier and it is the one that has methods like fit(), predict(), evaluate(), compile(), etc.
Plus, the History object has an attribute called history which is a dictionary containing the values of loss and metrics during the training. Therefore you need to use print(history.history.keys()) instead.
Now, if you would like to for example plot loss curve during training (i.e. loss at the end of each epoch) you can do it like this:
loss_values = history.history['loss']
epochs = range(1, len(loss_values)+1)
plt.plot(epochs, loss_values, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

Related

ValueError: Input 0 of layer “Discriminator” is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score,\
accuracy_score, balanced_accuracy_score,classification_report,\
plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle
import pickle
from tqdm import tqdm
import numpy as np
from scipy import stats
import pandas as pd
np.random.seed(1635848)
def get_data_XYZ_one_dimensional(n, a=-2, c=1/2, random_state=None, verbose=True):
"""
Generates pseudo-random data distributed according to the distribution defined in section 2.1 of the document
"Math/Confounders and data generation.pdf".
:param n: Number of data points to generate.
:param a: Mean of X.
:param c: Shape parameter for Weibull distribution.
:param random_state: Used to set the seed of numpy.random before generation of random numbers.
:param verbose: If True will display a progress bar. If False it will not display a progress bar.
:return: Pandas DataFrame with three columns (corresponding to X, Y and Z) and n rows (corresponding to the n
generated pseudo-random samples).
"""
np.random.seed(random_state)
output = []
iterator = tqdm(range(n)) if verbose else range(n)
for _ in iterator:
X = stats.norm.rvs(loc=-2, scale=1)
Y = stats.bernoulli.rvs(p=1/(1+np.exp(-X)))
if Y == 0:
Z = stats.expon.rvs(scale=np.exp(-X)) # note: np.exp(-X) could be cached for more computational efficiency but would render the code less useful
elif Y == 1:
Z = stats.weibull_min.rvs(c=c, scale=np.exp(-X))
else:
assert False
output.append((X, Y, Z))
return pd.DataFrame(output, columns=["Personal information", "Treatment", "Time to event"])
data = get_data_XYZ_one_dimensional(n=100, random_state=0)
print(data)
# The Architecture of CGAN
class cGAN():
"""
Class containing 3 methods (and __init__): generator, discriminator and train.
Generator is trained using random noise and label as inputs. Discriminator is trained
using real/fake samples and labels as inputs.
"""
def __init__(self,latent_dim=100, out_shape=3):
self.latent_dim = latent_dim
self.out_shape = out_shape
self.num_classes = 2
# using Adam as our optimizer
optimizer = Adam(0.0002, 0.5)
# building the discriminator
self.discriminator = self.discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# building the generator
self.generator = self.generator()
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,))
gen_samples = self.generator([noise, label])
# we don't train discriminator when training generator
self.discriminator.trainable = False
valid = self.discriminator([gen_samples, label])
# combining both models
self.combined = Model([noise, label], valid)
self.combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
def generator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(128, input_dim=self.latent_dim))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(self.out_shape, activation='tanh'))
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding])
gen_sample = model(model_input)
model.summary()
return Model([noise, label], gen_sample, name="Generator")
def discriminator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(128, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
gen_sample = Input(shape=(self.out_shape,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))
model_input = multiply([gen_sample, label_embedding])
validity = model(model_input)
model.summary()
return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")
def train(self, X_train, y_train, pos_index, neg_index, epochs, sampling=False, batch_size=32, sample_interval=100, plot=True):
# though not recommended, defining losses as global helps as in analysing our cgan out of the class
global G_losses
global D_losses
G_losses = []
D_losses = []
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# if sampling==True --> train discriminator with 8 sample from positive class and rest with negative class
if sampling:
idx1 = np.random.choice(pos_index, 3)
idx0 = np.random.choice(neg_index, batch_size-3)
idx = np.concatenate((idx1, idx0))
# if sampling!=True --> train discriminator using random instances in batches of 32
else:
idx = np.random.choice(len(y_train), batch_size)
samples, labels = X_train[idx], y_train[idx]
samples, labels = shuffle(samples, labels)
# Sample noise as generator input
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_samples = self.generator.predict([noise, labels])
# label smoothing
if epoch < epochs//1.5:
valid_smooth = (valid+0.1)-(np.random.random(valid.shape)*0.1)
fake_smooth = (fake-0.1)+(np.random.random(fake.shape)*0.1)
else:
valid_smooth = valid
fake_smooth = fake
# Train the discriminator
self.discriminator.trainable = True
d_loss_real = self.discriminator.train_on_batch([samples, labels], valid_smooth)
d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake_smooth)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
self.discriminator.trainable = False
sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
# Train the generator
g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)
if (epoch+1)%sample_interval==0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f'
% (epoch, epochs, d_loss[0], g_loss[0]))
G_losses.append(g_loss[0])
D_losses.append(d_loss[0])
if plot:
if epoch+1==epochs:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
data.Treatment.value_counts()
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Treatment', 1))
y = data['Treatment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lgb_1 = lgb.LGBMClassifier()
lgb_1.fit(X_train, y_train)
y_pred = lgb_1.predict(X_test)
# evaluation
print(classification_report(y_test, y_pred))
plot_confusion_matrix(lgb_1, X_test, y_test)
plt.show()
le = preprocessing.LabelEncoder()
for i in ['Personal information', 'Treatment', 'Time to event']:
data[i] = le.fit_transform(data[i].astype(str))
y_train = y_train.reshape(-1,1)
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train, y_train, pos_index, neg_index, epochs=500)
Here, the training gives an error ValueError: Input 0 of layer "Discriminator" is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2). Well I understand I have to fix the shape by changing the input but where and how to do it.
Also there are 3 columns in data so how to go about making this work?
I think the fix out_shape=2 and not 3 because the generated output has 2 and you stated the number of classes to be 2 as well. Unless there is something else I am missing.
def __init__(self, latent_dim=100, out_shape=2):

how to make a soft accuracy and loss curves in deep learning models

There is an imbalance two class classification problem with 12750 samples for class 0 and 2550 samples for class 1. I've gotten class weights using class_weight.compute_class_weight and fed them to model.fit. I've tested many loss and optimizer functions. The accuracy on test data is reasonable but loss and accuracy curves aren't normal, which are shown as below. I was wonder if some one give me a suggestion that how can I smooth the curves and fix this problem.
Thank you
import tensorflow as tf
import keras
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, UpSampling2D,Dropout, Conv1D
from sklearn.utils import class_weight
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import sklearn.metrics as metrics
from sklearn.utils import class_weight
#General Variables
batch_size = 32
epochs = 100
num_classes = 2
#Load Data
# X_p300 = scipy.io.loadmat('D:/P300_challenge/BCI data- code 2005/code2005/p300Cas.mat',variable_names='p300Cas').get('p300Cas')
# X_np300 = scipy.io.loadmat('D:/P300_challenge/BCI data- code 2005/code2005/np300Cas.mat',variable_names='np300Cas').get('np300Cas')
X_p300 = scipy.io.loadmat('/content/drive/MyDrive/p300/p300Cas.mat',variable_names='p300Cas').get('p300Cas')
X_np300 = scipy.io.loadmat('/content/drive/MyDrive/p300/np300Cas.mat',variable_names='np300Cas').get('np300Cas')
X_np300=X_np300[:,:]
X_p300=X_p300[:,:]
X=np.concatenate((X_p300,X_np300))
X = np.expand_dims(X,2)
Y=np.zeros((15300,))
Y[0:2550]=1
#Shuffle data as it is now in order by row colunm index
print('Shuffling...')
X, Y = shuffle(X, Y)
#Split data between 80% Training and 20% Testing
print('Splitting...')
x_train, x_test, y_train, y_test = train_test_split(
X, Y, train_size=.8, test_size=.2, shuffle=True)
# determine the weight of each class
class_weights = class_weight.compute_class_weight('balanced',
np.unique(y_train),
y_train)
class_weights = {i:class_weights[i] for i in range(2)}
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv1D(256,kernel_size=3,activation='relu', input_shape =(1680, 1)))
# model.add(Dropout(.5))
model.add(Flatten())
model.add(Dense(200, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='mse',
optimizer='sgd',
metrics= ['acc'])
## use it when you want to apply weight of the classes
history = model.fit(x_train, y_train,class_weight=class_weights, validation_split = 0.3, epochs = epochs, verbose = 1)
#model.fit(x_train, y_train,batch_size=32,validation_split = 0.1, epochs = epochs, verbose = 1)
import matplotlib.pyplot as plt
history_dict = history.history
history_dict.keys()
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc = history_dict.get('acc')
epochs = range(1, len(acc) + 1)
plt.plot(epochs, loss_values, 'r--', label = 'Training loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation_loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
plt.plot(epochs, acc, 'r--', label = 'Training acc')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation acc')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
model.summary()
test_loss, test_acc = model.evaluate(x_test, y_test)
print('test_acc:', test_acc)

keras model prediction did not return probability when using load_model

I have Covid-19 X-ray dataset from Kaggle. I split and resize image in to the following dimension.
X_train (675, 256, 256, 3), X_test (225, 256, 256, 3) and X_val (225, 256, 256, 3). My code to train a densenet121 is the following
import numpy as np
import os
import random
from sklearn.utils import class_weight
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, Activation, BatchNormalization
from keras.applications import DenseNet121
from keras.models import Model
from keras import applications as A
from tensorflow.keras.models import load_model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.optimizers import SGD
seed_value = 1234
os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
X_train = A.densenet.preprocess_input(X_train)
X_test = A.densenet.preprocess_input(X_test)
X_val = A.densenet.preprocess_input(X_val)
def get_model(hparams):
input_tensor = Input(shape=(256, 256, 3))
pretrain = DenseNet121(weights='imagenet', input_tensor=input_tensor, include_top=False)
idx = 52
x = pretrain.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, use_bias=False)(x)
x = Dropout(0.25)(x)
x = BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
predictions = Dense(hparams["nclass"], activation="softmax")(x)
model = Model(inputs=pretrain.input, outputs=predictions)
for layer in model.layers:
if "BatchNormalization" in layer.__class__.__name__:
layer.trainable = True
else:
layer.trainable = False
for i in range(len(model.layers)):
if i > idx:
model.layers[i].trainable = True
model.compile(optimizer=SGD(lr=hparams["lr"]), loss="categorical_crossentropy", metrics=["accuracy"])
return model
weights = class_weight.compute_class_weight("balanced", classes=np.unique(y_train_labels), y=y_train_labels)
class_weights = dict(zip(np.unique(y_train_labels), weights))
es = EarlyStopping(monitor="val_loss",
mode="min",
patience=20,
verbose=1,
restore_best_weights=True)
mc = ModelCheckpoint(filepath="../models/mymodel.h5",
monitor="val_loss",
mode="min",
verbose=1,
save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss",
factor=0.9,
patience=5,
min_lr=0.000001,
verbose=1)
history = model.fit(x=X_train,
y=y_train,
class_weight=class_weights,
validation_data=(X_val, y_val),
epochs=500,
batch_size=8,
callbacks=[es, mc, reduce_lr])
Prediction of shows probability of 3 classes (e.g. [0.1, 0.6, 0.3]) but when I load model later using this command.
classifier = load_model("mymodel.h5", compile=False)
probs = classifier.predict(X_test)
It seems that the prediction results is no longer probability but a class label (also incorrectly if we refer to the previous prediction [0.1, 0.6, 0.3] ... I got [0, 0, 1] as the output of the load model. I'm using keras version 2.3.1 and tensorflow 2.1.0. May I know what went wrong and how to fix it?

Getting non-brodcastable error in my LSTM

So, I have been trying to apply LSTM on this csv file CSV File that im trying to train
However, it seems to train it self but after the training, its causing issue on my test file with either
Error 1
Or if I modify it a little pit then I get another error which says "Value Error: cannot reshape array of size 1047835 into shape"
Here is the code im implementing:-
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #Had to use CPU because of gpus capability was 3.0
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
df=pd.read_csv(r'C:\Users\LambertThePrick\Desktop\Databysir\LSTM.csv')
print(df.shape)
print(df.head(5))
#df.head(5)
TrainPart=df.iloc[:800,1:3].values
test_set=df.iloc[800:,1:3].values
scaler=MinMaxScaler(feature_range=(0,1))
Trainpart_scaled=scaler.fit_transform(TrainPart)
print(Trainpart_scaled)
X_Train=[]
Y_Train=[]
for i in range(60,800):
X_Train.append(Trainpart_scaled[i-60:i,0])
Y_Train.append(Trainpart_scaled[i,0])
X_Train,Y_Train=np.array(X_Train),np.array(Y_Train)
X_Train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1))
# print(X_train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1)))
#(740, 60, 1)
model = Sequential()
#Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_Train.shape[1], 1)))
model.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
# Adding the output layer
model.add(Dense(units = 1))
# Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the RNN to the Training set
model.fit(X_Train, Y_Train, epochs = 100, batch_size = 32)
#THIS IS EXPT AFTER THIS
dataset_train = df.iloc[:800, 1:3]
dataset_test = df.iloc[800:, 1:3]
dataset_total = pd.concat((dataset_train, dataset_test), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_Test = []
for i in range(60, 800):
X_Test.append(inputs[i-60:i, 0])
X_Test = np.array(X_Test)
X_Test = np.reshape(X_Test, (X_Test.shape[0], X_Test.shape[1], 1))
print(X_Test.shape)
predicted_stock_price = model.predict(X_Test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
plt.plot(df.loc[800:, 'Date'],dataset_test.values, color = 'red', label = 'Real ASTL Stock Price')
plt.plot(df.loc[800:, 'Date'],predicted_stock_price, color = 'blue', label = 'Predicted ASTL Stock Price')
plt.xticks(np.arange(0,459,50))
plt.title('ASTL Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('ASTL Stock Price')
plt.legend()
plt.show()
You have a moment in your reshaping where you end up with a non-integer division. Take this example:
import numpy as np
data = np.zeros(3936)
out = data.reshape((-1,1,24,2))
works well because 3936/24/2 results in an integer, 82 .
But in this example
import numpy as np
data = np.zeros(34345)
out = data.reshape((-1,1,24,2))
you end up with the error message ValueError: cannot reshape array of size 34345 into shape (1,24,2) because the division does not result in an integer.
So, looping the way you do is bound to result in events of that type.

ValueError: in case of LSTM with `stateful=True`

I tried to use LSTM network with stateful=True as follows:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import LambdaCallback
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
raw = np.sin(2*np.pi*np.arange(1024)/float(1024/2))
data = pd.DataFrame(raw)
window_size = 3
data_s = data.copy()
for i in range(window_size):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
print (data)
ds = data.values
n_rows = ds.shape[0]
ts = int(n_rows * 0.8)
train_data = ds[:ts,:]
test_data = ds[ts:,:]
train_X = train_data[:,:-1]
train_y = train_data[:,-1]
test_X = test_data[:,:-1]
test_y = test_data[:,-1]
print (train_X.shape)
print (train_y.shape)
print (test_X.shape)
print (test_y.shape)
(816, 3)
(816,)
(205, 3)
(205,)
batch_size = 3
n_feats = 1
train_X = train_X.reshape(train_X.shape[0], batch_size, n_feats)
test_X = test_X.reshape(test_X.shape[0], batch_size, n_feats)
print(train_X.shape, train_y.shape)
regressor = Sequential()
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
resetCallback = LambdaCallback(on_epoch_begin=lambda epoch,logs: regressor.reset_states())
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
previous_inputs = test_X
regressor.reset_states()
previous_predictions = regressor.predict(previous_inputs).reshape(-1)
test_y = test_y.reshape(-1)
plt.plot(test_y, color = 'blue')
plt.plot(previous_predictions, color = 'red')
plt.show()
However, I got:
ValueError: Error when checking target: expected dense_1 to have 3 dimensions, but got array with shape (816, 1)
PS this code has been adapted from https://github.com/danmoller/TestRepo/blob/master/testing%20the%20blog%20code%20-%20train%20and%20pred.ipynb
Two minor bugs:
Here you have
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
This LSTM will return a 3D vector, but your y is 2D which throws a valuerror. You can fix this with return_sequences=False. I'm not sure why you initially had train_X.shape[0] inside of your batch_input, the number of samples in your entire set shouldn't affect the size of each batch.
regressor.add(LSTM(units = 64, batch_input_shape=(1, batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=False))
After this you have
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
In a stateful network you can only put in a number of inputs that divides the batch size. Since 7 doesn't divide 816 we change this to 1:
regressor.fit(train_X, train_y, batch_size=1, epochs = 1, callbacks=[resetCallback])
The same goes in your predict. You must specify batch_size=1:
previous_predictions = regressor.predict(previous_inputs, batch_size=1).reshape(-1)