Getting non-brodcastable error in my LSTM - pandas

So, I have been trying to apply LSTM on this csv file CSV File that im trying to train
However, it seems to train it self but after the training, its causing issue on my test file with either
Error 1
Or if I modify it a little pit then I get another error which says "Value Error: cannot reshape array of size 1047835 into shape"
Here is the code im implementing:-
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #Had to use CPU because of gpus capability was 3.0
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
df=pd.read_csv(r'C:\Users\LambertThePrick\Desktop\Databysir\LSTM.csv')
print(df.shape)
print(df.head(5))
#df.head(5)
TrainPart=df.iloc[:800,1:3].values
test_set=df.iloc[800:,1:3].values
scaler=MinMaxScaler(feature_range=(0,1))
Trainpart_scaled=scaler.fit_transform(TrainPart)
print(Trainpart_scaled)
X_Train=[]
Y_Train=[]
for i in range(60,800):
X_Train.append(Trainpart_scaled[i-60:i,0])
Y_Train.append(Trainpart_scaled[i,0])
X_Train,Y_Train=np.array(X_Train),np.array(Y_Train)
X_Train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1))
# print(X_train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1)))
#(740, 60, 1)
model = Sequential()
#Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_Train.shape[1], 1)))
model.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
# Adding the output layer
model.add(Dense(units = 1))
# Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the RNN to the Training set
model.fit(X_Train, Y_Train, epochs = 100, batch_size = 32)
#THIS IS EXPT AFTER THIS
dataset_train = df.iloc[:800, 1:3]
dataset_test = df.iloc[800:, 1:3]
dataset_total = pd.concat((dataset_train, dataset_test), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_Test = []
for i in range(60, 800):
X_Test.append(inputs[i-60:i, 0])
X_Test = np.array(X_Test)
X_Test = np.reshape(X_Test, (X_Test.shape[0], X_Test.shape[1], 1))
print(X_Test.shape)
predicted_stock_price = model.predict(X_Test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
plt.plot(df.loc[800:, 'Date'],dataset_test.values, color = 'red', label = 'Real ASTL Stock Price')
plt.plot(df.loc[800:, 'Date'],predicted_stock_price, color = 'blue', label = 'Predicted ASTL Stock Price')
plt.xticks(np.arange(0,459,50))
plt.title('ASTL Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('ASTL Stock Price')
plt.legend()
plt.show()

You have a moment in your reshaping where you end up with a non-integer division. Take this example:
import numpy as np
data = np.zeros(3936)
out = data.reshape((-1,1,24,2))
works well because 3936/24/2 results in an integer, 82 .
But in this example
import numpy as np
data = np.zeros(34345)
out = data.reshape((-1,1,24,2))
you end up with the error message ValueError: cannot reshape array of size 34345 into shape (1,24,2) because the division does not result in an integer.
So, looping the way you do is bound to result in events of that type.

Related

Feature scaling for LSTM time series classification

I am trying to prepare input data for LSTM time series classification model. I wanted to scale the features using sklearn MaxAbsScaler(), but I was having a hard time to incorporate scaling in my following code. I am using tensorflow implementation of LSTM network.
import keras
from keras.layers import LSTM, Dropout, Dense
import tensorflow as tf
import numpy as np
import pandas as pd
df = pd.read_excel('/content/hdds.xlsx')
def generate_data(X, y, sequence_length=2, step = 1):
X_local = []
y_local = []
for start in range(0, len(df) - sequence_length, step):
end = start + sequence_length
X_local.append(X[start:end])
y_local.append(y[end-1])
return np.array(X_local), np.array(y_local)
X_sequence, y = generate_data(df.loc[:, "V1":"V3"].values, df.Class)
X_sequence.shape, y.shape
((16, 2, 3), (16,))
training_size = int(len(X_sequence) * 0.7)
X_train, y_train = X_sequence[:training_size], y[:training_size]
X_test, y_test = X_sequence[training_size:], y[training_size:]
X_train.shape, X_test.shape
((11, 2, 3), (5, 2, 3))
I intended to use the following codes to scale the input data, but it is not working. I appreciate your suggestions. Thanks!
from sklearn.preprocessing import MaxAbsScaler
scaler = MaxAbsScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = np.clip(X_train, -5, 5)
X_test = np.clip(X_test, -5, 5)
Sample data:

ValueError: Input 0 of layer sequential_2 is incompatible with the layer:

i have a error in following code , error in 2st part on code and on first part i am declaring my dataset , layers etc.
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
data=pd.read_excel('/content/dataset.xlsx')
data.head()
data.plot(kind='scatter', x='fiyat', y='yil',alpha = 0.5,color = 'red')
plt.xlabel('price') # label = name of label
plt.ylabel('year')
plt.title('Fiyat ve yil Scatter Plot')
data.plot(kind='scatter', x='fiyat', y='km',alpha = 0.5,color = 'grey')
plt.xlabel('price') # label = name of label
plt.ylabel('km')
plt.title('Fiyat ve km Scatter Plot')
data.plot(kind='scatter', x='fiyat', y='motor_gucu_hp',alpha = 0.5,color = 'green')
plt.xlabel('price') # label = name of label
plt.ylabel('machine power')
plt.title('fiyat ve motor_gucu_hp Scatter Plot')
# Importing the dataset
X = data.iloc[:, data.columns != 'fiyat']
y = data.fiyat
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
# define base model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(30, input_dim=120, kernel_initializer='normal', activation='relu'))
model.add(Dense(120, activation = 'relu'))
model.add(Dense(120, activation = 'relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mse',
optimizer='adam',
metrics=['mae'] )
return model
model = baseline_model()
model.summary()
And getting error in here ; on model.fit location
import tensorflow as tf
from tensorflow import keras
import numpy as np
# Display training progress by printing a single dot for each completed epoch
EPOCHS = 500
# Store training stats
history = model.fit(X_train, y_train, epochs=EPOCHS,
batch_size=16, verbose=0)
ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected axis -1 of input shape to have value 120 but received input with shape (None, 47)
And error like this , can you help? What can i do.

Strange results from a neural network build using Keras

I build an sentiment classifier using Keras to predict if a sentence has a sentiment score of 1, 2, 3, 4 or 5. However I am getting some strange results. I will first show my code:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
# the data only reflects the structure of the actual data
# the real data has way larger text and more entries
X_train = ['i am glad i heard about that', 'that is one ugly bike']
y_train = pd.Series(np.array([1, 4])) # pandas series
X_test = ['that hurted me']
y_test = pd.Series(np.array([1, 4])) # pandas series
# tokenizing
tokenizer = Tokenizer(num_words = 5)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
# performing some padding
padding_len = 4
X_train_seq_padded = pad_sequences(X_train_seq, maxlen = padding_len)
X_test_seq_padded = pad_sequences(X_test_seq, maxlen = padding_len)
# building the model
model = Sequential()
model.add(Dense(16, input_dim = padding_len, activation = 'relu', name = 'hidden-1'))
model.add(Dense(16, activation = 'relu', name = 'hidden-2'))
model.add(Dense(16, activation = 'relu', name = 'hidden-3'))
model.add(Dense(6, activation='softmax', name = 'output_layer'))
# compiling the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
# training the model
callbacks = [EarlyStopping(monitor = 'accuracy', patience = 5, mode = 'max')]
my_model = model.fit(X_train_seq_padded, to_categorical(y_train), epochs = 100, batch_size = 1000, callbacks = callbacks, validation_data = (X_test, to_categorical(y_test)))
Using the actual data I keep getting results around 0.67xx (xx random numbers) which are reached after 1/2 epochs, no matter what changes to the code I introduce (and some are extreme).
I tried changing the padding to 1, 10, 100, 1000.
I tried removing the layer hidden-2 and hidden-3.
I tried adding stop word removal before tokenizing.
I tried using the tahn activation function in the hidden layers.
I used the sgd optimizer.
Example output of one setup:
Now my question is, is there something wrong with my code or are these actual possible results?

LSTM predict stock market is hard to converge in Keras

I am working on a stock prediction project and I just want to predict the gain and drop labels from the LSTM net. It is a binary classification problem.
However, my LSTM net is hard to converge even I reduce the training set a lot. Technically, it should overfit easily. But my prediction accuracy is still only 60% and loss is around 0.7 even I just feed 90 samples for training. So, I was thinking I probably made some mistakes in building the neural net. However, due to my limited ability, I cannot find the reason. Therefore, I really hope someone can take a look at my code and point out the reason! I will appreciate a lot!
My code is given below.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Dense, LSTM, Dropout, Activation, Flatten ,BatchNormalization
from keras.utils import to_categorical, np_utils
from keras.optimizers import SGD
data = pd.read_csv("EURUSD_M5_201910210000_201910251140.csv", sep="\t")
train_cut = int(data.shape[0] * 0.8)
dataset_train = data[0:train_cut]
training_set = dataset_train["<OPEN>"].values
sc = MinMaxScaler(feature_range=(0, 1))
train_sec_scaled = sc.fit_transform(training_set.reshape(-1, 1))
X_train = []
y_train = []
step_size = 60
predic_days = 1
for i in range(step_size, len(train_sec_scaled) - predic_days):
X_train.append(train_sec_scaled[i - step_size : i, 0])
y_value = train_sec_scaled[i : i + predic_days, 0]
last_day_value = train_sec_scaled[i - 1, 0]
# 1 ==> up, 0 ==> down
if y_value[0] > last_day_value:
y_train.append([1])
else:
y_train.append([0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1])
y_train = np_utils.to_categorical(y_train, 2)
print(y_train)
print("train data generated!")
print(X_train.shape, y_train.shape)
def train():
model = Sequential()
model.add(Dropout(0.2))
model.add(LSTM(40))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.02))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal'))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.02))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(X_train,
y_train,batch_size=32,epochs=10000)
model.save("trend_analysis.h5")
print("model saved!")
if __name__ == "__main__":
train()
Also, here is the dataset I used:
https://drive.google.com/open?id=1r_0Ko1F6i0F1pToTSsQF1xGt_FTtpUux
Thanks in advance!

How to plot training loss and accuracy curves for a MLP model in Keras?

I am modeling a neural network using Keras and I am trying to evaluate it with a graph of acc and val_acc. I have 3 errors in the following lines of code:
In print(history.keys()) The error is function' object has not attribute 'keys'
In y_pred = classifier.predict(X_test) The error is name 'classifier' is not defined
In plt.plot(history.history['acc']) The error is 'History' object is not subscriptable
I'm also trying to graph the ROC curve, how could I do it?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn import cross_validation
from matplotlib import pyplot
from keras.utils import plot_model
dataset = pd.read_csv('Data_BP.csv')
X = dataset.iloc[:, 0:11].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
def Model():
classifier = Sequential()
classifier.add(Dense(units = 12, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse', 'acc'])
return classifier
classifier = Model()
history = classifier.fit(X_train, y_train, validation_split=0.25, batch_size = 10, epochs = 5)
print('\n', history.history.keys())
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
cm = confusion_matrix(y_test, y_pred)
print(cm)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
What functions should be added?
Change history to classifier in the following lines (actually History object is the return value of fit method called on Model object) like this:
classifier = Model()
history = classifier.fit(...)
Don't confuse the return value of fit method with your model. The History object, as its name suggests, only contains the history of training. However, your model is classifier and it is the one that has methods like fit(), predict(), evaluate(), compile(), etc.
Plus, the History object has an attribute called history which is a dictionary containing the values of loss and metrics during the training. Therefore you need to use print(history.history.keys()) instead.
Now, if you would like to for example plot loss curve during training (i.e. loss at the end of each epoch) you can do it like this:
loss_values = history.history['loss']
epochs = range(1, len(loss_values)+1)
plt.plot(epochs, loss_values, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()