Strange results from a neural network build using Keras - tensorflow

I build an sentiment classifier using Keras to predict if a sentence has a sentiment score of 1, 2, 3, 4 or 5. However I am getting some strange results. I will first show my code:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
# the data only reflects the structure of the actual data
# the real data has way larger text and more entries
X_train = ['i am glad i heard about that', 'that is one ugly bike']
y_train = pd.Series(np.array([1, 4])) # pandas series
X_test = ['that hurted me']
y_test = pd.Series(np.array([1, 4])) # pandas series
# tokenizing
tokenizer = Tokenizer(num_words = 5)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
# performing some padding
padding_len = 4
X_train_seq_padded = pad_sequences(X_train_seq, maxlen = padding_len)
X_test_seq_padded = pad_sequences(X_test_seq, maxlen = padding_len)
# building the model
model = Sequential()
model.add(Dense(16, input_dim = padding_len, activation = 'relu', name = 'hidden-1'))
model.add(Dense(16, activation = 'relu', name = 'hidden-2'))
model.add(Dense(16, activation = 'relu', name = 'hidden-3'))
model.add(Dense(6, activation='softmax', name = 'output_layer'))
# compiling the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
# training the model
callbacks = [EarlyStopping(monitor = 'accuracy', patience = 5, mode = 'max')]
my_model = model.fit(X_train_seq_padded, to_categorical(y_train), epochs = 100, batch_size = 1000, callbacks = callbacks, validation_data = (X_test, to_categorical(y_test)))
Using the actual data I keep getting results around 0.67xx (xx random numbers) which are reached after 1/2 epochs, no matter what changes to the code I introduce (and some are extreme).
I tried changing the padding to 1, 10, 100, 1000.
I tried removing the layer hidden-2 and hidden-3.
I tried adding stop word removal before tokenizing.
I tried using the tahn activation function in the hidden layers.
I used the sgd optimizer.
Example output of one setup:
Now my question is, is there something wrong with my code or are these actual possible results?

Related

Getting non-brodcastable error in my LSTM

So, I have been trying to apply LSTM on this csv file CSV File that im trying to train
However, it seems to train it self but after the training, its causing issue on my test file with either
Error 1
Or if I modify it a little pit then I get another error which says "Value Error: cannot reshape array of size 1047835 into shape"
Here is the code im implementing:-
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #Had to use CPU because of gpus capability was 3.0
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
df=pd.read_csv(r'C:\Users\LambertThePrick\Desktop\Databysir\LSTM.csv')
print(df.shape)
print(df.head(5))
#df.head(5)
TrainPart=df.iloc[:800,1:3].values
test_set=df.iloc[800:,1:3].values
scaler=MinMaxScaler(feature_range=(0,1))
Trainpart_scaled=scaler.fit_transform(TrainPart)
print(Trainpart_scaled)
X_Train=[]
Y_Train=[]
for i in range(60,800):
X_Train.append(Trainpart_scaled[i-60:i,0])
Y_Train.append(Trainpart_scaled[i,0])
X_Train,Y_Train=np.array(X_Train),np.array(Y_Train)
X_Train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1))
# print(X_train = np.reshape(X_Train, (X_Train.shape[0], X_Train.shape[1], 1)))
#(740, 60, 1)
model = Sequential()
#Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_Train.shape[1], 1)))
model.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
# Adding the output layer
model.add(Dense(units = 1))
# Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the RNN to the Training set
model.fit(X_Train, Y_Train, epochs = 100, batch_size = 32)
#THIS IS EXPT AFTER THIS
dataset_train = df.iloc[:800, 1:3]
dataset_test = df.iloc[800:, 1:3]
dataset_total = pd.concat((dataset_train, dataset_test), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_Test = []
for i in range(60, 800):
X_Test.append(inputs[i-60:i, 0])
X_Test = np.array(X_Test)
X_Test = np.reshape(X_Test, (X_Test.shape[0], X_Test.shape[1], 1))
print(X_Test.shape)
predicted_stock_price = model.predict(X_Test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
plt.plot(df.loc[800:, 'Date'],dataset_test.values, color = 'red', label = 'Real ASTL Stock Price')
plt.plot(df.loc[800:, 'Date'],predicted_stock_price, color = 'blue', label = 'Predicted ASTL Stock Price')
plt.xticks(np.arange(0,459,50))
plt.title('ASTL Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('ASTL Stock Price')
plt.legend()
plt.show()
You have a moment in your reshaping where you end up with a non-integer division. Take this example:
import numpy as np
data = np.zeros(3936)
out = data.reshape((-1,1,24,2))
works well because 3936/24/2 results in an integer, 82 .
But in this example
import numpy as np
data = np.zeros(34345)
out = data.reshape((-1,1,24,2))
you end up with the error message ValueError: cannot reshape array of size 34345 into shape (1,24,2) because the division does not result in an integer.
So, looping the way you do is bound to result in events of that type.

Keras predicting different output for same input image

am working on a classification problem for binary classes, I have finished the training and testing the model in single images now using the below code
import warnings
import time
from urllib.request import urlopen
import os
import urllib.request
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import numpy as np
from keras.preprocessing.image import img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.applications.vgg16 import VGG16
import tensorflow as tf
import logging
logging.getLogger('tensorflow').disabled = True
img_size = 224
class PersonPrediction:
def __init__(self):
self.class_dictionary = np.load(
'class_indices_vgg.npy',
allow_pickle=True).item()
self.top_model_weights_path = 'v2/weights/bottleneck_fc_model_2020-10-10-05.h5'
self.num_classes = len(self.class_dictionary)
self.model = self.create_model(self.num_classes)
self.graph = tf.compat.v1.get_default_graph()
def create_model(self, num_of_cls):
model = Sequential()
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=(img_size, img_size, 3))
for layer in vgg_model.layers[:-4]:
layer.trainable = False
model.add(vgg_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
return model
def predict(self, path=None, file_name=None):
if path:
image_path = path
path = self.url_to_image(image_path)
else:
path = os.path.join('imgs', file_name)
print("[INFO] loading and preprocessing image...")
image = load_img(path, target_size=(224, 224))
image = img_to_array(image)
# important! otherwise the predictions will be '0'
image = image / 255
image = np.expand_dims(image, axis=0)
label_idx = self.model.predict_classes(image)[0][0]
probability = self.model.predict(image)[0]
inv_map = {v: k for k, v in self.class_dictionary.items()}
label = inv_map[label_idx]
return label, probability[0]
path = 'temp.jpg'
tax_model = PersonPrediction()
label, proba = tax_model.predict(
file_name='frame303.jpg')
print(label, proba)
Problem is I keep getting chaning predictions of both label and accuracy every time I rerun the code, am not sure what is causing that
There are a number of sources that create randomness in the results when training a model. First the weights are randomly initialized so your model is starting from a different point in N space (N is the number of trainable parameters). Second layers like dropout have randomness in terms of which nodes will be nodes will be selected. Some GPU processes particularly with multi-processing can also have some degree of randomness. I have seen a number of posts on getting repeatable results in tensorflow but I have not found one that seems to really work. In general though the results should be reasonably close if your model is working correctly and you run enough epochs. Now once the model is trained and you use it for predictions as long as you use the same trained model you should get identical prediction results.

Prediction in non classified answer

I have create neuronetwork in Kerars, program is runing but there is problem of result, it is Forexforcast network in forcast it should return 0 or 1 , as provided in traing dataset but result is showing in between 0 and 1 in float like "[[0.47342286]]"
I have tried to use numpy athmax but it only result in 1 answer
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from sklearn.preprocessing import MinMaxScaler
from ta import *
dataset = pd.read_csv('C:/Users/SIGMA COM/PycharmProjects/deep/GBP_JPY Historical Data.csv',index_col="Date",parse_dates=True)
dataset = dataset[::-1]
print(dataset.head())
print(dataset.isna().any())
print(dataset.info())
dataset['Open'].plot(figsize=(16,6))
# initial value
step_size = 4
batch_sizes = 1
dataset['Diff'] = dataset['Open'] - dataset['Price']
dataset['Range'] = dataset['High'] - dataset['Low']
dataset['Rsi'] = rsi(close=dataset['Price'],n=4,fillna=True)
dataset['Macd'] = macd(close=dataset['Price'],n_fast=12,n_slow=26,fillna=True)
dataset['Cci'] = cci(high=dataset['High'],low=dataset['Low'],close=dataset['Price'],n=20,fillna=True)
# dataset['Rsi'] = dataset['Rsi'] /100.0
# # dataset['Macd'] = dataset['Macd'] /2.0
# dataset['Cci'] = dataset['Cci'] / 500.0
training_set = dataset[['Rsi','Macd','Cci','Price','Low','High','Open','Signal']]
sc = MinMaxScaler()
training_set_scaled = sc.fit_transform(training_set)
# Creating a data structure with 60 timesteps and 1 output
X_train = []
y_train = []
for i in range(60, 1258):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, -1:])
X_train, y_train = np.array(X_train), np.array(y_train)
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X_train.shape)
print(X_train)
plt.show()
# Part 2 - Building the RNN
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
print((X_train.shape[1], 1))
print(X_train.shape)
# Initialising the RNN
regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
# Adding the output layer
regressor.add(Dense(units = 1,activation='sigmoid'))
# Compiling the RNN
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs = 10, batch_size = 32)
result = regressor.predict(np.reshape(X_train[100],(1,60,1)))
print(result)
I want to make model to make predication in class 0 and 1
This behavior is expected, because the sigmoid function is going to return a number between zero and one, like so:
So if your class labels are either 0 or 1, which seems to be the case here, for a binary classification problem you can just round the resultant output for your class prediction. Let's make a distinction between a classification vs. a regression problem here: regression is like finding the "line of best fit;" that is, the model is being trained to approximate the data. This appears to be what you're doing here: you're minimizing the mean squared error and searching for the model that best approximates your data, but that doesn't make a prediction.
If you want to actually make a classification, you can just round all elements of the result of regressor.predict to 0 or 1, and then compare your predictions with the true labels. This can actually be done easily in numpy like so: numpy.around(your_predictions, decimals=0). Note the decimals argument is not strictly required since it defaults to a value of 0, it's nice for clarity.
As for using numpy.argmax (I'm going to assume that's what you meant by athmax since I can't find a function with that spelling), it will give you the same label for everything because it returns the index of the largest element in an array. Since your output array has length one (because it's simply a single neuron that calculates the logistic function), it will always return index zero! However, you're sort of on the right track: if your last layer was instead Dense(units=n_classes, activation='softmax') — softmax outputs a probability distribution that a particular row of data will produce each label. In that case, numpy.argmax is correct.
Here's a Tensorflow tutorial on classification that I found super helpful when I was just learning it myself. It uses softmax instead of sigmoid like you, but I think it's fairly adaptable to your needs: https://www.tensorflow.org/tutorials/keras/basic_classification
Hope this helps!

why is this model giving me a "Value Error"

Its giving me a value error that I don't understand. Here is what it says :
Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays" my data has 8 columns and i m trying to predict the last 2 for output.
This is a ranking algorithm that I'm experimenting with my own data with :
import pandas as pd
import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras import backend
from keras.layers import Activation, Dense, Input, Subtract
from keras.models import Model
INPUT_DIM = 7
# Model.
h_1 = Dense(128, activation="relu")
h_2 = Dense(64, activation="relu")
h_3 = Dense(32, activation="relu")
s = Dense(1)
# Relevant document score.
rel_doc = Input(shape=(INPUT_DIM,), dtype="float32")
h_1_rel = h_1(rel_doc)
h_2_rel = h_2(h_1_rel)
h_3_rel = h_3(h_2_rel)
rel_score = s(h_3_rel)
# Irrelevant document score.
irr_doc = Input(shape=(INPUT_DIM,), dtype="float32")
h_1_irr = h_1(irr_doc)
h_2_irr = h_2(h_1_irr)
h_3_irr = h_3(h_2_irr)
irr_score = s(h_3_irr)
# Subtract scores.
diff = Subtract()([rel_score, irr_score])
# Pass difference through sigmoid function.
prob = Activation("sigmoid")(diff)
# Build model.
model = Model(inputs=[rel_doc, irr_doc], outputs=prob)
model.compile(optimizer="adadelta", loss="binary_crossentropy")
# data.
data=pd.read_csv('ranking_dataset_remastered.csv')
print (data.head())
X = data.iloc[:, 1:7]
y = data.iloc[:, 6:7]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.2)
)
# Train model.
NUM_EPOCHS = 20
BATCH_SIZE = 512
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)
# Generate scores from document/query features.
get_score = backend.function([rel_doc], [rel_score])
get_score([X_train])
get_score([y_train])
When you defined your model with this line:
model = Model(inputs=[rel_doc, irr_doc], outputs=prob)
You created what keras refers to as a multi-input model, which essentially means that your model is expecting more than one inputs (in your case 2: rel_doc and irr_doc).
However during training you are just passing 1 input, X_train:
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)
What you should do in order to work is to have two arrays, one representing relevant and one irrelevant documents and feed them both to the model during training like this:
history = model.fit([X_rel_train, X_irr_train], y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)

Categorical crossentropy and label encoding

I'm trying to code multiclass output and classes are ['A','B','C','D','E','F','G'].
Could someone elaborate more next error message:
"ValueError: You are passing a target array of shape (79, 1) while using as loss categorical_crossentropy. categorical_crossentropy expects targets to be binary matrices (1s and 0s) of shape (samples, classes). If your targets are integer classes, you can convert them to the expected format via:
from keras.utils.np_utils import to_categorical
y_binary = to_categorical(y_int)
Alternatively, you can use the loss function sparse_categorical_crossentropy instead, which does expect integer targets."
My code:
# Part 1 - Data Preprocessing
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataa = pd.read_csv('test_out.csv')
XX = dataa.iloc[:, 0:4].values
yy = dataa.iloc[:, 4].values
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_Y_1 = LabelEncoder()
yy = labelencoder_Y_1.fit_transform(yy)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(XX, yy, test_size = 0.2,
random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Part 2 - Now let's make the ANN!
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
# Initialising the ANN
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu',
input_dim = 4))
# Adding the second hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))
# Adding the output layer
classifier.add(Dense(output_dim = 1, init = 'uniform', activation =
'softmax'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 50)
# Part 3 - Making the predictions and evaluating the model
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
The problem lies in this portion of your code,
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_Y_1 = LabelEncoder()
yy = labelencoder_Y_1.fit_transform(yy)
You forgot to one-hot encode the yy, please take note that LabelEncoder only transforms your categorical data to numerical one, i.e. [A, B, C, D, E, F, G] to [1, 2, 3, 4, 5, 6, 7]. You have to one-hot encode it since you want to use softmax activation, and categorical_crossentropy (I'm over-simplifying, but it's the gist).
So, it should have been like this,
# Encoding categorical data
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder_Y_1 = LabelEncoder()
yy = labelencoder_Y_1.fit_transform(yy)
yy = to_categorical(yy)
I assume your target class that you are going to predict is binary i.e there are only 2 possible values that could occur
If your target is binary then, the last layer of the model should be activated with sigmoid activation function. Also, the model should be compiled with binary_crossentropy or sparse_categorical_crossentropy.
If the target is multi-class i.e more than 2 possible values, you must convert the target to categorical with the help of to_categorical from keras. Then you should compile your model with categorical_crossentropy and the last layer in the model should be activated with softmax activation function.!!