I'm not able to plot y_train data - matplotlib

enter image description here
idx = random.randint(0, len(X_train))
preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
preds_test = model.predict(X_test, verbose=1)
preds_train_t = (preds_train > 0.5).astype(np.uint8)
preds_val_t = (preds_val > 0.5).astype(np.uint8)
preds_test_t = (preds_test > 0.5).astype(np.uint8)
# Perform a sanity check on some random training samples
ix = random.randint(0, len(preds_train_t))
imshow(X_train[ix])
plt.show()
imshow(np.squeeze(Y_train[ix]))
plt.show()
imshow(np.squeeze(preds_train_t[ix]))
plt.show()
# Perform a sanity check on some random validation samples
ix = random.randint(0, len(preds_val_t))
imshow(X_train[int(X_train.shape[0]*0.9):][ix])
plt.show()
imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
plt.show()
imshow(np.squeeze(preds_val_t[ix]))
plt.show()
I'm not able to plot y_train data, getting this error 'numpy boolean subtract, the - operator, is not supported, use the bitwise_xor, the ^ operator, or the logical_xor function instead.'

Related

LSTM model has lower than expected accuracy

Hello, I am working on the resolution of a problem that has to do with time series.
I am plotting y = sin (x) with 10000 values
Then, to each value (y), I associate an index calculated based on the next values (between 0 and 1)
if the next 150 values are lower than the current one, then this index will be set to 1
If the next 150 values are higher then the current one, then this index will be set to 0
Then I'm trying to set up a LSTM network using tensorflow/keras in order to predict this index based on the last 150 values, which should be pretty trivial for a sinus function.
Here is the code and the explanation :
I make an array with 10000 values of sin(x)
import numpy as np
import math
from matplotlib import pyplot as plt
n = 10000
array = np.array([math.sin(i*0.02) for i in range(1, n)])
fig, ax = plt.subplots()
ax.plot([(i*0.02) for i in range(1, n)], array, linewidth=0.75)
plt.show()
Calculate the associated index, here SELL_INDEX
SELL_INDEX = np.zeros((len(array), 1))
for index, row in enumerate(array):
if index > len(array) - 150:
continue
max_price = np.amax(array[index:index + 150])
min_price = np.amin(array[index:index + 150])
current_sell_index = (row - min_price) / (max_price - min_price)
SELL_INDEX[index][0] = current_sell_index
data_with_sell_index = np.hstack((array.reshape(-1,1), SELL_INDEX))
data_final = np.hstack( (data_with_sell_index, np.arange(len(data_with_sell_index)).reshape(-1, 1)) )
fig, ax = plt.subplots()
ax.scatter(data_final[:,2], data_final[:,0] , c = data_final[:,1], s = .5)
plt.show()
Here is the plot (sin(x), SELL_INDEX : 1 being yellow, 0 being purple )
Here is the creation of the model
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras import models, Input, Model
from tensorflow.python.keras.layers import LSTM, Dense, Dropout
# from neural_intelligence.batches_generator import generate_smart_lstm_batch, get_smart_lstm_data
class LearningRateReducerCb(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
old_lr = self.model.optimizer.lr.read_value()
new_lr = old_lr * 0.99
print("\nEpoch: {}. Reducing Learning Rate from {} to {}".format(epoch, old_lr, new_lr))
self.model.optimizer.lr.assign(new_lr)
# Model creation
input_layer = Input(shape=(150, 1))
layer_1_lstm = LSTM(100, return_sequences=True)(input_layer)
dropout_1 = Dropout(0.0)(layer_1_lstm)
layer_2_lstm = LSTM(200, return_sequences=True)(dropout_1)
dropout_2 = Dropout(0.0)(layer_2_lstm)
layer_3_lstm = LSTM(100)(dropout_2)
output_sell_index_proba = Dense(1, activation='sigmoid')(layer_3_lstm)
model = Model(inputs=input_layer, outputs=output_sell_index_proba)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
Training the model
def generate_batch(dataset_x, dataset_y, sequence_length):
x_data, y_data = [], []
for i in range(len(list(zip(dataset_x, dataset_y))) - sequence_length - 1):
x_data.append(dataset_x[i:i + sequence_length])
y_data.append(dataset_y[i + sequence_length])
return np.array(x_data), np.array(y_data)
x, y = generate_batch(data_final[:,0], data_final[:,1], sequence_length=150)
x = x.reshape(x.shape[0], x.shape[1], 1)
y = y.reshape(x.shape[0], 1, 1)
print(x.shape, y.shape)
model.fit(x, y, callbacks=[LearningRateReducerCb()], epochs=2,
validation_split=0.1, batch_size=64, verbose=1)
Here is my issue, the accuracy never goes above 0.52, I don't understand why, everything seems to be ok to me.
This should be very simple for such a powerful tool as LSTM, but it can figure out what the index could be.
If you could me help in any way, you're welcome, thank you
EDIT : To plot the result, use
data = np.array(data_final[:,0])
results = np.array([])
for i in range (150, 1000):
result = model.predict(data[i - 150 : i].reshape(1, 150, 1))
results = np.append(result, results)
data = data[150:1000]
fig, ax = plt.subplots()
ax.scatter([range(len(data))], data.flatten() , c = results.flatten(), s= 1)
plt.show()
It seems to be working, the issue is : why is the accuracy never goes up while training ?
This leads me to investigate on what was the problem instead of trying predicting
This may be simplistic, but to my mind you are only accurately predicting half your curve.
This is where the blue and yellow lines overlap in your fit chart. The accuracy measure will be computed over all of the rows unless you tell it otherwise.
This intuitively explains why your accuracy is c. 50%. You should be able to confirm this by splitting your data into these two portions and calculating the accuracy on each
I suggest playing around with your features and transformations to understand which type of shapes predict your sine curve with a higher accuracy (and give a fuller overlap between the lines).

Plot Confusion Matrix from Roberta Model

I wrote the text classification code with two classes using the Roberta model and now I want to draw the confusion matrix.
How to go about plotting the confusion matrix based of a Roberta model?
RobertaTokenizer = RobertaTokenizer.from_pretrained('roberta-base',do_lower_case=False)
roberta_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base',num_labels=2)
input_ids=[]
attention_masks=[]
for sent in sentences:
bert_inp=RobertaTokenizer.encode_plus(sent,add_special_tokens = True,max_length =128,pad_to_max_length = True,return_attention_mask = True)
input_ids.append(bert_inp['input_ids'])
attention_masks.append(bert_inp['attention_mask'])
input_ids=np.asarray(input_ids)
attention_masks=np.array(attention_masks)
labels=np.array(labels)
#split
train_inp,val_inp,train_label,val_label,train_mask,val_mask=train_test_split(input_ids,labels,attention_masks,test_size=0.5)
print('Train inp shape {} Val input shape {}\nTrain label shape {} Val label shape {}\nTrain attention mask shape {} Val attention mask shape {}'.format(train_inp.shape,val_inp.shape,train_label.shape,val_label.shape,train_mask.shape,val_mask.shape))
#
log_dir='tensorboard_data/tb_roberta'
model_save_path='/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py'
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,save_weights_only=True,monitor='val_loss',mode='min',save_best_only=True),keras.callbacks.TensorBoard(log_dir=log_dir)]
print('\nBert Model',roberta_model.summary())
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5,epsilon=1e-08)
roberta_model.compile(loss=loss,optimizer=optimizer,metrics=[metric])
history=roberta_model.fit([train_inp,train_mask],train_label,batch_size=16,epochs=2,validation_data=([val_inp,val_mask],val_label),callbacks=callbacks)
trained_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base',num_labels=2)
trained_model.compile(loss=loss,optimizer=optimizer, metrics=[metric])
trained_model.load_weights(model_save_path)
preds = trained_model.predict([val_inp,val_mask],batch_size=16)
pred_labels = np.argmax(preds.logits, axis=1)
conf_matrix = confusion_matrix(labels2,pred_labels)
print('conf_matrix ',conf_matrix)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
for j in range(conf_matrix.shape[1]):
ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix(without preprocessing)', fontsize=18)
plt.show()

How to feed my network with the correct array size in tensorflow

I have the following code and I am trying to train the network that I built with Belgian traffic signs , here is the code below :
import tensorflow as tf
import os
import skimage.io
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
config=tf.ConfigProto(log_device_placement=True)
#config_soft = tf.ConfigProto(allow_soft_placement =True)
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".ppm")]
for f in file_names:
images.append(skimage.io.imread(f))
labels.append(int(d))
return images, labels
Root_Path = "/home/raed/Dropbox/Thesis/Codes/Tensorflow"
training_Directory = os.path.join(Root_Path,"Training")
testing_Directory = os.path.join(Root_Path,"Testing")
images, labels = load_data(training_Directory)
# Convert lists to array in order to retrieve to facilitate information retrieval
images_array = np.asarray(images)
labels_array = np.asanyarray(labels)
#print some information about the datasets
print(images_array.ndim)
print(images_array.size)
print(labels_array.ndim)
print(labels_array.nbytes)
print(len(labels_array))
# plotting the distribution of different signs
sns.set(palette="deep")
plt.hist(labels,62)
# Selecting couple of images based on their indices
traffic_signs = [300,2250,3650,4000]
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.show()
# Fill out the subplots with the random images and add shape, min and max values
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images_array[traffic_signs[i]].shape,
images_array[traffic_signs[i]].max(),
images_array[traffic_signs[i]].min()))
# Get unique labels
unique_labels = set(labels_array)
# initialize the figure
plt.figure(figsize=(15,15))
i=1
for label in unique_labels:
image = images_array[labels.index(label)]
plt.subplot(8,8,i)
plt.axis('off')
plt.title('label:{0} ({1})'.format(label, labels.count(label)))
i=i+1
plt.imshow(image)
plt.show()
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images28_array[i].shape,
images28_array[i].max(),
images28_array[i].min()))
#convert to grayscale
gray_images = skimage.color.rgb2gray(images28_array)
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(gray_images[traffic_signs[i]], cmap="gray")
plt.subplots_adjust(wspace=0.5)
# Show the plot
plt.show()
# prepare placeholders
x = tf.placeholder(dtype=tf.float32, shape =[None, 28,28])
y = tf.placeholder(dtype= tf.int32, shape=[None])
#Flatten the input data
images_flat = tf.layers.flatten(x)
#Fully connected layer , Multi-layer Perceptron (MLP)
logits = tf.contrib.layers.fully_connected(images_flat,62, tf.nn.relu)
#Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
#define an optimizer (Stochastic Gradient Descent )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
#convert logits to label indices
correct_prediction = tf.arg_max(logits,1)
#define an accuracy metric
accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#########################################
print('######### Main Program #########')
#########################################
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss)
print("Optimizer:",optimizer)
print("predicted_labels: ", correct_prediction)
tf.set_random_seed(1235)
#images28 = np.asanyarray(images28).reshape(-1, 28, 28,1)
#with tf.Session() as training_session:
# training_session.run(tf.global_variables_initializer())
# for i in range(201):
# print('Epoch', i)
# _,accuracy_value = training_session([optimizer, accuracy],feed_dict={x:images28, y:labels})
# if i%10 ==0:
# print("Loss", loss)
# print('Epochs Done!!')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(201):
_, loss_value = sess.run([optimizer, loss], feed_dict={x: gray_images, y: labels})
if i % 10 == 0:
print("Loss: ", loss)
I also did a series of transformation before feeding the netwok as follows :
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
But on execution I am getting the following error:
ValueError: Cannot feed value of shape (4575, 28, 28, 3) for Tensor 'Placeholder_189:0', which has shape '(?, 28, 28)'
Could you please help me , where am I doing wrong in training this network, please refer to the following link for more information:
https://www.datacamp.com/community/tutorials/tensorflow-tutorial

Tensorflow value error: Variable already exists, disallowed

I am predicting financial time series with different time periods using tensorflow. In order to divide input data, I made sub-samples and used for loop.
However, I got an ValueError like this;
ValueError: Variable rnn/basic_lstm_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
Without subsample this code works well.
Below is my code.
import tensorflow as tf
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
class lstm:
def __init__(self, x, y):
# train Parameters
self.seq_length = 50
self.data_dim = x.shape[1]
self.hidden_dim = self.data_dim*2
self.output_dim = 1
self.learning_rate = 0.0001
self.iterations = 5 # originally 500
def model(self,x,y):
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - self.seq_length):
_x = x[i:i + self.seq_length]
_y = y[i + self.seq_length]
dataX.append(_x)
dataY.append(_y)
train_size = int(len(dataY) * 0.7977)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
print(train_size,test_size)
# input place holders
X = tf.placeholder(tf.float32, [None, self.seq_length, self.data_dim])
Y = tf.placeholder(tf.float32, [None, 1])
# build a LSTM network
cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.hidden_dim,state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
self.Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], self.output_dim, activation_fn=None)
# We use the last cell's output
# cost/loss
loss = tf.reduce_sum(tf.square(self.Y_pred - Y)) # sum of the squares
# optimizer
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train = optimizer.minimize(loss)
# RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
# training
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for i in range(self.iterations):
_, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY})
# prediction
train_predict = sess.run(self.Y_pred, feed_dict={X: trainX})
test_predict = sess.run(self.Y_pred, feed_dict={X: testX})
return train_predict, test_predict
# variables definition
tsx = []
tsy = []
tsr = []
trp = []
tep = []
x = np.loadtxt('data.csv', delimiter=',') # data for analysis
y = x[:,[-1]]
z = np.loadtxt('rb.csv', delimiter=',') # data for time series
z1 = z[:,0] # start cell
z2 = z[:,1] # end cell
for i in range(1): # need to change to len(z)
globals()['x_%s' % i] = x[int(z1[i]):int(z2[i]),:] # definition of x
tsx.append(globals()["x_%s" % i])
globals()['y_%s' % i] = y[int(z1[i])+1:int(z2[i])+1,:] # definition of y
tsy.append(globals()["y_%s" % i])
globals()['a_%s' % i] = lstm(tsx[i],tsy[i]) # definition of class
globals()['trp_%s' % i],globals()['tep_%s' % i] = globals()["a_%s" % i].model(tsx[i],tsy[i])
trp.append(globals()["trp_%s" % i])
tep.append(globals()["tep_%s" % i])
Everytime the model method is called, you are building the computational graph of your LSTM. The second time the model method is called, tensorflow discovers that you already created variables with the same name. If the reuse flag of the scope in which the variables are created, is set to False, a ValueError is raised.
To solve this problem you have to set the reuse flag to True by calling tf.get_variable_scope().reuse_variables() at the end of your loop.
Note that you can't add this in the beginning of your loop, because then you are trying to reuse variables that have not yet been created.
You find more info in the tensorflow docs here
You define some variables in the "model" function.
Try this when you want to call "model" function multiple times:
with tf.variable_scope("model_fn") as scope:
train_predict, test_predict = model(input1)
with tf.variable_scope(scope, reuse=True):
train_predict, test_predict = model(input2)

TensorFlow Linear Regression gives 'NaN' result

I am currently running the TensorFlow model with Linear Regression. However, I don't understand why, even when I decrease the learning_rate from 0.01 to 0.001 and increase the training iterations from 1000 to 50000, I still obtain the 'nan' result for the cost function, as well as the two coefficients. Could anyone please help me detect the problem in the following code?
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import random
rng = numpy.random
# Parameters
learning_rate = 0.001
training_epochs = 20000 #number of iterations
display_step = 400
#read csv file
datapath = [directory path]
Ha_Noi = pd.read_csv(datapath+"HaNoi_1month_LW_WeatherTest.csv")
#Add an additional column into the table
sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
df2 = pd.DataFrame(Ha_Noi)
#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))
Xtrain = train_set['longwait_percent2'].reshape(-1,1)
Ytrain = train_set['accept_rate'].reshape(-1,1)
Xtrain2 = train_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
Xtest2 = test_set['Weather Weight_Longwait_percent2'].reshape(-1,1)
# Xtest = test_set['longwait_percent2'].reshape(-1,1)
# Ytest = test_set['accept_rate'].reshape(-1,1)
# Training Data
train_X = Xtrain
train_Y = Ytrain
n_samples = train_X.shape[0]
#Testing Data
Xtest = np.asarray(test_set['longwait_percent2'])
Ytest = np.asarray(test_set['accept_rate'])
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred = tf.add(tf.multiply(X, W), b)
# Mean squared error
cost = tf.sqrt(tf.reduce_sum(tf.pow(pred-Y, 2))/(n_samples))
# Gradient descent method
# Note, minimize() knows to modify W and b because Variable objects are "trained" (trainable=True by default)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver() #save all the initialized data
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0: # checkpoint every 50 epochs
c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (Xtest.shape[0]),
feed_dict={X: Xtest, Y: Ytest}) # square root of function cost above
print("Root Mean Square Error =", tf.sqrt(testing_cost))
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(Xtest, Ytest, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
Don't have your data, so it's hard to tell whether the problem is caused by data or by training problem. You can make learning rate and training iteration much smaller such 0.00005 and 100 to see is there still NaN.