LSTM to predict sine wave - mxnet

Here I would like to generate a tutorial usage of LSTM in MxNet, with the example for Tensorflow. (location at https://github.com/mouradmourafiq/tensorflow-lstm-regression/blob/master/lstm_sin.ipynb"
Here is my major code
import mxnet as mx
import numpy as np
import pandas as pd
import argparse
import os
import sys
from data_processing import generate_data
import logging
head = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.DEBUG, format=head)
TIMESTEPS = 3
BATCH_SIZE = 100
X, y = generate_data(np.sin, np.linspace(0, 100, 10000), TIMESTEPS, seperate=False)
train_iter = mx.io.NDArrayIter(X['train'], y['train'], batch_size=BATCH_SIZE, shuffle=True, label_name='lro_label')
eval_iter = mx.io.NDArrayIter(X['val'], y['val'], batch_size=BATCH_SIZE, shuffle=False)
test_iter = mx.io.NDArrayIter(X['test'], batch_size=BATCH_SIZE, shuffle=False)
num_layers = 3
num_hidden = 50
data = mx.sym.Variable('data')
label = mx.sym.Variable('lro_label')
stack = mx.rnn.SequentialRNNCell()
for i in range(num_layers):
stack.add(mx.rnn.LSTMCell(num_hidden=num_hidden, prefix='lstm_l%d_'%i))
#stack.reset()
outputs, states = stack.unroll(length=TIMESTEPS,
inputs=data,
layout='NTC',
merge_outputs=True)
outputs = mx.sym.reshape(outputs, shape=(BATCH_SIZE, -1))
# purpose of fc1 was to make shape change to (batch_size, *), or label shape won't match LSTM unrolled output shape.
outputs = mx.sym.FullyConnected(data=outputs, num_hidden=1, name='fc1')
label = mx.sym.reshape(label, shape=(-1,))
outputs = mx.sym.LinearRegressionOutput(data=outputs,
label=label,
name='lro')
contexts = mx.cpu(0)
model = mx.mod.Module(symbol = outputs,
data_names = ['data'],
label_names = ['lro_label'])
model.fit(train_iter, eval_iter,
optimizer_params = {'learning_rate':0.005},
num_epoch=4,
batch_end_callback=mx.callback.Speedometer(BATCH_SIZE, 2))
This code runs but the train_accuracy is Nan.
The question is how to make it correct?
And since unrolled out shape has sequence_length, how can it match to label shape? Did my FC1 net make sense?

pass auto_reset=False to Speedometer callback, say, batch_end_callback=mx.callback.Speedometer(BATCH_SIZE, 2, auto_reset=False), should fix the NaN train-acc.

Related

How to match dimensions in CNN

I'm trying to build a CNN, where the goal is from 3 features to predict the label, but is giving an error of dimension.
Could someone help me?
updated after comments from #M.Innat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential, load_model
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
import tensorflow as tf
import random
# Create data
n = 8500
l = [2, 3, 4, 5,6]
k = int(np.ceil(n/len(l)))
labels = [item for item in l for i in range(k)]
random.shuffle(labels,random.random)
labels =np.array(labels)
label_unique = np.unique(labels)
x = np.linspace(613000, 615000, num=n) + np.random.uniform(-5, 5, size=n)
y = np.linspace(7763800, 7765800, num=n) + np.random.uniform(-5, 5, size=n)
z = np.linspace(1230, 1260, num=n) + np.random.uniform(-5, 5, size=n)
X = np.column_stack((x,y,z))
Y = labels
# Split the dataset into training and testing.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)
seq_len=len(X_train)
n_features=len(X_train[0])
droprate=0.1
exit_un=len(label_unique)
seq_len=len(X_train)
n_features=len(X_train[0])
droprate=0.1
exit_un=len(label_unique)
print('n_features: {} \n seq_len: {} \n exit_un: {}'.format(n_features,seq_len,exit_un))
X_train = X_train[..., None][None, ...] # add channel axis+batch aix
Y_train = pd.get_dummies(Y_train) # transform to one-hot encoded
drop_prob = 0.5
my_model = Sequential()
my_model.add(Conv2D(input_shape=(seq_len,n_features,1),filters=32,kernel_size=(3,3),padding='same',activation="relu")) # 1 channel of grayscale.
my_model.add(MaxPooling2D(pool_size=(2,1)))
my_model.add(Conv2D(filters=64,kernel_size=(5,5), padding='same',activation="relu"))
my_model.add(MaxPooling2D(pool_size=(2,1)))
my_model.add(Flatten())
my_model.add(Dense(units = 1024, activation="relu"))
my_model.add(Dropout(rate=drop_prob))
my_model.add(Dense(units = exit_un, activation="softmax"))
n_epochs = 100
batch_size = 10
learn_rate = 0.005
# Define the optimizer and then compile.
my_optimizer=Adam(lr=learn_rate)
my_model.compile(loss = "categorical_crossentropy", optimizer = my_optimizer, metrics=['categorical_crossentropy','accuracy'])
my_summary = my_model.fit(X_train, Y_train, epochs=n_epochs, batch_size = batch_size, verbose = 1)
The error I have is:
ValueError: Data cardinality is ambiguous:
x sizes: 1
y sizes: 5950
Make sure all arrays contain the same number of samples.
You're passing the input sample without the channel axis and also the batch axis. Also, according to your loss function, you should transform your integer label to one-hot encoded.
exit_un=len(label_unique)
drop_prob = 0.5
X_train = X_train[..., None][None, ...] # add channel axis+batch aix
X_train = np.repeat(X_train, repeats=100, axis=0) # batch-ing
Y_train = np.repeat(Y_train, repeats=100, axis=0) # batch-ing
Y_train = pd.get_dummies(Y_train) # transform to one-hot encoded
print(X_train.shape, Y_train.shape)
my_model = Sequential()
...
update
Based on the discussion, it seems like you need the conv1d operation in the modeling time and need to reshape your sample as mentioned in the comment. Here is the colab, it should work now.

Tensorflow Custom Dataset - Add metadata as additional input to an image input processed by a CNN

I've got a working CNN model that classifies images from a custom dataset that is loaded with a csv file. The dataset is split up into training, validation and test dataset after being shuffled. Now I want to expand the image input by four extra input classes containing info / metadata about the images.
I've already learnt that I should split up my cnn model into two branches, one for the images and one for the extra input. My question is, how must I modify my data input so that the model can correctly process both images and additional input?
I'm very new to creating neural networks in tensorflow. My entire code is basically from this website. However, none of the topics could solve the problem for my code.
This is my code: (additional metadata are called usages, completions, heights, constructions)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io
# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values
# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)
img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE
# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
image = tf.io.read_file(image_file)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, (img_width, img_height))
return tf.cast(image, tf.float32) / 255.0, label, \
tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
tf.cast(hei, tf.float32), tf.cast(con, tf.float32)
# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
if tf.random.uniform((), minval=0, maxval=1) < 0.1:
image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
image = tf.image.random_brightness(image, max_delta=0.25)
image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
image = tf.image.random_flip_left_right(image)
return image, label, usg, com, hei, con
# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)
ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)
ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)
## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##
# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,)) # how is this shape supposed to be?
x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)
out2 = layers.Dense(128, activation='relu')(input_dat)
merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)
history = History()
no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss', # stop training when overfitting occurs
min_delta=0.015, patience=1,
verbose=2, mode='auto')
# TRAINING STEP
model.compile(
optimizer=keras.optimizers.Adam(3e-5),
loss=[keras.losses.SparseCategoricalCrossentropy()],
metrics=["accuracy"])
model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
verbose=1, validation_data=ds_val)
So far I've only added the extra inputs to the dataset tensor and changed the model structure. How exactly do I split my dataset into input_img and input_dat so that each model branch will receive their proper input?
Also I have a custom test step in order to plot a confusion matrix. How is this supposed to be modified? Here the working code, for just the image input:
y_true = []
y_pred = []
for x, y in ds_test:
y_true.append(y)
predicts = model.predict(x) # compute model predictions for test step
y_pred.append(np.argmax(predicts, axis=-1))
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)
cm = confusion_matrix(true, pred) # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm)) # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5) # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))
Any help is greatly appreciated!!

Value Error due to Numpy returning an object

I'm trying to make the following code piece at the end run.
However, i'm getting the following error when i try to fit my model:
"ValueError: setting an array element with a sequence."
I'm trying to use a RNN to predict the next 5 days of prices. So, in the function create_ts I'm trying to create two time series, one with the first X items and another with X+1, X+2, X+3, X+4, and X+5 - these five items being the next five days of prices i'd like to predict.
I suspect the problem is here somewhere:
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)
Thanks in advance for any help!
Below is the full code piece:
from keras import backend as k
import os
from importlib import reload
def set_keras_backend(backend):
if k.backend() != backend:
os.environ['KERAS_BACKEND'] = backend
reload(k)
assert k.backend() == backend
set_keras_backend("cntk")
import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
np.random.seed(7)
#load dataset
fileloc = "C:\\Stock Data\\CL1.csv"
stock_data = pd.read_csv(fileloc)
stock_data.head()
stock_data.dtypes
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Price'] = pd.to_numeric(stock_data['Price'], downcast = 'float')
stock_data.set_index('Date', inplace=True)
stock_close = stock_data['Price']
stock_close = stock_close.values.reshape(len(stock_close), 1)
plt.plot(stock_close)
#normalize data
scaler = MinMaxScaler(feature_range = (0,1))
stock_close = scaler.fit_transform(stock_close)
#split data into a train, test set
train_size = int(len(stock_close)*0.7)
test_size = len(stock_close) - train_size
stock_train, stock_test = stock_close[0:train_size, :], stock_close[train_size:len(stock_close), :]
#convert the data into a time series looking back over a period fo days
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)

IndexError: LSTM with "stateful=True"

I tried to use LSTM network using reset callback for expected future
predictions as follows:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import LambdaCallback
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
raw = np.sin(2*np.pi*np.arange(1024)/float(1024/2)).reshape(-1,1)
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(raw)
data = pd.DataFrame(scaled)
window_size = 3
data_s = data.copy()
for i in range(window_size):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
ds = data.values
n_rows = ds.shape[0]
ts = int(n_rows * 0.8)
train_data = ds[:ts,:]
test_data = ds[ts:,:]
train_X = train_data[:,:-1]
train_y = train_data[:,-1]
test_X = test_data[:,:-1]
test_y = test_data[:,-1]
print (train_X.shape)
print (train_y.shape)
print (test_X.shape)
print (test_y.shape)
batch_size = 3
n_feats = 1
train_X = train_X.reshape(train_X.shape[0], batch_size, n_feats)
test_X = test_X.reshape(test_X.shape[0], batch_size, n_feats)
print(train_X.shape, train_y.shape)
regressor = Sequential()
regressor.add(LSTM(units = 64, batch_input_shape=(1, batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=False))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
resetCallback = LambdaCallback(on_epoch_begin=lambda epoch,logs: regressor.reset_states())
regressor.fit(train_X, train_y, batch_size=1, epochs = 1, callbacks=[resetCallback])
previous_inputs = test_X
regressor.reset_states()
previous_predictions = regressor.predict(previous_inputs, batch_size=1)
previous_predictions = scaler.inverse_transform(previous_predictions).reshape(-1)
test_y = scaler.inverse_transform(test_y.reshape(-1,1)).reshape(-1)
plt.plot(test_y, color = 'blue')
plt.plot(previous_predictions, color = 'red')
plt.show()
inputs = test_X
future_predicitons = regressor.predict(inputs, batch_size=1)
n_futures = 7
regressor.reset_states()
predictions = regressor.predict(previous_inputs, batch_size=1)
print (predictions)
future_predicts = []
currentStep = predictions[:,-1:,:]
for i in range(n_futures):
currentStep = regressor.predict(currentStep, batch_size=1)
future_predicts.append(currentStep)
regressor.reset_states()
future_predicts = np.array(future_predicts, batch_size=1).reshape(-1,1)
future_predicts = scaler.inverse_transform(future_predicts).reshape(-1)
all_predicts = np.concatenate([predicts, future_predicts])
plt.plot(all_predicts, color='red')
plt.show()
but i got the following error. I could not figure out how to solve it for expected predictions.
currentStep = predictions[:,-1:,:]
IndexError: too many indices for array
PS this code has been adapted from https://github.com/danmoller/TestRepo/blob/master/testing%20the%20blog%20code%20-%20train%20and%20pred.ipynb
When you defined the regressor, you used return_sequences=False.
So, the regressor is returning 2D, tensors (without the steps), not 3D.
So you can't get elements from predictions using three indices as you did.
Possibilities:
With return_sequences=False, every prediction will be only the last step.
With return_sequences=True, every prediction will contain steps, even if only one step.

Making simple rnn code with scan function in Tensorflow

I recently started to learn Tensorflow and try to make simple rnn code using scan function.
What I'm trying to do is to make The RNN predict sine function.
It gets input of 1 dim. and outputs also 1 dim in batch as follow.
import tensorflow as tf
from tensorflow.examples.tutorials import mnist
import numpy as np
import matplotlib.pyplot as plt
import os
import time
# FLAGS (options)
tf.flags.DEFINE_string("data_dir", "", "")
#tf.flags.DEFINE_boolean("read_attn", True, "enable attention for reader")
#tf.flags.DEFINE_boolean("write_attn",True, "enable attention for writer")
opt = tf.flags.FLAGS
#Parameters
time_step = 10
num_rnn_h = 16
batch_size = 2
max_epoch=10000
learning_rate=1e-3 # learning rate for optimizer
eps=1e-8 # epsilon for numerical stability
#temporary sinusoid data
x_tr = np.zeros([batch_size,time_step])
y_tr = np.zeros([batch_size,time_step])
ptrn = 0.7*np.sin(np.arange(time_step+1)/(2*np.pi))
x_tr[0] = ptrn[0:time_step]
y_tr[0] = ptrn[1:time_step+1]
x_tr[1] = ptrn[0:time_step]
y_tr[1] = ptrn[1:time_step+1]
#Build model
x = tf.placeholder(tf.float32,shape=[batch_size,time_step,1], name= 'input')
y = tf.placeholder(tf.float32,shape=[None,time_step,1], name= 'target')
cell = tf.nn.rnn_cell.BasicRNNCell(num_rnn_h)
#cell = tf.nn.rnn_cell.LSTMCell(num_h, state_is_tuple=True)
with tf.variable_scope('output'):
W_o = tf.get_variable('W_o', shape=[num_rnn_h, 1])
b_o = tf.get_variable('b_o', shape=[1], initializer=tf.constant_initializer(0.0))
init_state = cell.zero_state(batch_size, tf.float32)
#make graph
#rnn_outputs, final_states = tf.scan(cell, xx1, initializer= tf.zeros([num_rnn_h]))
scan_outputs = tf.scan(lambda a, xi: cell(xi, a), tf.transpose(x, perm=[1,0,2]), initializer= init_state)
rnn_outputs, rnn_states = tf.unpack(tf.transpose(scan_outputs,perm=[1,2,0,3]))
print rnn_outputs, rnn_states
with tf.variable_scope('predictions'):
weighted_sum = tf.reshape(tf.matmul(tf.reshape(rnn_outputs, [-1, num_rnn_h]), W_o), [batch_size, time_step, 1])
predictions = tf.add(weighted_sum, b_o, name='predictions')
with tf.variable_scope('loss'):
loss = tf.reduce_mean((y - predictions) ** 2, name='loss')
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
But It gives an error at the last line (optimizer) like ,
ValueError: Shapes (2, 16) and (2, 2, 16) are not compatible
Please someone knows the reason, tell me how to fix it...
I assume your error is not on the last line (the optimizer) but rather on some operation you are doing earlier. Perhaps in the reduce_mean with this y - prediction? I will not go over your code in details but I will tell you that this error comes when you do an operation between two tensors which require the same shape (usually math operations).