How can I compile batched training of a gpflow GPR into a tf.function? - tensorflow

I need to train a GPR model in multiple batches per epoch using a custom loss function. I would like to do this using GPflow and I would like to compile my training using tf.function to increase the efficiency. However, gpflow.GPR must be re-instantiated each time you supply new data, so tf.function will have to re-trace each time. This makes the code slower rather than faster.
This is the initial setup:
import numpy as np
from itertools import islice
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
from sklearn.model_selection import train_test_split
import gpflow
from gpflow.kernels import SquaredExponential
import time
data_size = 1000
train_fract = 0.8
batch_size = 250
n_epochs = 3
iterations_per_epoch = int(train_fract * data_size/batch_size)
tf.random.set_seed(3)
# Generate dummy data
x = np.arange(data_size)
y = np.arange(data_size) + np.random.rand(data_size)
# Slice into train and validate sets
x_train, x_validate, y_train, y_validate = train_test_split(x, y, random_state = 1, test_size = 1-train_fract )
# Convert data into tensorflow constants
x_train = tf.constant(x_train[:, np.newaxis], dtype=np.float64)
x_validate = tf.constant(x_validate[:, np.newaxis], dtype=np.float64)
y_train = tf.constant(y_train[:, np.newaxis], dtype=np.float64)
y_validate = tf.constant(y_validate[:, np.newaxis], dtype=np.float64)
# Batch data
batched_dataset = (
tf.data.Dataset.from_tensor_slices((x_train, y_train))
.shuffle(buffer_size=len(x_train), seed=1)
.repeat(count=None)
.batch(batch_size)
)
# Create kernel
constrain_positive = tfb.Shift(np.finfo(np.float64).tiny)(tfb.Exp())
amplitude = tfp.util.TransformedVariable(initial_value=1, bijector=constrain_positive, dtype=np.float64, name="amplitude")
len_scale = tfp.util.TransformedVariable(initial_value=10, bijector=constrain_positive, dtype=np.float64, name="len_scale")
kernel = SquaredExponential(variance=amplitude, lengthscales=len_scale, name="squared_exponential_kernel")
obs_noise = tfp.util.TransformedVariable(initial_value=1e-3, bijector=constrain_positive, dtype=np.float64, name="observation_noise")
# Define custom loss function
#tf.function(autograph=False, experimental_compile=False)
def my_custom_loss(y_predict, y_true):
return tf.math.reduce_mean(tf.math.squared_difference(y_predict, y_true))
#optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
This is how I train without a tf.function:
gpr_model_j_i = gpflow.models.GPR(data=(x_train, y_train), kernel=kernel, noise_variance=obs_noise)
# Start training loop
for j in range(n_epochs):
for i, (x_train_j_i, y_train_j_i) in enumerate(islice(batched_dataset, iterations_per_epoch)):
with tf.GradientTape() as tape:
gpr_model_j_i = gpflow.models.GPR(data=(x_train_j_i, y_train_j_i), kernel=kernel, noise_variance=gpr_model_j_i.likelihood.variance)
y_predict_j_i = gpr_model_j_i.predict_f(x_validate)[0]
loss_j_i = my_custom_loss(y_predict_j_i, y_validate)
grads_j_i = tape.gradient(loss_j_i, gpr_model_j_i.trainable_variables)
optimizer.apply_gradients(zip(grads_j_i, gpr_model_j_i.trainable_variables))
This is how I train with a tf.function:
#tf.function(autograph=False, experimental_compile=False)
def tf_function_attempt_3(model): #, optimizer):
with tf.GradientTape() as tape:
y_predict_j_i = model.predict_f(x_validate)[0]
loss_j_i = my_custom_loss(y_predict_j_i, y_validate)
grads_j_i = tape.gradient(loss_j_i, model.trainable_variables)
optimizer.apply_gradients(zip(grads_j_i, model.trainable_variables))
print("TRACING...", end="")
for j in range(n_epochs):
for i, (x_train_j_i, y_train_j_i) in enumerate(islice(batched_dataset, iterations_per_epoch)):
gpr_model_j_i = gpflow.models.GPR(data=(x_train_j_i, y_train_j_i), kernel=kernel, noise_variance=gpr_model_j_i.likelihood.variance)
tf_function_attempt_3(gpr_model_j_i)#, optimizer)
The tf.function retraces for each batch and is significantly slower than the normal training.
Is there a way to speed up the batched training of my GPR model with tf.function while using a custom loss function and GPflow? If not, I am open to suggestions for an alternative approach.

You don't have to re-instantiate GPR each time. You can construct tf.Variable holders with unconstrained shape and then .assign to them:
import gpflow
import numpy as np
import tensorflow as tf
input_dim = 1
initial_x, initial_y = np.zeros((0, input_dim)), np.zeros((0, 1)) # or your first batch
x_var = tf.Variable(initial_x, shape=(None, input_dim), dtype=tf.float64)
y_var = tf.Variable(initial_y, shape=(None,1), dtype=tf.float64)
# in principle you could also set shape=(None, None)...
m = gpflow.models.GPR((x_var, y_var), gpflow.kernels.SquaredExponential())
loss = m.training_loss_closure() # compile=True default wraps in tf.function()
N1 = 3
x1, y1 = np.random.randn(N1, input_dim), np.random.randn(N1, 1)
m.data[0].assign(x1)
m.data[1].assign(y1)
loss() # traces the first time
N2 = 7
x2, y2 = np.random.randn(N2, input_dim), np.random.randn(N2, 1)
m.data[0].assign(x2)
m.data[1].assign(y2)
loss() # does not trace again

Related

Training `tfd.MixtureSameFamily` gets 'NaN'

I want to train a mixture density model using tfd.MixtureSameFamily. But after several thousand epochs of training, the result gets NaN. Here's full functioning code to replicate this.
import section
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
import numpy as np
data generation
number_of_instances = 1000
x_data = np.linspace(-5.5,5.5,number_of_instances)
r_data = np.random.randn(number_of_instances)
y_data = 7*np.sin(x_data*0.75)+ x_data + r_data
x_data = x_data.astype("float32")
x_data = x_data.reshape(x_data.size,1)
y_data = y_data.astype("float32")
y_data = y_data.reshape(y_data.size,1)
model building section
hidden_units = 100
k_mixt = 5
l2_reg = 1e-3
learning_rate = 1e-3
hidden_dense = Dense(units=hidden_units,
input_dim=y_data,
activation=tf.nn.relu,
kernel_regularizer=regularizers.l2(l2_reg),
name=f'Dense',
trainable=True)
alpha_dense = Dense(units=k_mixt,
activation=tf.nn.softmax,
name='alpha',
trainable=True)
mu_dense = Dense(units=k_mixt,
activation=None,
name='mu',
trainable=True)
sigma_dense = Dense(k_mixt,
activation=tf.nn.softplus,
name='sigma',
trainable=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
training section
for epoch in range(int(5e4)):
with tf.GradientTape() as tape:
hidden = hidden_dense(y_data)
alpha = alpha_dense(hidden)
mu = mu_dense((hidden))
sigma=sigma_dense(hidden)
gm = tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical(probs=alpha),components_distribution=tfd.Normal(loc=mu, scale=sigma))
loss = -tf.reduce_sum(gm.log_prob(tf.reshape(x_data,(-1,))))
grads = tape.gradient(loss,tape.watched_variables())
(grads_clipped, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
optimizer.apply_gradients(zip(grads_clipped, tape.watched_variables()))
if epoch%5e2 == 0:
print(epoch, loss)
What I found is that the nan first appears sigma_dense layer and hidden_dense layer in some epoch and then spread to all other layers. It seems that the cause of Nan is calculating gradient of loss respect to sigma.
As I learnt from a youtube video, the gradient of loss respect to sigma is:
d(ln(loss)/d(sigma) = -n/sigma + 1/sigma^3 * ((x1 - mu)^2 + ... + (xn - mu)^2)
Could this derivative formula be the cause of nan? Does anyone have any idea?

Loss function with derivative in TensorFlow 2

I am using TF2 (2.3.0) NN to approximate the function y which solves the ODE: y'+3y=0
I have defined cutsom loss class and function in which I am trying to differentiate the single output with respect to the single input so the equation holds, provided that y_true is zero:
from tensorflow.keras.losses import Loss
import tensorflow as tf
class CustomLossOde(Loss):
def __init__(self, x, model, name='ode_loss'):
super().__init__(name=name)
self.x = x
self.model = model
def call(self, y_true, y_pred):
with tf.GradientTape() as tape:
tape.watch(self.x)
y_p = self.model(self.x)
dy_dx = tape.gradient(y_p, self.x)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
return loss
but running the following NN:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from custom_loss_ode import CustomLossOde
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
loss = CustomLossOde(model.input, model)
model.compile(optimizer=Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99),loss=loss)
model.run_eagerly = True
model.fit(x_train, y_train, batch_size=16, epochs=30)
for now I am getting 0 loss from the fisrt epoch, which doesn't make any sense.
I have printed both y_true and y_test from within the function and they seem OK so I suspect that the problem is in the gradien which I didn't succeed to print.
Apprecitate any help
Defining a custom loss with the high level Keras API is a bit difficult in that case. I would instead write the training loop from scracth, as it allows a finer grained control over what you can do.
I took inspiration from those two guides :
Advanced Automatic Differentiation
Writing a training loop from scratch
Basically, I used the fact that multiple tape can interact seamlessly. I use one to compute the loss function, the other to calculate the gradients to be propagated by the optimizer.
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
# using the high level tf.data API for data handling
x_train = tf.reshape(x_train,(-1,1))
dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(1)
opt = Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99)
for step, (x,y_true) in enumerate(dataset):
# we need to convert x to a variable if we want the tape to be
# able to compute the gradient according to x
x_variable = tf.Variable(x)
with tf.GradientTape() as model_tape:
with tf.GradientTape() as loss_tape:
loss_tape.watch(x_variable)
y_pred = model(x_variable)
dy_dx = loss_tape.gradient(y_pred, x_variable)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
grad = model_tape.gradient(loss, model.trainable_variables)
opt.apply_gradients(zip(grad, model.trainable_variables))
if step%20==0:
print(f"Step {step}: loss={loss.numpy()}")

Exploding LOSS in TensorFlow 2.0 Linear Regression Example using GradientTape

I'm trying to construct a little educational example for multivariate linear regresssion, but the LOSS is increasing until it explodes rather than getting smaller, any idea?
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
def loss_function(y, pred):
return tf.reduce_mean(tf.square(y - pred))
def train(b, w, x, y, lr=0.012):
with tf.GradientTape() as t:
current_loss = loss_function(y, linear_model(x))
lr_weight, lr_bias = t.gradient(current_loss, [w, b])
w.assign_sub(lr * lr_weight)
b.assign_sub(lr * lr_bias)
epochs = 80
for epoch_count in range(epochs):
real_loss = loss_function(y_target, linear_model(x))
train(b, w, x, y_target, lr=0.12)
print(f"Epoch count {epoch_count}: Loss value: {real_loss.numpy()}")
This even happens if I initialize the weights with the "correct" values (found out via a scikit-learn regressor)
w = tf.Variable([-1.76770250e-04,3.46688912e-01,2.43827475e-03],dtype=tf.float64)
b = tf.Variable(-11.837184241807234,dtype=tf.float64)
Here's how you might use a TF2 optimizer for a toy example (as per the comment). I know this is not the answer but I didn't want to post this in the comments section, as it will mess up the indentation and all that.
tf_x = tf.Variable(tf.constant(2.0,dtype=tf.float32),name='x')
optimizer = tf.optimizers.SGD(learning_rate=0.1)
# Optimizing tf_x using gradient tape
x_series, y_series = [],[]
for step in range(5):
x_series.append(tf_x.numpy().item())
with tf.GradientTape() as tape:
tf_y = tf_x**2
gradients = tape.gradient(tf_y, tf_x)
optimizer.apply_gradients(zip([gradients], [tf_x]))
Based on #thushv89's input, I'm providing here an intermediate solution using a TF2 Optimizer which is working, although this is not 100% answering my question
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
w = tf.Variable([1,1,1],dtype=tf.float64)
b = tf.Variable(1,dtype=tf.float64)
def linear_model(x):
return b + tf.tensordot(x,w,axes=1)
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.MeanSquaredLogarithmicError()
def train_step(x, y):
with tf.GradientTape() as tape:
predicted = linear_model(x)
loss_value = loss_object(y, predicted)
print(f"Loss Value:{loss_value}")
grads = tape.gradient(loss_value, [b,w])
optimizer.apply_gradients(zip(grads, [b,w]))
def train(epochs):
for epoch in range(epochs):
train_step(x, y_target)
print ('Epoch {} finished'.format(epoch))
train(epochs = 1000)

A question about simple Keras and Tensorflow code performance

I wrote simple Sin function predictors using Keras and Tensorflow with LSTM, but found the performance of Keras code is much slower which runs about 5 min while Tensorflow code runs the model just in 20 seconds. Moreover, the Keras prediction performance is less precide as Keras one. Could anyone help me find the code difference between the 2 model?
I hacked the code online and intend to train the model with the same hyper parameters. But the performance is not as expected. Tried searching many materials online, but found no reasons.
Keras Code:
import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0
params = {
"batch_size": BATCH_SIZE, # 20<16<10, 25 was a bust
"epochs": 500,
"lr": 0.00010000,
"time_steps": TIME_STEPS
}
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20
# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIME_STEPS):
X.append([[e] for e in seq[i: i + TIME_STEPS]])
y.append([seq[i + TIME_STEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)
def print_time(text, stime):
seconds = (time.time()-stime)
print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")
def create_model():
lstm_model = Sequential()
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
lstm_model.add(LSTM(HIDDEN_UNITS))
lstm_model.add(Dense(1, activation=None))
lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
return lstm_model
model = create_model()
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)
mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
"best_model.h5"), monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=False, mode='min', period=1)
# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)
history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])
print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))
# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)
y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)
Tensorflow code:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
NUM_EPOCH = 1000
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 20
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
def generate_data(seq):
X = []
y = []
for i in range(len(seq) - TIMESTEPS):
X.append([seq[i: i + TIMESTEPS]])
y.append([seq[i + TIMESTEPS]])
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
def lstm_model(X, y, is_training):
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
output = outputs[:, -1, :]
predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
if not is_training:
return predictions, None, None
loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
return predictions, loss, train_op
def train(sess, train_X, train_Y):
ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
X, y = ds.make_one_shot_iterator().get_next()
losses = np.array([])
with tf.variable_scope('model'):
predictions, loss, train_op = lstm_model(X, y, True)
sess.run(tf.global_variables_initializer())
for i in range(TRAINING_STEPS):
_, l = sess.run([train_op, loss])
losses = np.append(losses, l)
if i % NUM_EPOCH == 0:
print('train step: ' + str(i) + ', loss: ' + str(l))
plt.figure()
plt.plot(losses, label='loss')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/loss.png')
def run_eval(sess, test_X, test_y):
ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
ds = ds.batch(1)
X, y = ds.make_one_shot_iterator().get_next()
with tf.variable_scope('model', reuse=True):
prediction, _, _ = lstm_model(X, [0, 0], False)
predictions = []
labels = []
for i in range(int(TESTING_EXAMPLES / 2)):
p, l = sess.run([prediction, y])
predictions.append(p)
labels.append(l)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
print('Mean Square Error is: %f' % rmse)
plt.figure()
print(predictions[:15])
print(labels[:15])
plt.plot(predictions, label='predictions')
plt.plot(labels, label='real_val')
plt.legend()
# plt.show()
plt.savefig('./test_outputs/test.png')
test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
x_val, test_X = np.split(test_X, 2)
y_val, test_y = np.split(test_y, 2)
with tf.Session() as sess:
train(sess, train_X, train_y)
run_eval(sess, test_X, test_y)
You maybe should try to use CuDNNLSTM instead of LSTM. They are CUDA accelerated.
Fast LSTM implementation with CuDNN.
See here: https://github.com/keras-team/keras/blob/master/keras/layers/cudnn_recurrent.py#L328
Your model structure is not same, first has 3 layers of LSTM, other has 2.
Tensorflow data API is highly optimized, It preparing the data-set, without wasting any resources.
Note that: You can even more accelerate the training in tensorflow using parallelization in dynamic_rnn cell. check out this.

Value Error due to Numpy returning an object

I'm trying to make the following code piece at the end run.
However, i'm getting the following error when i try to fit my model:
"ValueError: setting an array element with a sequence."
I'm trying to use a RNN to predict the next 5 days of prices. So, in the function create_ts I'm trying to create two time series, one with the first X items and another with X+1, X+2, X+3, X+4, and X+5 - these five items being the next five days of prices i'd like to predict.
I suspect the problem is here somewhere:
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)
Thanks in advance for any help!
Below is the full code piece:
from keras import backend as k
import os
from importlib import reload
def set_keras_backend(backend):
if k.backend() != backend:
os.environ['KERAS_BACKEND'] = backend
reload(k)
assert k.backend() == backend
set_keras_backend("cntk")
import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
np.random.seed(7)
#load dataset
fileloc = "C:\\Stock Data\\CL1.csv"
stock_data = pd.read_csv(fileloc)
stock_data.head()
stock_data.dtypes
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Price'] = pd.to_numeric(stock_data['Price'], downcast = 'float')
stock_data.set_index('Date', inplace=True)
stock_close = stock_data['Price']
stock_close = stock_close.values.reshape(len(stock_close), 1)
plt.plot(stock_close)
#normalize data
scaler = MinMaxScaler(feature_range = (0,1))
stock_close = scaler.fit_transform(stock_close)
#split data into a train, test set
train_size = int(len(stock_close)*0.7)
test_size = len(stock_close) - train_size
stock_train, stock_test = stock_close[0:train_size, :], stock_close[train_size:len(stock_close), :]
#convert the data into a time series looking back over a period fo days
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)