How to use this tutorial on time series forecasting (for beginners) - tensorflow

I am following TensorFlow’s tutorial on time series forecasting. I created and saved the model like in this tutorial. There are many examples in the manual for learning, but few uses of it.
How can I use the saved model in another script? How can I predict temperature, e.g., “01.01.2017 00:10:00”? How can I get the temperature value in a normal format?
Example code from the tutorial:
from datetime import datetime
import time
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam, RMSprop
import os
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
zip_path = tf.keras.utils.get_file(
origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
fname='jena_climate_2009_2016.csv.zip',
extract=True)
csv_path, _ = os.path.splitext(zip_path)
df = pd.read_csv(csv_path)
# Slice [start:stop:step], starting from index 5 take every 6th record.
df = df[5::6]
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
wv = df['wv (m/s)']
bad_wv = wv == -9999.0
wv[bad_wv] = 0.0
max_wv = df['max. wv (m/s)']
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0
# The above inplace edits are reflected in the DataFrame.
df['wv (m/s)'].min()
wv = df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')
# Convert to radians.
wd_rad = df.pop('wd (deg)')*np.pi / 180
# Calculate the wind x and y components.
df['Wx'] = wv*np.cos(wd_rad)
df['Wy'] = wv*np.sin(wd_rad)
# Calculate the max wind x and y components.
df['max Wx'] = max_wv*np.cos(wd_rad)
df['max Wy'] = max_wv*np.sin(wd_rad)
timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day
df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
fft = tf.signal.rfft(df['T (degC)'])
f_per_dataset = np.arange(0, len(fft))
n_samples_h = len(df['T (degC)'])
hours_per_year = 24*365.2524
years_per_dataset = n_samples_h/(hours_per_year)
# f_per_year = f_per_dataset/years_per_dataset
# plt.step(f_per_year, np.abs(fft))
# plt.xscale('log')
# plt.ylim(0, 400000)
# plt.xlim([0.1, max(plt.xlim())])
# plt.xticks([1, 365.2524], labels=['1/Year', '1/day'])
# _ = plt.xlabel('Frequency (log scale)')
column_indices = {name: i for i, name in enumerate(df.columns)}
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]
num_features = df.shape[1]
train_mean = train_df.mean()
train_std = train_df.std()
train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std
df_std = (df - train_mean) / train_std
# df_std = df_std.melt(var_name='Column', value_name='Normalized')
# plt.figure(figsize=(12, 6))
# ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
# _ = ax.set_xticklabels(df.keys(), rotation=90)
# //////////////////////////////////////////////////////////////////////////////////////////////
class WindowGenerator():
def __init__(self, input_width, label_width, shift,
train_df=train_df, val_df=val_df, test_df=test_df,
label_columns=None):
# Store the raw data.
self.train_df = train_df
self.val_df = val_df
self.test_df = test_df
# Work out the label column indices.
self.label_columns = label_columns
if label_columns is not None:
self.label_columns_indices = {name: i for i, name in
enumerate(label_columns)}
self.column_indices = {name: i for i, name in
enumerate(train_df.columns)}
# Work out the window parameters.
self.input_width = input_width
self.label_width = label_width
self.shift = shift
self.total_window_size = input_width + shift
self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
def __repr__(self):
return '\n'.join([
f'Total window size: {self.total_window_size}',
f'Input indices: {self.input_indices}',
f'Label indices: {self.label_indices}',
f'Label column name(s): {self.label_columns}'])
# //////////////////////////////////////////////////////////////////////////////////////////////
def split_window(self, features):
inputs = features[:, self.input_slice, :]
labels = features[:, self.labels_slice, :]
if self.label_columns is not None:
labels = tf.stack(
[labels[:, :, self.column_indices[name]] for name in self.label_columns],
axis=-1)
# Slicing doesn't preserve static shape information, so set the shapes
# manually. This way the `tf.data.Datasets` are easier to inspect.
inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, None])
return inputs, labels
WindowGenerator.split_window = split_window
def plot(self, model=None, plot_col='T (degC)', max_subplots=3, num=None):
inputs, labels = self.example
plt.figure(figsize=(12, 8), num=num)
plot_col_index = self.column_indices[plot_col]
max_n = min(max_subplots, len(inputs))
for n in range(max_n):
plt.subplot(max_n, 1, n+1)
plt.ylabel(f'{plot_col} [normed]')
plt.plot(self.input_indices, inputs[n, :, plot_col_index],
label='Inputs', marker='.', zorder=-10)
if self.label_columns:
label_col_index = self.label_columns_indices.get(plot_col, None)
else:
label_col_index = plot_col_index
if label_col_index is None:
continue
plt.scatter(self.label_indices, labels[n, :, label_col_index],
edgecolors='k', label='Labels', c='#2ca02c', s=64)
if model is not None:
predictions = model(inputs)
plt.scatter(self.label_indices, predictions[n, :, label_col_index],
marker='X', edgecolors='k', label='Predictions',
c='#ff7f0e', s=64)
if n == 0:
plt.legend()
plt.xlabel('Time [h]')
WindowGenerator.plot = plot
def make_dataset(self, data):
data = np.array(data, dtype=np.float32)
ds = tf.keras.utils.timeseries_dataset_from_array(
data=data,
targets=None,
sequence_length=self.total_window_size,
sequence_stride=1,
shuffle=True,
batch_size=32,)
ds = ds.map(self.split_window)
return ds
WindowGenerator.make_dataset = make_dataset
#property
def train(self):
return self.make_dataset(self.train_df)
#property
def val(self):
return self.make_dataset(self.val_df)
#property
def test(self):
return self.make_dataset(self.test_df)
#property
def example(self):
"""Get and cache an example batch of `inputs, labels` for plotting."""
result = getattr(self, '_example', None)
if result is None:
# No example batch was found, so get one from the `.train` dataset
result = next(iter(self.train))
# And cache it for next time
self._example = result
return result
WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example
single_step_window = WindowGenerator(
input_width=1, label_width=1, shift=1,
label_columns=['T (degC)'])
MAX_EPOCHS = 20
def compile_and_fit(model, window, patience=2, save=False):
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=patience,
mode='min')
model.compile(loss=tf.keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[tf.keras.metrics.MeanAbsoluteError()])
history = model.fit(window.train, epochs=MAX_EPOCHS,
validation_data=window.val,
callbacks=[early_stopping])
if save == True:
model.save('./saved_model')
return history
dense = tf.keras.Sequential([
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=64, activation='relu'),
tf.keras.layers.Dense(units=1),
])
history = compile_and_fit(dense, single_step_window, save=True)
val_performance = {}
performance = {}
val_performance['Dense'] = dense.evaluate(single_step_window.val)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)
dense.summary()
Next I need to load the model and use:
from datetime import datetime
import time
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam, RMSprop
import os
import matplotlib.pyplot as plt
import seaborn as sns
loaded_model = tf.keras.models.load_model('./saved_model')
loaded_model.summary()
What's next?

There are many ways you may consider.
model.save() and model.load()
model.save_weights() and model.load_wights()
Export parameters as arrays. We discussed here no need for target API install. You can use any random function from the target machine with the parameter.
Transform and copy the working directory
etc.
Time conversation. You know that the database has unique time units, Oracle, Informix, and SQL. They can display as unix_time or binary sequences as designed in many applications. The unix_time in the communications response with a clock or response time to convert per security method if any. You need to synchronize the clock and response to the correct binary format or time format for extracting data when request or response information extraction, number as binary, and then string exports.
TAI (17 September 2004) UTC (16 to 17 September 2004) [Unix time][1]
2004-09-17T00:00:30.75 2004-09-16T23:59:58.75 1095379198.75
Sample: Export trained parameters from the model. It is easy to print out at any of the steps, but working coefficients respond to the target environment with significant feedback.
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if( logs['accuracy'] >= 0.95 ):
self.model.stop_training = True
def on_epoch_end(self, epoch, logs=None):
print( model.get_weights()[0].shape ) # (3, 3, 4, 32)
print( model.get_weights()[1].shape ) # (32,)
custom_callback = custom_callback()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=10000, callbacks=[custom_callback] )

Related

The code for running the model accuracy for kmeans clustering takes a long time to execute

I've used kmeans clustering algorithm for training the data and then try to get accuracy with some Classification algorithms such as decision tree, random forest, KNN algorithm etc,. After training of data while running model accuracy it takes long time for running. I've attached the code below.
# lets import the warnings library so that we can avoid warnings
import warnings
warnings.filterwarnings('ignore')
# Lets select the Spending score, and Annual Income Columns from the Data
x = data.loc[:, ['Time', 'V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20']].values
# let's check the shape of x
print(x.shape)
# lets convert this data into a dataframe
x_data = pd.DataFrame(x)
x_data.head()
km = KMeans(n_clusters = 2, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
y_means = km.fit_predict(x)
# lets find out the Results
a = data['Class']
y_means = pd.DataFrame(y_means)
z = pd.concat([y_means, a], axis = 1)
z = z.rename(columns = {0: 'cluster'})
# lets check the Clusters of each Crops
print("Lets check the Results After Applying the K Means Clustering Analysis \n")
print("First Cluster:", z[z['cluster'] == 0]['Class'].unique())
print("---------------------------------------------------------------")
print("Second Cluster:", z[z['cluster'] == 1]['Class'].unique())
print("---------------------------------------------------------------")
from sklearn.cluster import KMeans
hc=KMeans
hc= KMeans(n_clusters=2)
y_her= hc.fit_predict(x)
# lets find out the Results
b = data['Class']
y_herr = pd.DataFrame(y_her)
w = pd.concat([y_herr, b], axis = 1)
w= w.rename(columns = {0: 'cluster'})
# lets check the Clusters of each Crops
print("K-Means Clustering Analysis \n")
print("Zero Cluster:", w[w['cluster'] == 0]['Class'].unique())
print("---------------------------------------------------------------")
print("First Cluster:", w[w['cluster'] == 1]['Class'].unique())
print("---------------------------------------------------------------")
y = data['Class']
x = data.drop(['Class'], axis = 1)
print("Shape of x:", x.shape)
print("Shape of y:", y.shape)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
print("The Shape of x train:", x_train.shape)
print("The Shape of x test:", x_test.shape)
print("The Shape of y train:", y_train.shape)
print("The Shape of y test:", y_test.shape)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score
from mlxtend.plotting import plot_confusion_matrix
def evaluator(y_test, y_pred):
# Accuracy:
print('Accuracy is: ', accuracy_score(y_test,y_pred))
print('')
# Classification Report:
print('Classification Report: \n',classification_report(y_test,y_pred))
print('Confusion Matrix: \n\n')
plt.style.use("ggplot")
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(conf_mat = cm,figsize=(10,10),show_normed=True)
plt.title('Confusion Matrix for Logistic Regression', fontsize = 15)
plt.show()
#In this below part where the code is running for a long time.
model_accuracy = pd.DataFrame(columns=['Model','Accuracy'])
models = {
"KNN" : KNeighborsClassifier(),
"DT" : DecisionTreeClassifier(),
'RFC' : RandomForestClassifier(),
'GBC' : GradientBoostingClassifier(),
'XGB' : XGBClassifier()
}
for test, clf in models.items():
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test,y_pred)
train_pred = clf.predict(x_train)
train_acc = accuracy_score(y_train, train_pred)
print("\n", test + ' scores')
print(acc)
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print('*' * 100,"\n")
model_accuracy = model_accuracy.append({'Model': test, 'Accuracy': acc, 'Train_acc': train_acc}, ignore_index=True)
I want a detailed output as it mentioned in the code.
For KNN algorithm:
enter image description here
enter image description here
same it goes for other algorithms..

Learning a simple pattern with RNN

I am trying to make RNN in tensorflow capture a basic pattern in a simple time series in hours. I am trying to solve a bigger problem involving count time series of customer demand.
The simple time series is as follows:
Every 24 hours (1 day) there will be a small integer number either 1 or 2 from a random uniform distirbution.
In between these 24 hours will be zero values.
Every 168 hours (7 days) there will be a high integer number (5 or 6 or 7 or 8 or 9) from a random uniform distirbution.
I tried following the code at https://r2rt.com/recurrent-neural-networks-in-tensorflow-i.html using dynamic_rnn.
Is my test data correct? How can I feed the batches of output from previous times step as input to the next time step? I have 5 hyperparamters to play with
batch_size = 8 num_steps = 192 state_size = 5 learning_rate = 0.00001
num_epochs=1
However, after training each time with the same hyperparameters I am getting different results. Each time the training error is very small. The different results seem quite random (local minima probably??). orange is actual, blue is predicted.
Can my test batch start at any point in the sequence? Does the RNN learn the number of zeros inbetween non-zero values? if the test batch starts with a small non-zero number then the RNN should know that it should output 23 zero value steps after this and then after 167 steps output a high non-zero value. if I start my test sequence at 0 then it should wait 23 more zero value steps before outputing a small non-zero value and after 167 steps output a high non-zero value?
or does it learn another pattern? I am not sure if my method of testing is correct?
Is it better to just pass one time step integer value and let the network generate the remaining time steps integer values by passing the current time step output as input to the next time step?
Currently, I just take a random sequence of X generated by the same method for training and check if my output Y is the shifted version of X by 1 time step. Could you please explain?
My code is given below. you can just copy and paste and it should run. Basically, I just generate the data, build the model, train the network and test it.
from data_generator import gen_data
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import time
import matplotlib.pyplot as plt
num_classes = 11
batch_size = 8
num_steps = 192
state_size = 5
learning_rate = 0.00001
num_epochs=1
dem = gen_data(len=1576)
def gen_batch(dem, batch_size, num_steps):
raw_x = dem[:-1]
raw_y = dem[1:]
data_length = len(raw_x)
num_of_win = data_length - num_steps - 1 # 1382 windows
batch_partition_length = num_of_win // batch_size # 172 batches
data_x = []
data_y = []
j=0
for i in range(batch_partition_length):
windows_x = []
windows_y = []
k=0
while(k<batch_size):
windows_x.append( raw_x[ j:num_steps + j] )
windows_y.append( raw_y[ j:num_steps + j] )
j+=1
k+=1
data_x.append(np.array(windows_x)) # each batch is stacked horizontally.
data_y.append(np.array(windows_y))
for windows_x, windows_y in zip(data_x,data_x):
x = windows_x
y = windows_y
z = x.shape
z = y.shape
yield (x, y)
def gen_epoch(num_epochs,batch_size, num_steps):
for n in range(num_epochs):
yield gen_batch(dem, batch_size, num_steps)
def reset_graph():
# if 'sess' in globals() and sess:
# sess.close()
tf.compat.v1.reset_default_graph()
def build_RNN_model(batch_size, num_classes,state_size,num_steps,learning_rate):
reset_graph()
x = tf.compat.v1.placeholder(dtype=tf.int32, shape=(batch_size,num_steps))
y = tf.compat.v1.placeholder(dtype=tf.int32, shape=(batch_size,num_steps))
init_state = tf.zeros([batch_size, state_size])
# with tf.compat.v1.variable_scope('rnn_cell'):
# W = tf.compat.v1.get_variable('inp_state_w', shape=(num_classes+state_size,state_size),initializer=tf.compat.v1.initializers.glorot_uniform(10) )
# b = tf.compat.v1.get_variable('inp_state_b', shape=(state_size),initializer=tf.compat.v1.initializers.constant(0.0) )
# def rnn_cell(rnn_input,state):
# with tf.compat.v1.variable_scope('rnn_cell', reuse=True):
# W = tf.compat.v1.get_variable('inp_state_w', shape=(num_classes+state_size,state_size),initializer=tf.compat.v1.initializers.glorot_uniform(10) )
# b = tf.compat.v1.get_variable('inp_state_b', shape=(state_size),initializer=tf.compat.v1.initializers.constant(0.0) )
# return tf.tanh( tf.matmul( tf.concat([rnn_input,state], axis=1),W) + b )
#cell = tf.compat.v1.nn.rnn_cell.BasicRNNCell(state_size, reuse=True, name='rnn_cell' )
rnn_inputs = tf.one_hot(x, num_classes)
cell = tf.compat.v1.nn.rnn_cell.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.compat.v1.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
with tf.compat.v1.variable_scope('output'):
W = tf.compat.v1.get_variable('out_state_w', shape=(state_size,num_classes),initializer=tf.compat.v1.initializers.glorot_uniform(10) )
b = tf.compat.v1.get_variable('out_state_b', shape=(num_classes),initializer=tf.compat.v1.initializers.constant(0.0) )
logits = tf.reshape( tf.compat.v1.matmul(tf.reshape(rnn_outputs, [-1, state_size]), W) + b, [batch_size, num_steps, num_classes])
predictions = tf.compat.v1.nn.softmax(logits)
tru_labels = y
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
total_loss = tf.reduce_mean(losses)
train_step = tf.compat.v1.train.AdagradOptimizer(learning_rate).minimize(total_loss)
return dict(
x=x,
y=y,
final_state = final_state,
total_loss = total_loss,
train_step = train_step,
init_state = init_state,
predictions = predictions,
tru_labels = tru_labels,
saver = tf.compat.v1.train.Saver()
)
def train_network(g,num_epochs, batch_size,num_steps, dem,save=' '):
tf.compat.v1.set_random_seed(2345)
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.initialize_all_variables())
training_losses = []
for idx, epoch in enumerate(gen_epoch(num_epochs,batch_size, num_steps)):
training_loss = 0
steps=0 # number of batches
training_state = None
for X,Y in epoch:
steps+=1
feed_dict = {g['x'] : X, g['y'] : Y}
if training_state is not None:
feed_dict[g['init_state']] = training_state
training_loss_, training_state, train_step = \
sess.run([g['total_loss'], g['final_state'], g['train_step']], feed_dict)
training_loss+=training_loss_
print("Average training loss for Epoch", idx, ":", training_loss/steps)
print('steps',steps)
training_losses.append(training_loss/steps)
if isinstance(save, str):
g['saver'].save(sess, save)
e = gen_batch(dem, batch_size, num_steps)
e = gen_batch(dem, batch_size, num_steps)
for X,Y in e:
tru_labels, predictions = \
sess.run([g['tru_labels'], g['predictions']], feed_dict={g['x'] : X, g['y'] : Y, g['init_state'] : training_state})
pred = np.argmax(predictions, axis=2)
print(pred.shape)
pred = pred[0]
print('predictions',pred)
tru_labels = tru_labels[0]
print('tru_labels',tru_labels )
plt.plot(pred)
plt.plot(tru_labels)
plt.show()
return training_loss
g = build_RNN_model(batch_size, num_classes,state_size,num_steps,learning_rate)
t = time.time()
train_network(g, num_epochs,batch_size,num_steps, dem,save='saver' )
print("It took", time.time() - t, "seconds to train for 3 epochs.")
I have written some keras code with a single RNN cell and a dense layer to capture the following two patterns which is similar to the two patterns above. However, the distribution of magnitudes of high vehicles and low vehicles that are drawn from a categorical distribution below are not being represented in the test output.
Categorical Random Variable, x = {0,1,2} and p(x) = {0.6,0.3,0.1}
low vehicles = 1 + x , every 4 hours
high vehicles = 6 + x , every 8 hours
I managed to get the results like the following
with this code
from copyreg import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras as keras
import sys
#### for reproduclvle resutls
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(2)
n_steps = 12
batch_size = 32
lay1_state_size = 64
lay2_state_size = 0
dense_state_size = 1
num_epochs = 25
horizon = 24
loss_function_type = 'sparse_categorical_crossentropy or mse or rmse'
num_layers = 1
optimizer_type = 'Adam'
metrics = 'rmse'
# spikes at regrular interval
dem = np.load('const_dem_2_freq_stoch.npy')
dem_len = len(dem)
def gen_batch(dem, batch_size, n_steps):
n = n_steps + 1
raw_x = dem[:-1]
data_length = len(raw_x)
num_of_win = data_length - n - 1 # 1382 windows
batch_partition_length = num_of_win // batch_size # 172 batches
#print('batch_partition_length',batch_partition_length)
data_x = []
j=0
for i in range(batch_partition_length):
windows_x = []
k=0
while(k<batch_size):
windows_x.append( raw_x[ j:n + j] )
j+=1
k+=1
data_x.append(np.array(windows_x)) # each batch is stacked horizontally.
data_x = np.array(data_x)
data_x = np.reshape(data_x,(-1,n)) # 224 x 13
#print(data_x.shape)
return data_x,batch_partition_length
data_x,batch_partition_length = gen_batch(dem, batch_size, n_steps)
data_x = np.expand_dims(data_x,axis=-1)
tr = int(0.7*dem_len)
val = int(0.2*dem_len)
x_train, y_train = data_x[:tr,:n_steps], data_x[:tr,-1]
x_valid, y_valid = data_x[tr:tr+val,:n_steps], data_x[tr:tr+val,-1]
print('\n\n')
print('tr+val',tr+val)
print('\n\n')
x_test, y_test = data_x[tr+val:,:n_steps], data_x[tr+val:,-1]
#model
model = keras.models.Sequential([keras.layers.SimpleRNN(lay1_state_size,input_shape=[None,1]), keras.layers.Dense(dense_state_size)])
# model = keras.models.Sequential([keras.layers.SimpleRNN(lay1_state_size,return_sequences=True,input_shape=[None,1]),keras.layers.SimpleRNN(lay2_state_size),
# keras.layers.Dense(dense_state_size)])
model.compile(optimizer='Adam',loss=keras.losses.mean_absolute_error ,metrics=[tf.keras.metrics.RootMeanSquaredError()] )
model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs,validation_data=(x_valid,y_valid))
print('\n')
print('Model Evaluation on test set:\n')
model.evaluate(x_test, y_test,batch_size=batch_size)
print('\n')
#model.summary()
y_tru = np.array([])
for step_ahead in range(horizon):
# tru label
y = np.append(data_x[step_ahead+1:,n_steps ], np.array([[0]*(step_ahead+1)]))
y_tru = np.append(y_tru,y)
# prediction
y_pred_one = model.predict(data_x[:,step_ahead:])[:,np.newaxis,:]
data_x = np.concatenate([data_x,y_pred_one ],axis=1)
y_tru = np.reshape(y_tru,(batch_partition_length*batch_size,horizon),order='F')
y_pred_horizon = data_x[:,n_steps+1:]
y_pred_horizon = np.squeeze(y_pred_horizon)
print('print(y_pred_horizon.shape)',y_pred_horizon.shape)
print(' RNN prediction on all data MSE',np.mean(keras.losses.mean_squared_error(y_tru,y_pred_horizon )) )
print(' RNN prediction on all data MAE',np.mean(keras.losses.mean_absolute_error(y_tru,y_pred_horizon )) )
print('\n')
for i in range(10):
plt.figure(i)
plt.plot(y_tru[i])
plt.plot(np.squeeze(y_pred_horizon[i]))
plt.show()
The data generation code is given below
from copyreg import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras as keras
dem_len = 1240
def categorical(p):
return (p.cumsum(-1) >= np.random.uniform(size=p.shape[:-1])[..., None]).argmax(-1)
p = np.array([0.6, 0.3, 0.1])
def dem_hr(hr, lo_veh, hi_veh,len):
dem_hrs = np.array([])
for i in range(10000):
#d = np.random.randint(lo_veh,hi_veh)
d = lo_veh + categorical(p)
z = np.array([0]*(hr-1))
dem_hrs = np.append(dem_hrs, d)
dem_hrs = np.append(dem_hrs, z)
dem_hrs = dem_hrs[:len]
return dem_hrs
def gen_data(len):
dzero = np.zeros(len)
# for hr,lo_veh, hi_veh in zip([4, 8],[1, 6],[3,9]):
# d = dem_hr(hr, lo_veh, hi_veh,len)
# dem = dem + d
# dem = np.array(dem,dtype=np.float32)
d4 = dem_hr(4, 1, 3,len)
d8 = dem_hr(8, 6, 9,len)
dall = dzero + d8
dsub = dall - d4
dem = np.where(dsub>=0,d8,d4)
# plt.plot(dem)
# plt.plot(d4)
# plt.plot(d8)
# plt.show()
return dem
dem = gen_data(len=dem_len)
np.save('const_dem_2_freq_stoch_cat',dem)
plt.plot(dem)
plt.show()
I think incresing the number of steps may help to capture the distribution of magnitudes at different periods. Does increasing the layers also help to capture the magnitude distribution?

tensorflow GPU training doesn't take place

I am using the following code (once on CPU (without the "disable_eager_execution" part)) and yet once more with GPU.
On the CPU training, one epoch takes 12 Hours but the loss changes from batch to batch and I see that training takes place.
On the GPU version. Nothing happens. Training one epoch takes around 1 Hour but loss and accuracy stay the same.
PLEASE HELP ME UNDERSTAND WHAT DO I DO WRONG...
I am running this code using aws sage maker (ml.g4dn.4xlarge)
Code:
import numpy as np
import pandas as pd
import os
import datetime
import tensorflow as tf
import re
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.python.framework.ops import disable_eager_execution
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')), '\n')
disable_eager_execution()
# Read data
# read dictionaries:
# company_dict:
company_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/company_dict.csv", sep='\t', header=None)
company_df.columns = ['company_id', 'idx']
# payee dict
payee_df = pd.read_csv("/home/ec2-user/SageMaker/company_similarity/data/cleaned_up_payee_dict.csv", sep='\t', header=None)
payee_df.columns = ['payee', 'idx']
# Read raw data
BATCH_SIZE = 32
raw_data = tf.data.experimental.make_csv_dataset(
"/home/ec2-user/SageMaker/company_similarity/data/training_data.csv",
column_names=['company_id', 'payee', 'label'],
select_columns=['company_id', 'payee', 'label'],
field_delim='\t',
column_defaults=[tf.string, tf.string, tf.int32],
batch_size=BATCH_SIZE,
label_name='label',
na_value="?",
num_epochs=1,
ignore_errors=True,
)
class PreprocessingFeatures(object):
def __init__(self, company_idx, payee_idx):
self.payee_idx = payee_idx
self.company_idx = company_idx
self.symbols = '!"$%&\'\?()*+,-./:;<=>?[\\]^_`{|}~a-zA-Z0-9 '
self.payee_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=payee_idx,
mask_token=None,
num_oov_indices=1
)
self.company_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=company_idx,
mask_token=None,
num_oov_indices=1
)
def __call__(self, features, labels):
payee = self.payee_lookup(features['payee'])
company = self.company_lookup(features['company_id'])
return (company, payee), labels
payee_list = list(payee_df['payee'])
company_list = [str(si) for si in list(company_df['company_id'])]
# ************ START TRAINING ************ #
log_dir = '/home/ec2-user/SageMaker/company_similarity/models/logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_dir = '/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints'
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
def present_topK(model, listA, item_title='eBay', topK=10):
'''
show top 10 similar items using model embedding
:param model: the actual model
:param item_index: dictionary with item name:index format
:param index_item: dictionary with index:item name format
:param item_title: title text
:return: table to print (string)
'''
assert item_title in listA, "Item not in Vocabulary"
emb = model.layers[2].get_weights()[0]
# we started from 1 not zero on dictionary
score = cosine_similarity(emb[listA.index(item_title)+1].reshape(1, -1), emb)[0]
similar_items = np.argsort(score)[::-1][:topK]
res = {'payee': [], 'score': []}
for i in similar_items:
res['payee'] += [listA[i-1]]
res['score'] += [score[i]]
return "\n".join("{}\t{}".format(k, v) for k, v in res.items())
class GenerateExamplesCallback(tf.keras.callbacks.Callback):
def __init__(self):
self.step = 0
def on_epoch_end(self, epoch, logs=None):
self.step += 1
self.model.save('/home/ec2-user/SageMaker/company_similarity/models/embedding_checkpoints/model_{}'.format(epoch))
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=epoch)
def on_batch_end(self, batch, logs=None):
if batch % 1000 == 0:
sim_table = present_topK(self.model, payee_list)
print("\nSimilar Items to 'eBay': ", sim_table)
with file_writer.as_default():
tf.summary.text('Similarity sanity check', data=tf.convert_to_tensor(sim_table), step=batch)
print('TensorBoard logging folder: ', log_dir)
print("checkpoint_dir:", checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True, save_freq=200000)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq=10000)
callbacks = [checkpoint_callback, tensorboard_callback, GenerateExamplesCallback()]
# read the data
# train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).shuffle(buffer_size=10000).repeat()
train_data = raw_data.map(PreprocessingFeatures(company_list, payee_list)).repeat()
# examples
# next(iter(raw_data.take(1)))
# next(iter(train_data))
# wc -l <filename> on terminal
fileLen = 5851184
STEPS_PER_EPOCH = (fileLen // BATCH_SIZE) + 1
# STEPS_PER_EPOCH = 1000
def build_model(company_embedding=128, payee_embedding=128, loss=tf.keras.losses.binary_crossentropy):
company_input = tf.keras.layers.Input(name='company_input', shape=(1,))
payee_input = tf.keras.layers.Input(name='payee_input', shape=(1,))
company_emb = tf.keras.layers.Embedding(name='company_embedding',
input_dim=len(company_list)+1,
output_dim=company_embedding)(company_input)
company_emb = tf.keras.layers.Flatten()(company_emb)
payee_emb = tf.keras.layers.Embedding(name='payee_embedding',
input_dim=len(payee_list)+1,
output_dim=payee_embedding)(payee_input)
payee_emb = tf.keras.layers.Flatten()(payee_emb)
merged = tf.keras.layers.Dot(name='dot', normalize=True, axes=1)([payee_emb, company_emb])
merged = tf.keras.layers.Reshape(target_shape = [1])(merged)
x = tf.keras.layers.Dense(1, activation='sigmoid')(merged)
# x = tf.keras.layers.Concatenate()([item_emb, device_emb])
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(64, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.5)(x)
# x = tf.keras.layers.Dense(32, activation='relu')(x)
# # x = tf.keras.layers.BatchNormalization()(x)
# x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=(company_input, payee_input), outputs=x)
model.compile(
loss=loss,
optimizer='adam',
metrics=['accuracy'],
)
return model
model = build_model()
EPOCHS = 5
r = model.fit(train_data,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
callbacks=callbacks
)
model.save("/home/ec2user/SageMaker/company_similarity/models/models/embedding_model_final.h5")
print("Training is completed")

Keras Model using Tensorflow Distribution for loss fails with batch size > 1

I'm trying to use a distribution from tensorflow_probability to define a custom loss function in Keras. More specifically, I'm trying to build a Mixture Density Network.
My model works on a toy dataset when batch_size = 1 (it learns to predict the correct mixture distribution for y using x). But it "fails" when batch_size > 1 (it predicts the same distribution for all y, ignoring x). This makes me think my problem has to do with batch_shape vs. sample_shape.
To reproduce:
import random
import keras
from keras import backend as K
from keras.layers import Dense, Activation, LSTM, Input, Concatenate, Reshape, concatenate, Flatten, Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.models import Sequential, Model
import tensorflow
import tensorflow_probability as tfp
tfd = tfp.distributions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# generate toy dataset
random.seed(12902)
n_obs = 20000
x = np.random.uniform(size=(n_obs, 4))
df = pd.DataFrame(x, columns = ['x_{0}'.format(i) for i in np.arange(4)])
# 2 latent classes, with noisy assignment based on x_0, x_1, (x_2 and x_3 are noise)
df['latent_class'] = 0
df.loc[df.x_0 + df.x_1 + np.random.normal(scale=.05, size=n_obs) > 1, 'latent_class'] = 1
df.latent_class.value_counts()
# Latent class will determines which mixture distribution we draw from
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(probs=[0.3, 0.7]),
components_distribution=tfd.Normal(
loc=[-1., 1], scale=[0.1, 0.5]))
d0_samples = d0.sample(sample_shape=(df.latent_class == 0).sum()).numpy()
d1 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(probs=[0.5, 0.5]),
components_distribution=tfd.Normal(
loc=[-2., 2], scale=[0.2, 0.6]))
d1_samples = d1.sample(sample_shape=(df.latent_class == 1).sum()).numpy()
df.loc[df.latent_class == 0, 'y'] = d0_samples
df.loc[df.latent_class == 1, 'y'] = d1_samples
fig, ax = plt.subplots()
bins = np.linspace(-4, 5, 9*4 + 1)
df.y[df.latent_class == 0].hist(ax=ax, bins=bins, label='Class 0', alpha=.4, density=True)
df.y[df.latent_class == 1].hist(ax=ax, bins=bins, label='Class 1', alpha=.4, density=True)
ax.legend();
# mixture density network
N_COMPONENTS = 2 # number of components in the mixture
input_feature_space = 4
flat_input = Input(shape=(input_feature_space,),
batch_shape=(None, input_feature_space),
name='inputs')
x = Dense(6, activation='relu',
kernel_initializer='glorot_uniform',
bias_initializer='ones')(flat_input)
x = Dense(6, activation='relu',
kernel_initializer='glorot_uniform',
bias_initializer='ones')(x)
# 3 params per component: weight, loc, scale
output = Dense(N_COMPONENTS*3,
kernel_initializer='glorot_uniform',
bias_initializer='ones')(x)
model = Model(inputs=[flat_input],
outputs=[output])
I suspect the problem is in the next 3 functions:
def get_mixture_coef(output, num_components):
"""
Extract mixture params from output
"""
out_pi = output[:, :num_components]
out_sigma = output[:, num_components:2*num_components]
out_mu = output[:, 2*num_components:]
# use softmax to normalize pi into prob distribution
max_pi = K.max(out_pi, axis=1, keepdims=True)
out_pi = out_pi - max_pi
out_pi = K.exp(out_pi)
normalize_pi = 1 / K.sum(out_pi, axis=1, keepdims=True)
out_pi = normalize_pi * out_pi
# use exp to ensure sigma is pos
out_sigma = K.exp(out_sigma)
return out_pi, out_sigma, out_mu
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
# I suspect the problem is here
return -1 * d0.log_prob(y)
def mdn_loss(num_components):
def loss(y_true, y_pred):
out_pi, out_sigma, out_mu = get_mixture_coef(y_pred, num_components)
return get_lossfunc(out_pi, out_sigma, out_mu, y_true)
return loss
opt = Adam(lr=.001)
model.compile(
optimizer=opt,
loss = mdn_loss(N_COMPONENTS),
)
es = EarlyStopping(monitor='val_loss',
min_delta=1e-5,
patience=5,
verbose=1, mode='auto')
validation = .15
validate_idx = np.random.choice(df.index.values,
size=int(validation * df.shape[0]),
replace=False)
train_idx = [i for i in df.index.values if i not in validate_idx]
x_cols = ['x_0', 'x_1', 'x_2', 'x_3']
model.fit(x=df.loc[train_idx, x_cols].values,
y=df.loc[train_idx, 'y'].values[:, np.newaxis],
validation_data=(
df.loc[validate_idx, x_cols].values,
df.loc[validate_idx, 'y'].values[:, np.newaxis]),
# model works when batch_size = 1
# model fails when batch_size > 1
epochs=2, batch_size=1, verbose=1, callbacks=[es])
def sample(output, n_samples, num_components):
"""Sample from a mixture distribution parameterized by
model output."""
pi, sigma, mu = get_mixture_coef(output, num_components)
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=pi),
components_distribution=tfd.Normal(
loc=mu,
scale=sigma))
return d0.sample(sample_shape=n_samples).numpy()
yhat = model.predict(df.loc[train_idx, x_cols].values)
out_pi, out_sigma, out_mu = get_mixture_coef(yhat, 2)
latent_1_samples = sample(yhat[:1], n_samples=1000, num_components=2)
latent_1_samples = pd.DataFrame({'latent_1_samples': latent_1_samples.ravel()})
fig, ax = plt.subplots()
bins = np.linspace(-4, 5, 9*4 + 1)
latent_1_samples.latent_1_samples.hist(ax=ax, bins=bins, label='Class 1: yHat', alpha=.4, density=True)
df.y[df.latent_class == 0].hist(ax=ax, bins=bins, label='Class 0: True', density=True, histtype='step')
df.y[df.latent_class == 1].hist(ax=ax, bins=bins, label='Class 1: True', density=True, histtype='step')
ax.legend();
Thanks in advance!
Update
I found two ways to solve the problem, guided by this answer. Both solutions point to the fact that Keras is awkwardly broadcasting y to match y_pred:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
# this also works:
# return -1 * d0.log_prob(tensorflow.transpose(y))
return -1 * d0.log_prob(y[:, 0])
Specifying the workaround here (Answer Section) even though it is specified by Dan in the question, for the benefit of the Community.
The problem of predicting the same distribution for all y, ignoring x can be resolved in two ways.
Code for Solution 1 is mentioned below:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
return -1 * d0.log_prob(tensorflow.transpose(y))
Code for Solution 2 is mentioned below:
def get_lossfunc(out_pi, out_sigma, out_mu, y):
d0 = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(
probs=out_pi),
components_distribution=tfd.Normal(
loc=out_mu, scale=out_sigma,
),
)
return -1 * d0.log_prob(y[:, 0])
Hope this helps. Happy Learning!

Value Error due to Numpy returning an object

I'm trying to make the following code piece at the end run.
However, i'm getting the following error when i try to fit my model:
"ValueError: setting an array element with a sequence."
I'm trying to use a RNN to predict the next 5 days of prices. So, in the function create_ts I'm trying to create two time series, one with the first X items and another with X+1, X+2, X+3, X+4, and X+5 - these five items being the next five days of prices i'd like to predict.
I suspect the problem is here somewhere:
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)
Thanks in advance for any help!
Below is the full code piece:
from keras import backend as k
import os
from importlib import reload
def set_keras_backend(backend):
if k.backend() != backend:
os.environ['KERAS_BACKEND'] = backend
reload(k)
assert k.backend() == backend
set_keras_backend("cntk")
import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
np.random.seed(7)
#load dataset
fileloc = "C:\\Stock Data\\CL1.csv"
stock_data = pd.read_csv(fileloc)
stock_data.head()
stock_data.dtypes
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Price'] = pd.to_numeric(stock_data['Price'], downcast = 'float')
stock_data.set_index('Date', inplace=True)
stock_close = stock_data['Price']
stock_close = stock_close.values.reshape(len(stock_close), 1)
plt.plot(stock_close)
#normalize data
scaler = MinMaxScaler(feature_range = (0,1))
stock_close = scaler.fit_transform(stock_close)
#split data into a train, test set
train_size = int(len(stock_close)*0.7)
test_size = len(stock_close) - train_size
stock_train, stock_test = stock_close[0:train_size, :], stock_close[train_size:len(stock_close), :]
#convert the data into a time series looking back over a period fo days
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)