RNN use mean square error does not converge - tensorflow

I am learning RNN through https://medium.com/#erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)

Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.

Related

Keras Model works w/ 3 inputs but not 4

I'm trying to build a VAE for some time series data, but am having a hard time getting the model to work with 4 inputs instead of 3, and I'm not sure what's causing the problem.
Here's the complete code that I have:
# data for each time series
import yfinance as yf
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
# load in the data
msft = yf.Ticker('MSFT').history(period = '5y')[['Close']]
googl = yf.Ticker('GOOGL').history(period = '5y')[['Close']]
amzn = yf.Ticker('AMZN').history(period = '5y')[['Close']]
vals = np.sin(np.linspace(-100, 100, msft.shape[0]))[:, None]
# scale the data for numeric stability
msft = StandardScaler().fit_transform(msft)
googl = StandardScaler().fit_transform(googl)
amzn = StandardScaler().fit_transform(amzn)
# global variables
latent_dim = 2
batch_size = 32
sequence_length = 30
# build time series samplers for each time series
c1 = keras.utils.timeseries_dataset_from_array(
msft,
targets = None,
sequence_length = sequence_length
)
c2 = keras.utils.timeseries_dataset_from_array(
googl,
targets = None,
sequence_length = sequence_length
)
c3 = keras.utils.timeseries_dataset_from_array(
amzn,
targets = None,
sequence_length = sequence_length
)
c4 = keras.utils.timeseries_dataset_from_array(
vals,
targets = None,
sequence_length = sequence_length
)
# add the encoder for the sine wave
sin_inputs = keras.layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
sx = layers.LSTM(64, return_sequences = True)(sin_inputs)
sx = layers.LSTM(64)(sx)
# build the encoders for each of the separate time series
msft_inputs = layers.Input(shape=(sequence_length, 1))
# stack two lstm layers
mx = layers.LSTM(64, return_sequences = True)(msft_inputs)
mx = layers.LSTM(64)(mx)
# now for google
googl_inputs = layers.Input(shape=(sequence_length, 1))
gx = layers.LSTM(64, return_sequences = True)(googl_inputs)
gx = layers.LSTM(64)(gx)
# and for amazon
amzn_inputs = layers.Input(shape = (sequence_length, 1))
ax = layers.LSTM(64, return_sequences = True)(amzn_inputs)
ax = layers.LSTM(64)(ax)
# now combine them together for a single joint time series!
x = layers.Concatenate()([mx, gx, ax, sx])
# pass into a dense layer
x = layers.Dense(64, activation = 'relu')(x)
# and finally pass them into the final decoder!
z_mean = layers.Dense(latent_dim, name = 'z_mean')(x)
z_logvar = layers.Dense(latent_dim, name = 'z_logvar')(x)
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs, sin_inputs], [z_mean, z_logvar], name = 'encoder')
class Sampler(layers.Layer):
def call(self, z_mean, z_logvar):
batch_size = tf.shape(z_mean)[0]
n_dims = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape = (batch_size, n_dims))
return z_mean + tf.exp(0.5 * z_logvar) * epsilon
latent_inputs = keras.Input(shape=(latent_dim,))
dec = layers.RepeatVector(sequence_length)(latent_inputs)
dec = layers.LSTM(64, return_sequences=True)(dec)
out = layers.TimeDistributed(layers.Dense(1))(dec)
decoder = keras.Model(latent_inputs, out)
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name = 'total_loss')
self.reconstruction_loss_tracker = keras.metrics.Mean(name = 'reconstruction_loss')
self.kl_loss_tracker = keras.metrics.Mean(name = 'kl_loss')
#property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_logvar = self.encoder(data)
z = self.sampler(z_mean, z_logvar)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data, reconstruction),
axis = (1, 2)
)
)
kl_loss = -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(), run_eagerly=False)
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat(), c4.repeat()), epochs = 10, steps_per_epoch = 10)
When I fit this model I get the following error:
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: (<tf.Tensor: shape=(128, 30, 1),
My issue is that this exact same model works when I only have 3 inputs instead of 4.
If I replace the lines where I specify the inputs everything seems to work fine:
x = layers.Concatenate()([mx, gx, sx])
encoder = keras.Model([msft_inputs, googl_inputs, amzn_inputs], [z_mean, z_logvar], name = 'encoder')
vae.fit(zip(c1.repeat(), c2.repeat(), c3.repeat()), epochs = 10, steps_per_epoch = 10)
So I'm curious about what it is about my setup that is causing my model to break when I add the fourth input.

How to make lstm/rnn focus more on certain parts of time series while less on other parts using tensorflow?

I have a time series prediction problem where most of the observed values (95%) are 0s while remaining values are non-zeros. How can I make use of RNN for this problem.
I want to predict surface flow from environmental data(air temperature, rainfall, humidity etc). We know surface flow is 0.0 for most of the time in an year. However, I also don't want to simply ignore 0s as the 0s represent the period of the year when when surface flow is 0.0. The image below shows possible observed output and three inputs. The three inputs here are just random but in reality they will be data like rainfall, humidity etc and these input data have some periodic pattern.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy, percent_of_zeros):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
indices = np.random.choice(np.arange(dataY.size), replace=False,
size=int(dataY.size * percent_of_zeros))
dataY[indices] = 0
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 16
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 1005 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()
# build neural network
with tf.variable_scope('scope0'): #defining RNN
# cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
cell = tf.keras.layers.LSTMCell(units = 128)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
# Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
print('epoch: # {} loss: {}'.format(epoch, step_loss))
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
The problem is that while training, the model focuses on majority of values i.e. 0s and thus makes the predictions equal to 0s. How can I make the model focus on non-zero values (positive surface flow) while at the same time also consider 0s (when there is no surface flow). I have read about attention mechanism but have not understood that how I can implement it in such scenarios.

Tensorflow: Can't overfit training data with batch size > 1

I coded a small RNN network with Tensorflow to return the total energy consumption given some parameters. There seem to be a problem in my code. It can't overfit the training data when I use a batch size > 1 (even with only 4 samples!). In the code below, the loss value reaches 0 when I set BatchSize to 1. However, by setting BatchSize to 2, the network fails to overfit and the loss value goes toward 12.500000 and gets stuck there forever.
I suspect this has something to do with LSTM states. I get the same problem if I don't update the state with each iteration. Or maybe the cost function? A help is appreciated. Thanks.
import tensorflow as tf
import numpy as np
import os
from utils import loadData
Epochs = 10000
LearningRate = 0.0001
MaxGradNorm = 5
SeqLen = 1
NChannels = 28
NClasses = 1
NLayers = 2
NUnits = 256
BatchSize = 1
NumSamples = 4
#################################################################
trainingFile = "./training.dat"
X_values, Y_values = loadData(trainingFile, SeqLen, NumSamples)
X = tf.placeholder(tf.float32, [BatchSize, SeqLen, NChannels], name='inputs')
Y = tf.placeholder(tf.float32, [BatchSize, SeqLen, NClasses], name='labels')
keep_prob = tf.placeholder(tf.float32, name='keep')
initializer = tf.contrib.layers.xavier_initializer()
Xin = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
lstm_layers = []
for i in range(NLayers):
lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=NUnits, initializer=initializer, use_peepholes=True, state_is_tuple=True)
dropout_layer = tf.contrib.rnn.DropoutWrapper(lstm_layer, output_keep_prob=keep_prob)
#[LSTM ---> DROPOUT] ---> [LSTM ---> DROPOUT] ---> etc...
lstm_layers.append(dropout_layer)
rnn = tf.nn.rnn_cell.MultiRNNCell(lstm_layers, state_is_tuple=True)
initial_state = rnn.zero_state(BatchSize, tf.float32)
outputs, final_state = tf.nn.static_rnn(rnn, Xin, dtype=tf.float32, initial_state=initial_state)
outputs = tf.transpose(outputs, [1,0,2])
outputs = tf.reshape(outputs, [-1, NUnits])
weight = tf.Variable(tf.truncated_normal([NUnits, NClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[NClasses]))
prediction = tf.matmul(outputs, weight) + bias
prediction = tf.reshape(prediction, [BatchSize, SeqLen, NClasses])
cost = tf.reduce_sum(tf.pow(tf.subtract(prediction, Y), 2)) / (2 * BatchSize)
tvars = tf.trainable_variables()
grad, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), MaxGradNorm)
optimizer = tf.train.AdamOptimizer(learning_rate = LearningRate)
train_step = optimizer.apply_gradients(zip(grad, tvars))
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 1
for e in range(0, Epochs):
train_loss = []
state = sess.run(initial_state)
for i in xrange(0, len(X_values), BatchSize):
x = X_values[i:i + BatchSize]
y = Y_values[i:i + BatchSize]
y = np.expand_dims(y, 2)
feed = {X : x, Y : y, keep_prob : 1.0, initial_state : state}
_ , loss, state, pred = sess.run([train_step, cost, final_state, prediction], feed_dict = feed)
train_loss.append(loss)
iteration += 1
print("Epoch: {}/{}".format(e, Epochs), "Iteration: {:d}".format(iteration), "Train average rmse: {:6f}".format(np.mean(train_loss)))
Normalizing the input data solved the problem.

Failed to get the value of tensor

When running the MNIST data set, I want to know what actually my model outputs during training the batch.Here is my code:(I haven't added the optimizer and the loss function):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784 # the total pixels of the input images
OUTPUT_NODE = 10 # the output varies from 0 to 9
LAYER_NODE = 500
BATCH_SIZE = 100
TRAINING_STEPS = 10
def inference(input_tensor, avg_class, weight1, biase1, weight2, biase2):
if avg_class == None:
layer = tf.nn.relu(tf.matmul(input_tensor, weight1) + biase1)
return tf.matmul(layer, weight2)+biase2
else:
layer = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weight1)) +
avg_class.average(biase1))
return tf.matmul(layer, avg_class.average(weight2)) + avg_class.average(biase2)
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name = 'x-input')
y = tf.placeholder(tf.float32, [None, OUTPUT_NODE],name = 'y-input')
weight1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER_NODE], stddev = 0.1))
biase1 = tf.Variable(tf.constant(0.1, shape = [LAYER_NODE]))
weight2 = tf.Variable(tf.truncated_normal([LAYER_NODE, OUTPUT_NODE], stddev = 0.1))
biase2 = tf.Variable(tf.constant(0.1, shape = [OUTPUT_NODE]))
out = inference(x, None, weight1, biase1, weight2, biase2)
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
test_feed = {x:mnist.test.images, y:mnist.test.labels}
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
def main(arg = None):
mnist = input_data.read_data_sets("/home/vincent/Tensorflow/MNIST/data/", one_hot = True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
I try to print out:
Tensor("add_1:0", shape=(?, 10), dtype=float32)
If I want to know the value of out, what should I do?
I tried to print(out.eval()), and it raised error
out is a tensor object. If you want to get its value, replace
sess.run(out, feed_dict= {x:xs, y:ys})
print(out)
with
res_out=sess.run(out, feed_dict= {x:xs, y:ys})
print(res_out)

Creating a highly customizable RNN in Tensorflow

I am trying to implement an RNN without using the RNN functions provided by tensorflow. Here is the code I tried that eventually gave me an error
import tensorflow as tf
tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=(5,5))
InitialState = tf.zeros((5,1))
h = InitialState
W1 = tf.Variable(tf.random_normal([5, 5], stddev=0.35),
name="W1")
W2 = tf.Variable(tf.random_normal([5, 5], stddev=0.35),
name="W2")
for k in range(5):
h = tf.matmul(W1,h) + tf.matmul(W2,x[:,k:(k+1)])
h = tf.sigmoid(h)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
a = sess.run([h], feed_dict = {x:tf.ones((5,5))})
How can I implement an RNN from scratch? Is there an example online?
import tensorflow as tf
import numpy as np
hidden_size = 2 # hidden layer of two neurons
input_size = 5
# Weight of x will the be (hidden_layer_size x input_size)
Wx = tf.Variable(tf.random_normal([hidden_size, input_size], stddev=0.35),
name="Wx")
# Weight of y will be (input_size x hidden_layer_size)
Wy = tf.Variable(tf.random_normal([input_size, hidden_size], stddev=0.35),
name="Wy")
# Weight of h will be (hidden_size, hidden_size)
Wh = tf.Variable(tf.random_normal([hidden_size, hidden_size], stddev=0.35),
name="Wh")
h = tf.zeros((hidden_size, input_size))
x = tf.placeholder(dtype = tf.float32,
shape = (input_size,input_size))
y = tf.placeholder(dtype = tf.float32,
shape = (input_size,input_size))
feed_dict = {
x : np.ones((5,5), dtype = np.float32),
y : np.ones((5,5), dtype = np.float32)
}
# RNN step
for _ in range(input_size):
h = tf.tanh(tf.matmul(Wh, h) + tf.matmul(Wx, x))
o = tf.nn.softmax(h)
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op, feed_dict = feed_dict)
h_new, y_hat = sess.run([h, o], feed_dict = feed_dict)
print h_new
print y_hat