How to share LSTM unit for 2 separate input in TensorFlow? - tensorflow

Assume I have 2 input q and a, how to make the 2 inputs share 1 LSTM cell? Now part of my code as belows
def lstmnets(self, sequence, seq_len):
seq_embeds = self.embeds(sequence)
# lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_size)
lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
lstm_out, final_state = tf.nn.dynamic_rnn(lstm_cell, seq_embeds, initial_state=init_state, sequence_length=seq_len)
return lstm_out
def inference(self, q, a, q_len, a_len):
with tf.variable_scope('lstmnets') as scope:
query_rep = self.lstmnets(q, q_len)
scope.reuse_variables()
title_rep = self.lstmnets(a, a_len)
But for this codes, My structure have 2 stacked LSTM as following figure. How can I just use one LSTM for both? In addition, how can I initial the LSTM weights and add them to histogram? so far, I find no related tutorials for this. Thanks.

Your code seems to be fine as it uses scope.reuse_variable() to share the LSTM weights. The best way is to check is by printing the variables in the graph and verify whether the lstm_cell is declared only once. So in your inference function print the variable names:
def inference(self, q, a, q_len, a_len):
with tf.variable_scope('lstmnets') as scope:
query_rep = self.lstmnets(q, q_len)
scope.reuse_variables()
title_rep = self.lstmnets(a, a_len)
for v in tf.global_variables():
print(v.name)

Related

Parameters computation of LSTM

I am trying to compute the total parameters of LSTM model, and I have some confusion.
I have searched some answers, such as this post and this post. I don't know how what's the role of hidden units play in the parameter computation(h1=64, h2=128 in my case).
import tensorflow as tf
b, t, d_in, d_out = 32, 256, 161, 257
data = tf.placeholder("float", [b, t, d_in]) # [batch, timestep, dim_in]
labels = tf.placeholder("float", [b, t, d_out]) # [batch, timestep, dim_out]
myinput = data
batch_size, seq_len, dim_in = myinput.shape
rnn_layers = []
h1 = 64
c1 = tf.nn.rnn_cell.LSTMCell(h1)
rnn_layers.append(c1)
h2 = 128
c2 = tf.nn.rnn_cell.LSTMCell(h1)
rnn_layers.append(c2)
multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
rnnoutput, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=myinput, dtype=tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
all_trainable_vars = tf.reduce_sum([tf.reduce_prod(v.shape) for v in tf.trainable_variables()])
print(sess.run(all_trainable_vars))
I printed the total number of parameters using Tensorflow, it showed the total number of a parameter is 90880. How can I get this result step by step, Thank you
In your case, you defined a LSTM cell via this line c1 = tf.nn.rnn_cell.LSTMCell(h1). To answer your question, here I will introduce the mathematical definition of LSTM. Like the picture (picture source wikipedia-lstm) below,
t: means at time t.
f_t is named forget gate.
i_t is named input gate.
o_t is named are called.
c_t, h_t are named the cell state and hidden state of the LSTM cell, respectively.
For tf.nn.rnn_cell.LSTMCell(h1), h1=64 is the dimension of h_t, i.e. dim(h_t) = 64.

Tensorflow reusing of Multi-Layered LSTM Network

I am trying to use same LSTM architecture for different inputs and hence passing the same cells while unfolding the bidirectional LSTM while unfolding different inputs. I am not sure if it's creating two whole different LSTM Networks. It looks like there are two different nodes in my Graph. My code and graph looks something like this:
def get_multirnn_cell(self):
cells = []
for _ in range(config.n_layers):
cell = tf.nn.rnn_cell.LSTMCell(config.n_hidden, initializer=tf.glorot_uniform_initializer())
dropout_cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell,
input_keep_prob=config.keep_prob,
output_keep_prob=config.keep_prob)
cells.append(dropout_cell)
return cells
def add_lstm_op(self):
with tf.variable_scope('lstm'):
cells_fw = self.get_multirnn_cell()
cells_bw = self.get_multirnn_cell()
cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells_fw)
cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells_bw)
(_, _), (state_one_fw, state_one_bw) = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw,
inputs=self.question_one,
sequence_length=self.seql_one,
dtype=tf.float32)
self.state_one = tf.concat([state_one_fw[-1].h, state_one_bw[-1].h], name='state_one', axis=-1)
# self.state_one = tf.concat([state_one_fw, state_one_bw], axis=-1)
# [batch_size, 2*hidden_size]
(_, _), (state_two_fw, state_two_bw) = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw,
inputs=self.question_two,
sequence_length=self.seql_two,
dtype=tf.float32)
self.state_two = tf.concat([state_two_fw[-1].h, state_two_bw[-1].h], name='state_two', axis=-1)
If you want to reuse the multirnn_cell, you could pass a reuse=tf.AUTO_REUSE for the variable_scope.
with tf.variable_scope('lstm', reuse=tf.AUTO_REUSE)
See the doc.

How to use multilayered bidirectional LSTM in Tensorflow?

I want to know how to use multilayered bidirectional LSTM in Tensorflow.
I have already implemented the contents of bidirectional LSTM, but I wanna compare this model with the model added multi-layers.
How should I add some code in this part?
x = tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
#print(x[0].get_shape())
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
# Linear activation, using rnn inner loop last output
outputs = tf.stack(outputs, axis=1)
outputs = tf.reshape(outputs, (batch_size*n_steps, n_hidden*2))
outputs = tf.matmul(outputs, weights['out']) + biases['out']
outputs = tf.reshape(outputs, (batch_size, n_steps, n_classes))
You can use two different approaches to apply multilayer bilstm model:
1) use out of previous bilstm layer as input to the next bilstm. In the beginning you should create the arrays with forward and backward cells of length num_layers. And
for n in range(num_layers):
cell_fw = cell_forw[n]
cell_bw = cell_back[n]
state_fw = cell_fw.zero_state(batch_size, tf.float32)
state_bw = cell_bw.zero_state(batch_size, tf.float32)
(output_fw, output_bw), last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output,
initial_state_fw=state_fw,
initial_state_bw=state_bw,
scope='BLSTM_'+ str(n),
dtype=tf.float32)
output = tf.concat([output_fw, output_bw], axis=2)
2) Also worth a look at another approach stacked bilstm.
This is primarily same as the first answer but with a little variation of usage of scope name and with added dropout wrappers. It also takes care of the error the first answer gives about variable scope.
def bidirectional_lstm(input_data, num_layers, rnn_size, keep_prob):
output = input_data
for layer in range(num_layers):
with tf.variable_scope('encoder_{}'.format(layer),reuse=tf.AUTO_REUSE):
# By giving a different variable scope to each layer, I've ensured that
# the weights are not shared among the layers. If you want to share the
# weights, you can do that by giving variable_scope as "encoder" but do
# make sure first that reuse is set to tf.AUTO_REUSE
cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
cell_bw,
output,
dtype=tf.float32)
# Concat the forward and backward outputs
output = tf.concat(outputs,2)
return output
On top of Taras's answer. Here is another example using just 2-layer Bidirectional RNN with GRU cells
embedding_weights = tf.Variable(tf.random_uniform([vocabulary_size, state_size], -1.0, 1.0))
embedding_vectors = tf.nn.embedding_lookup(embedding_weights, tokens)
#First BLSTM
cell = tf.nn.rnn_cell.GRUCell(state_size)
cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs=embedding_vectors,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_1')
outputs = tf.concat([forward_output, backward_output], axis=2)
#Second BLSTM using the output of previous layer as an input.
cell2 = tf.nn.rnn_cell.GRUCell(state_size)
cell2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell2, cell2, inputs=outputs,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_2')
outputs = tf.concat([forward_output, backward_output], axis=2)
BTW, don't forget to add different scope name. Hope this help.
As #Taras pointed out, you can use:
(1) tf.nn.bidirectional_dynamic_rnn()
(2) tf.contrib.rnn.stack_bidirectional_dynamic_rnn().
All previous answers only capture (1), so I give some details on (2), in particular since it usually outperforms (1). For an intuition about the different connectivities
see here.
Let's say you want to create a stack of 3 BLSTM layers, each with 64 nodes:
num_layers = 3
num_nodes = 64
# Define LSTM cells
enc_fw_cells = [LSTMCell(num_nodes)for layer in range(num_layers)]
enc_bw_cells = [LSTMCell(num_nodes) for layer in range(num_layers)]
# Connect LSTM cells bidirectionally and stack
(all_states, fw_state, bw_state) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
cells_fw=enc_fw_cells, cells_bw=enc_bw_cells, inputs=input_embed, dtype=tf.float32)
# Concatenate results
for k in range(num_layers):
if k == 0:
con_c = tf.concat((fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((fw_state[k].h, bw_state[k].h), 1)
else:
con_c = tf.concat((con_c, fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((con_h, fw_state[k].h, bw_state[k].h), 1)
output = tf.contrib.rnn.LSTMStateTuple(c=con_c, h=con_h)
In this case, I use the final states of the stacked biRNN rather than the states at all timesteps (saved in all_states), since I was using an encoding decoding scheme, where the above code was only the encoder.

Tensorflow 1.0 LSTM Cell in dynamic_rnn throws dimension error

I am trying to implement an LSTM Model as a model_fn input to an Estimator. My X is only a .txt with a time series of prices. Before going into my first hidden layer, I try to define the lstm cell as:
def lstm_cell():
return tf.contrib.rnn.BasicLSTMCell(
size, forget_bias=0.0, state_is_tuple=True)
attn_cell = lstm_cell
if is_training and keep_prob < 1:
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell(), output_keep_prob=keep_prob)
cell = tf.contrib.rnn.MultiRNNCell([attn_cell() for _ in range(num_layers)], state_is_tuple=True)
initial_state = cell.zero_state(batch_size, data_type())
inputs = tf.unstack(X, num=num_steps, axis=0)
outputs = []
outputs, state = tf.nn.dynamic_rnn(cell, inputs,
initial_state=initial_state)
This then is supposed to go into:
first_hidden_layer = tf.contrib.layers.relu(outputs, 1000)
Unfortunately, it throws an error idicating that "ValueError: Dimension must be 1 but is 3 for 'transpose' (op: 'Transpose') with input shapes: [1], [3]."
I gather that my problem is the "inputs" tensor. In its description, the inputs variable is supposed to be a tensor with form [batch_size,max_time,...], but Ihave no idea how to translate this into above structure since, through the estimator, only input values X and target values y are fed to the system. So my question would be how to create a tensor that can serve as an inputs variable to the dynamic_rnn class.
Thanks a lot.
I believe you don't need the line:
inputs = tf.unstack(X, num=num_steps, axis=0)
you can supply X directly to dynamic_rnn since dynamic_rnn doesn't take a list of tensors; It takes one tensor where the time axis is dimension 0 (if time_major == True) or dimension 1 (if time_major == False).
Actually, it seems that X has 2 dimensions only, since inputs is list of 1 dimensional tensors (as indicated by the error message). so you should replace the unstack line with:
inputs = tf.expand_dims(X, axis=2)
This will add a 3rd dimension of size 1 that is needed by dynamic_rnn

how to stack LSTM layers using TensorFlow

what I have is the following, which I believe is a network with one hidden LSTM layer:
# Parameters
learning rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 13
n_steps = 10
n_hidden = 512
n_classes = 13
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out' : tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out' : tf.Variable(tf.random_normal([n_classes]))
}
However, I am trying to build an LSTM network using TensorFlow to predict power consumption. I have been looking around to find a good example, but I could not find any model with 2 hidden LSTM layers. Here's the model that I would like to build:
1 input layer,
1 output layer,
2 hidden LSTM layers(with 512 neurons in each),
time step(sequence length): 10
Could anyone guide me to build this using TensorFlow? ( from defining weights, building input shape, training, predicting, use of optimizer or cost function, etc), any help would be much appreciated.
Thank you so much in advance!
Here is how I do it in a translation model with GRU cells. You can just replace the GRU with an LSTM. It is really easy just use tf.nn.rnn_cell.MultiRNNCell with a list of the multiple cells it should wrap. In the code bellow I am manually unrolling it but you can pass it to tf.nn.dynamic_rnn or tf.nn.rnn as well.
y = input_tensor
with tf.variable_scope('encoder') as scope:
rnn_cell = rnn.MultiRNNCell([rnn.GRUCell(1024) for _ in range(3)])
state = tf.zeros((BATCH_SIZE, rnn_cell.state_size))
output = [None] * TIME_STEPS
for t in reversed(range(TIME_STEPS)):
y_t = tf.reshape(y[:, t, :], (BATCH_SIZE, -1))
output[t], state = rnn_cell(y_t, state)
scope.reuse_variables()
y = tf.pack(output, 1)
First you need some placeholders to put your training data (one batch)
x_input = tf.placeholder(tf.float32, [batch_size, truncated_series_length, 1])
y_output = tf.placeholder(tf.float32, [batch_size, truncated_series_length, 1])
A LSTM need a state, which consists of two components, the hidden state and the cell state, very good guide here: https://arxiv.org/pdf/1506.00019.pdf. For every layer in the LSTM you have one cell state and one hidden state.
The problem is that Tensorflow stores this in a LSTMStateTuple which you can not send into placeholder. So you need to store it in a Tensor, and then unpack it into a tuple:
state_placeholder = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
l = tf.unpack(state_placeholder, axis=0)
rnn_tuple_state = tuple(
[tf.nn.rnn_cell.LSTMStateTuple(l[idx][0], l[idx][1])
for idx in range(num_layers)]
)
Then you can use the built-in Tensorflow API to create the stacked LSTM layer.
cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell]*num_layers, state_is_tuple=True)
outputs, state = tf.nn.dynamic_rnn(cell, x_input, initial_state=rnn_tuple_state)
From here you continue with the outputs to calculate logits and then a loss with respect to the y_inputs.
Then you run each batch with the sess.run-command, with truncated backpropagation (good explanation here http://r2rt.com/styles-of-truncated-backpropagation.html)
init_state = np.zeros((num_layers, 2, batch_size, state_size))
...current_state... = sess.run([...state...], feed_dict={x_input:batch_in, state_placeholder:current_state ...})
current_state = np.array(current_state)
You will have to convert the state to a numpy array before feeding it again.
Perhaps it is better to use a librarly like Tflearn or Keras instead?