I am building a rnn and I use tf.nn.dynamic_rnn to yield the output and state.
The code is as follows (tf version 1.3):
import tensorflow as tf
def lstm_cell():
return tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(128), output_keep_prob=0.7)
cell= tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(3)])
initial_state= cell.zero_state(1, tf.float32)
layer = tf.placeholder(tf.float32, [1,1,36])
outputs, state=tf.nn.dynamic_rnn(cell=cell, inputs=layer, initial_state=initial_state)
Since the input tensor is always of batch size =1, the initial_state and state also have a batch size 1.
layer is an input of batch_size=1 as well, and for each cell there are 36 nodes(size of the embedded sequence). Each layer has lstm_size 128.
The problem comes when I loop the rnn cell.
rnn_outputs_sequence=outputs
for i in range(1, num_pics, 1):
outputs, state=tf.nn.dynamic_rnn(cell=cell, inputs=outputs, initial_state=state)
rnn_outputs_sequence=tf.concat((rnn_outputs_sequence, outputs),axis=1)
rnn_outputs_sequence is expected to have shape [1, num_pics, 36].However, this triggers an error:
Trying to share variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (256, 512) and found shape (164, 512).
I cannot figure out this shape [164, 512].
Can anyone help me with this out?
Thanks.
import tensorflow as tf
def lstm_cell():
return tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(128), output_keep_prob=0.7)
cell= tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(2)])
initial_state= cell.zero_state(1, tf.float32)
layer = tf.placeholder(tf.float32, [1,1,36])
outputs, state=tf.nn.dynamic_rnn(cell=cell, inputs=layer, initial_state=initial_state)
outputs = tf.reshape(outputs, shape=[1, -1])
outputs = tf.layers.dense(outputs, 36,\
kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False))
outputs = tf.reshape(outputs, shape=[1, 1, -1])
rnn_outputs_sequence=outputs
print(outputs)
for i in range(1, 16, 1):
outputs, state=tf.nn.dynamic_rnn(cell=cell, inputs=outputs, initial_state=state)
outputs = tf.reshape(outputs, shape=[1, -1])
outputs = tf.layers.dense(outputs, 36,\
kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False))
outputs = tf.reshape(outputs, shape=[1, 1, -1])
print(outputs)
rnn_outputs_sequence=tf.concat((rnn_outputs_sequence, outputs),axis=1)
print(rnn_outputs_sequence)
Related
I tried Convolution Neural Network with Tensorflow.
However, Shape causes an error.
First is part of the main function.
while True:
with mss.mss() as sct:
Game_Scr = np.array(sct.grab(Game_Scr_pos))[:,:,:3]
cv2.imshow('Game_Src', Game_Scr)
cv2.waitKey(0)
Game_Scr = cv2.resize(Game_Scr, dsize=(960, 540), interpolation=cv2.INTER_AREA)
print(Game_Scr.shape)
print(Convolution(Game_Scr))
Second is my called function.
def Convolution(img):
kernel = tf.Variable(tf.truncated_normal(shape=[4], stddev=0.1))
sess = tf.Session()
with tf.Session() as sess:
img = img.astype('float32')
Bias1 = tf.Variable(tf.truncated_normal(shape=[4],stddev=0.1))
conv2d = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME')# + Bias1
conv2d = sess.run(conv2d)
return conv2d
ValueError: Shape must be rank 4 but is rank 3 for 'Conv2D' (op: 'Conv2D') with input shapes: [540,960,3], [4].
I tried changing the shape many times, but I get the same error.
Try replacing
img = img.astype('float32')
with
img = tf.expand_dims(img.astype('float32'), 0)
The dimention of tf.nn.conv2d input shoul be 4, (batch_size, image_hight, image_with, image_channels). You where missing the batch_size, tf.expand_dims just add that dimention (with a batch_size of 1 since you only have one image).
As per the official documentation here, input tensor should be of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor should be of shape [filter_height, filter_width, in_channels, out_channels].
Try by changing your Convolution function to something like this:
def Convolution(img):
kernel = tf.Variable(tf.truncated_normal(shape=[200, 200, 3, 3], stddev=0.1))
sess = tf.Session()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
img = img.astype('float32')
conv2d = tf.nn.conv2d(np.expand_dims(img, 0), kernel, strides=[1, 1, 1, 1], padding='SAME')# + Bias1
conv2d = sess.run(conv2d)
return conv2d
I am looking methods to embed variable length sequences with float values to fixed size vectors. The input formats as following:
[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]-> ... -> [f1,f2,f3,f4]
[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]-> ... -> [f1,f2,f3,f4]
...
[f1,f2,f3,f4]-> ... -> ->[f1,f2,f3,f4]
Each line is a variable length sequnece, with max length 60. Each unit in one sequece is a tuple of 4 float values. I have already paded zeros to fill all sequences to the same length.
The following architecture seems solve my problem if I use the output as the same as input, I need the thought vector in the center as the embedding for the sequences.
In tensorflow, I have found tow candidate methods tf.contrib.legacy_seq2seq.basic_rnn_seq2seq and tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq.
However, these tow methos seems to be used to solve NLP problem, and the input must be discrete value for words.
So, is there another functions to solve my problems?
All you need is only an RNN, not the seq2seq model, since seq2seq goes with an additional decoder which is unecessary in your case.
An example code:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
input_size = 4
max_length = 60
hidden_size=64
output_size = 4
x = tf.placeholder(tf.float32, shape=[None, max_length, input_size], name='x')
seqlen = tf.placeholder(tf.int64, shape=[None], name='seqlen')
lstm_cell = rnn.BasicLSTMCell(hidden_size, forget_bias=1.0)
outputs, states = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=x, sequence_length=seqlen, dtype=tf.float32)
encoded_states = states[-1]
W = tf.get_variable(
name='W',
shape=[hidden_size, output_size],
dtype=tf.float32,
initializer=tf.random_normal_initializer())
b = tf.get_variable(
name='b',
shape=[output_size],
dtype=tf.float32,
initializer=tf.random_normal_initializer())
z = tf.matmul(encoded_states, W) + b
results = tf.sigmoid(z)
###########################
## cost computing and training components goes here
# e.g.
# targets = tf.placeholder(tf.float32, shape=[None, input_size], name='targets')
# cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=z))
# optimizer = tf.train.AdamOptimizer(learning_rate=0.1).minimize(cost)
###############################
init = tf.global_variables_initializer()
batch_size = 4
data_in = np.zeros((batch_size, max_length, input_size), dtype='float32')
data_in[0, :4, :] = np.random.rand(4, input_size)
data_in[1, :6, :] = np.random.rand(6, input_size)
data_in[2, :20, :] = np.random.rand(20, input_size)
data_in[3, :, :] = np.random.rand(60, input_size)
data_len = np.asarray([4, 6, 20, 60], dtype='int64')
with tf.Session() as sess:
sess.run(init)
#########################
# training process goes here
#########################
res = sess.run(results,
feed_dict={
x: data_in,
seqlen: data_len})
print(res)
To encode sequence to a fixed length vector you typically use recurrent neural networks (RNNs) or convolutional neural networks (CNNs).
If you use a recurrent neural network you can use the output at the last time step (last element in your sequence). This corresponds to the thought vector in your question. Have a look at tf.dynamic_rnn. dynamic_rnn requires you to specify to type of RNN cell you want to use. tf.contrib.rnn.LSTMCell and tf.contrib.rnn.GRUCell are most common.
If you want to use CNNs you need to use 1 dimensional convolutions. To build CNNs you need tf.layers.conv1d and tf.layers.max_pooling1d
I have found a solution to my problem, using the following architecture,
,
The LSTMs layer below encode the series x1,x2,...,xn. The last output, the green one, is duplicated to the same count as the input for the decoding LSTM layers above. The tensorflow code is as following
series_input = tf.placeholder(tf.float32, [None, conf.max_series, conf.series_feature_num])
print("Encode input Shape", series_input.get_shape())
# encoding layer
encode_cell = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.BasicLSTMCell(conf.rnn_hidden_num, reuse=False) for _ in range(conf.rnn_layer_num)]
)
encode_output, _ = tf.nn.dynamic_rnn(encode_cell, series_input, dtype=tf.float32, scope='encode')
print("Encode output Shape", encode_output.get_shape())
# last output
encode_output = tf.transpose(encode_output, [1, 0, 2])
last = tf.gather(encode_output, int(encode_output.get_shape()[0]) - 1)
# duplite the last output of the encoding layer
decoder_input = tf.stack([last for _ in range(conf.max_series)], axis=1)
print("Decoder input shape", decoder_input.get_shape())
# decoding layer
decode_cell = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.BasicLSTMCell(conf.series_feature_num, reuse=False) for _ in range(conf.rnn_layer_num)]
)
decode_output, _ = tf.nn.dynamic_rnn(decode_cell, decoder_input, dtype=tf.float32, scope='decode')
print("Decode output", decode_output.get_shape())
# Loss Function
loss = tf.losses.mean_squared_error(labels=series_input, predictions=decode_output)
print("Loss", loss)
I am trying to build an LSTM network using an Estimator. My data looks like
X = [[1,2,3], [2,3,4], ... , [98,99,100]]
y = [2, 3, ... , 99]
I am using an Estimator:
regressor = learn.Estimator(model_fn=lstm_model,
params=model_params,
)
where the lstm_model function is
def lstm_model(features, targets, mode, params):
def lstm_cells(layers):
if isinstance(layers[0], dict):
return [tf.nn.rnn_cell.BasicLSTMCell(layer['steps'],state_is_tuple=True) for layer in layers]
return [tf.nn.rnn_cell.BasicLSTMCell(steps, state_is_tuple=True) for steps in layers]
stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(lstm_cells(params['rnn_layers']), state_is_tuple=True)
output, layers = tf.nn.rnn(stacked_lstm, [features], dtype=tf.float32)
return learn.models.linear_regression(output, targets)
and params are
model_params = {
'steps': 1000,
'learning_rate': 0.03,
'batch_size': 24,
'time_steps': 3,
'rnn_layers': [{'steps': 3}],
'dense_layers': [10, 10]
}
and then I do the fitting
regressor.fit(X, y)
The issue I am facing is
output, layers = tf.nn.rnn(stacked_lstm, [features], dtype=tf.float32)
requires a sequence but I am not sure how to split my features to into list of tensors. The shape of features inside the lstm_model function is (?, 3)
I have two questions, how do I do the training in batches? and how do I split 'features' so
output, layers = tf.nn.rnn(stacked_lstm, [features], dtype=tf.float32)
doesn't throw and error. The error I am getting is
raise TypeError("%s that don't all match." % prefix)
TypeError: Tensors in list passed to 'values' of 'Concat' Op have types [float64, float32] that don't all match.
I am using tensorflow 0.12
I had to set the shape for features to be
(batch_size, time_step, 1) or (None, time_step, 1) and then unstack the features to go in the rnn. Unstacking the features in the "time_step" so you have a list of tensors with the size of time steps and the shape for each tensor should be (None, 1) or (batch_size, 1)
I am building a 1d convolutional neural net for my own data (spectra) and am having an issue with tf.reshape. First I load in the data with pandas, and convert these to numpy arrays, composed of 708 training example spectra, each of length 2151,
import pandas as pd
import numpy as np
data = pd.read_csv('test.csv',header=None)
yTrue = data.ix[:,0].as_matrix()
data = data - data.mean()
data = data.ix[:,1:].as_matrix()
where I subtract the mean value in each column. So data is of dimensions 708 x 2151 here. I then create a network that starts with,
sess = tf.InteractiveSession()
## define inputs
x_ = tf.placeholder(tf.float32, shape=[None, 2151])
x_ = tf.reshape(x_, [-1,1,2151,1])
y_ = tf.placeholder(tf.float32, shape=[None])
which are inputs for my 1d convolutional neural net (with kernels with a width of 10, and 32 feature maps),
W_conv1 = weight_variable([1, 10, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
I then build the rest of the network and then try to run ADAM on it,
cost_function = tf.reduce_mean(tf.pow(y_out - y_, 2))/(2 * samples_number) #L2 loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cost_function)
correct_prediction = tf.equal(tf.argmax(y_out,1), tf.argmax(y_,1))
sess.run(tf.initialize_all_variables())
for i in range(20000):
print(i)
sess.run(train_step, feed_dict={x_: data, y_: yTrue})
However I get the following error:
ValueError: Cannot feed value of shape (708, 2151) for Tensor u'Reshape_26:0',
which has shape '(?, 1, 2151, 1)'
I have looked at these answers: TensorFlow/TFLearn: ValueError: Cannot feed value of shape (64,) for Tensor u'target/Y:0', which has shape '(?, 10)'; Tensorflow error using my own data which suggest that I need to be doing some reshaping before I pass my data to the network. However, I am not sure what this should be? Particularly since the following works on the first row of the data,
t = tf.constant(data[0])
tf.reshape(t,[1,1,2151,1])
Does anyone have any ideas here?
Best,
Ben
The issue is that feed_dict can replace any Tensor, and since you've changed x_ to reference the reshape op, that's the thing that it's trying to replace. It should work if you just use different Python variables to reference the placeholder and the reshape op:
x_placeholder_ = tf.placeholder(tf.float32, shape=[None, 2151])
x_ = tf.reshape(x_placeholder_, [-1,1,2151,1])
Then when feeding, use x_placeholder_:
sess.run(train_step, feed_dict={x_placeholder_: data, y_: yTrue})
I want to stack two LSTMs without using MultiRNN wrapper. However, following code results with ValueError: Shapes (3,) and (2,) are not compatible because of inputs=states_fw_1 in the second LSTM. How can I pass hidden state of the first LSTM as input to the second?
LSTM 1
with tf.name_scope("BiLSTM_1"):
with tf.variable_scope('forward_1'):
cell_fw_1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
with tf.variable_scope('backward_srl'):
cell_bw_srl = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs_1, states_1 = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw_1,
cell_bw=cell_bw_1,
dtype=tf.float64,
sequence_length=self.input_seq_len,
inputs=self.embedded_input_layer,
scope='BiLSTM_1')
State is tuple
states_fw_1, states_bw_1 = states
LSTM 2
with tf.name_scope("BiLSTM_2"):
with tf.variable_scope('forward'):
cell_fw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
with tf.variable_scope('backward'):
cell_bw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
dtype=tf.float64,
sequence_length=self.input_seq_len,
inputs=states_fw_1,
scope="BiLSTM_extraction")
I'm learning TF 2 days (so I'm not pro-guy) and I found this problem to be interested to resolve.
Here are my findings:
You want to do thing which is not possible to obtain using 'LSTMCell' implementation. Here is why:
You want to feed the "states_fw_1 to the next BI-LSTM. So, first question should be: What are dimensions of "states_fw_1"? For any RNN implementation you need [batch_size, seq_len, input_size]. For "states_fw_1" it is [batch_size, hidden_size] (I have just check the size of "states_fw_1" running below code). So you can see that your output does not fit to RNN requirements. It is because model output just one the last state of LSTM cell, not all the history (see the documentation). And you are not interested in last state, because you want feed state[t-step] to the layer above.The 'state_fw_1' is useful when you want to classify the sequence (not each element in sequence)
Edit: 'state_fw_1' contain the last "hidden_state" and last "memory_cell". For classification only "hidden_state" will be usefull, I think.
So you just need to use the merged output (from forward and backward pass) . And 'LSTMCell' cell output have size [batch_size, seq_len, hidden_size*2], (*2 as forward and backward) so it is right for next stacked RNN (output come from each time-step, not like the state).
Here is the code which I was testing:
import tensorflow as tf
import numpy as np
hidden_size = 21
seq_len = tf.placeholder(tf.int32, [None])
inputs = tf.placeholder(tf.float32, [None, None, 32])
with tf.variable_scope('BiLSTM_1'):
with tf.variable_scope('forward_1'):
cell_fw_1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
with tf.variable_scope('backward_srl'):
cell_bw_1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs_1, states_1 = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw_1,
cell_bw=cell_bw_1,
dtype=tf.float32,
sequence_length=seq_len,
inputs=inputs,
scope='BiLSTM_1')
# Merge Output tensor from forward and backward pass. It size is [batch_size, seq_len, 2*hidden_size]
outputs_1 = tf.concat(outputs_1, 2)
with tf.name_scope("BiLSTM_2"):
with tf.variable_scope('forward'):
cell_fw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
with tf.variable_scope('backward'):
cell_bw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
dtype=tf.float32,
sequence_length=seq_len,
inputs=outputs_1,
scope="BiLSTM_2")
# Initializate the weights and biases
init = tf.initialize_all_variables()
batch_size = 5
seq_len_val = 10
train_inputs = np.zeros((batch_size, seq_len_val, 32))
train_seq_len = np.ones(batch_size) * seq_len_val
with tf.Session() as session:
session.run(init)
feed = {inputs: train_inputs, seq_len: train_seq_len}
out,state,state_1 = session.run([outputs,states, states_1],feed)
print ("State size: ", state_1[0].c.shape, " Out Size: ", out[0][0].shape)
print ("Batch_size: ", batch_size, " Sequence Len: ", seq_len_val, " Hidden Size: ", hidden_size)
'outputs_1' returned by LSTM 1 is a tuple containing 'outputs_fw' and 'outputs_bw'.
'outputs_fw' and 'outputs_bw' will be of dimension: [batch_size, sequence_length, hidden_size].
You have to concatenate 'outputs_fw' and 'outputs_bw' hidden states (us tf.concat with axis=2) and pass that as input to LSTM 2 instead of passing 'states_fw_1' as input to LSTM 2.