tf.nn.dynamic_rnn + Dataset iterator - tensorflow

I am building an LSTM net using the Dataset API.
The input tensor (named x in code) has different shapes for the train and the val sets and the iterator is defined without specifying an output shape.
The problem is that when tf.nn.dynamic_rnn graph_op is defined the shape of x is unknown and the following error is raised:
ValueError: as_list() is not defined on an unknown TensorShape.
Using tf.nn.dynamic_rnn without the Dataset API works as expected.
How can this error be fixed?
TF version: 1.4
import tensorflow as tf
import numpy as np
"""
1d: Number of examples per epoch
2d: Time steps size
3d: Batch size e.g. number of independent time series
4d: Number of points that are given as input in the lstm each time step
Batch size is usually smaller in val set because we use most of data for training.
Time steps size is bigger in val set because we want to speed up inference.
"""
x_train = np.random.rand(100, 8, 12, 2).astype(np.float32)
x_val = np.random.rand(8, 100, 4, 2).astype(np.float32)
use_dataset_api = True
with tf.device('/gpu:0'):
tf.reset_default_graph()
if not use_dataset_api:
batch_size_pl = tf.placeholder(shape=[], dtype=tf.int32)
x_pl = tf.placeholder(shape=[None, None, 2], dtype=tf.float32)
cell = tf.contrib.rnn.LSTMCell(num_units=11)
init_state = cell.zero_state(batch_size=batch_size_pl, dtype=tf.float32)
rnn_outputs, current_state = tf.nn.dynamic_rnn(cell, x_pl, initial_state=init_state,
time_major=True, dtype=tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Use first example of train set
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 12, x_pl: x_train[0]})
# Use first example of val set
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 4, x_pl: x_val[0]})
else:
batch_size_pl = tf.placeholder(shape=[], dtype=tf.int32)
train_set = tf.data.Dataset.from_tensor_slices((x_train))
val_set = tf.data.Dataset.from_tensor_slices((x_val))
iterator = tf.data.Iterator.from_structure(train_set.output_types) # , train_set.output_shapes)
train_init_op = iterator.make_initializer(train_set)
val_init_op = iterator.make_initializer(val_set)
x = iterator.get_next()
cell = tf.contrib.rnn.LSTMCell(num_units=11)
init_state = cell.zero_state(batch_size=batch_size_pl, dtype=tf.float32)
# Raises error for tensor x: as_list() is not defined on an unknown TensorShape.
rnn_outputs, current_state = tf.nn.dynamic_rnn(cell, x, initial_state=init_state,
time_major=True, dtype=tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Use first example of train set
sess.run(train_init_op)
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 12})
# Use first example of val set
sess.run(val_init_op)
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 4})

The solution is to change the following line:
iterator = tf.data.Iterator.from_structure(train_set.output_types)
with:
iterator = tf.data.Iterator.from_structure(train_set.output_types, [None, None, 2])

Related

Feeding Dataset Iterator to Tensorflow

Can i get a full example somewhere where they feed tf.data.Dataset iterator to a model? I'm trying to feed this data into a model without the help of tf.Estimators.
def preprocess_image(image):
image = tf.image.decode_jpeg(image, channels=1)
image = tf.image.resize_images(image, [224, 224])
image = tf.image.random_flip_left_right(image)
image /= 255.0
image = tf.cast(image, tf.float32)
image = tf.train.shuffle_batch([image],batch_size=16, num_threads=10, capacity=100000, min_after_dequeue=15)
return image
def load_and_preprocess_image(path):
image = tf.read_file(path)
return preprocess_image(image)
train_data_dx = tf.data.Dataset.from_tensor_slices(xray_data_train['full_path'].values)
train_data_dx = train_data_dx.map(load_and_preprocess_image, num_parallel_calls=8)
train_data_dy = xray_data_train['Finding_strings']
print(train_data_dx.output_shapes)
print(train_data_dx.output_types)
test_data_dx = tf.data.Dataset.from_tensor_slices(xray_data_test['full_path'].values)
test_data_dx = test_data_dx.map(load_and_preprocess_image, num_parallel_calls=8)
test_data_dy = xray_data_test['Finding_strings']
Here's a full example.
Note
Iterator must be initialized at the beginning
We can set number of epochs to perform by using repeat() method of number of epochs and batch() method for batch size. Note that I use first repeat() and then batch().
At each iteration we're using tf.Session() interface to access the next batch.
We use try-except since when repetition of data ends it raises tf.error.OutOfRangeError.
import tensorflow as tf
from sklearn.datasets import make_blobs
# generate dummy data for illustration
x_train, y_train = make_blobs(n_samples=25,
n_features=2,
centers=[[1, 1], [-1, -1]],
cluster_std=0.5)
n_epochs = 2
batch_size = 10
with tf.name_scope('inputs'):
x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.int32, shape=[None])
with tf.name_scope('logits'):
logits = tf.layers.dense(x,
units=2,
name='logits')
with tf.name_scope('loss'):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_tensor = tf.reduce_mean(xentropy)
with tf.name_scope('optimizer'):
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss_tensor)
# create dataset `from_tensor_slices` and create iterator
dataset = tf.data.Dataset.from_tensor_slices({'x':x_train, 'y':y_train})
dataset = dataset.repeat(n_epochs).batch(10)
iterator = dataset.make_initializable_iterator()
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(),
iterator.initializer]) # <-- must be initialized!
next_batch = iterator.get_next()
while True:
try:
batch = sess.run(next_batch) # <-- extract next batch
loss_val, _ = sess.run([loss_tensor, train_op],
feed_dict={x:batch['x'], y:batch['y']})
print(loss_val)
except tf.errors.OutOfRangeError:
break

TensorFlow: How to embed float sequences to fixed size vectors?

I am looking methods to embed variable length sequences with float values to fixed size vectors. The input formats as following:
[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]-> ... -> [f1,f2,f3,f4]
[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]->[f1,f2,f3,f4]-> ... -> [f1,f2,f3,f4]
...
[f1,f2,f3,f4]-> ... -> ->[f1,f2,f3,f4]
Each line is a variable length sequnece, with max length 60. Each unit in one sequece is a tuple of 4 float values. I have already paded zeros to fill all sequences to the same length.
The following architecture seems solve my problem if I use the output as the same as input, I need the thought vector in the center as the embedding for the sequences.
In tensorflow, I have found tow candidate methods tf.contrib.legacy_seq2seq.basic_rnn_seq2seq and tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq.
However, these tow methos seems to be used to solve NLP problem, and the input must be discrete value for words.
So, is there another functions to solve my problems?
All you need is only an RNN, not the seq2seq model, since seq2seq goes with an additional decoder which is unecessary in your case.
An example code:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
input_size = 4
max_length = 60
hidden_size=64
output_size = 4
x = tf.placeholder(tf.float32, shape=[None, max_length, input_size], name='x')
seqlen = tf.placeholder(tf.int64, shape=[None], name='seqlen')
lstm_cell = rnn.BasicLSTMCell(hidden_size, forget_bias=1.0)
outputs, states = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=x, sequence_length=seqlen, dtype=tf.float32)
encoded_states = states[-1]
W = tf.get_variable(
name='W',
shape=[hidden_size, output_size],
dtype=tf.float32,
initializer=tf.random_normal_initializer())
b = tf.get_variable(
name='b',
shape=[output_size],
dtype=tf.float32,
initializer=tf.random_normal_initializer())
z = tf.matmul(encoded_states, W) + b
results = tf.sigmoid(z)
###########################
## cost computing and training components goes here
# e.g.
# targets = tf.placeholder(tf.float32, shape=[None, input_size], name='targets')
# cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=z))
# optimizer = tf.train.AdamOptimizer(learning_rate=0.1).minimize(cost)
###############################
init = tf.global_variables_initializer()
batch_size = 4
data_in = np.zeros((batch_size, max_length, input_size), dtype='float32')
data_in[0, :4, :] = np.random.rand(4, input_size)
data_in[1, :6, :] = np.random.rand(6, input_size)
data_in[2, :20, :] = np.random.rand(20, input_size)
data_in[3, :, :] = np.random.rand(60, input_size)
data_len = np.asarray([4, 6, 20, 60], dtype='int64')
with tf.Session() as sess:
sess.run(init)
#########################
# training process goes here
#########################
res = sess.run(results,
feed_dict={
x: data_in,
seqlen: data_len})
print(res)
To encode sequence to a fixed length vector you typically use recurrent neural networks (RNNs) or convolutional neural networks (CNNs).
If you use a recurrent neural network you can use the output at the last time step (last element in your sequence). This corresponds to the thought vector in your question. Have a look at tf.dynamic_rnn. dynamic_rnn requires you to specify to type of RNN cell you want to use. tf.contrib.rnn.LSTMCell and tf.contrib.rnn.GRUCell are most common.
If you want to use CNNs you need to use 1 dimensional convolutions. To build CNNs you need tf.layers.conv1d and tf.layers.max_pooling1d
I have found a solution to my problem, using the following architecture,
,
The LSTMs layer below encode the series x1,x2,...,xn. The last output, the green one, is duplicated to the same count as the input for the decoding LSTM layers above. The tensorflow code is as following
series_input = tf.placeholder(tf.float32, [None, conf.max_series, conf.series_feature_num])
print("Encode input Shape", series_input.get_shape())
# encoding layer
encode_cell = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.BasicLSTMCell(conf.rnn_hidden_num, reuse=False) for _ in range(conf.rnn_layer_num)]
)
encode_output, _ = tf.nn.dynamic_rnn(encode_cell, series_input, dtype=tf.float32, scope='encode')
print("Encode output Shape", encode_output.get_shape())
# last output
encode_output = tf.transpose(encode_output, [1, 0, 2])
last = tf.gather(encode_output, int(encode_output.get_shape()[0]) - 1)
# duplite the last output of the encoding layer
decoder_input = tf.stack([last for _ in range(conf.max_series)], axis=1)
print("Decoder input shape", decoder_input.get_shape())
# decoding layer
decode_cell = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.BasicLSTMCell(conf.series_feature_num, reuse=False) for _ in range(conf.rnn_layer_num)]
)
decode_output, _ = tf.nn.dynamic_rnn(decode_cell, decoder_input, dtype=tf.float32, scope='decode')
print("Decode output", decode_output.get_shape())
# Loss Function
loss = tf.losses.mean_squared_error(labels=series_input, predictions=decode_output)
print("Loss", loss)

Why when I changed the test batch size in tensorflow, result was different

Here is my train code:
x = tf.placeholder(tf.float32, [None, 2, 3])
cell = tf.nn.rnn_cell.GRUCell(10)
_, state = tf.nn.dynamic_rnn(
cell = cell,
inputs = x,
dtype = tf.float32)
# train
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
x_ = np.ones([2,2,3],np.float32)
output = sess.run(state, feed_dict= {x: x_})
print output
saver = tf.train.Saver()
saver.save(sess,'./model')
The result is:
[[ 0.12851571 -0.23994535 0.23123585 -0.00047993 -0.02450397
-0.21048039 -0.18786618 0.04458345 -0.08603278 -0.08259721]
[ 0.12851571 -0.23994535 0.23123585 -0.00047993 -0.02450397
-0.21048039 -0.18786618 0.04458345 -0.08603278 -0.08259721]]
Here is my test code:
x = tf.placeholder(tf.float32, [None, 2, 3])
cell = tf.nn.rnn_cell.GRUCell(10)
_, state = tf.nn.dynamic_rnn(
cell = cell,
inputs = x,
dtype = tf.float32)
with tf.Session() as sess:
x_ = np.ones([1,2,3],np.float32)
saver = tf.train.Saver()
saver.restore(sess,'./model')
output = sess.run(state, feed_dict= {x: x_})
print output
Then I get:
[[ 0.12851571 -0.23994535 0.2312358 -0.00047993 -0.02450397
-0.21048039 -0.18786621 0.04458345 -0.08603278 -0.08259721]]
You see, result has changed slightly. When I set the test batch to 2, the result is same as train result. So what's wrong? My tf version is 0.12
An update (not an answer)
The tf.nn.rnn_cell.GRUCell and tf.nn.dynamic_rnn are both deprecated and replaced with tf.keras.layers.GRU.
Using the deprecated functions, it appears you don't even need to save and restore the model or even run it multiple times. All you need is to run it on an odd batch size and use tf.float32 as the dtype and the last result will be slightly off.
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, [None, 2, 3])
cell = tf.nn.rnn_cell.GRUCell(10)
_, state = tf.nn.dynamic_rnn(
cell = cell,
inputs = x,
dtype = tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
x_ = np.ones([3,2,3],np.float32)
output = sess.run(state, feed_dict= {x: x_})
print(output)
Returns results like this
[[ 0.03649516 -0.08052824 -0.0539998 0.2995336 -0.12542574 -0.04339318
0.3872745 0.08844283 -0.14555818 -0.4216033 ]
[ 0.03649516 -0.08052824 -0.0539998 0.2995336 -0.12542574 -0.04339318
0.3872745 0.08844283 -0.14555818 -0.4216033 ]
[ 0.03649516 -0.08052824 -0.05399981 0.2995336 -0.12542574 -0.04339318
0.38727456 0.08844285 -0.14555818 -0.4216033 ]]
The anomaly only seems to appear in the last row for odd length batches.
An alternative view is, that a single batch is correct, and all even sized batches are off and everything other than the last row of odd sized batches is off.
It does not seem to happen for dtype=float64 or dtype=float16, both of which seem stable.
Furthermore, this issue is only in the hidden state and does not seem to appear in the regular output.

Updating the Initial state of a recurrent neural network in tensorflow

Currently I have the following code:
init_state = tf.Variable(tf.zeros([batch_partition_length, state_size])) # -> [16, 1024].
final_state = tf.Variable(tf.zeros([batch_partition_length, state_size]))
And inside my inference method that is responsible producing the output, I have the following:
def inference(frames):
# Note that I write the final_state as a global valriable to avoid the shadowing issue, since it is referenced at the dynamic_rnn line.
global final_state
# .... Here we have some conv layers and so on...
# Now the RNN cell
with tf.variable_scope('local1') as scope:
# Move everything into depth so we can perform a single matrix multiply.
shape_d = pool3.get_shape()
shape = shape_d[1] * shape_d[2] * shape_d[3]
# tf_shape = tf.stack(shape)
tf_shape = 1024
print("shape:", shape, shape_d[1], shape_d[2], shape_d[3])
# So note that tf_shape = 1024, this means that we have 1024 features are fed into the network. And
# the batch size = 1024. Therefore, the aim is to divide the batch_size into num_steps so that
reshape = tf.reshape(pool3, [-1, tf_shape])
# Now we need to reshape/divide the batch_size into num_steps so that we would be feeding a sequence
rnn_inputs = tf.reshape(reshape, [batch_partition_length, step_size, tf_shape])
print('RNN inputs shape: ', rnn_inputs.get_shape()) # -> (16, 64, 1024).
cell = tf.contrib.rnn.BasicRNNCell(state_size)
# note that rnn_outputs are the outputs but not multiplied by W.
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
# linear Wx + b
with tf.variable_scope('softmax_linear') as scope:
weight_softmax = \
tf.Variable(
tf.truncated_normal([state_size, n_classes], stddev=1 / state_size, dtype=tf.float32, name='weight_softmax'))
bias_softmax = tf.constant(0.0, tf.float32, [n_classes], name='bias_softmax')
softmax_linear = tf.reshape(
tf.matmul(tf.reshape(rnn_outputs, [-1, state_size]), weight_softmax) + bias_softmax,
[batch_size, n_classes])
print('Output shape:', softmax_linear.get_shape())
return softmax_linear
# Here we define the loss, accuracy and the optimzer.
# now run the graph:
with tf.Session() as sess:
_, accuracy_train, loss_train, summary = \
sess.run([optimizer, accuracy, cost_scalar, merged], feed_dict={x: image_batch,
y_valence: valences,
confidence_holder: confidences})
....
Problem: How I would be able to assign initial_state the value stored in final_state? That is, how to more update a Variable value given the other?
I have used the following:
tf.assign(init_state, final_state.eval())
under session after running the sess.run command. But, this is throwing an error:
You must feed a value for placeholder tensor 'inputs' with dtype float
Where tf.Variable: "input" is declared as follows:
x = tf.placeholder(tf.float32, [None, 112, 112, 3], name='inputs')
And the feeding is done after reading the images from the tfRecords through the following command:
example = tf.train.Example()
example.ParseFromString(string_record)
height = int(example.features.feature['height']
.int64_list
.value[0])
width = int(example.features.feature['width']
.int64_list
.value[0])
img_string = (example.features.feature['image_raw']
.bytes_list
.value[0])
img_1d = np.fromstring(img_string, dtype=np.uint8)
reconstructed_img = img_1d.reshape((height, width, -1)) # Where this is added to the image_batch list, which is fed into the placeholder.
And if tried the following:
img_1d = np.fromstring(img_string, dtype=np.float32)
This will produce the following error:
ValueError: cannot reshape array of size 9408 into shape (112,112,newaxis)
Any help is much appreciated!!
So here are the mistakes that I have done so far. After doing some revision I figured out the following:
I shouldn't create the final_state as a tf.Variable. Since tf.nn.dynamic_rnn return tensors as ndarray, then, I should not instantiate the final_state int the beginning. And I should not use the global final_state under the function definition.
In order to assign the initial state the final_state, I used:
tf.assign(intial_state, final_state)
And things work out.
Note: in tensorflow, an operation returns the data as numpy array in python and as tensorflow::Tensor in C and C++.
Have a look at https://www.tensorflow.org/versions/r0.10/get_started/basic_usage for more informaiton.

Making simple rnn code with scan function in Tensorflow

I recently started to learn Tensorflow and try to make simple rnn code using scan function.
What I'm trying to do is to make The RNN predict sine function.
It gets input of 1 dim. and outputs also 1 dim in batch as follow.
import tensorflow as tf
from tensorflow.examples.tutorials import mnist
import numpy as np
import matplotlib.pyplot as plt
import os
import time
# FLAGS (options)
tf.flags.DEFINE_string("data_dir", "", "")
#tf.flags.DEFINE_boolean("read_attn", True, "enable attention for reader")
#tf.flags.DEFINE_boolean("write_attn",True, "enable attention for writer")
opt = tf.flags.FLAGS
#Parameters
time_step = 10
num_rnn_h = 16
batch_size = 2
max_epoch=10000
learning_rate=1e-3 # learning rate for optimizer
eps=1e-8 # epsilon for numerical stability
#temporary sinusoid data
x_tr = np.zeros([batch_size,time_step])
y_tr = np.zeros([batch_size,time_step])
ptrn = 0.7*np.sin(np.arange(time_step+1)/(2*np.pi))
x_tr[0] = ptrn[0:time_step]
y_tr[0] = ptrn[1:time_step+1]
x_tr[1] = ptrn[0:time_step]
y_tr[1] = ptrn[1:time_step+1]
#Build model
x = tf.placeholder(tf.float32,shape=[batch_size,time_step,1], name= 'input')
y = tf.placeholder(tf.float32,shape=[None,time_step,1], name= 'target')
cell = tf.nn.rnn_cell.BasicRNNCell(num_rnn_h)
#cell = tf.nn.rnn_cell.LSTMCell(num_h, state_is_tuple=True)
with tf.variable_scope('output'):
W_o = tf.get_variable('W_o', shape=[num_rnn_h, 1])
b_o = tf.get_variable('b_o', shape=[1], initializer=tf.constant_initializer(0.0))
init_state = cell.zero_state(batch_size, tf.float32)
#make graph
#rnn_outputs, final_states = tf.scan(cell, xx1, initializer= tf.zeros([num_rnn_h]))
scan_outputs = tf.scan(lambda a, xi: cell(xi, a), tf.transpose(x, perm=[1,0,2]), initializer= init_state)
rnn_outputs, rnn_states = tf.unpack(tf.transpose(scan_outputs,perm=[1,2,0,3]))
print rnn_outputs, rnn_states
with tf.variable_scope('predictions'):
weighted_sum = tf.reshape(tf.matmul(tf.reshape(rnn_outputs, [-1, num_rnn_h]), W_o), [batch_size, time_step, 1])
predictions = tf.add(weighted_sum, b_o, name='predictions')
with tf.variable_scope('loss'):
loss = tf.reduce_mean((y - predictions) ** 2, name='loss')
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
But It gives an error at the last line (optimizer) like ,
ValueError: Shapes (2, 16) and (2, 2, 16) are not compatible
Please someone knows the reason, tell me how to fix it...
I assume your error is not on the last line (the optimizer) but rather on some operation you are doing earlier. Perhaps in the reduce_mean with this y - prediction? I will not go over your code in details but I will tell you that this error comes when you do an operation between two tensors which require the same shape (usually math operations).