Keras custom layer on ragged tensor to reduce dimensionallity - tensorflow

I'm trying to write a custom layer that will handle variable-length vectors, and reduce them to the same length vector.
The length is known in advance because the reason for the variable lengths is that I have several different data types that I encode using a different number of features.
In a sense, it is similar to Embedding only for numerical values.
I've tried using padding, but the results were bad, so I'm trying this approach instead.
So, for example let's say I have 3 data types, which I encode with 3, 4, 6 length vectors.
arr = [
# example one (data type 1 [len()==3], datat type 3[len()==6]) - force values as floats
[[1.0,2.0,3],[1,2,3,4,5,6]],
# example two (data type 2 [len()==4], datat type 3len()==6]) - force values as floats
[[1.0,2,3,4],[1,2,3,4,5,6]],
]
I tried implementing a custom layer like:
class DimensionReducer(tf.keras.layers.Layer):
def __init__(self, output_dim, expected_lengths):
super(DimensionReducer, self).__init__()
self._supports_ragged_inputs = True
self.output_dim = output_dim
for l in expected_lengths:
setattr(self,f'w_{l}', self.add_weight(shape=(l, self.output_dim),initializer='random_normal',trainable=True))
setattr(self, f'b_{l}',self.add_weight(shape=(self.output_dim,), initializer='random_normal',trainable=True))
def call(self, inputs):
print(inputs.shape)
# batch
if len(inputs.shape) == 3:
print("batch")
result = []
for i,x in enumerate(inputs):
_result = []
for v in x:
l = len(v)
print(l)
print(v)
w = getattr(self, f'w_{l}')
b = getattr(self, f'b_{l}')
out = tf.matmul([v],w) + b
_result.append(out)
result.append(tf.concat(_result, 0))
r = tf.stack(result)
print("batch output:",r.shape)
return r
Which seems to be working when called directly:
dim = DimensionReducer(3, [3,4,6])
dim(tf.ragged.constant(arr))
But when I try to incorporate it into a model, it fails:
import tensorflow as tf
val_ragged = tf.ragged.constant(arr)
inputs_ragged = tf.keras.layers.Input(shape=(None,None), ragged=True)
outputs_ragged = DimensionReducer(3, [3,4,6])(inputs_ragged)
model_ragged = tf.keras.Model(inputs=inputs_ragged, outputs=outputs_ragged)
# this one with RaggedTensor doesn't
print(model_ragged(val_ragged))
With
AttributeError: 'DimensionReducer' object has no attribute 'w_Tensor("dimension_reducer_98/strided_slice:0", shape=(), dtype=int32)'
I'm not sure how am I to implement such a layer, or what I'm doing wrong.

Related

Converting a fully connected neural network with variable number of hidden layers from tensorflow to pytorch

I recently started learning pytorch and I am trying to convert a part of a large script including coding a MLP with variable number of hidden layers from Tensorflow to pytorch.
import tensorflow as tf
### Base neural network
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = {'w':[], 'b':[]}
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params['w'].append(tf.Variable(tf.random_normal([n_in, n_out], stddev=std)))
params['b'].append(tf.Variable(tf.mul(bias_init, tf.ones([n_out,]))))
return params
def mlp(X, params):
h = [X]
for w,b in zip(params['w'][:-1], params['b'][:-1]):
h.append( tf.nn.relu( tf.matmul(h[-1], w) + b ) )
#h.append( tf.nn.tanh( tf.matmul(h[-1], w) + b ) )
return tf.matmul(h[-1], params['w'][-1]) + params['b'][-1]
def compute_nll(x, x_recon_linear):
return tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(x_recon_linear, x), reduction_indices=1, keep_dims=True)
def gauss_cross_entropy(mean_post, std_post, mean_prior, std_prior):
d = (mean_post - mean_prior)
d = tf.mul(d,d)
return tf.reduce_sum(-tf.div(d + tf.mul(std_post,std_post),(2.*std_prior*std_prior)) - tf.log(std_prior*2.506628), reduction_indices=1, keep_dims=True)
how could I write down similarly weights and bias variables and attach them in each hidden layer in pytorch?
how could I convert gauss_cross_entropy and compute_nll
functions as well (finding equivalent syntax)?
Are these two codes compatible?
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
from torch.distributions import Normal, Categorical, Independent
from copy import
device = "cpu"
if torch.cuda.is_available():
device = "cuda:0"
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net)
net.to(device)
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = {'w':[], 'b':[]}
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params['w'].append(torch.tensor(Normal([n_in, n_out], torch.tensor([std])) ,requires_grad=True))
params['b'].append(torch.tensor(torch.mul(bias_init, torch.ones([n_out,])),requires_grad=True))
return params
def mlp(X, params):
h = [X]
for w,b in zip(params['w'][:-1], params['b'][:-1]):
h.append( torch.nn.ReLU( tf.matmul(h[-1], w) + b ) )
return torch.matmul(h[-1], params['w'][-1]) + params['b'][-1]
def compute_nll(x, x_recon_linear):
return torch.sum(func.binary_cross_entropy_with_logits(x_recon_linear, x), reduction_indices=1, keep_dims=True)
def gauss_cross_entropy(mu_post, sigma_post, mu_prior, sigma_prior):
d = (mu_post - mu_prior)
d = torch.mul(d,d)
return torch.sum(-torch.div(d + torch.mul(sigma_post,sigma_post),(2.*sigma_prior*sigma_prior)) - torch.log(sigma_prior*2.506628), reduction_indices=1, keep_dims=True)
What is the substitute function for tf.placeholder in pytorch? For instance here:
class VAE(object):
def __init__(self, hyperParams):
self.X = tf.placeholder("float", [None, hyperParams['input_d']])
self.prior = hyperParams['prior']
self.K = hyperParams['K']
self.encoder_params = self.init_encoder(hyperParams)
self.decoder_params = self.init_decoder(hyperParams)
and also how should I change tf.shape in this line: tf.random_normal(tf.shape(self.sigma[-1]))
How could I write down similar weights and bias variables and attach them in each hidden layer in PyTorch?
An easier way to define those is to create a list containing the params as (weight, bias) tuples:
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = []
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params.append([
nn.init.normal_(torch.empty(n_in, n_out)).requires_grad_(True),
torch.empty(n_out).fill_(bias_init).requires_grad_(True)])
return params
Above I define my parameters as 'empty' (created with uninitialized data) tensors with torch.empty. I have used in-place functions such as nn.init.normal_ (there are many others available) and torch.Tensor.fill_ to fill the tensor with an arbitrary value (maybe it is .mul_(bias_init) you are looking for, based on your TensorFlow sample?).
For the inference code, you don't actually need to store the intermediate layer results:
def mlp(x, params):
for i, (W, b) in enumerate(params):
x = x#W + b
if i < len(params) - 1:
x = torch.relu(x)
return x
How could I convert gauss_cross_entropy and compute_nll functions as well (finding equivalent syntax)?
You can use PyTorch functions and mathematical operators to define your logic. For compute_loss you were using the built-in, which actually does not require summation after it, by default the losses of the batch elements are averaged.
def compute_loss(y_pred, y_true):
return F.binary_cross_entropy_with_logits(y_pred, y_true)
What is the substitute function for tf.placeholder in Pytorch?
You don't have placeholders in PyTorch, you compute your outputs explicitly using PyTorch operators, then you should be able to backpropagate through those operators and get the gradients for each parameter.
How should I change tf.shape in this line: tf.random_normal(tf.shape(self.sigma[-1]))
Function tf.shape returns the shape of the tensor, in PyTorch you call torch.Tensor.shape or by calling torch.Tensor.size: i.e. self.sigma[-1].shape or self.sigma[-1].size().

Create tf.keras.add Layer when Subclassing tf.keras.Model

Objective: build a neural net with skip connections, where the number of blocks is a parameter, and we subclass tf.keras.Model.
Problem: When subclassing tf.keras.Model we define the network layers in __init__() and specify the forward pass in call(). When a tf.keras.layers.add layer is defined in __init__() two arguments must be specified (the two tensors to be added). However, those tensors exist only within the scope of call(). Where and how does one provide the arguments to tf.keras.layers.add()?
Code is below. See lines ending with # PROBLEM. I tried creating instance variables to use as the arguments to add() (e.g. internalTensor, which is the internal "flow" tensor typically written as x) but no success.
class Network_very_simple(tf.keras.Model):
def __init__(self, num_blocks):
super(Network_very_simple, self).__init__()
self.units_per_layer = 100
self.num_blocks = num_blocks
self.internalTensor = None
self.block_output_tensors = [None for _ in range(self.num_blocks)]
self.block_shortcut_tensors = [None for _ in range(self.num_blocks)]
# Individual layers
self.flatten_layer = tf.keras.layers.Flatten()
self.final_dense_layer = tf.keras.layers.Dense(1, activation='linear')
# Per-block layers
self.block_dense_layers = []
self.block_activations_0 = []
self.block_activations_1 = []
self.block_add_layers = []
for block_num in range(num_blocks):
self.block_dense_layers.append(tf.keras.layers.Dense(self.units_per_layer, activation=None))
self.block_activations_0.append(tf.keras.layers.Activation('selu'))
self.block_activations_1.append(tf.keras.layers.Activation('selu'))
self.block_add_layers.append(tf.keras.layers.add([self.block_shortcut_tensors[block_num], self.internalTensor])) # PROBLEM
def call(self, inputs):
input_tensor = self.flatten_layer(inputs)
for block_num in range(self.num_blocks):
if block_num == 0:
block_input_tensor = input_tensor
else:
block_input_tensor = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_dense_layers[block_num](block_input_tensor)
self.internalTensor = self.block_activations_0[block_num](self.internalTensor)
if block_num > 0: # Skip connection
self.block_shortcut_tensors[block_num] = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_add_layers[block_num]() # PROBLEM
self.internalTensor = self.block_activations_1(self.internalTensor)
self.block_output_tensors[block_num] = tf.keras.identity(self.internalTensor)
self.internalTensor = self.final_dense_layer(self.block_output_tensors[-1])
return(self.internalTensor)
Two solutions:
Use tf.keras.layers.Add. Note the capital A. You can define this layer in __init__ like any other layer, e.g. add_layer = tf.keras.layers.Add(), then use it on a list of two inputs in call, e.g. added = add_layer([x1, x2]).
There is really no need to use a layer to do addition. Simply do added = x1 + x2 in the call. Only Sequential models need "everything" to be a layer.

How to show the class distribution in Dataset object in Tensorflow

I am working on a multi-class classification task using my own images.
filenames = [] # a list of filenames
labels = [] # a list of labels corresponding to the filenames
full_ds = tf.data.Dataset.from_tensor_slices((filenames, labels))
This full dataset will be shuffled and split into train, valid and test dataset
full_ds_size = len(filenames)
full_ds = full_ds.shuffle(buffer_size=full_ds_size*2, seed=128) # seed is used for reproducibility
train_ds_size = int(0.64 * full_ds_size)
valid_ds_size = int(0.16 * full_ds_size)
train_ds = full_ds.take(train_ds_size)
remaining = full_ds.skip(train_ds_size)
valid_ds = remaining.take(valid_ds_size)
test_ds = remaining.skip(valid_ds_size)
Now I am struggling to understand how each class is distributed in train_ds, valid_ds and test_ds. An ugly solution is to iterate all the element in the dataset and count the occurrence of each class. Is there any better way to solve it?
My ugly solution:
def get_class_distribution(dataset):
class_distribution = {}
for element in dataset.as_numpy_iterator():
label = element[1]
if label in class_distribution.keys():
class_distribution[label] += 1
else:
class_distribution[label] = 0
# sort dict by key
class_distribution = collections.OrderedDict(sorted(class_distribution.items()))
return class_distribution
train_ds_class_dist = get_class_distribution(train_ds)
valid_ds_class_dist = get_class_distribution(valid_ds)
test_ds_class_dist = get_class_distribution(test_ds)
print(train_ds_class_dist)
print(valid_ds_class_dist)
print(test_ds_class_dist)
The answer below assumes:
there are five classes.
labels are integers from 0 to 4.
It can be modified to suit your needs.
Define a counter function:
def count_class(counts, batch, num_classes=5):
labels = batch['label']
for i in range(num_classes):
cc = tf.cast(labels == i, tf.int32)
counts[i] += tf.reduce_sum(cc)
return counts
Use the reduce operation:
initial_state = dict((i, 0) for i in range(5))
counts = train_ds.reduce(initial_state=initial_state,
reduce_func=count_class)
print([(k, v.numpy()) for k, v in counts.items()])
A solution inspired by user650654 's answer, only using TensorFlow primitives (with tf.unique_with_counts instead of for loop):
In theory, this should have better performance and scale better to large datasets, batches or class count.
num_classes = 5
#tf.function
def count_class(counts, batch):
y, _, c = tf.unique_with_counts(batch[1])
return tf.tensor_scatter_nd_add(counts, tf.expand_dims(y, axis=1), c)
counts = train_ds.reduce(
initial_state=tf.zeros(num_classes, tf.int32),
reduce_func=count_class)
print(counts.numpy())
Similar and simpler version with numpy that actually had better performances for my simple use-case:
count = np.zeros(num_classes, dtype=np.int32)
for _, labels in train_ds:
y, _, c = tf.unique_with_counts(labels)
count[y.numpy()] += c.numpy()
print(count)

Expected to see 3 array(s), but instead got the following list of 1 arrays:

I am trying to train a triple loss model using a fit_generator. it requires three input and no output. so i have a function that generates hard triplets. the output from the triplets generator has a shape of (3,5,279) which is 3 inputs(anchor,positive and negative) for 5 batches and a total of 279 features. When i run the fit_generator it throws this error that "the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 3 array(s), but instead got the following list of 1 arrays" meanwhile i have passed a list of three arrays. the code is below. it works when i use the fit, however, i want to always call the generator function to generate my triplets as my batches. thanks in advance..this has taken me three days
def load_data():
path = "arrhythmia_data.txt"
f = open( path, "r")
data = []
#remove line breaker, comma separate and store in array
for line in f:
line = line.replace('\n','').replace('?','0')
line = line.split(",")
data.append(line)
f.close()
data = np.array(data).astype(np.float64)
#print(data.shape)
#create the class labels for input data
Y_train = data[:,-1:]
train = data[:,:-1]
normaliser = preprocessing.MinMaxScaler()
train = normaliser.fit_transform(train)
val = train[320:,:]
train = train[:320,:]
#create one hot encoding of the class labels of the data and separate them into train and test data
lb = LabelBinarizer()
encode = lb.fit_transform(Y_train)
nb_classes = int(len(encode[0]))
#one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) this could also be used for one hot encoding
Y_val_e = encode[320:,:]
Y_train_e = encode[:320,:]
print(Y_train_e[0])
print(np.argmax(Y_train_e[0]))
val_in = []
train_in = []
#grouping and sorting the input data based on label id or name
for n in range(nb_classes):
images_class_n = np.asarray([row for idx,row in enumerate(train) if np.argmax(Y_train_e[idx])==n])
train_in.append(images_class_n)
images_class_n = np.asarray([row for idx,row in enumerate(val) if np.argmax(Y_val_e[idx])==n])
val_in.append(images_class_n)
#print(train_in[0].shape)
return train_in,val_in,Y_train_e,Y_val_e,nb_classes
train_in,val,Y_train,Y_val,nb_classes = load_data()
input_shape = (train_in[0].shape[1],)
def build_network(input_shape , embeddingsize):
'''
Define the neural network to learn image similarity
Input :
input_shape : shape of input images
embeddingsize : vectorsize used to encode our picture
'''
#in_ = Input(train.shape)
net = Sequential()
net.add(Dense(128, activation='relu', input_shape=input_shape))
net.add(Dense(128, activation='relu'))
net.add(Dense(256, activation='relu'))
net.add(Dense(4096, activation='sigmoid'))
net.add(Dense(embeddingsize, activation= None))
#Force the encoding to live on the d-dimentional hypershpere
net.add(Lambda(lambda x: K.l2_normalize(x,axis=-1)))
return net
class TripletLossLayer(Layer):
def __init__(self, alpha, **kwargs):
self.alpha = alpha
super(TripletLossLayer, self).__init__(**kwargs)
def triplet_loss(self, inputs):
anchor, positive, negative = inputs
p_dist = K.sum(K.square(anchor-positive), axis=-1)
n_dist = K.sum(K.square(anchor-negative), axis=-1)
return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
def call(self, inputs):
loss = self.triplet_loss(inputs)
self.add_loss(loss)
return loss
def build_model(input_shape, network, margin=0.2):
'''
Define the Keras Model for training
Input :
input_shape : shape of input images
network : Neural network to train outputing embeddings
margin : minimal distance between Anchor-Positive and Anchor-Negative for the lossfunction (alpha)
'''
# Define the tensors for the three input images
anchor_input = Input(input_shape, name="anchor_input")
positive_input = Input(input_shape, name="positive_input")
negative_input = Input(input_shape, name="negative_input")
# Generate the encodings (feature vectors) for the three images
encoded_a = network(anchor_input)
encoded_p = network(positive_input)
encoded_n = network(negative_input)
#TripletLoss Layer
loss_layer = TripletLossLayer(alpha=margin,name='triplet_loss_layer')([encoded_a,encoded_p,encoded_n])
# Connect the inputs with the outputs
network_train = Model(inputs=[anchor_input,positive_input,negative_input],outputs=loss_layer)
# return the model
return network_train
def get_batch_random(batch_size,s="train"):
# initialize result
triplets=[np.zeros((batch_size,m)) for i in range(3)]
for i in range(batch_size):
#Pick one random class for anchor
anchor_class = np.random.randint(0, nb_classes)
nb_sample_available_for_class_AP = X[anchor_class].shape[0]
#Pick two different random pics for this class => A and P. You can use same anchor as P if there is one one element for anchor
if nb_sample_available_for_class_AP<=1:
continue
[idx_A,idx_P] = np.random.choice(nb_sample_available_for_class_AP,size=2 ,replace=False)
#Pick another class for N, different from anchor_class
negative_class = (anchor_class + np.random.randint(1,nb_classes)) % nb_classes
nb_sample_available_for_class_N = X[negative_class].shape[0]
#Pick a random pic for this negative class => N
idx_N = np.random.randint(0, nb_sample_available_for_class_N)
triplets[0][i,:] = X[anchor_class][idx_A,:]
triplets[1][i,:] = X[anchor_class][idx_P,:]
triplets[2][i,:] = X[negative_class][idx_N,:]
return np.array(triplets)
def get_batch_hard(draw_batch_size,hard_batchs_size,norm_batchs_size,network,s="train"):
if s == 'train':
X = train_in
else:
X = val
#m, features = X[0].shape
#while True:
#Step 1 : pick a random batch to study
studybatch = get_batch_random(draw_batch_size,X)
#Step 2 : compute the loss with current network : d(A,P)-d(A,N). The alpha parameter here is omited here since we want only to order them
studybatchloss = np.zeros((draw_batch_size))
#Compute embeddings for anchors, positive and negatives
A = network.predict(studybatch[0])
P = network.predict(studybatch[1])
N = network.predict(studybatch[2])
#Compute d(A,P)-d(A,N)
studybatchloss = np.sum(np.square(A-P),axis=1) - np.sum(np.square(A-N),axis=1)
#Sort by distance (high distance first) and take the
selection = np.argsort(studybatchloss)[::-1][:hard_batchs_size]
#Draw other random samples from the batch
selection2 = np.random.choice(np.delete(np.arange(draw_batch_size),selection),norm_batchs_size,replace=False)
selection = np.append(selection,selection2)
triplets = [studybatch[0][selection,:], studybatch[1][selection,:],studybatch[2][selection,:]]
triplets = triplets.reshape(triplets.shape[0],triplets.shape[1],triplets.shape[2])
yield triplets
network = build_network(input_shape,embeddingsize=10)
hard = get_batch_hard(5,4,1,network,s="train")
network_train = build_model(input_shape,network)
optimizer = Adam(lr = 0.00006)
network_train.compile(loss=None,optimizer=optimizer)
#this works
#history = network_train.fit(hard,epochs=100,steps_per_epoch=1, verbose=2)
history = network_train.fit_generator(hard,epochs=10,steps_per_epoch=16, verbose=2)
# error:: the list of Numpy arrays that you are passing to your model is not the size the model
expected. Expected to see 3 array(s), but instead got the following list of 1 arrays:
I think that's beacause in your generator you are yielding the 3 inputs array in one list, you need to yield the 3 arrays independently:
triplet_1 = studybatch[0][selection,:]
triplet_2 = studybatch[1][selection,:]
triplet_3 = studybatch[2][selection,:]
yield [triplet_1, triplet_2, triplet_3]

Store RNN states using graph collections

I frequently use tf.add_to_collection to have Tensorflow automatically serialize intermediary results into a checkpoint. I find this the most convenient way to later fetch pointers to interesting tensors when a model was restored from a checkpoint. However, I realized that RNN state tuples cannot easily be added to a graph collection. Consider the following dummy example in TF 1.3:
import tensorflow as tf
import numpy as np
in_ = tf.placeholder(tf.float32, shape=[None, 5, 1])
batch_size = tf.shape(in_)[0]
cell1 = tf.nn.rnn_cell.BasicLSTMCell(num_units=128)
cell2 = tf.nn.rnn_cell.BasicLSTMCell(num_units=256)
cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
outputs, last_state = tf.nn.dynamic_rnn(cell=cell,
inputs=in_,
initial_state=cell.zero_state(batch_size, dtype=tf.float32))
tf.add_to_collection('states', last_state)
loss = tf.reduce_mean(in_ - outputs)
loss_s = tf.summary.scalar('loss', loss)
writer = tf.summary.FileWriter('.', tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
l, s = sess.run([loss, loss_s], feed_dict={in_: np.ones([1, 5, 1])})
writer.add_summary(s)
This will produce the following warning:
WARNING:tensorflow:Error encountered when serializing states.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'tuple' object has no attribute 'name'
It seems that the serialization cannot handle tuples, and of course the last_state variable is a tuple. May be one could loop through the tuple and add each element individually to the collection, but that seems too complicated. What's a better way of handling this? In the end, I would like to access last_state again when the model is restored, ideally without needing access to the original code that created the model.
Actually, looping through every element of the state is not too complicated, and straight-forward to implement:
def add_to_collection_rnn_state(name, rnn_state):
for layer in rnn_state:
tf.add_to_collection(name, layer.c)
tf.add_to_collection(name, layer.h)
And then to load it:
def get_collection_rnn_state(name):
layers = []
coll = tf.get_collection(name)
for i in range(0, len(coll), 2):
state = tf.nn.rnn_cell.LSTMStateTuple(coll[i], coll[i+1])
layers.append(state)
return tuple(layers)
Note that this assumes that one collection only stores on state, i.e. use a different collection for every state you want to store, e.g. like this:
add_to_collection_rnn_state('states', last_state)
add_to_collection_rnn_state('init_state', init_state)
Edit
As pointed out correctly in the comments, the proposed solution only works for LSTMCells (that are represented as tuples as well). A more general solution that can handle GRU cells or potentially custom cells and mixes thereof, could look like this:
import tensorflow as tf
import numpy as np
def add_to_collection_rnn_state(name, rnn_state):
# store the name of each cell type in a different collection
coll_of_names = name + '__names__'
for layer in rnn_state:
n = layer.__class__.__name__
tf.add_to_collection(coll_of_names, n)
try:
for l in layer:
tf.add_to_collection(name, l)
except TypeError:
# layer is not iterable so just add it directly
tf.add_to_collection(name, layer)
def get_collection_rnn_state(name):
layers = []
coll = tf.get_collection(name)
coll_of_names = tf.get_collection(name + '__names__')
idx = 0
for n in coll_of_names:
if 'LSTMStateTuple' in n:
state = tf.nn.rnn_cell.LSTMStateTuple(coll[idx], coll[idx+1])
idx += 2
else: # add more cell types here
state = coll[idx]
idx += 1
layers.append(state)
return tuple(layers)
in_ = tf.placeholder(tf.float32, shape=[None, 5, 1])
batch_size = tf.shape(in_)[0]
cell1 = tf.nn.rnn_cell.BasicLSTMCell(num_units=128)
cell2 = tf.nn.rnn_cell.GRUCell(num_units=256)
cell3 = tf.nn.rnn_cell.BasicRNNCell(num_units=256)
cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2, cell3])
outputs, last_state = tf.nn.dynamic_rnn(cell=cell,
inputs=in_,
initial_state=cell.zero_state(batch_size, dtype=tf.float32))
add_to_collection_rnn_state('last_state', last_state)
last_state_r = get_collection_rnn_state('last_state')
Comparing last_state and last_state_r reveals that both are identical (which they should be). Note that I am using a different collection to store the names because tensorflow can only serialize a collection when all elements in the collection are of the same type. E.g. mixing strings with Tensors in the same collection does not work.