Using tf.keras.Model as base class for defining RNN Cell

Using tf.keras.Model as base class for defining RNN Cell - tensorflow

I'm working within TensorFlow's EagerExecution to develop a variation of Variational Autoencoder (VAE) in a sequential data setting. Since both recurrent network structure and its input-output flow are not standard, I have to build my own custom RNNCell, which later can be passed to tf.nn.raw_rnn API.
In respect to building the class of the desired RNNCell, I use tf.keras.Model as the base class. But, when I passed this RNNCell to tf.nn.raw_rnn, I got nan output. What's wrong?
Here is my implementation (please tell me if you are still not clear)
import tensorflow as tf
tfe = tf.contrib.eager
tf.enable_eager_execution()
import numpy as np
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
the dataset is called 'inputs', with all bounded entries of float32 dtype and shape (time_steps, batch_size, input_depth) = (20, 1000, 4). Notice the difference of shape format compared to when using the more familiar tf.nn.dynamic_rnn API (when using the latter API, the shape is in a format of (batch_size, time_steps, input_depth)).
#defining sampling and reparameterizing function
def sampling(args):
mean, logvar = args
batch = batch_size
dim = latent_dim
# by default, random_normal has mean = 0 and std = 1.0
epsilon = tf.random_normal(shape=(batch, dim))
return mean + tf.exp(0.5 * logvar) * epsilon
#defining class of the model (PreSSM = without transition module yet)
class PreSSM(tf.keras.Model):
def __init__(self, latent_dim = 4, intermediate_dim = 4):
super(PreSSM, self).__init__()
self.latent_dim = latent_dim
self.input_dim = self.latent_dim + 4 #toy problem
inputs = Input(shape=(self.latent_dim + 4,), name='inference_input')
layer_1 = Dense(intermediate_dim, activation='relu')(inputs)
layer_2 = Dense(intermediate_dim, activation='relu')(layer_1)
mean = Dense(latent_dim, name='mean')(layer_2)
logvar = Dense(latent_dim, name='logvar')(layer_2)
s = Lambda(sampling, output_shape=(latent_dim,), name='s')([mean, logvar])
self.inference_net = Model(inputs, [mean, logvar, s], name='inference_net')
latent_inputs = Input(shape=(latent_dim,), name='s_sampling')
layer_3 = Dense(intermediate_dim, activation='relu')(latent_inputs)
layer_4 = Dense(intermediate_dim, activation='relu')(layer_3)
outputs = Dense(2)(layer_4)
self.generative_net = Model(latent_inputs, outputs, name='generative_net')
#property
def state_size(self):
return latent_dim
#property
def output_size(self):
return 2 #(x,y) coordinate
#property
def zero_state(self):
return init_state #global variable we have defined
def __call__(self, inputs, state):
next_state = self.inference_net(inputs)[-1]
output = self.generative_net(next_state)
return output, next_state
#instantiate cell == model instant
model = PreSSM()
#define a class with instant super_loop_fn(inputs) that has method called loop_fn
class SuperLoop:
def __init__(self, inputs, output_dim = 2):
inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, clear_after_read=False)
inputs_ta = inputs_ta.unstack(inputs) #ini datanya
self.inputs_ta = inputs_ta
self.output_dim = output_dim
def loop_fn(self,time, cell_output, cell_state, loop_state):
emit_output = cell_output # ==None for time == 0
if cell_output is None: # when time == 0
next_cell_state = init_state
emit_output = tf.zeros([self.output_dim])
else :
emit_output = cell_output
next_cell_state = cell_state
elements_finished = (time >= seq_length)
finished = tf.reduce_all(elements_finished)
if finished :
next_input = tf.zeros(shape=(self.output_dim), dtype=tf.float32)
else :
next_input = tf.concat([self.inputs_ta.read(time), next_cell_state], -1)
next_loop_state = None
return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
#defining a model
def SSM_model(inputs, RNN_cell = model, output_dim = 2):
superloop = SuperLoop(inputs, output_dim)
outputs_ta, final_state, final_loop_state = tf.nn.raw_rnn(RNN_cell, superloop.loop_fn)
outputs = outputs_ta.stack()
return outputs
#model checking
SSM_model(inputs = inputs, RNN_cell = model)
Here, the outputs are nan...
Hence I can't proceed to the training step. What's wrong? Do I miss something when defining the RNNCell using tf.keras.Model as base class in the above?

Related

Keras/Deepchem: epochs in data generator for prediction in graph convolutions affects prediction size

I am using graph convolutions in Deepchem/Keras for predicting molecular properties. Following the Deepchem tutorials I created a data generator. While there is no error in my code below, I fail to understand why the size of pred changes with epoch and batch_size.
First we create some dummy data.
!pip install --pre deepchem
!pip install --pre rdkit
import deepchem as dc
import numpy as np
import tensorflow as tf
from deepchem.feat.mol_graphs import ConvMol
mol = ['C-C-O']*240
ftr = dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)
X=ftr.featurize(mol)
y = np.arange(0,240,1)
w = np.arange(0,240,1)
ids = np.arange(0,240,1)
ds = dc.data.NumpyDataset(X=X, y=y, ids=ids)
Edit: We use the following function as generator:
def data_generator(dataset, epochs=1, batch_size = 100, pad_batches = True):
print(dataset)
for ind, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(batch_size, epochs,
deterministic=False, pad_batches=pad_batches)):
multiConvMol = ConvMol.agglomerate_mols(X_b)
inputs = [multiConvMol.get_atom_features(), multiConvMol.deg_slice, np.array(multiConvMol.membership)]
for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
labels = [y_b]
weights = [w_b]
yield (inputs, labels, weights)
(end edit)
Then we define the model and fit it to the dataset generated above:
batch_size = 100
n_tasks = 1
class TestModel(tf.keras.Model):
def __init__(self, model = 1):
super(TestModel, self).__init__()
self.model = model
#____________Test Model 1___________
if self.model == 1:
self.gc1 = GraphConv(128, activation_fn=tf.nn.tanh)
self.readout = GraphGather(batch_size=batch_size,
activation_fn=tf.nn.tanh)
self.dense2 = layers.Dense(1)
def call(self, inputs):
#____________Test Model 1___________
if self.model == 1:
gc1_output = self.gc1(inputs)
readout_output = self.readout([gc1_output]+ inputs[1:])
dense2_output = self.dense2(readout_output)
return dense2_output
#Fit_generator
print("_________\nFitting:")
testmodel = dc.models.KerasModel(TestModel(1), loss=dc.models.losses.L2Loss())
testmodel.fit_generator(data_generator(ds, epochs=1, batch_size = 100))
Finally we try to predict the dataset labels setting epochs = 2:
#Predict
print("_________\nPredicting:")
pred = testmodel.predict_on_generator(data_generator(ds, epochs = 2, batch_size = 100, pad_batches = True))
print(ds.y.shape, pred.shape)
Giving:
_________
Predicting:
<NumpyDataset X.shape: (240,), y.shape: (240,), w.shape: (240,), ids: [0 1 2 ... 237 238 239], task_names: [0]>
(240,) (600, 1)
However if I change epochs to 1, the size of pred changes (300, 1) i.e. half of what we had before. Similarly, changing the batch_size affects the prediction size too.
Can anyone explain what I'm doing wrong?

Tensor shapes for FFJORD bijector

I want to fit FFJORD bijector for transformation of two-dimensional dataset. The code is below (it is simplified version of my original code, but has the same problem).
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
tfd = tfp.distributions
class ODE(tf.keras.layers.Layer):
def __init__(self):
super(ODE, self).__init__()
self.dense_layer1 = tf.keras.layers.Dense(4, activation = 'tanh')
self.dense_layer2 = tf.keras.layers.Dense(2)
def call(self, t, inputs):
return self.dense_layer2(self.dense_layer1(inputs))
ode = ODE()
ffjord = tfb.FFJORD(state_time_derivative_fn = ode)
base_distr = tfd.MultivariateNormalDiag(loc = tf.zeros(2), scale_diag = tf.ones(2))
td = tfd.TransformedDistribution(distribution = base_distr, bijector = ffjord)
x = tf.keras.Input(shape = (2,), dtype = tf.float32)
log_prob = td.log_prob(x)
model = tf.keras.Model(x, log_prob)
def NLL(y, log_prob):
return -log_prob
model.compile(optimizer = tf.optimizers.Adam(1.0e-2), loss = NLL)
history = model.fit(x = X_train, y = np.zeros(X_train.shape[0]), epochs = 100, verbose = 0, batch_size = 128)
I get error in line log_prob = td.log_prob(x): ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 2)
If I try to get a sample from transformed distribution td.sample(), it produces another error, but td.sample(1) works as well as some other calls, for example
x = tf.constant([[2.0, 3.0]])
ode(-1.0, x)
ffjord.inverse(x)
ffjord.forward(x)
td.log_prob(td.sample(5))
I guess that there is some problem with shapes, but can't understand where it is.

Can't get output shape of a keras layer inside a custom layer

I am using Keras custom layer built form multiple Keras layers. I am trying to get the output_shape of the inner layers form a callback (on_train_batch_end) and get the following error:
"AttributeError: The layer has never been called and thus has no defined input shape."
I don't understand how this can happen if the call function in the custom layer is called because I have already trained the model for a single batch.
a code example:
from tensorflow.keras.layers import ReLU, MaxPooling2D, Input, Dense, Conv2D, Flatten
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback
import tensorflow as tf
import numpy as np
class MyLayer(Layer):
def __init__(self):
super(MyLayer, self).__init__()
self.conv = None
self.m_max = None
self.relu = None
def call(self, inputs, **kwargs):
x = self.conv(inputs)
x = self.m_max(x)
return self.relu(x)
def build(self, input_shape):
self.conv = Conv2D(input_shape=input_shape, filters=128, kernel_size=(2,2))
self.m_max = MaxPooling2D()
self.relu = ReLU()
class ModelCallback(Callback):
def on_batch_end(self, batch, logs=None):
print(self.model.layers[1].conv.output_shape)
inp = Input((32,32,3))
x = MyLayer()(inp)
x = Flatten()(x)
out = Dense(1)(x)
model = Model(inputs=inp, outputs=out)
model.compile(optimizer='adam', loss='categorical_crossentropy' )
x_train = np.random.rand(5000,32,32,3)
y_train = np.random.randint(2, size=(5000,1))
model.fit(x_train, y_train,epochs=5, callbacks=ModelCallback())

It is not really an answer but I will share my workaround for other people.
The idea is just to calculate the sizes on dummy data and save them.
def _calculate_shape(self, input_tensor_shape: tf.TensorShape):
self.conv.trainable = False
self.m_max.trainable = False
self.relu.trainable = False
input_shape = list(input_tensor_shape)
input_shape[0] = self.batch_size
x = self.conv(np.random.rand(*input_shape))
self.conv_shapes = (input_shape[1:], tf.shape(x).numpy().tolist()[1:]) # [1:] is needed to remove the batch size form the shape
x = self.m_max(x)
self.max_shapes = (self.conv_shapes[1], tf.shape(x).numpy().tolist()[1:])
x = self.relu(x)
self.relu_shapes = (self.max_shapes[1], tf.shape(x).numpy().tolist()[1:])
self.conv.trainable = True
self.m_max.trainable = True
self.relu.trainable = True
Then you can use the variables when trying to get the shape of inner layers.
** This removes the batch size from the shape

How do I load a checkpoint using tensorflow in eager execution mode?

I am using tensorflow 1.7.0 in eager execution mode. I have the model working, but none of the examples that I have found for saving the model work.
This is the code that I am using:
checkpoint_directory ='./JokeWords/'
checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
checkpoint = tfe.Checkpoint(model=model,optimizer=optimizer) # save as "x"
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
evaluate(model,jokes,2,32)
....
checkpoint.save(file_prefix=checkpoint_prefix)
I have trained the model and use evaluate to check the results when loading from a restart. Each time I get a random result from evaluate, meaning that the model is not loading from the data, but instead only having random weights.
How do I save the model? It can take days to train one of these.
Edit. Here is the model:
class EagerRNN(tfe.Network):
def __init__(self,embedding, hidden_dim, num_layers, keep_ratio):
super(EagerRNN, self).__init__()
self.keep_ratio = keep_ratio
self.cells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.backcells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.linear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.backlinear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.attension = layers.Dense(hidden_dim, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
def call(self, input_seq,seq_lengths, training):
lengths=[i[0] for i in seq_lengths]
nRotations=max(lengths)
batchSize=input_seq.shape[0]
input_seq2 = tf.unstack(input_seq, num=int(input_seq.shape[1]), axis=1)
atten = None
state = self.cells[0].zero_state(batchSize, tf.float32)
for i in range(0,nRotations):
for j in range(0,len(self.cells)):
c=self.cells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
if atten==None:
atten =self.linear(output)
else:
atten=atten+self.linear(output)
for i in range(nRotations-1,-1,-1):
for j in range(0,len(self.backcells)):
c=self.backcells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
atten=atten+self.backlinear(output)
#input_seq = tf.stack(input_seq2[0:nRotations], axis=1)
atten=self.attension(atten)
if training:
input_seq = tf.nn.dropout(input_seq, self.keep_ratio)
# Returning a list instead of a single tensor so that the line:
# y = self.rnn(y, ...)[0]
# in PTBModel.call works for both this RNN and CudnnLSTM (which returns a
# tuple (output, output_states).
return input_seq,state,atten
def _add_cells(self, cells):
# "Magic" required for keras.Model classes to track all the variables in
# a list of Layer objects.
# TODO(ashankar): Figure out API so user code doesn't have to do this.
for i, c in enumerate(cells):
setattr(self, "cell-%d" % i, c)
return cells
class EagerLSTM_Model(tfe.Network):
"""LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
See also:
https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
"""
def __init__(self,
embedding,
hidden_dim,
num_layers,
dropout_ratio,
use_cudnn_rnn=True):
super(EagerLSTM_Model, self).__init__()
self.keep_ratio = 1 - dropout_ratio
self.use_cudnn_rnn = use_cudnn_rnn
self.embedding = embedding
if self.use_cudnn_rnn:
self.rnn = cudnn_rnn.CudnnLSTM(
num_layers, hidden_dim, dropout=dropout_ratio)
else:
self.rnn = EagerRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
self.unrnn = EagerUnRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
def callRNN(self, input_seq,seq_lengths, training):
y = self.embedding.callbatchword(input_seq)
if training:
y = tf.nn.dropout(y, self.keep_ratio)
y,state,atten = self.rnn.call(y,seq_lengths, training=training)
return state,atten
def callUnRNN (self,state,atten,seq_lengths, training ):
x,state = self.unrnn(state,atten,seq_lengths,training=training)
#b=tf.reshape(y, self._output_shape)
#c=self.linear(b)
return x

tfe.Network is not (easily) Checkpointable and it will soon be deprecated. Prefer to subclass tf.Keras.Model instead. So if you change class EagerRNN(tfe.Network) to class EagerRNN(tf.keras.Model) and class EagerLSTM_Model(tfe.Network) to class EagerLSTM_Model(tf.keras.Model), checkpoint.save(file_prefix=checkpoint_prefix) should actually save all your variables and checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) should restore them.

why am I getting a 100% error rate (RNN for spam)

I am learning tensor flow by modifying some examples I've found. To start off with I have taken an RNN example to try against the "Spam" data set from UCI.
My code and the sample data set can be found in full here:
https://trinket.io/python/c7d6b95452
When I run the code I get a 100% error rate. I figure even if this data set was not well suited for this particular model that I'd get at least something better than that, so I don't think it's my choice of a sample data set.
Below is my Python code. If anyone can suggest how to modify this to get the model to work properly I would appreciate it! I'd also appreciate any general tensor flow advice too.
# Example for my blog post at:
# https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
import functools
import os
import sets
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
def lazy_property(function):
attribute = '_' + function.__name__
#property
#functools.wraps(function)
def wrapper(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return wrapper
class SequenceClassification:
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3):
self.data = data
self.target = target
self.dropout = dropout
self._num_hidden = num_hidden
self._num_layers = num_layers
self.prediction
self.error
self.optimize
#lazy_property
def prediction(self):
# Recurrent network.
network = rnn_cell.GRUCell(self._num_hidden)
network = rnn_cell.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = rnn_cell.MultiRNNCell([network] * self._num_layers)
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32)
# Select last output.
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
# Softmax layer.
weight, bias = self._weight_and_bias(
self._num_hidden, int(self.target.get_shape()[1]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
return prediction
#lazy_property
def cost(self):
cross_entropy = -tf.reduce_sum(self.target *tf.log(self.prediction))
return cross_entropy
#lazy_property
def optimize(self):
learning_rate = 0.003
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(self.cost)
#lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
#staticmethod
def _weight_and_bias(in_size, out_size):
weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
bias = tf.constant(0.1, shape=[out_size])
return tf.Variable(weight), tf.Variable(bias)
def main():
sample_size=10
num_classes=2 #spam or ham
##
# import spam data
##
spam_data=[]
spam_data_train=[]
spam_data_test=[]
data_dir="."
data_file="spam.csv"
with open(os.path.join(data_dir, data_file), "r") as file_handle:
for row in file_handle:
spam_data.append(row)
spam_data=[line.rstrip().split(",") for line in spam_data if len(line) >=1]
random.shuffle(spam_data)
spam_data_train=spam_data[0:int(len(spam_data)*.8)]
spam_data_test=spam_data[int(len(spam_data)*.8):int(len(spam_data))]
def next_train_batch(batch_size):
a=random.sample(spam_data_train, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def train_batch():
return [np.array([line[:-1] for line in spam_data_train]),np.array([line[len(line)-1] for line in spam_data_train])]
def next_test_batch(batch_size):
a=random.sample(spam_data_test, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def test_batch():
return [np.array([line[:-1] for line in spam_data_test]),np.array([line[len(line)-1] for line in spam_data_test])]
t=train_batch();
train_input=t[0]
train_target=t[1]
test=test_batch()
test_input=t[0]
test_target=t[1]
training_data = tf.placeholder(tf.float32, [None, sample_size, len(train_input[0])], "training_data")
training_target = tf.placeholder(tf.float32, [None, sample_size], "training_target")
testing_data = tf.placeholder(tf.float32, [None, len(test_input), len(test_input[0])], "testing_data")
testing_target = tf.placeholder(tf.float32, [None, len(test_target)], "testing_target")
dropout = tf.placeholder(tf.float32)
training_model = SequenceClassification(training_data, training_target, dropout)
tf.get_variable_scope().reuse_variables()
testing_model = SequenceClassification(testing_data, testing_target, dropout)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
for epoch in range(sample_size):
for _ in range(100):
sample=random.sample(range(0,len(train_input)-1),sample_size)
batch_train = [train_input[i] for i in sample]
batch_target = [train_target[i] for i in sample]
sess.run(training_model.optimize, {
training_data: [batch_train], training_target: [batch_target] , dropout: 0.5})
error = sess.run(testing_model.error, {
testing_data: [test_input], testing_target: [test_target], dropout: 1.0})
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
if __name__ == '__main__':
main()

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Using tf.keras.Model as base class for defining RNN Cell - tensorflow

Related

Keras/Deepchem: epochs in data generator for prediction in graph convolutions affects prediction size

Tensor shapes for FFJORD bijector

Can't get output shape of a keras layer inside a custom layer

How do I load a checkpoint using tensorflow in eager execution mode?

why am I getting a 100% error rate (RNN for spam)

Categories

Resources