Tensorflow subclassing issue - tensorflow

I have a resnet model, defined by the following class:
class ModelResNet(tf.keras.models.Model):
def __init__(self):
super(ModelResNet, self).__init__()
self.resBlock1 = ResBlock(num_filters=32)
self.resBlock2 = ResBlock(num_filters=32)
self.dense1 = tf.keras.layers.Dense(units=128, activation='relu')
self.dense2 = tf.keras.layers.Dense(units=10, activation='softmax')
def call(self, input_tensor, training=False):
x = self.resBlock1(input_tensor, training=training)
x = self.resBlock2(x, training=training)
x = tf.keras.layers.Flatten()(x)
# Option 1
x = self.dense1(x)
x = self.dense2(x)
# Option 2
# x = tf.keras.layers.Dense(units=128, activation='relu')(x)
# x = tf.keras.layers.Dense(units=10, activation='softmax')(x)
return x
When I call ModelResNet2.fit(), everything works great!
But, if I replace Option1 by Option2 in the "call" method, I get the following error:
ValueError: tf.function-decorated function tried to create variables on non-first call.
python-BaseException
Not sure I understand where the issue comes from.
Thanks!

When calling:
x = tf.keras.layers.Dense(units=128, activation='relu')(x)
you are creating a dense layer which will be immediately destroyed (after you exit call() method). It doesn't make sense - you will not be able to train.
Tensorflow is not able to create a graph of layers with such calls. So it prohibits them.

Related

How to apply Monte Carlo Dropout, in tensorflow, for an LSTM if batch normalization is part of the model?

I have a model composed of 3 LSTM layers followed by a batch norm layer and finally dense layer. Here is the code:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
Now I am aware that to apply MCDropout, we can apply the following code:
y_predict = np.stack([my_model(X_test, training=True) for x in range(100)])
y_proba = y_predict.mean(axis=0)
However, setting training = True will force the batch norm layer to overfit the testing dataset.
Additionally, building a custom Dropout layer while setting training to True isn't a solution in my case because I am using LSTM.
class MCDropout(tf.keras.layers.Dropout):
def call(self, inputs):
return super().call(inputs, training=True)
Any help is much appreciated!!
A possible solution could be to create a custom LSTM layer. You should override the call method to force the training flag to be True
class MCLSTM(keras.layers.LSTM):
def __init__(self, units, **kwargs):
super(MCLSTM, self).__init__(units, **kwargs)
def call(self, inputs, mask=None, training=None, initial_state=None):
return super(MCLSTM, self).call(
inputs,
mask=mask,
training=True,
initial_state=initial_state,
)
Then you can use it in your code
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = MCLSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
or add it to your return_RNN factory (a more elegant way)
===== EDIT =====
Another solution could be to add the training flag when creating the model. Something like this:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
# This the Monte Carlo LSTM
x = LSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs, training=True)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model

tf.placeholder inside a class in TF 2.0

I'm trying to change a code I have written in TF 1.0 to TF 2.0 and I'm having difficulties with replacing the tf.placeholder inside a class function. My code is the following
class User:
x = tf.placeholder(tf.float32,shape=[None,784])
y_true = tf.placeholder(tf.float32, [None, 10])
W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
lambda_W = tf.Variable(tf.zeros([7840,1]))
W = tf.reshape(W1,[784, 10])
ylogits = W*x
y = tf.nn.softmax(ylogits)
def __init__(self):
pass
Is there a way to replace tf.placeholder inside the class to make code running in TF 2.0?
Firstly, I think you intended to have each of those objects created per instance of the class, not one for the whole class as it is now. I also think your product between W and x was meant to be a matrix product, not an element-wise product, which would not work with the given shapes:
class User:
def __init__(self):
self.x = tf.placeholder(tf.float32,shape=[None,784])
self.y_true = tf.placeholder(tf.float32, [None, 10])
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
self.ylogits = self.x # self.W
self.y = tf.nn.softmax(ylogits)
To use it in TensorFlow 2.x, you would remove the placeholders and simply perform the operations each time with every new input, for example with a new function call:
class User:
def __init__(self):
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
def call(self, x):
ylogits = self.x # self.W
return tf.nn.softmax(ylogits)
You could use this as:
user1 = User()
x = ... # Get some data
y = user1.call(x)
Or if you like to be more "idiomatic", you could use __call__:
class User:
def __init__(self):
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
def __call__(self, x):
ylogits = x # W
return tf.nn.softmax(ylogits)
And then you would do:
user1 = User()
x = ... # Get some data
y = user1(x)

Freeze sublayers in tensorflow 2

I have a model which is composed of custom layers. Each custom layer contains many tf.keras.layers. The problem is that if I want to freeze those layers after defining my model, the loop:
for i, layer in enumerate(model.layers):
print(i, layer.name)
only prints the "outer" custom layers and not those who exist inside. Is there any way to access the inner layers so I can freeze them?
an example of a custom layer from the official tf docs:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
You can use keras callbacks. If you want to freeze your first layer after some certain amount of epochs, add this callback
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False
What you are doing in your update function is to replace the first Dense() layer with another Dense() layer, this time setting trainable = false.
While this works, I would update the 'update' function as following:
def updt(self):
self.dense1.trainable = False
Ok i came up with a solution.
An "update" function must be implemented inside the custom layer, which updates the inner layers so that they become non trainable.
Here is a sample code:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)

Tensorflow 2.0 Keras Model subclassing

I'm trying to implement a simple UNet-like model using the model subclassing method. Here's my code:
import tensorflow as tf
from tensorflow import keras as K
class Enc_block(K.layers.Layer):
def __init__(self, in_dim):
super(Enc_block, self).__init__()
self.conv_layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
self.pool_layer = K.layers.SeparableConv2D(in_dim,3, padding='same',strides=2, activation='relu')
def call(self, x):
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
return self.pool_layer(x), x
class Dec_block(K.layers.Layer):
def __init__(self, in_dim):
super(Dec_block, self).__init__()
self.conv_layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
def call(self, x):
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
return x
class Bottleneck(K.layers.Layer):
def __init__(self, in_dim):
super(Bottleneck, self).__init__()
self.conv_1layer = K.layers.SeparableConv2D(in_dim,1, padding='same', activation='relu')
self.conv_3layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
def call(self, x):
x = self.conv_1layer(x)
x = self.batchnorm_layer(x)
x = self.conv_3layer(x)
x = self.batchnorm_layer(x)
return x
class Output_block(K.layers.Layer):
def __init__(self, in_dim):
super(Output_block, self).__init__()
self.logits = K.layers.SeparableConv2D(in_dim,3, padding='same', activation=None)
self.out = K.layers.Softmax()
def call(self, x):
x_logits = self.logits(x)
x = self.out(x_logits)
return x_logits, x
class UNetModel(K.Model):
def __init__(self,in_dim):
super(UNetModel, self).__init__()
self.encoder_block = Enc_block(in_dim)
self.bottleneck = Bottleneck(in_dim)
self.decoder_block = Dec_block(in_dim)
self.output_block = Output_block(in_dim)
def call(self, inputs, training=None):
x, x_skip1 = self.encoder_block(32)(inputs)
x, x_skip2 = self.encoder_block(64)(x)
x, x_skip3 = self.encoder_block(128)(x)
x, x_skip4 = self.encoder_block(256)(x)
x = self.bottleneck(x)
x = K.layers.UpSampling2D(size=(2,2))(x)
x = K.layers.concatenate([x,x_skip4],axis=-1)
x = self.decoder_block(256)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #56x56
x = K.layers.concatenate([x,x_skip3],axis=-1)
x = self.decoder_block(128)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #112x112
x = K.layers.concatenate([x,x_skip2],axis=-1)
x = self.decoder_block(64)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #224x224
x = K.layers.concatenate([x,x_skip1],axis=-1)
x = self.decoder_block(32)(x)
x_logits, x = self.output_block(2)(x)
return x_logits, x
I am getting the following error:
ValueError: Input 0 of layer separable_conv2d is incompatible with the layer: expected ndim=4, found ndim=0. Full shape received: []
I'm not sure if this is the correct way to implement a network in tf.keras
The idea was to implement encoder and decoder blocks by subclassing keras layers and subclassing the Model later.
Take a look at this line from UNetModel class:
x, x_skip1 = self.encoder_block(32)(inputs)
where self.encoder_block() is defined by
self.encoder_block = Enc_block(in_dim)
encoder_block is an instance of class. By doing self.encoder_block(32) you are invoking a __call__() method of the End_block class which expect to receive an iterable of image inputs of rank=4. Instead you're passing an integer number 32 of rank=0 and you get ValueError which says exactly what I've just explained: expected ndim=4, found ndim=0. What probably you intended to do is:
x, x_skip1 = self.encoder_block(inputs)
You repeat the same mistake in the subsequent lines as well. There are additional errors where you define the same in_dim for every custom layer:
self.encoder_block = Enc_block(in_dim)
self.bottleneck = Bottleneck(in_dim)
self.decoder_block = Dec_block(in_dim)
self.output_block = Output_block(in_dim)
The input shape for Bottleneck layer should be the same shape as output of the Enc_Block layer and so one. I suggest you first to understand simple example before you're trying to implement more complicated ones. Take a look at this example. It has two custom layers:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
class CustomLayer1(layers.Layer):
def __init__(self, outshape=4):
super(CustomLayer1, self).__init__()
self.outshape = outshape
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=(int(input_shape[1]), self.outshape),
trainable=True)
super(CustomLayer1, self).build(input_shape)
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
class CustomLayer2(layers.Layer):
def __init__(self):
super(CustomLayer2, self).__init__()
def call(self, inputs):
return inputs / tf.reshape(tf.reduce_sum(inputs, 1), (-1, 1))
Now I will use both of these layers in the new CombinedLayers class:
class CombinedLayers(layers.Layer):
def __init__(self, units=3):
super(CombinedLayers, self).__init__()
# `units` defines a number of units in the layer. It is the
# output shape of the `CustomLayer`
self.layer1 = CustomLayer1(units)
# The input shape is inferred dynamically in the `build()`
# method of the `CustomLayer1` class
self.layer2 = CustomLayer1(units)
# Some layers such as this one do not need to know the shape
self.layer3 = CustomLayer2()
def call(self, inputs):
x = self.layer1(inputs)
x = self.layer2(x)
x = self.layer3(x)
return x
Note that the input shape of CustomLayer1 is inferred dynamically in the build() method. Now let's test it with some input:
x_train = [np.random.normal(size=(3, )) for _ in range(5)]
x_train_tensor = tf.convert_to_tensor(x_train)
combined = CombinedLayers(3)
result = combined(x_train_tensor)
result.numpy()
# array([[ 0.50822063, -0.0800476 , 0.57182697],
# [ -0.76052217, 0.50127872, 1.25924345],
# [-19.5887986 , 9.23529798, 11.35350062],
# [ -0.33696137, 0.22741248, 1.10954888],
# [ 0.53079047, -0.08941536, 0.55862488]])
This is how you should approach it. Create layers one by one. Each time you add a new layer test everything with some input to verify that you are doing things correctly.

How do I load a checkpoint using tensorflow in eager execution mode?

I am using tensorflow 1.7.0 in eager execution mode. I have the model working, but none of the examples that I have found for saving the model work.
This is the code that I am using:
checkpoint_directory ='./JokeWords/'
checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
checkpoint = tfe.Checkpoint(model=model,optimizer=optimizer) # save as "x"
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
evaluate(model,jokes,2,32)
....
checkpoint.save(file_prefix=checkpoint_prefix)
I have trained the model and use evaluate to check the results when loading from a restart. Each time I get a random result from evaluate, meaning that the model is not loading from the data, but instead only having random weights.
How do I save the model? It can take days to train one of these.
Edit. Here is the model:
class EagerRNN(tfe.Network):
def __init__(self,embedding, hidden_dim, num_layers, keep_ratio):
super(EagerRNN, self).__init__()
self.keep_ratio = keep_ratio
self.cells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.backcells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.linear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.backlinear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.attension = layers.Dense(hidden_dim, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
def call(self, input_seq,seq_lengths, training):
lengths=[i[0] for i in seq_lengths]
nRotations=max(lengths)
batchSize=input_seq.shape[0]
input_seq2 = tf.unstack(input_seq, num=int(input_seq.shape[1]), axis=1)
atten = None
state = self.cells[0].zero_state(batchSize, tf.float32)
for i in range(0,nRotations):
for j in range(0,len(self.cells)):
c=self.cells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
if atten==None:
atten =self.linear(output)
else:
atten=atten+self.linear(output)
for i in range(nRotations-1,-1,-1):
for j in range(0,len(self.backcells)):
c=self.backcells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
atten=atten+self.backlinear(output)
#input_seq = tf.stack(input_seq2[0:nRotations], axis=1)
atten=self.attension(atten)
if training:
input_seq = tf.nn.dropout(input_seq, self.keep_ratio)
# Returning a list instead of a single tensor so that the line:
# y = self.rnn(y, ...)[0]
# in PTBModel.call works for both this RNN and CudnnLSTM (which returns a
# tuple (output, output_states).
return input_seq,state,atten
def _add_cells(self, cells):
# "Magic" required for keras.Model classes to track all the variables in
# a list of Layer objects.
# TODO(ashankar): Figure out API so user code doesn't have to do this.
for i, c in enumerate(cells):
setattr(self, "cell-%d" % i, c)
return cells
class EagerLSTM_Model(tfe.Network):
"""LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
See also:
https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
"""
def __init__(self,
embedding,
hidden_dim,
num_layers,
dropout_ratio,
use_cudnn_rnn=True):
super(EagerLSTM_Model, self).__init__()
self.keep_ratio = 1 - dropout_ratio
self.use_cudnn_rnn = use_cudnn_rnn
self.embedding = embedding
if self.use_cudnn_rnn:
self.rnn = cudnn_rnn.CudnnLSTM(
num_layers, hidden_dim, dropout=dropout_ratio)
else:
self.rnn = EagerRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
self.unrnn = EagerUnRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
def callRNN(self, input_seq,seq_lengths, training):
y = self.embedding.callbatchword(input_seq)
if training:
y = tf.nn.dropout(y, self.keep_ratio)
y,state,atten = self.rnn.call(y,seq_lengths, training=training)
return state,atten
def callUnRNN (self,state,atten,seq_lengths, training ):
x,state = self.unrnn(state,atten,seq_lengths,training=training)
#b=tf.reshape(y, self._output_shape)
#c=self.linear(b)
return x
tfe.Network is not (easily) Checkpointable and it will soon be deprecated. Prefer to subclass tf.Keras.Model instead. So if you change class EagerRNN(tfe.Network) to class EagerRNN(tf.keras.Model) and class EagerLSTM_Model(tfe.Network) to class EagerLSTM_Model(tf.keras.Model), checkpoint.save(file_prefix=checkpoint_prefix) should actually save all your variables and checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) should restore them.