How to access recursive layers of custom layer in tensorflow keras - tensorflow

From tensorflow keras example. I can create a custom layer which contains Linear layer recursively
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
how do i access all the component layers of a custom layer, I want to access weight and biases of all the component layers.
ex:
MLPBlock(Parent Layer):
linear_1
linear_2
linear_3
I have looked into tensorflow keras api version r 1.14 https://www.tensorflow.org/guide/keras
but could not find any way to do this.

I assume that you are following this tutorial. Based on that, here is how you can access the weights:
class MLPBlock(tf.keras.Model):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = tf.keras.layers.Dense(32)
self.linear_2 = tf.keras.layers.Dense(32)
self.linear_3 = tf.keras.layers.Dense(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
mlp_block = MLPBlock()
y = mlp_block(tf.ones(shape=(3, 64)))
for layer in mlp_block.layers:
weights, biases = layer.get_weights()
Please note that I slightly modified the example so that you can access the layer's weights and biases. Namely, what I did is instead of subclassing the class with tf.keras.layers.Layer, I subclassed with tf.keras.Model so that the stack of layers can be treated as a model, and then you can access the layers of that model. Then, instead of using the custom Linear layer, I used the tf.keras.layers.Dense for simplicity, however, using the custom layer should not make a difference.

Related

How to create a keras layer with a custom gradient *and learnable parameters* in TF2.0?

this is a similar question to: How to create a keras layer with a custom gradient in TF2.0?
Only, I would like to introduce a learnable parameter into the custom layer that I am training.
Here's a toy example of my current approach here:
# Method for calculation custom gradient
#tf.custom_gradient
def scaler(x, s):
def grad(upstream):
dy_dx = s
dy_ds = x
return dy_dx, dy_ds
return x * s, grad
# Keras Layer with trainable parameter
class TestLayer(tf.keras.layers.Layer):
def build(self, input_shape):
self.scale = self.add_weight("scale",
shape=[1,],
initializer=tf.keras.initializers.Constant(value=2.0),
trainable=True)
def call(self, inputs):
return scaler(inputs, self.scale)
# Creates Keras Model that uses the layer
def Model():
x_in = tf.keras.layers.Input(shape=(1,))
x_out = TestLayer()(x_in)
return tf.keras.Model(inputs=x_in, outputs=x_out, name="fp8_test")
# Create toy dataset, want to learn `scale` such to satisfy 5 = 2 * scale (i.e, `scale` should learn ~2.5)
def Dataset():
inps = tf.ones(shape=(10**5,)) * 2 # inputs
expected = tf.ones(shape=(10**5,)) * 5 # targets
data_in = tf.data.Dataset.from_tensors(inps)
data_exp = tf.data.Dataset.from_tensors(expected)
dataset = tf.data.Dataset.zip((data_in, data_exp))
return dataset
model = Model()
model.summary()
dataset = Dataset()
# Use `MSE` loss and `SGD` optimizer
model.compile(
loss=tf.keras.losses.MSE,
optimizer=tf.keras.optimizers.SGD(),
)
model.fit(dataset, epochs=100)
This is failing with the following shape related error in the optimizer:
ValueError: Shapes must be equal rank, but are 1 and 2 for '{{node SGD/SGD/update/ResourceApplyGradientDescent}} = ResourceApplyGradientDescent[T=DT_FLOAT, use_locking=true](fp8_test/test_layer_1/ReadVariableOp/resource, SGD/Identity, SGD/IdentityN)' with input shapes: [], [], [100000,1].
I've been staring at the docs for a while, I'm a bit stumped as to why this isn't working, I would really appreciate any input on how to fix this toy example.
Thanks in advance.

Freeze sublayers in tensorflow 2

I have a model which is composed of custom layers. Each custom layer contains many tf.keras.layers. The problem is that if I want to freeze those layers after defining my model, the loop:
for i, layer in enumerate(model.layers):
print(i, layer.name)
only prints the "outer" custom layers and not those who exist inside. Is there any way to access the inner layers so I can freeze them?
an example of a custom layer from the official tf docs:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
You can use keras callbacks. If you want to freeze your first layer after some certain amount of epochs, add this callback
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False
What you are doing in your update function is to replace the first Dense() layer with another Dense() layer, this time setting trainable = false.
While this works, I would update the 'update' function as following:
def updt(self):
self.dense1.trainable = False
Ok i came up with a solution.
An "update" function must be implemented inside the custom layer, which updates the inner layers so that they become non trainable.
Here is a sample code:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)

How to implement gradient reversal layer in TF 2.0?

This layer is static, it is a pseudo function. In the forward propagation it doesn't do anything (identity function). In the back propagation however, it multiplies the gradient by -1. There are lots of implementations on github but they don't work with TF 2.0.
Here's one for reference.
import tensorflow as tf
from tensorflow.python.framework import ops
class FlipGradientBuilder(object):
def __init__(self):
self.num_calls = 0
def __call__(self, x, l=1.0):
grad_name = "FlipGradient%d" % self.num_calls
#ops.RegisterGradient(grad_name)
def _flip_gradients(op, grad):
return [tf.negative(grad) * l]
g = tf.get_default_graph()
with g.gradient_override_map({"Identity": grad_name}):
y = tf.identity(x)
self.num_calls += 1
return y
flip_gradient = FlipGradientBuilder()
Dummy op that reverses the gradients
This can be done using the decorator tf.custom_gradient, as described in this example:
#tf.custom_gradient
def grad_reverse(x):
y = tf.identity(x)
def custom_grad(dy):
return -dy
return y, custom_grad
Then, you can just use it as if it is a normal TensorFlow op, for example:
z = encoder(x)
r = grad_reverse(z)
y = decoder(r)
Keras API?
A great convenience of TF 2.0 is it's native support for Keras API. You can define a custom GradReverse op and enjoy the convenience of Keras:
class GradReverse(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def call(self, x):
return grad_reverse(x)
Then, you can use this layer as any other layers of Keras, for example:
model = Sequential()
conv = tf.keras.layers.Conv2D(...)(inp)
cust = CustomLayer()(conv)
flat = tf.keras.layers.Flatten()(cust)
fc = tf.keras.layers.Dense(num_classes)(flat)
model = tf.keras.models.Model(inputs=[inp], outputs=[fc])
model.compile(loss=..., optimizer=...)
model.fit(...)

Implementation of BERT in keras with TF_HUB

I was trying to implement the Google Bert model in tensorflow-keras using tensorflow hub. For this I designed a custom keras layer "Bertlayer" . Now the problem is when I am compiling the keras model it keeps showing that
AttributeError: 'Bertlayer' object has no attribute '_keras_style'
I don't know where I am wrong and what _keras_style attribute is.Please help to find the error in the code.
This is the github link to the full code: https://github.com/PradyumnaGupta/BERT/blob/master/Untitled21.ipynb
class BertLayer(tf.layers.Layer):
def __init__(self, n_fine_tune_layers=10, **kwargs):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
bert_path,
trainable=self.trainable,
name="{}_module".format(self.name)
)
trainable_vars = self.bert.variables
# Remove unused layers
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
# Select how many layers to fine tune
trainable_vars = trainable_vars[-self.n_fine_tune_layers :]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
return result
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
So, tensorflow version 1.* is a bit misleading. It actually has 2 bases classes called Layer. One - the one that you are using. It is intended to implement shortcut wrappers over regular TF operations. The other from tensorflow.keras.layers import Layer is for Keras-like models and sequencies.
Judging by your error, you are using keras/models to train further.
You probably should start form derivering your layer from keras.layers.Layer instead of tf.layers.Layer.

How do I load a checkpoint using tensorflow in eager execution mode?

I am using tensorflow 1.7.0 in eager execution mode. I have the model working, but none of the examples that I have found for saving the model work.
This is the code that I am using:
checkpoint_directory ='./JokeWords/'
checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
checkpoint = tfe.Checkpoint(model=model,optimizer=optimizer) # save as "x"
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))
evaluate(model,jokes,2,32)
....
checkpoint.save(file_prefix=checkpoint_prefix)
I have trained the model and use evaluate to check the results when loading from a restart. Each time I get a random result from evaluate, meaning that the model is not loading from the data, but instead only having random weights.
How do I save the model? It can take days to train one of these.
Edit. Here is the model:
class EagerRNN(tfe.Network):
def __init__(self,embedding, hidden_dim, num_layers, keep_ratio):
super(EagerRNN, self).__init__()
self.keep_ratio = keep_ratio
self.cells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.backcells = self._add_cells([
tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)
for _ in range(num_layers)
])
self.linear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.backlinear = layers.Dense(embedding. vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
self.attension = layers.Dense(hidden_dim, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))
def call(self, input_seq,seq_lengths, training):
lengths=[i[0] for i in seq_lengths]
nRotations=max(lengths)
batchSize=input_seq.shape[0]
input_seq2 = tf.unstack(input_seq, num=int(input_seq.shape[1]), axis=1)
atten = None
state = self.cells[0].zero_state(batchSize, tf.float32)
for i in range(0,nRotations):
for j in range(0,len(self.cells)):
c=self.cells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
if atten==None:
atten =self.linear(output)
else:
atten=atten+self.linear(output)
for i in range(nRotations-1,-1,-1):
for j in range(0,len(self.backcells)):
c=self.backcells[j]
inp=input_seq2[i]
output, state = c(inp, state)
#input_seq2[i]=(output)
atten=atten+self.backlinear(output)
#input_seq = tf.stack(input_seq2[0:nRotations], axis=1)
atten=self.attension(atten)
if training:
input_seq = tf.nn.dropout(input_seq, self.keep_ratio)
# Returning a list instead of a single tensor so that the line:
# y = self.rnn(y, ...)[0]
# in PTBModel.call works for both this RNN and CudnnLSTM (which returns a
# tuple (output, output_states).
return input_seq,state,atten
def _add_cells(self, cells):
# "Magic" required for keras.Model classes to track all the variables in
# a list of Layer objects.
# TODO(ashankar): Figure out API so user code doesn't have to do this.
for i, c in enumerate(cells):
setattr(self, "cell-%d" % i, c)
return cells
class EagerLSTM_Model(tfe.Network):
"""LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
See also:
https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
"""
def __init__(self,
embedding,
hidden_dim,
num_layers,
dropout_ratio,
use_cudnn_rnn=True):
super(EagerLSTM_Model, self).__init__()
self.keep_ratio = 1 - dropout_ratio
self.use_cudnn_rnn = use_cudnn_rnn
self.embedding = embedding
if self.use_cudnn_rnn:
self.rnn = cudnn_rnn.CudnnLSTM(
num_layers, hidden_dim, dropout=dropout_ratio)
else:
self.rnn = EagerRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
self.unrnn = EagerUnRNN(embedding,hidden_dim, num_layers, self.keep_ratio)
def callRNN(self, input_seq,seq_lengths, training):
y = self.embedding.callbatchword(input_seq)
if training:
y = tf.nn.dropout(y, self.keep_ratio)
y,state,atten = self.rnn.call(y,seq_lengths, training=training)
return state,atten
def callUnRNN (self,state,atten,seq_lengths, training ):
x,state = self.unrnn(state,atten,seq_lengths,training=training)
#b=tf.reshape(y, self._output_shape)
#c=self.linear(b)
return x
tfe.Network is not (easily) Checkpointable and it will soon be deprecated. Prefer to subclass tf.Keras.Model instead. So if you change class EagerRNN(tfe.Network) to class EagerRNN(tf.keras.Model) and class EagerLSTM_Model(tfe.Network) to class EagerLSTM_Model(tf.keras.Model), checkpoint.save(file_prefix=checkpoint_prefix) should actually save all your variables and checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) should restore them.