How to use keras layers in custom keras layer - tensorflow

I am trying to write my own keras layer. In this layer, I want to use some other keras layers. Is there any way to do something like this:
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
def call(self, input):
return self.fc(input)
layer = MyDenseLayer(10)
When I do something like
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
it outputs
How do I make weiths in the dense there trainable?

If you look at the documentation for how to add custom layers, they recommend that you use the .add_weight(...) method. This method internally places all weights in self._trainable_weights. So to do what you want, you mush first define the keras layers you want to use, build them, copy the weights and then build your own layer. If I update your code it should be something like
class mylayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_outputs2):
self.num_outputs = num_outputs
super(mylayer, self).__init__()
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
self.fc.build(input_shape)
self._trainable_weights = self.fc.trainable_weights
super(mylayer, self).build(input_shape)
def call(self, input):
return self.fc(input)
layer = mylayer(10)
input = tf.keras.layers.Input(shape=(16, ))
output = layer(input)
model = tf.keras.Model(inputs=[input], outputs=[output])
model.summary()
You should then get what you want

It's much more comfortable and concise to put existing layers in the tf.keras.models.Model class. If you define non-custom layers such as layers, conv2d, the parameters of those layers are not trainable by default.
class MyDenseLayer(tf.keras.Model):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(num_outputs)
def call(self, input):
return self.fc(input)
def compute_output_shape(self, input_shape):
shape = tf.TensorShape(input_shape).as_list()
shape[-1] = self.num_outputs
return tf.TensorShape(shape)
layer = MyDenseLayer(10)
Check this tutorial: https://www.tensorflow.org/guide/keras#model_subclassing

In the TF2 custom layer Guide, they "recommend creating such sublayers in the __init__ method (since the sublayers will typically have a build method, they will be built when the outer layer gets built)." So just move the creation of self.fc into __init__ will give what you want.
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(self.num_outputs)
def build(self, input_shape):
self.built = True
def call(self, input):
return self.fc(input)
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
Output:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 16)] 0
_________________________________________________________________
my_dense_layer_2 (MyDenseLay (None, 10) 170
=================================================================
Total params: 170
Trainable params: 170
Non-trainable params: 0

This works for me and is clean, concise, and readable.
import tensorflow as tf
class MyDense(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyDense, self).__init__(kwargs)
self.dense = tf.keras.layers.Dense(2, tf.keras.activations.relu)
def call(self, inputs, training=None):
return self.dense(inputs)
inputs = tf.keras.Input(shape=10)
outputs = MyDense(trainable=True)(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name='test')
model.compile(loss=tf.keras.losses.MeanSquaredError())
model.summary()
Output:
Model: "test"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 10)] 0
_________________________________________________________________
my_dense (MyDense) (None, 2) 22
=================================================================
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________
Note that trainable=True is needed. I have posted a questions about it here.

Related

Number of parameters counting in GRU

I have GRU model as follows.
class CharGenModel(tf.keras.Model):
def __init__(self, vocab_size, num_timesteps, embedding_dim, **kwargs):
super(CharGenModel, self).__init__(**kwargs)
self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.rnn_layer = tf.keras.layers.GRU(
num_timesteps,
recurrent_initializer="glorot_uniform",
recurrent_activation="sigmoid",
stateful=True,
return_sequences=True
)
self.dense_layer = tf.keras.layers.Dense(vocab_size)
def call(self, x):
print(x.shape)
x = self.embedding_layer(x)
print(x.shape)
x = self.rnn_layer(x)
print(x.shape)
x = self.dense_layer(x)
print(x.shape)
return x
vocab_size = 92
embedding_dim = 256
seq_length = 100
batch_size = 64
model = CharGenModel(vocab_size, seq_length, embedding_dim)
model.build(input_shape=(batch_size, seq_length))
model.summary()
model.summary() produced the number of trainable parameters as follows.
(64, 100)
(64, 100, 256)
(64, 100, 100)
(64, 100, 92)
Model: "char_gen_model_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_4 (Embedding) multiple 23552
gru_4 (GRU) multiple 107400
dense_4 (Dense) multiple 9292
=================================================================
Total params: 140,244
Trainable params: 140,244
Non-trainable params: 0
I'm confused for two things.
According to embedding layer definition
tf.keras.layers.Embedding(
input_dim,
output_dim,
embeddings_initializer='uniform',
embeddings_regularizer=None,
activity_regularizer=None,
embeddings_constraint=None,
mask_zero=False,
input_length=None,
**kwargs
)
input_dim for embedding layer for my application is 64x100.
(1)But why embedding layer trainable parameters are 92x256=23552. Why not 100x256?
(2)Number of parameters counting for GRU is
num_params = number of FFNNs × [number of hidden units x (number of hidden units+number of inputs) + number of bias]
number of FFNNs(Number of feedforward networks) in GRU is 3
number of hidden units is 100
number of inputs is 256
number of bias is 100
so num_params = 3 x [100x(100+256)+100] = 107100
But model summary output is 107400
Where am I missing in calculation?

keras define a trainable variable for add or matmul

I have some problems in use tf.keras to build model. Now I want to define a trainbale weight tensor with shape(64, 128), which similar to tf.get_variable. However I can't achieve it.
In the past, I have try many methods.But I want to look for easily method.
inputs = tf.keras.Input((128,))
weights = tf.Variable(tf.random.normal((64, 128)))
output = tf.keras.layers.Lambda(lambda x: tf.matmul(x, tf.transpose(weights)))(inputs)
model = tf.keras.Model(inputs, output)
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_10 (InputLayer) (None, 128) 0
_________________________________________________________________
lambda_2 (Lambda) (None, 64) 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
The defined weights is not trainable.
In addition, I know Dense can get trained matrix weights and bias. But if I want add a bias, I can't use Dense.
However, I have to use add_weights in custome layer, for example:
class Bias(keras.layers.Layer):
def build(self, input_shape):
self.bias = self.add_weight(shape=(64, 128), initializer='zeros', dtype=tf.float32, name='x')
self.built = True
def call(self, inputs):
return inputs + self.bias
inputs = Input(shape=(64, 128))
outputs = Bias()(inputs)
model = Model(inputs=inputs, outputs=outputs)
model.summary()
Layer (type) Output Shape Param #
=================================================================
input_11 (InputLayer) (None, 64, 128) 0
_________________________________________________________________
bias_5 (Bias) (None, 64, 128) 8192
=================================================================
Total params: 8,192
Trainable params: 8,192
Non-trainable params: 0
Is there any more easily method to define a trainable variable ?

subclass of tf.keras.Model can not get summay() result

I want build subclass of tf.keras.Model and want to see the model structure with summary function. But it not works. The following is my code:
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.summary()
The error:
ValueError: This model has not yet been built. Build the model first
by calling build() or calling fit() with some data, or specify an
input_shape argument in the first layer(s) for automatic build.
You need to call each layer once to infer shapes and then call build() method of the tf.keras.Model with model's input shape as argument:
import tensorflow as tf
import numpy as np
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((32, 32, 3))
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) multiple 896
_________________________________________________________________
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 3686528
_________________________________________________________________
dense_1 (Dense) multiple 1290
=================================================================
Total params: 3,688,714
Trainable params: 3,688,714
Non-trainable params: 0
_________________________________________________________________
A better solution is listed here. You need to provide a model method to infer the model explicitly.
import tensorflow as tf
from tensorflow.keras.layers import Input
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
return self.dense(inputs)
def model(self):
x = Input(shape=(1))
return Model(inputs=[x], outputs=self.call(x))
MyModel().model().summary()
Editing #Vlad's answer to avoid this error ValueError: Input 0 of layer conv2d_10 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 32, 3)
Change this line from:
model.build((32, 32, 3 ))
To:
model.build((None, 32, 32, 3 ))
Final Code:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((None, 32, 32, 3 ))
model.summary()

integrating sampled softmax in keras failed

Based on How can I use TensorFlow's sampled softmax loss function in a Keras model?, I created this code:
class SampledSoftmax(tensorflow.keras.layers.Layer):
def __init__(self, **kwargs):
super(SampledSoftmax, self).__init__(**kwargs)
def call(self, inputs):
def f1(inputs):
return tf.nn.sampled_softmax_loss(
inputs[0]._keras_history[0].weights[0],
inputs[0]._keras_history[0].bias,
tf.reshape(tf.argmax(inputs[1], 1), [-1, 1]),
inputs[0],
8192,
817496)
def f2(inputs):
logits = tf.matmul(inputs[0], tf.transpose(inputs[0]._keras_history[0].weights[0]))
logits = tf.nn.bias_add(logits, inputs[0]._keras_history[0].bias)
return tf.nn.softmax_cross_entropy_with_logits_v2(
labels=inputs[1],
logits=logits)
return tf.cond(K.learning_phase(), true_fn=f1(inputs), false_fn=f2(inputs))
and when used with the following model:
#model
input_layer = Input(shape=(None,), dtype='int32')
target_input = Input(shape=(None,vocab_size), dtype='int8')
embedding_layer = Embedding(vocab_size,
EMBEDDING_DIM,
trainable=True,
mask_zero=True) (input_layer)
common = LSTM(LSTM_UNITS, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)(embedding_layer)
common = (Dense(PROJ_UNITS, activation='linear'))(common)
out = (Dense(vocab_size, name='output_layer'))(common)
out = (SampledSoftmax())([out, target_input])
model = Model(inputs=[input_layer,target_input], outputs=out)
it failed with this error:
ValueError: Shape must be rank 2 but is rank 3 for 'sampled_softmax/sampled_softmax_loss/MatMul' (op: 'MatMul') with input shapes: [?,?,817496], [?,817496].
I made some progress based on google search:
class MyLayer(tensorflow.keras.layers.Dense):
def __init__(self, num_sampled, num_classes, mode, **kwargs):
self.num_sampled = num_sampled
self.num_classes = num_classes
self.mode = mode
super(MyLayer, self).__init__(num_classes, **kwargs)
self.input_spec = [InputSpec(ndim=2)]
def build(self, input_shape):
#self.input_spec = [InputSpec(shape=input_shape)]
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs_and_labels):
inputs, labels = inputs_and_labels
if self.mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=self.kernel,
biases=self.bias,
labels=tf.reshape(tf.argmax(labels, 1), [-1, 1]),
inputs=inputs,
num_sampled=self.num_sampled,
num_classes=self.num_classes,
num_true=1)
elif self.mode == "eval":
logits = tf.matmul(inputs, tf.transpose(self.kernel))
logits = tf.nn.bias_add(logits, self.bias)
loss = tf.nn.softmax_cross_entropy_with_logits(
labels=labels,
logits=logits)
return loss
def compute_output_shape(self, input_shape):
dense_shape, classes_shape = input_shape
return (dense_shape[0], )
and the error now:
The error now:
ValueError: Layer my_layer expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'dense/BiasAdd:0' shape=(?, ?, 512) dtype=float32>, <tf.Tensor 'input_2:0' shape=(?, ?, 817496) dtype=int8>]
I tried to use self.input_spec but it does not work until now.

How to add a trainable weight to an LSTM in Keras/Tensorflow

I would like to add a trainable weight to my LSTM and when I use the following wrapper provided by Keras, the tensor is initialized but isn't added to the LSTM layer. When I use the same code on Dense layers or convnets it works properly. Is there any other way to add a variable to a recurrent model?
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[0][1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
a, b = x
return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [(shape_a[0], self.output_dim), shape_b[:-1]]