Keras unable to calculate number of parameters in a Keras Custom Layer - tensorflow

I am building a Keras Custom layer with some Tensorflow support. Before that I wanted to test whether a Convolution2D layer works properly if I write a Keras layer with Tensorflow's conv2d in the call function.
class Convolutional2D(Layer):
def __init__(self, filters=None, kernel_size=None, padding='same', activation='linear', strides=(1,1), name ='Conv2D', **kwargs):
self.filters = filters
self.kernel_size = kernel_size
self.padding = padding
self.activation = activation
self.strides = strides = name
self.input_spec = [InputSpec(ndim=4)]
super(Convolutional2D, self).__init__(**kwargs)
def call(self, input):
out = tf.layers.conv2d(inputs=input, filters=self.filters, kernel_size=self.kernel_size, strides=self.strides, padding=self.padding,
def compute_output_shape(self, input_shape):
batch_size = input_shape[0]
width = input_shape[1]/self.strides[0]
height = input_shape[2]/self.strides[1]
channels = self.filters
return(batch_size, width, height, channels)
def get_config(self):
config = {'filters': self.filters, 'kernel_size': self.kernel_size, 'padding': self.padding, 'activation':self.activation, 'strides':self.strides,
base_config = super(Convolutional2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
This compiles properly but when the I use model.summary() it does not calculate the number of parameters for this layer.
What do I have to do so that when I check the total number of parameters of the model the number includes the trainable number of parameters of this layer?

I have found the answer to this problem.
def build(self, input_shape):
if self.data_format == 'channels_first':
channel_axis = 1
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
kernel_shape = self.kernel_size + (input_dim, self.filters)
self.kernel = self.add_weight(shape=kernel_shape,
if self.use_bias:
self.bias = self.add_weight(shape=(self.filters,),
self.bias = None
# Set input spec.
self.input_spec = InputSpec(ndim=self.rank + 2,
axes={channel_axis: input_dim})
self.built = True
The add weights defines the number of parameters which I have not done in my code. But that does not hamper the performance of the model. It works fine except for the fact one cannot get the number of parameters specification.


Is there a tensorflow keras that is a wrapper for a stack of Dense layers?

For example, this is trivial but is there a layer for this? Is not really a convolution ... there is one "Dense layer" (weights) per data point.
In [266]: X = np.random.randn(10, 3); W = np.random.randn(10, 3, 4); (X[:, :, None] * W).sum(axis=1).shape
Out[266]: (10, 4)
Create your own layer:
Warning: works only with fixed batch size, you need to define batch_shape or batch_input_shape in your models!!!!
class SampleDense(Layer):
def __init__(self, units, **kwargs):
self.units = units
super(SampleDense, self).__init__(**kwargs)
def build(self, input_shape):
weight_shape = input_shape + (self.units,)
self.kernel = self.add_weight(name='kernel',
self.built = True
def call(self, inputs):
inputs = K.expand_dims(inputs, axis=-1)
outputs = inputs * self.kernel
outputs = K.sum(outputs, axis=-2)
return outputs
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.units,)

Define custom LSTM with multiple inputs

Following the tutorial writing custom layer, I am trying to implement a custom LSTM layer with multiple input tensors. I am providing two vectors input_1 and input_2 as a list [input_1, input_2] as suggested in the tutorial. The single input code is working but when I change the code for multiple inputs, its throwing the error,
self.kernel = self.add_weight(shape=(input_shape[0][-1], self.units),
TypeError: 'NoneType' object is not subscriptable.
What change I have to do to get rid of the error? Here is the modified code.
class MinimalRNNCell(keras.layers.Layer):
def __init__(self, units, **kwargs):
self.units = units
self.state_size = units
super(MinimalRNNCell, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(shape=(input_shape[0][-1], self.units),
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
self.built = True
def call(self, inputs, states):
prev_output = states[0]
h =[0], self.kernel)
output = h +, self.recurrent_kernel)
return output, [output]
# Let's use this cell in a RNN layer:
cell = MinimalRNNCell(32)
input_1 = keras.Input((None, 5))
input_2 = keras.Input((None, 5))
layer = RNN(cell)
y = layer([input_1, input_2])
Error is because of the line, y = layer([input_1, input_2]).
Replacing that line with y = layer((input_1, input_2)) (passing as Tuple of Inputs rather than List of Inputs), will resolve the error.
Complete working code using tf.keras is shown below:
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import RNN
import tensorflow as tf
class MinimalRNNCell(tf.keras.layers.Layer):
def __init__(self, units, **kwargs):
self.units = units
self.state_size = units
#self.state_size = [tf.TensorShape([units])]
super(MinimalRNNCell, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(shape=(input_shape[0][-1], self.units),
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
self.built = True
def call(self, inputs, states):
prev_output = states[0]
h =[0], self.kernel)
output = h +, self.recurrent_kernel)
return output, [output]
# Let's use this cell in a RNN layer:
cell = MinimalRNNCell(32)
input_1 = tf.keras.Input((None, 5))
input_2 = tf.keras.Input((None, 5))
layer = RNN(cell)
y = layer((input_1, input_2))
Output of the above code is:
<class 'tuple'>
Hope this helps. Happy Learning!

Propagating through a custom layer in tensorflow just once

Given a custom layer in tensorflow, is it possible to let the model use it just during one epoch? The layer may just be ignored for all other epochs or simple be an identity.
For example: Given data I would like the layer to simply double the given data. The other layers should may work normally. How would one do that?
def do_stuff(data):
return 2*data
def run_once(data):
return tf.py_func(do_stuff,
name='I run once')
class CustomLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
self.trainable = False
super(CustomLayer, self).__init__(**kwargs)
def call(self, x):
res = tf.map_fn(run_once, x)
return res
inputs = Input(shape=(224, 224, 1))
x = Lambda(preprocess_input(x), input_shape=(224, 224, 1), output_shape=(224, 224, 3))
outputs = Dense(1)(x)
model = Model(input=inputs, output=outputs)
output = model(x)
Interesting question. To execute a TF operation just in the first epoch, one could use tf.cond and tf.control_dependencies to check/update the value of a boolean tensor. For example, your custom layer could be implemented as follows:
class CustomLayer(Layer):
def __init__(self, **kwargs):
super(CustomLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.first_epoch = tf.Variable(True)
def call(self, x):
res = tf.cond(self.first_epoch,
true_fn=lambda: run_once(x),
false_fn=lambda: x)
with tf.control_dependencies([res]):
assign_op = self.first_epoch.assign(False)
with tf.control_dependencies([assign_op]):
res = tf.identity(res)
return res
To validate that this layer works as expected, define run_once as:
def run_once(data):
print_op = tf.print('First epoch')
with tf.control_dependencies([print_op]):
out = tf.identity(data)
return out

unable to apply condition on output of custom layer using keras layers module

I want to apply a condition on the output of a dense layer. For this, I tried to customize the Dense layer of Keras but when I run my code I get the error
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'scope0/rnn/while/lstm_cell/kernel:0' shape=(3, 512) dtype=float32>", "<tf.Variable 'scope0/rnn/while/lstm_cell/recurrent_kernel:0' shape=(128, 512) dtype=float32>", "<tf.Variable 'scope0/rnn/while/lstm_cell/bias:0' shape=(512,) dtype=float32>", "<tf.Variable 'scope0/my_dense/kernel:0' shape=(128, 1) dtype=float32>", "<tf.Variable 'scope0/my_dense/bias:0' shape=(1,) dtype=float32>"] and loss Tensor("Sum:0", shape=(), dtype=float32).
I am putting the condition inside the call function where the output checked against a condition i.e. if<= 0.001 then the output should be one otherwise 0.0. The Dense layer I am using is just a copy of Keras Dense layer with some modifications in the call method which implement the above condition.
class MyDense(Layer):
def __init__(self,
apply_cond = False,
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(MyDense, self).__init__(
activity_regularizer=regularizers.get(activity_regularizer), **kwargs)
self.units = int(units)
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.apply_cond = apply_cond
self.supports_masking = True
self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
if tensor_shape.dimension_value(input_shape[-1]) is None:
raise ValueError('The last dimension of the inputs to `Dense` '
'should be defined. Found `None`.')
last_dim = tensor_shape.dimension_value(input_shape[-1])
self.input_spec = InputSpec(min_ndim=2,
axes={-1: last_dim})
self.kernel = self.add_weight(
shape=[last_dim, self.units],
if self.use_bias:
self.bias = self.add_weight(
self.bias = None
self.built = True
def call(self, inputs):
# print('in start of call apply_cond is: ', self.apply_cond)
inputs = ops.convert_to_tensor(inputs)
rank = common_shapes.rank(inputs)
if rank > 2:
# Broadcasting is required for the inputs.
outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
# Reshape the output back to the original ndim of the input.
if not context.executing_eagerly():
shape = inputs.get_shape().as_list()
output_shape = shape[:-1] + [self.units]
outputs = gen_math_ops.mat_mul(inputs, self.kernel)
if self.use_bias:
outputs = nn.bias_add(outputs, self.bias)
if self.activation is not None:
outputs = self.activation(outputs) # pylint: disable=not-callable
if self.apply_cond:
cond = tf.less_equal(outputs, tf.constant(0.00001), name='mycondition')
return tf.where(cond, tf.ones_like(outputs), tf.zeros_like(outputs), name='mywhere')
return outputs
def compute_output_shape(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
input_shape = input_shape.with_rank_at_least(2)
if tensor_shape.dimension_value(input_shape[-1]) is None:
raise ValueError(
'The innermost dimension of input_shape must be defined, but saw: %s'
% input_shape)
return input_shape[:-1].concatenate(self.units)
How can I make the above code work?

Attention layer output shape issue

I have been using BiLSTMs to classify each word in sentences and my input is n_sentences, max_sequence_length, classes. Recently, I have been trying to use this attention layer:
class Attention(Layer):
def __init__(self, step_dim,
W_regularizer=None, b_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.step_dim = step_dim
self.features_dim = 0
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight((input_shape[-1],),
self.features_dim = input_shape[-1]
if self.bias:
self.b = self.add_weight((input_shape[1],),
self.b = None
self.built = True
def compute_mask(self, input, input_mask=None):
return None
def call(self, x, mask=None):
features_dim = self.features_dim
step_dim = self.step_dim
eij = K.reshape(, (-1, features_dim)),
K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
if self.bias:
eij += self.b
eij = K.tanh(eij)
a = K.exp(eij)
if mask is not None:
a *= K.cast(mask, K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def compute_output_shape(self, input_shape):
return input_shape[0], self.features_dim
My output needs to be (samples, steps, features) or I get this
ValueError: Error when checking target: expected dense_2 to have 2 dimensions, but got array with shape (656, 109, 2)
So I switched:
return input_shape[0], self.features_dim
return input_shape[0], self.step_dim, self.features_dim
Doing so I get another error:
InvalidArgumentError: Incompatible shapes: [32,109] vs. [32]
[[{{node metrics/acc/Equal}}]]
What do I need to modify to actually use the attention layer on my sentences ?
Are u using SeqSelfAttention?
I faced the same issue and instead of SeqSelfAttention I used SeqWeightedAttention - and it solved my problem.