subclass of tf.keras.Model can not get summay() result - tensorflow

I want build subclass of tf.keras.Model and want to see the model structure with summary function. But it not works. The following is my code:
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.summary()
The error:
ValueError: This model has not yet been built. Build the model first
by calling build() or calling fit() with some data, or specify an
input_shape argument in the first layer(s) for automatic build.

You need to call each layer once to infer shapes and then call build() method of the tf.keras.Model with model's input shape as argument:
import tensorflow as tf
import numpy as np
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((32, 32, 3))
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) multiple 896
_________________________________________________________________
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 3686528
_________________________________________________________________
dense_1 (Dense) multiple 1290
=================================================================
Total params: 3,688,714
Trainable params: 3,688,714
Non-trainable params: 0
_________________________________________________________________

A better solution is listed here. You need to provide a model method to infer the model explicitly.
import tensorflow as tf
from tensorflow.keras.layers import Input
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
return self.dense(inputs)
def model(self):
x = Input(shape=(1))
return Model(inputs=[x], outputs=self.call(x))
MyModel().model().summary()

Editing #Vlad's answer to avoid this error ValueError: Input 0 of layer conv2d_10 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 32, 3)
Change this line from:
model.build((32, 32, 3 ))
To:
model.build((None, 32, 32, 3 ))
Final Code:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((None, 32, 32, 3 ))
model.summary()

Related

Number of parameters counting in GRU

I have GRU model as follows.
class CharGenModel(tf.keras.Model):
def __init__(self, vocab_size, num_timesteps, embedding_dim, **kwargs):
super(CharGenModel, self).__init__(**kwargs)
self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.rnn_layer = tf.keras.layers.GRU(
num_timesteps,
recurrent_initializer="glorot_uniform",
recurrent_activation="sigmoid",
stateful=True,
return_sequences=True
)
self.dense_layer = tf.keras.layers.Dense(vocab_size)
def call(self, x):
print(x.shape)
x = self.embedding_layer(x)
print(x.shape)
x = self.rnn_layer(x)
print(x.shape)
x = self.dense_layer(x)
print(x.shape)
return x
vocab_size = 92
embedding_dim = 256
seq_length = 100
batch_size = 64
model = CharGenModel(vocab_size, seq_length, embedding_dim)
model.build(input_shape=(batch_size, seq_length))
model.summary()
model.summary() produced the number of trainable parameters as follows.
(64, 100)
(64, 100, 256)
(64, 100, 100)
(64, 100, 92)
Model: "char_gen_model_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_4 (Embedding) multiple 23552
gru_4 (GRU) multiple 107400
dense_4 (Dense) multiple 9292
=================================================================
Total params: 140,244
Trainable params: 140,244
Non-trainable params: 0
I'm confused for two things.
According to embedding layer definition
tf.keras.layers.Embedding(
input_dim,
output_dim,
embeddings_initializer='uniform',
embeddings_regularizer=None,
activity_regularizer=None,
embeddings_constraint=None,
mask_zero=False,
input_length=None,
**kwargs
)
input_dim for embedding layer for my application is 64x100.
(1)But why embedding layer trainable parameters are 92x256=23552. Why not 100x256?
(2)Number of parameters counting for GRU is
num_params = number of FFNNs × [number of hidden units x (number of hidden units+number of inputs) + number of bias]
number of FFNNs(Number of feedforward networks) in GRU is 3
number of hidden units is 100
number of inputs is 256
number of bias is 100
so num_params = 3 x [100x(100+256)+100] = 107100
But model summary output is 107400
Where am I missing in calculation?

Shape mismatch with vgg16 keras: expected ndim=4, found ndim=2, shape received [None, None]

In trying to learn keras and deep learning, I want to create an image matting algorithm that uses an architecture similar to a modified autoencoder, where it takes two image inputs (a source image and a user-generated trimap) and produces one image output (the alpha values of the image foreground). The encoder part (of both inputs) is simple feature extraction using pre-trained VGG16. I want to train the decoder using the low-res alphamatting.com dataset.
Running the attached code produces an error:
ValueError: Input 0 of layer block1_conv1 is incompatible with the layer: expected ndim=4, found ndim=2. Full shape received: [None, None]
I'm having trouble understanding this error. I verified that my twin_gen closure is producing image batches of shape (22, 256,256,3) for both inputs, so I would guess that the issue is that I have somehow created the model wrong, but I don't see where the error is. Can anyone help shed some light on how I could be seeing this error?
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2DTranspose, Concatenate, BatchNormalization, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def DeConvBlock(input, num_output):
x = Conv2DTranspose(num_output, kernel_size=3, strides=2, activation='relu', padding='same')(input)
x = BatchNormalization()(x)
x = Conv2DTranspose(num_output, kernel_size=3, strides=1, activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = Conv2DTranspose(num_output, kernel_size=3, strides=1, activation='relu', padding='same')(x)
x = BatchNormalization()(x)
return x
img_input = Input((256, 256, 3))
img_vgg16 = VGG16(include_top=False, weights='imagenet')
img_vgg16._name = 'img_vgg16'
img_vgg16.trainable = False
tm_input = Input((256, 256, 3))
tm_vgg16 = VGG16(include_top=False, weights='imagenet')
tm_vgg16._name = 'tm_vgg16'
tm_vgg16.trainable = False
img_vgg16 = img_vgg16(img_input)
tm_vgg16 = tm_vgg16(tm_input)
x = Concatenate()([img_vgg16, tm_vgg16])
x = DeConvBlock(x, 512)
x = DeConvBlock(x, 256)
x = DeConvBlock(x, 128)
x = DeConvBlock(x, 64)
x = DeConvBlock(x, 32)
x = Conv2DTranspose(1, kernel_size=3, strides=1, activation='sigmoid', padding='same')(x)
m = Model(inputs=[img_input, tm_input], outputs=x)
m.summary()
m.compile(optimizer='adam', loss='mean_squared_error')
gen = ImageDataGenerator(width_shift_range=0.1, rotation_range=30, height_shift_range=0.1, horizontal_flip=True, validation_split=0.2, preprocessing_function=preprocess_input)
SEED = 49
def twin_gen(generator, subset):
gen_img = generator.flow_from_directory('./data', classes=['input_training_lowres'], seed=SEED, shuffle=False, subset=subset, color_mode='rgb')
gen_map = generator.flow_from_directory('./data/trimap_training_lowres', classes=['Trimap1'], seed=SEED, shuffle=False, subset=subset, color_mode='rgb')
gen_truth = generator.flow_from_directory('./data', classes=['gt_training_lowres'], seed=SEED, shuffle=False, subset=subset, color_mode='rgb')
while True:
img = gen_img.__next__()
tm = gen_map.__next__()
gt = gen_truth.__next__()
yield [[img, tm], gt]
train_gen = twin_gen(gen, 'training')
val_gen = twin_gen(gen, 'validation')
checkpoint_filepath = 'checkpoint'
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_loss',
mode='auto',
save_freq='epoch',
save_best_only=True)
r = m.fit(train_gen, validation_data=val_gen, epochs=10, callbacks=[checkpoint])
First you didn't specify the input shape of VGG16 and you set include_top=False, so the default input shape will be (None, None ,3) for channels_last case.
PS: You can check the source code of keras.applications.VGG16 and keras.applications.imagenet_utils.obtain_input_shape for detail.
As you can see the output None shape by calling model.summary():
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 256, 256, 3) 0
__________________________________________________________________________________________________
input_3 (InputLayer) [(None, 256, 256, 3) 0
__________________________________________________________________________________________________
img_vgg16 (Functional) (None, None, None, 5 14714688 input_1[0][0]
__________________________________________________________________________________________________
tm_vgg16 (Functional) (None, None, None, 5 14714688 input_3[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 8, 8, 1024) 0 img_vgg16[0][0]
tm_vgg16[0][0]
__________________________________________________________________________________________________
To fix this you can simply set input_shape=(256, 256, 3) in VGG16, and calling model.summary() will now give you:
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 256, 256, 3) 0
__________________________________________________________________________________________________
input_3 (InputLayer) [(None, 256, 256, 3) 0
__________________________________________________________________________________________________
img_vgg16 (Functional) (None, 8, 8, 512) 14714688 input_1[0][0]
__________________________________________________________________________________________________
tm_vgg16 (Functional) (None, 8, 8, 512) 14714688 input_3[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 8, 8, 1024) 0 img_vgg16[0][0]
tm_vgg16[0][0]
__________________________________________________________________________________________________
The main cause of the error is when you calling __next__() it return tuple of two array (data, label) with shape ((batch_size, 256, 256, 3), (batch_size, 1)), but we really just want first one.
Also the data generator should yield tuple not list otherwise there will be no gradients provided for any variable, because the fit function expect (inputs, targets) as returning of data generator.
And you have another problem that your model's output shape is (batch_size, 256, 256, 1) but your gen_truth elements shape is (batch_size, 256, 256, 3) when you load the gen_truth image with color_mode='rgb', in order to get same shape with model's output you should load gen_truth using color_mode='grayscale' if you have grayscale image or load it using color_mode='rgba' and get the last channel value if you want using alpha value (I just guess it from the description in your question, but you should get the idea)
Example code that running without any problem:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2DTranspose, Concatenate, BatchNormalization, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def DeConvBlock(input, num_output):
x = Conv2DTranspose(num_output, kernel_size=3, strides=2, activation='relu', padding='same')(input)
x = BatchNormalization()(x)
x = Conv2DTranspose(num_output, kernel_size=3, strides=1, activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = Conv2DTranspose(num_output, kernel_size=3, strides=1, activation='relu', padding='same')(x)
x = BatchNormalization()(x)
return x
img_input = Input((256, 256, 3))
img_vgg16 = VGG16(include_top=False, input_shape=(256, 256, 3), weights='imagenet')
img_vgg16._name = 'img_vgg16'
img_vgg16.trainable = False
tm_input = Input((256, 256, 3))
tm_vgg16 = VGG16(include_top=False, input_shape=(256, 256, 3), weights='imagenet')
tm_vgg16._name = 'tm_vgg16'
tm_vgg16.trainable = False
img_vgg16 = img_vgg16(img_input)
tm_vgg16 = tm_vgg16(tm_input)
x = Concatenate()([img_vgg16, tm_vgg16])
x = DeConvBlock(x, 512)
x = DeConvBlock(x, 256)
x = DeConvBlock(x, 128)
x = DeConvBlock(x, 64)
x = DeConvBlock(x, 32)
x = Conv2DTranspose(1, kernel_size=3, strides=1, activation='sigmoid', padding='same')(x)
m = Model(inputs=[img_input, tm_input], outputs=x)
m.summary()
m.compile(optimizer='adam', loss='mse')
gen = ImageDataGenerator(width_shift_range=0.1, rotation_range=30, height_shift_range=0.1, horizontal_flip=True, validation_split=0.2, preprocessing_function=preprocess_input)
SEED = 49
def twin_gen(generator, subset):
gen_img = generator.flow_from_directory('./data', classes=['input_training_lowres'], seed=SEED, shuffle=False, subset=subset, color_mode='rgb')
gen_map = generator.flow_from_directory('./data/trimap_training_lowres', classes=['Trimap1'], seed=SEED, shuffle=False, subset=subset, color_mode='rgb')
gen_truth = generator.flow_from_directory('./data', classes=['gt_training_lowres'], seed=SEED, shuffle=False, subset=subset, color_mode='grayscale')
while True:
img = gen_img.__next__()[0]
tm = gen_map.__next__()[0]
gt = gen_truth.__next__()[0]
yield ([img, tm], gt)
train_gen = twin_gen(gen, 'training')
r = m.fit(train_gen, steps_per_epoch=5, epochs=3)

ValueError: Structure of Python function inputs does not match input_signature

System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I try to convert the tensorflow offical image caption model to TFLite model
I try to convert the tf.keras.Model 's encoder and decoder model as following:
import tensorflow as tf
embedding_dim = 256
units = 512
top_k = 5000
vocab_size = top_k + 1
features_shape = 2048
attention_features_shape = 64
class BahdanauAttention(tf.keras.Model):
def __init__(self, utils):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(utils)
self.W2 = tf.keras.layers.Dense(utils)
self.V = tf.keras.layers.Dense(1)
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis_shape = tf.expand_dims(hidden, 1)
# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis_shape))
# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class CNN_Encoder(tf.keras.Model):
#由于您已经提取了特征并使用pickle进行了转储
#该编码器通过完全连接的层传递这些特征
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
# #tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32)])
#tf.function
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
#tf.function
def call(self, x , features, hidden):
#将注意力定义为一个单独的模型
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
#将concated后的的向量传递给GRU
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)`
encoder._set_inputs(tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32))
decoder._set_inputs([tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
encoder_converter = tf.lite.TFLiteConverter.from_keras_model(encoder)
decoder_converter = tf.lite.TFLiteConverter.from_keras_model(decoder)
encoder_model = encoder_converter.convert()
decoder_model = decoder_converter.convert()
open("encoder_model.tflite", "wb").write(encoder_model)
open("decoder_model.tflite", "wb").write(decoder_model)
The error messge is
ValueError: Structure of Python function inputs does not match input_signature:
inputs: (
[<tf.Tensor 'x:0' shape=(1, 1) dtype=int32>, <tf.Tensor 'feature:0' shape=(1, 64, 256) dtype=float32>, <tf.Tensor 'hidden:0' shape=(1, 512) dtype=float32>])
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name='x'),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name='feature'),
TensorSpec(shape=(1, 512), dtype=tf.float32, name='hidden'))
I think the function input is the same as the input signature.How can I fix the problem?
i also had the same issue. found a simple solution to this. the call method of tf.keras.Model class accepts only single input, so you need to pack all your inputs inside a list/tuple
https://github.com/tensorflow/tensorflow/issues/32488#issuecomment-560248754

integrating sampled softmax in keras failed

Based on How can I use TensorFlow's sampled softmax loss function in a Keras model?, I created this code:
class SampledSoftmax(tensorflow.keras.layers.Layer):
def __init__(self, **kwargs):
super(SampledSoftmax, self).__init__(**kwargs)
def call(self, inputs):
def f1(inputs):
return tf.nn.sampled_softmax_loss(
inputs[0]._keras_history[0].weights[0],
inputs[0]._keras_history[0].bias,
tf.reshape(tf.argmax(inputs[1], 1), [-1, 1]),
inputs[0],
8192,
817496)
def f2(inputs):
logits = tf.matmul(inputs[0], tf.transpose(inputs[0]._keras_history[0].weights[0]))
logits = tf.nn.bias_add(logits, inputs[0]._keras_history[0].bias)
return tf.nn.softmax_cross_entropy_with_logits_v2(
labels=inputs[1],
logits=logits)
return tf.cond(K.learning_phase(), true_fn=f1(inputs), false_fn=f2(inputs))
and when used with the following model:
#model
input_layer = Input(shape=(None,), dtype='int32')
target_input = Input(shape=(None,vocab_size), dtype='int8')
embedding_layer = Embedding(vocab_size,
EMBEDDING_DIM,
trainable=True,
mask_zero=True) (input_layer)
common = LSTM(LSTM_UNITS, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)(embedding_layer)
common = (Dense(PROJ_UNITS, activation='linear'))(common)
out = (Dense(vocab_size, name='output_layer'))(common)
out = (SampledSoftmax())([out, target_input])
model = Model(inputs=[input_layer,target_input], outputs=out)
it failed with this error:
ValueError: Shape must be rank 2 but is rank 3 for 'sampled_softmax/sampled_softmax_loss/MatMul' (op: 'MatMul') with input shapes: [?,?,817496], [?,817496].
I made some progress based on google search:
class MyLayer(tensorflow.keras.layers.Dense):
def __init__(self, num_sampled, num_classes, mode, **kwargs):
self.num_sampled = num_sampled
self.num_classes = num_classes
self.mode = mode
super(MyLayer, self).__init__(num_classes, **kwargs)
self.input_spec = [InputSpec(ndim=2)]
def build(self, input_shape):
#self.input_spec = [InputSpec(shape=input_shape)]
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs_and_labels):
inputs, labels = inputs_and_labels
if self.mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=self.kernel,
biases=self.bias,
labels=tf.reshape(tf.argmax(labels, 1), [-1, 1]),
inputs=inputs,
num_sampled=self.num_sampled,
num_classes=self.num_classes,
num_true=1)
elif self.mode == "eval":
logits = tf.matmul(inputs, tf.transpose(self.kernel))
logits = tf.nn.bias_add(logits, self.bias)
loss = tf.nn.softmax_cross_entropy_with_logits(
labels=labels,
logits=logits)
return loss
def compute_output_shape(self, input_shape):
dense_shape, classes_shape = input_shape
return (dense_shape[0], )
and the error now:
The error now:
ValueError: Layer my_layer expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'dense/BiasAdd:0' shape=(?, ?, 512) dtype=float32>, <tf.Tensor 'input_2:0' shape=(?, ?, 817496) dtype=int8>]
I tried to use self.input_spec but it does not work until now.

How to use keras layers in custom keras layer

I am trying to write my own keras layer. In this layer, I want to use some other keras layers. Is there any way to do something like this:
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
def call(self, input):
return self.fc(input)
layer = MyDenseLayer(10)
When I do something like
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
it outputs
How do I make weiths in the dense there trainable?
If you look at the documentation for how to add custom layers, they recommend that you use the .add_weight(...) method. This method internally places all weights in self._trainable_weights. So to do what you want, you mush first define the keras layers you want to use, build them, copy the weights and then build your own layer. If I update your code it should be something like
class mylayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_outputs2):
self.num_outputs = num_outputs
super(mylayer, self).__init__()
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
self.fc.build(input_shape)
self._trainable_weights = self.fc.trainable_weights
super(mylayer, self).build(input_shape)
def call(self, input):
return self.fc(input)
layer = mylayer(10)
input = tf.keras.layers.Input(shape=(16, ))
output = layer(input)
model = tf.keras.Model(inputs=[input], outputs=[output])
model.summary()
You should then get what you want
It's much more comfortable and concise to put existing layers in the tf.keras.models.Model class. If you define non-custom layers such as layers, conv2d, the parameters of those layers are not trainable by default.
class MyDenseLayer(tf.keras.Model):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(num_outputs)
def call(self, input):
return self.fc(input)
def compute_output_shape(self, input_shape):
shape = tf.TensorShape(input_shape).as_list()
shape[-1] = self.num_outputs
return tf.TensorShape(shape)
layer = MyDenseLayer(10)
Check this tutorial: https://www.tensorflow.org/guide/keras#model_subclassing
In the TF2 custom layer Guide, they "recommend creating such sublayers in the __init__ method (since the sublayers will typically have a build method, they will be built when the outer layer gets built)." So just move the creation of self.fc into __init__ will give what you want.
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(self.num_outputs)
def build(self, input_shape):
self.built = True
def call(self, input):
return self.fc(input)
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
Output:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 16)] 0
_________________________________________________________________
my_dense_layer_2 (MyDenseLay (None, 10) 170
=================================================================
Total params: 170
Trainable params: 170
Non-trainable params: 0
This works for me and is clean, concise, and readable.
import tensorflow as tf
class MyDense(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyDense, self).__init__(kwargs)
self.dense = tf.keras.layers.Dense(2, tf.keras.activations.relu)
def call(self, inputs, training=None):
return self.dense(inputs)
inputs = tf.keras.Input(shape=10)
outputs = MyDense(trainable=True)(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name='test')
model.compile(loss=tf.keras.losses.MeanSquaredError())
model.summary()
Output:
Model: "test"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 10)] 0
_________________________________________________________________
my_dense (MyDense) (None, 2) 22
=================================================================
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________
Note that trainable=True is needed. I have posted a questions about it here.