Tensorflow 2.0 Keras Model subclassing - tensorflow

I'm trying to implement a simple UNet-like model using the model subclassing method. Here's my code:
import tensorflow as tf
from tensorflow import keras as K
class Enc_block(K.layers.Layer):
def __init__(self, in_dim):
super(Enc_block, self).__init__()
self.conv_layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
self.pool_layer = K.layers.SeparableConv2D(in_dim,3, padding='same',strides=2, activation='relu')
def call(self, x):
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
return self.pool_layer(x), x
class Dec_block(K.layers.Layer):
def __init__(self, in_dim):
super(Dec_block, self).__init__()
self.conv_layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
def call(self, x):
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
x = self.conv_layer(x)
x = self.batchnorm_layer(x)
return x
class Bottleneck(K.layers.Layer):
def __init__(self, in_dim):
super(Bottleneck, self).__init__()
self.conv_1layer = K.layers.SeparableConv2D(in_dim,1, padding='same', activation='relu')
self.conv_3layer = K.layers.SeparableConv2D(in_dim,3, padding='same', activation='relu')
self.batchnorm_layer = K.layers.BatchNormalization()
def call(self, x):
x = self.conv_1layer(x)
x = self.batchnorm_layer(x)
x = self.conv_3layer(x)
x = self.batchnorm_layer(x)
return x
class Output_block(K.layers.Layer):
def __init__(self, in_dim):
super(Output_block, self).__init__()
self.logits = K.layers.SeparableConv2D(in_dim,3, padding='same', activation=None)
self.out = K.layers.Softmax()
def call(self, x):
x_logits = self.logits(x)
x = self.out(x_logits)
return x_logits, x
class UNetModel(K.Model):
def __init__(self,in_dim):
super(UNetModel, self).__init__()
self.encoder_block = Enc_block(in_dim)
self.bottleneck = Bottleneck(in_dim)
self.decoder_block = Dec_block(in_dim)
self.output_block = Output_block(in_dim)
def call(self, inputs, training=None):
x, x_skip1 = self.encoder_block(32)(inputs)
x, x_skip2 = self.encoder_block(64)(x)
x, x_skip3 = self.encoder_block(128)(x)
x, x_skip4 = self.encoder_block(256)(x)
x = self.bottleneck(x)
x = K.layers.UpSampling2D(size=(2,2))(x)
x = K.layers.concatenate([x,x_skip4],axis=-1)
x = self.decoder_block(256)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #56x56
x = K.layers.concatenate([x,x_skip3],axis=-1)
x = self.decoder_block(128)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #112x112
x = K.layers.concatenate([x,x_skip2],axis=-1)
x = self.decoder_block(64)(x)
x = K.layers.UpSampling2D(size=(2,2))(x) #224x224
x = K.layers.concatenate([x,x_skip1],axis=-1)
x = self.decoder_block(32)(x)
x_logits, x = self.output_block(2)(x)
return x_logits, x
I am getting the following error:
ValueError: Input 0 of layer separable_conv2d is incompatible with the layer: expected ndim=4, found ndim=0. Full shape received: []
I'm not sure if this is the correct way to implement a network in tf.keras
The idea was to implement encoder and decoder blocks by subclassing keras layers and subclassing the Model later.

Take a look at this line from UNetModel class:
x, x_skip1 = self.encoder_block(32)(inputs)
where self.encoder_block() is defined by
self.encoder_block = Enc_block(in_dim)
encoder_block is an instance of class. By doing self.encoder_block(32) you are invoking a __call__() method of the End_block class which expect to receive an iterable of image inputs of rank=4. Instead you're passing an integer number 32 of rank=0 and you get ValueError which says exactly what I've just explained: expected ndim=4, found ndim=0. What probably you intended to do is:
x, x_skip1 = self.encoder_block(inputs)
You repeat the same mistake in the subsequent lines as well. There are additional errors where you define the same in_dim for every custom layer:
self.encoder_block = Enc_block(in_dim)
self.bottleneck = Bottleneck(in_dim)
self.decoder_block = Dec_block(in_dim)
self.output_block = Output_block(in_dim)
The input shape for Bottleneck layer should be the same shape as output of the Enc_Block layer and so one. I suggest you first to understand simple example before you're trying to implement more complicated ones. Take a look at this example. It has two custom layers:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
class CustomLayer1(layers.Layer):
def __init__(self, outshape=4):
super(CustomLayer1, self).__init__()
self.outshape = outshape
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=(int(input_shape[1]), self.outshape),
trainable=True)
super(CustomLayer1, self).build(input_shape)
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
class CustomLayer2(layers.Layer):
def __init__(self):
super(CustomLayer2, self).__init__()
def call(self, inputs):
return inputs / tf.reshape(tf.reduce_sum(inputs, 1), (-1, 1))
Now I will use both of these layers in the new CombinedLayers class:
class CombinedLayers(layers.Layer):
def __init__(self, units=3):
super(CombinedLayers, self).__init__()
# `units` defines a number of units in the layer. It is the
# output shape of the `CustomLayer`
self.layer1 = CustomLayer1(units)
# The input shape is inferred dynamically in the `build()`
# method of the `CustomLayer1` class
self.layer2 = CustomLayer1(units)
# Some layers such as this one do not need to know the shape
self.layer3 = CustomLayer2()
def call(self, inputs):
x = self.layer1(inputs)
x = self.layer2(x)
x = self.layer3(x)
return x
Note that the input shape of CustomLayer1 is inferred dynamically in the build() method. Now let's test it with some input:
x_train = [np.random.normal(size=(3, )) for _ in range(5)]
x_train_tensor = tf.convert_to_tensor(x_train)
combined = CombinedLayers(3)
result = combined(x_train_tensor)
result.numpy()
# array([[ 0.50822063, -0.0800476 , 0.57182697],
# [ -0.76052217, 0.50127872, 1.25924345],
# [-19.5887986 , 9.23529798, 11.35350062],
# [ -0.33696137, 0.22741248, 1.10954888],
# [ 0.53079047, -0.08941536, 0.55862488]])
This is how you should approach it. Create layers one by one. Each time you add a new layer test everything with some input to verify that you are doing things correctly.

Related

In keras tensorflow, need batch_input_shape but already specified

I get this error when I try :
model = LSTMPolicy(sequence_length, n_x)
model.predict(X_train[:batch_size])
ValueError: Exception encountered when calling layer 'lstm_policy_26'
(type LSTMPolicy). in user code:
....
ValueError: If a RNN is stateful, it needs to know its batch size.
Specify the batch size of your input tensors:
If using a Sequential model, specify the batch size by passing a batch_input_shape argument to your first layer.
If using the functional API, specify the batch size by passing a batch_shape argument to your Input layer.
Call arguments received by layer 'lstm_policy_26' (type LSTMPolicy):
• inputs=tf.Tensor(shape=(None, 20, 79), dtype=float32)
But I already added it and still get the same error.
class LSTMPolicy(Model):
def __init__(self, sequence_length, n_x):
super(LSTMPolicy, self).__init__()
self.sequence_length = sequence_length
self.n_x = n_x
self.build_model()
def build_model(self):
self.lstm1 = LSTM(units, return_sequences=True, return_state=True, stateful=True, name='lstm1', batch_input_shape=(batch_size, self.sequence_length, self.n_x))
self.dropout1 = Dropout(0.3)
self.lstm2 = LSTM(units, return_sequences=True, return_state=True, stateful=True, name='lstm2')
self.dropout2 = Dropout(0.3)
self.lstm3 = LSTM(units, return_state=True, stateful=True, name='lstm3')
self.dropout3 = Dropout(0.3)
self.dense2 = Dense(self.n_x, activation="softmax")
def call(self, inputs):
x, final_memory_state1, final_carry_state1 = self.lstm1(inputs)
x = self.dropout1(x)
x, final_memory_state2, final_carry_state2 = self.lstm2(x)
x = self.dropout2(x)
x, final_memory_state3, final_carry_state3 = self.lstm3(x)
x = self.dense1(x)
x = self.dropout3(x)
output = self.dense2(x)
return output, (final_memory_state1,
final_carry_state1,
final_memory_state2,
final_carry_state2,
final_memory_state3,
final_carry_state3)
def sampleAction(self, observation, init_state):
self.lstm1.states[0] = init_state[0]
self.lstm1.states[1] = init_state[1]
self.lstm2.states[0] = init_state[2]
self.lstm2.states[1] = init_state[3]
self.lstm3.states[0] = init_state[4]
self.lstm3.states[1] = init_state[5]
return self.call(observation)

tf.placeholder inside a class in TF 2.0

I'm trying to change a code I have written in TF 1.0 to TF 2.0 and I'm having difficulties with replacing the tf.placeholder inside a class function. My code is the following
class User:
x = tf.placeholder(tf.float32,shape=[None,784])
y_true = tf.placeholder(tf.float32, [None, 10])
W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
lambda_W = tf.Variable(tf.zeros([7840,1]))
W = tf.reshape(W1,[784, 10])
ylogits = W*x
y = tf.nn.softmax(ylogits)
def __init__(self):
pass
Is there a way to replace tf.placeholder inside the class to make code running in TF 2.0?
Firstly, I think you intended to have each of those objects created per instance of the class, not one for the whole class as it is now. I also think your product between W and x was meant to be a matrix product, not an element-wise product, which would not work with the given shapes:
class User:
def __init__(self):
self.x = tf.placeholder(tf.float32,shape=[None,784])
self.y_true = tf.placeholder(tf.float32, [None, 10])
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
self.ylogits = self.x # self.W
self.y = tf.nn.softmax(ylogits)
To use it in TensorFlow 2.x, you would remove the placeholders and simply perform the operations each time with every new input, for example with a new function call:
class User:
def __init__(self):
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
def call(self, x):
ylogits = self.x # self.W
return tf.nn.softmax(ylogits)
You could use this as:
user1 = User()
x = ... # Get some data
y = user1.call(x)
Or if you like to be more "idiomatic", you could use __call__:
class User:
def __init__(self):
self.W1 = tf.Variable(tf.random.truncated_normal([7840,1], stddev=0.1))
self.lambda_W = tf.Variable(tf.zeros([7840,1]))
self.W = tf.reshape(W1,[784, 10])
def __call__(self, x):
ylogits = x # W
return tf.nn.softmax(ylogits)
And then you would do:
user1 = User()
x = ... # Get some data
y = user1(x)

Gradients are None for Custom Convolution Layer

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging
Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.

Freeze sublayers in tensorflow 2

I have a model which is composed of custom layers. Each custom layer contains many tf.keras.layers. The problem is that if I want to freeze those layers after defining my model, the loop:
for i, layer in enumerate(model.layers):
print(i, layer.name)
only prints the "outer" custom layers and not those who exist inside. Is there any way to access the inner layers so I can freeze them?
an example of a custom layer from the official tf docs:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
You can use keras callbacks. If you want to freeze your first layer after some certain amount of epochs, add this callback
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False
What you are doing in your update function is to replace the first Dense() layer with another Dense() layer, this time setting trainable = false.
While this works, I would update the 'update' function as following:
def updt(self):
self.dense1.trainable = False
Ok i came up with a solution.
An "update" function must be implemented inside the custom layer, which updates the inner layers so that they become non trainable.
Here is a sample code:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)

Propagating through a custom layer in tensorflow just once

Given a custom layer in tensorflow, is it possible to let the model use it just during one epoch? The layer may just be ignored for all other epochs or simple be an identity.
For example: Given data I would like the layer to simply double the given data. The other layers should may work normally. How would one do that?
def do_stuff(data):
return 2*data
def run_once(data):
return tf.py_func(do_stuff,
[data],
'float32',
stateful=False,
name='I run once')
class CustomLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
self.trainable = False
super(CustomLayer, self).__init__(**kwargs)
def call(self, x):
res = tf.map_fn(run_once, x)
res.set_shape([x.shape[0],
self.output_dim[1],
self.output_dim[0],
x.shape[-1]])
return res
inputs = Input(shape=(224, 224, 1))
x = Lambda(preprocess_input(x), input_shape=(224, 224, 1), output_shape=(224, 224, 3))
outputs = Dense(1)(x)
model = Model(input=inputs, output=outputs)
output = model(x)
Interesting question. To execute a TF operation just in the first epoch, one could use tf.cond and tf.control_dependencies to check/update the value of a boolean tensor. For example, your custom layer could be implemented as follows:
class CustomLayer(Layer):
def __init__(self, **kwargs):
super(CustomLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.first_epoch = tf.Variable(True)
def call(self, x):
res = tf.cond(self.first_epoch,
true_fn=lambda: run_once(x),
false_fn=lambda: x)
with tf.control_dependencies([res]):
assign_op = self.first_epoch.assign(False)
with tf.control_dependencies([assign_op]):
res = tf.identity(res)
return res
To validate that this layer works as expected, define run_once as:
def run_once(data):
print_op = tf.print('First epoch')
with tf.control_dependencies([print_op]):
out = tf.identity(data)
return out