Custom layer in tensorflow to output the running maximum of its inputs - tensorflow

I am trying to create a custom layer in tensorflow to output the running maximum of its inputs. The layer has a memory variable and comparison function. I wrote the following
class ComputeMax(tf.keras.layers.Layer):
def __init__(self):
super(ComputeMax, self).__init__()
def build(self, input_shape):
self.maxval = tf.Variable(initial_value=tf.zeros((input_shape)),
trainable=False)
def call(self, inputs):
self.maxval.assign(tf.maximum(inputs, self.maxval))
return self.maxval
my_sum = ComputeMax()
x = tf.ones((1,2))
y = my_sum(x)
print(y.numpy()) # [1, 1]
y = my_sum(x)
print(y.numpy()) # [1, 1]
It works as above. When I try it in a test model:
model = Sequential()
model.add(tf.keras.Input(shape=(2)))
model.add(Dense(1, activation='relu'))
model.add(ComputeMax())
model.compile(optimizer='adam', loss='mse')
I get the error on compile:
ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 1)
What am I missing?

Actually, the layer needs to know the input neurons from the previous layer, which is the last value in input_shape. You are using input_shape as it is which is actually batch shape, leading to a layer of the shape of batch.
This implementation might help.
class ComputeMax(tf.keras.layers.Layer):
def __init__(self):
super(ComputeMax, self).__init__()
def build(self, input_shape):
self.maxval = tf.Variable(initial_value=tf.zeros((input_shape[-1])),
trainable=False)
def call(self, inputs):
self.maxval.assign(tf.maximum(inputs, self.maxval))
return self.maxval
But probably it won't give you answers with numpy 1d array.

Related

How to write a custom call function for a Tensorflow LSTM class?

I have defined a custom LSTM Layer as follows:
class LSTMModel(tf.keras.Model):
def __init__(self, CNN_model, num_classes):
super().__init__()
self.cnn_model = CNN_model
self.lstm = tf.keras.layers.LSTM(units=64, return_state=True, dropout=0.3)
self.dense = tf.keras.layers.Dense(num_classes, activation="softmax")
def call(self, input):
pass
However, I am unclear what needs too occur in the call function here. I also wrote a generic CNN class like below:
class generic_vns_function(tf.keras.Model):
# Where would we use layer_units here?
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same",
input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
but here the required structure makes a lot more sense to me. I am just initializing all of the layers. What do I need to do to initialize my LSTM layers?
You could write it like this:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import Model
class LSTMModel(Model):
def __init__(self, num_classes, num_units=64, drop_prob=0.3):
super().__init__()
self.num_classes = num_classes
self.num_units = num_units
self.drop_prob = drop_prob
self.lstm = LSTM(
units=self.num_units,
return_state=True,
dropout=self.drop_prob)
self.dense = Dense(
num_classes,
activation="softmax")
def call(self, x, training=True):
x, *state = self.lstm(x, training=training)
x = self.dense(x)
return x
And then you would use it like:
model = LSTMModel(num_classes=2)
time_series = tf.random.normal((32, 64, 128))
x_pred = model(time_series)
# loss and gradients calculations ...
It is a common tensorflow idom to instantiate layers when initializing a custom layer/model, and then execute their call() methods by passing data through them in your custom call implementation.

How to connect a single neuron in one layer with the single neuron in another layer

I am new to Tensorflow. I want to connect a layer with another layer in which only the corresponding neurons are connected with weights to each other as shown below. This means all the neurons in the previous layer are not connected to a neuron in the next layer.
Now I get 4 neurons with wixi. Further, I need to add all these outputs to get a single value. Now I want to pass this single value to a dense layer of size 4 for the autoencoder operation to complete.
I have created my custom layer for the wixi operation and I am correctly getting it but when I apply normal dense layer after addition I get the following error:
'''
ValueError: Input 0 of layer dense_15 is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: [100]
'''
Following is my code for custom layer and model-
class Layer_w_x(tf.keras.layers.Layer):
def __init__(self):
super(Layer_w_x, self).__init__()
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1],),
initializer="random_normal", trainable=True)
def call(self, inputs):
return tf.multiply(inputs, self.w)
class MyModel(Model):
def __init__(self, **kwargs):
super(MyModel, self).__init__(**kwargs)
self.layer_1 = Layer_w_x()
self.dense = Dense(4,activation = 'sigmoid')
def call(self, inputs):
# CALCULATION FOR FIRST NEURON
h1 = self.layer_1(inputs)
h4 = tf.reduce_sum(h1,1)
encoded = self.dense(h4)
return encoded
model = MyModel()
output =model(my_train_data1)
my_train_data1 has size (100,4)
You can make your own layer by subclassing from tf.keras.layers.Layer.
Creating custom layers is described in https://www.tensorflow.org/guide/keras/custom_layers_and_models
and https://www.tensorflow.org/tutorials/customization/custom_layers.
I created the layer for you.
import tensorflow as tf
class DirectLayer(tf.keras.layers.Layer):
def __init__(self):
super(DirectLayer, self).__init__()
def build(self, input_shape):
self.w = self.add_weight(
shape=input_shape,
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(input_shape[-1],), initializer="random_normal", trainable=True
)
def call(self, inputs):
# outputs shape
return tf.multiply(inputs, self.w) + self.b
layer1 = DirectLayer()
layer2 = DirectLayer()
x = tf.ones([16,2])
y = layer1(x)
y = layer2(y)
tf.print(y.shape)
It outputs TensorShape([16, 2]), which means that it maintains the input's dimensions, which, I believe, is what you want. Notice I used tf.multiply (multiply elementwise) as opposed to what the Dense layer does—tf.matmul (matrix multiplication).

Unable to understand the behavior of method `build` in tensorflow keras layers (tf.keras.layers.Layer)

Layers in tensorflow keras have a method build that is used to defer the weights creation to a time when you have seen what the input is going to be. a layer's build method
I have a few questions i have not been able to find the answer of:
here it is said that
If you assign a Layer instance as attribute of another Layer, the outer layer will start tracking the weights of the inner layer.
What does it mean to track the weights of a layer?
The same link also mentions that
We recommend creating such sublayers in the init method (since the sublayers will typically have a build method, they will be built when the outer layer gets built).
Does it mean that while running the build method of child class (self), there will an iteration through all the attributes of self and whichever are found to be subclassed from (instances of) tf.keras.layer.Layer will have their build methods run automatically?
I can run this code:
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def call(self, x):
return self.l1(x)
net = Net()
print(net.variables)
But not this:
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
super().build()
def call(self, x):
return self.l1(x)
net = Net()
print(net.variables)
why?
I would say the build mentioned means, when you build a self-defined tf.keras.Model for example
net = Net()
then you will get all the tf.keras.layers.Layer objects create in __init__, and being stored in net which is a callable object. In this case, it will become a completed object for TF to train later, this is what it said to track. The next time you call net(inputs) you'll can get your outputs.
Here is a example of Tensorflow self-defined decoder with attention
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# query hidden state shape == (batch_size, hidden size)
# query_with_time_axis shape == (batch_size, 1, hidden size)
# values shape == (batch_size, max_len, hidden size)
# we are doing this to broadcast addition along the time axis to calculate the score
query_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
# the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
super(Decoder, self).__init__()
self.batch_sz = batch_sz
self.dec_units = dec_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.dec_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.fc = tf.keras.layers.Dense(vocab_size)
# used for attention
self.attention = BahdanauAttention(self.dec_units)
def call(self, x, hidden, enc_output):
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(hidden, enc_output)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[2]))
# output shape == (batch_size, vocab)
x = self.fc(output)
return x, state, attention_weights
I have tried to put tf.keras.layers.Layer object in call and got really poor outcome, guess that was because if you put it in call then it will be call multiple times while each time a forward-backward propagation happends.

Freeze sublayers in tensorflow 2

I have a model which is composed of custom layers. Each custom layer contains many tf.keras.layers. The problem is that if I want to freeze those layers after defining my model, the loop:
for i, layer in enumerate(model.layers):
print(i, layer.name)
only prints the "outer" custom layers and not those who exist inside. Is there any way to access the inner layers so I can freeze them?
an example of a custom layer from the official tf docs:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
You can use keras callbacks. If you want to freeze your first layer after some certain amount of epochs, add this callback
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False
What you are doing in your update function is to replace the first Dense() layer with another Dense() layer, this time setting trainable = false.
While this works, I would update the 'update' function as following:
def updt(self):
self.dense1.trainable = False
Ok i came up with a solution.
An "update" function must be implemented inside the custom layer, which updates the inner layers so that they become non trainable.
Here is a sample code:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)

How to change Sequential model to Custom Class model

I'm learning tensorflow 2.0 from its older versions.
I found tensorflow model is changed Sequential-base from Class-base.
But I want to use Class-base model because it is easy to read for me.
I want to try translate : https://www.tensorflow.org/beta/tutorials/keras/basic_text_classification_with_tfhub
embedding = 'https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1'
hub_layer = hub.KerasLayer(embedding,
input_shape=[],
dtype=tf.string,
trainable=True)
# hub_layer(train_example_batch[:3])
# model = tf.keras.Sequential()
# model.add(hub_layer)
# model.add(tf.keras.layers.Dense(16, activation='relu'))
# model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
class MyModel(keras.Model):
def __init__(self, embedding):
super(MyModel, self).__init__()
self.embedding = embedding
self.d1 = keras.layers.Dense(16, activation='relu')
self.d2 = keras.layers.Dense(1, activation='sigmoid')
def call(self, x):
print(x.shape)
return reduce(lambda x, f: f(x), [x, self.embedding, self.d1, self.d2])
model = MyModel(hub_layer)
I got below error message.
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: input must be a vector, got shape: [512,1]
[[{{node my_model_48/keras_layer_7/StatefulPartitionedCall/StatefulPartitionedCall/StatefulPartitionedCall/tokenize/StringSplit}}]]
(1) Invalid argument: input must be a vector, got shape: [512,1]
[[{{node my_model_48/keras_layer_7/StatefulPartitionedCall/StatefulPartitionedCall/StatefulPartitionedCall/tokenize/StringSplit}}]]
[[my_model_48/keras_layer_7/StatefulPartitionedCall/StatefulPartitionedCall/StatefulPartitionedCall/SparseFillEmptyRows/SparseFillEmptyRows/_24]]
0 successful operations.
0 derived errors ignored. [Op:__inference_keras_scratch_graph_303077]
Function call stack:
keras_scratch_graph -> keras_scratch_graph
Why I got this error? And also, please answer whether WE NEED THROWING AWAY Class-base Model?
Here is the right code.
# model = tf.keras.Sequential()
# model.add(hub_layer)
# model.add(tf.keras.layers.Dense(16, activation='relu'))
# model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
class MyModel(keras.Model):
def __init__(self, embedding):
super(MyModel, self).__init__()
self.embedding = embedding
self.d1 = keras.layers.Dense(16, activation='relu')
self.d2 = keras.layers.Dense(1, activation='sigmoid')
def call(self, x):
# tf.sequeeze is needed! because x's dimention is [None, 1]. (1 was inserted without permission ...)
return reduce(lambda x, f: f(x), [x, tf.squeeze, self.embedding, self.d1, self.d2])
model = MyModel(hub_layer)
# model.summary()
model.layers
It is because the officious framework has added extra features on their own...
I don't like this attribute, but I think someone who like tensorflow eager to do it...