How to add a trainable weight to an LSTM in Keras/Tensorflow - tensorflow

I would like to add a trainable weight to my LSTM and when I use the following wrapper provided by Keras, the tensor is initialized but isn't added to the LSTM layer. When I use the same code on Dense layers or convnets it works properly. Is there any other way to add a variable to a recurrent model?
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[0][1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
a, b = x
return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [(shape_a[0], self.output_dim), shape_b[:-1]]

Related

how to add external numpy array to the ypred results during custom training loop

I have an external numpy array that i want to add to the ypred data inside the train_step in after each batch prediction. what should be the best procedure. I did something like this but it doesnot work.
external_data=np.loadtxt('file.txt')
#tf.function
def train_step(x, y):
with tf.GradientTape() as tape:
ypred = model(x, training=True)
new_ypred=ypred+external_data
loss_value = loss_fn(y, ypred)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
train_acc_metric.update_state(y, ypred)
return loss_value

How to modify weights of Keras layers?

I am trying to freeze some of the weights of a layer by setting them to a specific value in Keras. How can I achieve this without moving weights to CPU ?
I checked similar questions such as modify layer weights in keras and modify layer parameters in keras
Answers suggest usage of get_weights() and 'set_weights()', however those functions moves weights between CPU and GPU.
I created a custom lambda layer and modified model.trainable_weights inside of that layer, however weights are not updated.
I used tf advanced tutorial, and just added a custom lambda layer that multiplies weights with zero.
Colab notebook with same code
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Lambda
from tensorflow.keras import Model
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
def antirectifier(x):
for i,w in enumerate(model.trainable_weights):
model.trainable_weights[i] = tf.multiply(w,0)
return x
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10, activation='softmax')
self.mask = Lambda(antirectifier)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.mask(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
Since weights are zero, accuracy should be low. However weights are not changed.

why my self-defined layer failed in keras?

error message:
TypeError: Failed to convert object of type to Tensor. Contents: (None, 3). Consider casting elements to a supported type.
can anyone help me about this error, i think i define this layer quite similar with dense layer, why it doesnt work?
my layer code:
from keras.layers.core import Layer
from keras.engine import InputSpec
from keras import backend as K
try:
from keras import initializations
except ImportError:
from keras import initializers as initializations
import numpy as np
class HardAttention(Layer):
def init(self, **kwargs):
super(HardAttention, self).init(**kwargs)
self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape):
input_dim = input_shape[-1]
self.attention = self.add_weight(shape=input_shape,
initializer='uniform',
name='attention',
dtype=np.float32,
trainable=True)
#dtype=bool)
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
super(HardAttention, self).build(input_shape)
def call(self, inputs):
return K.multiply(inputs, self.attention)
def compute_output_shape(self, input_shape):
return input_shape
model code:
(time_step, n_stock) = np.shape(x_train)
model = Sequential()
model.add(InputLayer(input_shape=(3,)))
model.add(HardAttention())
model.add(Dense(5))
model.compile(optimizer='adam', loss='mse')
model.summary()
You want to use a Layer with the name of Input. Not import InputLayer from engine.
The following snippet works in Colab (tf 1.4).
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from keras import backend as K
import numpy as np
class HardAttention(Layer):
def init(self, **kwargs):
super(HardAttention, self).init(**kwargs)
def build(self, input_shape):
input_dim = input_shape[-1]
self.attention = self.add_weight(shape=input_shape,
initializer='uniform',
name='attention',
dtype=np.float32,
trainable=True)
#dtype=bool)
self.built = True
super(HardAttention, self).build(input_shape)
def call(self, inputs):
return K.multiply(inputs, self.attention)
def compute_output_shape(self, input_shape):
return input_shape
model = Sequential()
model.add(Input(shape=(3,)))
model.add(HardAttention())
model.add(Dense(5))
model.compile(optimizer='adam', loss='mse')
model.summary()

integrating sampled softmax in keras failed

Based on How can I use TensorFlow's sampled softmax loss function in a Keras model?, I created this code:
class SampledSoftmax(tensorflow.keras.layers.Layer):
def __init__(self, **kwargs):
super(SampledSoftmax, self).__init__(**kwargs)
def call(self, inputs):
def f1(inputs):
return tf.nn.sampled_softmax_loss(
inputs[0]._keras_history[0].weights[0],
inputs[0]._keras_history[0].bias,
tf.reshape(tf.argmax(inputs[1], 1), [-1, 1]),
inputs[0],
8192,
817496)
def f2(inputs):
logits = tf.matmul(inputs[0], tf.transpose(inputs[0]._keras_history[0].weights[0]))
logits = tf.nn.bias_add(logits, inputs[0]._keras_history[0].bias)
return tf.nn.softmax_cross_entropy_with_logits_v2(
labels=inputs[1],
logits=logits)
return tf.cond(K.learning_phase(), true_fn=f1(inputs), false_fn=f2(inputs))
and when used with the following model:
#model
input_layer = Input(shape=(None,), dtype='int32')
target_input = Input(shape=(None,vocab_size), dtype='int8')
embedding_layer = Embedding(vocab_size,
EMBEDDING_DIM,
trainable=True,
mask_zero=True) (input_layer)
common = LSTM(LSTM_UNITS, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)(embedding_layer)
common = (Dense(PROJ_UNITS, activation='linear'))(common)
out = (Dense(vocab_size, name='output_layer'))(common)
out = (SampledSoftmax())([out, target_input])
model = Model(inputs=[input_layer,target_input], outputs=out)
it failed with this error:
ValueError: Shape must be rank 2 but is rank 3 for 'sampled_softmax/sampled_softmax_loss/MatMul' (op: 'MatMul') with input shapes: [?,?,817496], [?,817496].
I made some progress based on google search:
class MyLayer(tensorflow.keras.layers.Dense):
def __init__(self, num_sampled, num_classes, mode, **kwargs):
self.num_sampled = num_sampled
self.num_classes = num_classes
self.mode = mode
super(MyLayer, self).__init__(num_classes, **kwargs)
self.input_spec = [InputSpec(ndim=2)]
def build(self, input_shape):
#self.input_spec = [InputSpec(shape=input_shape)]
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs_and_labels):
inputs, labels = inputs_and_labels
if self.mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=self.kernel,
biases=self.bias,
labels=tf.reshape(tf.argmax(labels, 1), [-1, 1]),
inputs=inputs,
num_sampled=self.num_sampled,
num_classes=self.num_classes,
num_true=1)
elif self.mode == "eval":
logits = tf.matmul(inputs, tf.transpose(self.kernel))
logits = tf.nn.bias_add(logits, self.bias)
loss = tf.nn.softmax_cross_entropy_with_logits(
labels=labels,
logits=logits)
return loss
def compute_output_shape(self, input_shape):
dense_shape, classes_shape = input_shape
return (dense_shape[0], )
and the error now:
The error now:
ValueError: Layer my_layer expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'dense/BiasAdd:0' shape=(?, ?, 512) dtype=float32>, <tf.Tensor 'input_2:0' shape=(?, ?, 817496) dtype=int8>]
I tried to use self.input_spec but it does not work until now.

How to use keras layers in custom keras layer

I am trying to write my own keras layer. In this layer, I want to use some other keras layers. Is there any way to do something like this:
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
def call(self, input):
return self.fc(input)
layer = MyDenseLayer(10)
When I do something like
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
it outputs
How do I make weiths in the dense there trainable?
If you look at the documentation for how to add custom layers, they recommend that you use the .add_weight(...) method. This method internally places all weights in self._trainable_weights. So to do what you want, you mush first define the keras layers you want to use, build them, copy the weights and then build your own layer. If I update your code it should be something like
class mylayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_outputs2):
self.num_outputs = num_outputs
super(mylayer, self).__init__()
def build(self, input_shape):
self.fc = tf.keras.layers.Dense(self.num_outputs)
self.fc.build(input_shape)
self._trainable_weights = self.fc.trainable_weights
super(mylayer, self).build(input_shape)
def call(self, input):
return self.fc(input)
layer = mylayer(10)
input = tf.keras.layers.Input(shape=(16, ))
output = layer(input)
model = tf.keras.Model(inputs=[input], outputs=[output])
model.summary()
You should then get what you want
It's much more comfortable and concise to put existing layers in the tf.keras.models.Model class. If you define non-custom layers such as layers, conv2d, the parameters of those layers are not trainable by default.
class MyDenseLayer(tf.keras.Model):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(num_outputs)
def call(self, input):
return self.fc(input)
def compute_output_shape(self, input_shape):
shape = tf.TensorShape(input_shape).as_list()
shape[-1] = self.num_outputs
return tf.TensorShape(shape)
layer = MyDenseLayer(10)
Check this tutorial: https://www.tensorflow.org/guide/keras#model_subclassing
In the TF2 custom layer Guide, they "recommend creating such sublayers in the __init__ method (since the sublayers will typically have a build method, they will be built when the outer layer gets built)." So just move the creation of self.fc into __init__ will give what you want.
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.fc = tf.keras.layers.Dense(self.num_outputs)
def build(self, input_shape):
self.built = True
def call(self, input):
return self.fc(input)
input = tf.keras.layers.Input(shape = (16,))
output = MyDenseLayer(10)(input)
model = tf.keras.Model(inputs = [input], outputs = [output])
model.summary()
Output:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 16)] 0
_________________________________________________________________
my_dense_layer_2 (MyDenseLay (None, 10) 170
=================================================================
Total params: 170
Trainable params: 170
Non-trainable params: 0
This works for me and is clean, concise, and readable.
import tensorflow as tf
class MyDense(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyDense, self).__init__(kwargs)
self.dense = tf.keras.layers.Dense(2, tf.keras.activations.relu)
def call(self, inputs, training=None):
return self.dense(inputs)
inputs = tf.keras.Input(shape=10)
outputs = MyDense(trainable=True)(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name='test')
model.compile(loss=tf.keras.losses.MeanSquaredError())
model.summary()
Output:
Model: "test"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 10)] 0
_________________________________________________________________
my_dense (MyDense) (None, 2) 22
=================================================================
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________
Note that trainable=True is needed. I have posted a questions about it here.