How to save variable in custom layer in keras - tensorflow

I wI want to create a (4,1) dimension matrix to multiply with inputs with (4,) dimension in my custom layer. So I do as the following:
class MyLayer(Layer):
def __init__(self, output_dim = 4, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
k1 = K.variable(1, dtype=tf.float32, name='k1')
k2 = K.variable(1, dtype=tf.float32, name='k2')
k3 = K.variable(1, dtype=tf.float32, name='k3')
k4 = K.variable(1, dtype=tf.float32, name='k4')
weights = tf.stack([k1,k2,k3,k4], name='weight_stack')
weights = tf.expand_dims(weights,1)
weights = tf.expand_dims(weights, 1)
weights = tf.expand_dims(weights, 1)
weights = tf.expand_dims(weights, 1)
self.kernel=tf.nn.softmax(weights,name='weights_softmax')
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
self.inputs = inputs
P = tf.multiply(self.inputs, self.kernel)
P = tf.add_n([P[0],P[1],P[2],P[3]])
self.shape = P.shape
return P
def compute_output_shape(self, input_shape ):
return (input_shape, 1, 1, 1, 1)
I call it as followed:
x = [x1, x2, x3, x4]
x = MyLayer(name = "weight_matrix")(x)
It works when I train the Network with this self designed layers. However, when I save it with callbacks as followed:
keras.callbacks.ModelCheckpoint(self.checkpoint_path,
verbose=0, save_weights_only=True)
I found that I can't reload the "weights"(means k1 to k4) when I try to reload the model.
And I take a look into the file which save the model, I can't find the weights for the "weight_matrix".
If I am not doing it right, how can I fix it or how to implement it in keras? Thx.

Nowhere you're calling self.add_weight.
Your layer simply doesn't have weights.
In build:
def build(self, input_shape):
def customWeights(shape, dtype=None):
#k1 = K.variable(1, dtype=tf.float32, name='k1')
#k2 = K.variable(1, dtype=tf.float32, name='k2')
#k3 = K.variable(1, dtype=tf.float32, name='k3')
#k4 = K.variable(1, dtype=tf.float32, name='k4')
#weights = tf.stack([k1,k2,k3,k4], name='weight_stack')
weights = K.ones((4,))
#you can also use the shape passed to this function to build a flexible layer
#weights = tf.expand_dims(weights,1)
#weights = tf.expand_dims(weights, 1)
#weights = tf.expand_dims(weights, 1)
#weights = tf.expand_dims(weights, 1)
weights = K.reshape(weights,(-1,1,1,1,1))
#this should be applied later
#return tf.nn.softmax(weights,name='weights_softmax')
return weights
self.kernel= self.add_weight(name='kernel',
shape=notBeingUsedButItMayBeGoodForFlexibleLayers,
initializer=customWeights,
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
In call you call the softmax on your weights:
def call(self, inputs):
#don't call this! You're replacing an important property of the layer
#self.inputs = inputs
kernel = K.softmax(self.kernel)
P = tf.multiply(inputs, kernel)
P = tf.add_n([P[0],P[1],P[2],P[3]])
#not sure if this is a good idea either
#self.shape = P.shape
return P

Related

Unhashable Type TypeError: Tensors are unhashable. Instead, use tensor.ref() as the key

I am trying to implement a custom variational autoencoder. Following is the code to reproduce.
epsilon_std = 1.0
vx = tf.keras.layers.Input(batch_shape=(None, max_length_output), name='vae_enc_in')
vx_emb = tf.keras.layers.Embedding(
vocab_tar_size,
embedding_dim,
input_length=max_length_output,
name='vae_enc_emb'
)(vx)
vxbi = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(units, return_sequences=False, recurrent_dropout=0.2, name='vae_enc_lstm'), merge_mode='concat'
)(vx_emb)
vx_drop = tf.keras.layers.Dropout(0.2, name='vae_enc_drop')(vxbi)
vx_dense = tf.keras.layers.Dense(units, activation='linear', name='vae_enc_dense')(vx_drop)
vx_elu = tf.keras.layers.ELU(name='vae_enc_elu')(vx_dense)
vx_drop1 = tf.keras.layers.Dropout(0.2, name='vae_enc_drop2')(vx_elu)
z_mean = tf.keras.layers.Dense(20, name='vae_enc_dense2')(vx_drop1)
z_log_var = tf.keras.layers.Dense(20, name='vae_enc_dense3')(vx_drop1)
def sampling(args):
z_mean, z_log_var = args
epsilon = tf.random.normal(shape=(BATCH_SIZE, 20), mean=0.,
stddev=epsilon_std)
return z_mean + tf.math.exp(z_log_var / 2) * epsilon
z = tf.keras.layers.Lambda(sampling, output_shape=(20,), name='vae_lambda')([z_mean, z_log_var])
repeated_context = tf.keras.layers.RepeatVector(max_length_output, name='vae_repeat')
decoder_h = tf.keras.layers.LSTM(units, return_sequences=True, recurrent_dropout=0.2, name='vae_dec_lstm')
decoder_mean = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(vocab_tar_size, activation='linear', name='vae_dec_lstm'),
name='vae_dec_time_dist'
)
h_decoded = decoder_h(repeated_context(z))
x_decoded_mean = decoder_mean(h_decoded)
def zero_loss(y_true, y_pred):
print("ZERO LOSS")
return tf.zeros_like(y_pred)
And then creating a custom vae layer
class VAELayer(tf.keras.layers.Layer):
def __init__(self, batch_size, max_len, **kwargs):
self.is_placeholder = True
super(VAELayer, self).__init__(**kwargs)
self.target_weights = tf.constant(np.ones((batch_size, max_len)), tf.float32)
def vae_loss(self, x, x_decoded_mean):
#xent_loss = K.sum(metrics.categorical_crossentropy(x, x_decoded_mean), axis=-1)
labels = tf.cast(x, tf.int32)
xent_loss = tf.math.reduce_sum(
tfa.seq2seq.sequence_loss(
x_decoded_mean,
labels,
weights=self.target_weights,
average_across_timesteps=False,
average_across_batch=False
),
axis=-1
)
#softmax_loss_function=softmax_loss_f), axis=-1)#, for sampled softmax
kl_loss = - 0.5 * tf.math.reduce_sum(1 + z_log_var - tf.math.square(z_mean) - tf.math.exp(z_log_var), axis=-1)
return tf.math.reduce_mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
x_decoded_mean = inputs[1]
print(x.shape, x_decoded_mean.shape)
loss = self.vae_loss(x, x_decoded_mean)
print("Adding loss")
self.add_loss(loss, inputs=inputs)
print("Returning ones like")
return tf.ones_like(x)
I compiled it successfully and also produced a test output by calling the model. But when i try to train, it, It produces the error
TypeError: Tensors are unhashable. (KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='tf.math.reduce_sum_25/Sum:0', description="created by layer 'tf.math.reduce_sum_25'"))Instead, use tensor.ref() as the key.
Following is the code for compiling and fitting the model
loss_layer = VAELayer(BATCH_SIZE, max_length_output)([vx, x_decoded_mean])
vae = tf.keras.Model(vx, [loss_layer], name='VariationalAutoEncoderLayer')
opt = tf.keras.optimizers.Adam(lr=0.01) #SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
vae.compile(optimizer=opt, loss=[zero_loss])
def vae_sentence_generator():
for ip, tg in train_dataset:
yield tg.numpy()
vae.fit(vae_sentence_generator(steps_per_epoch=steps_per_epoch, epochs=10))

Is there a tensorflow keras that is a wrapper for a stack of Dense layers?

For example, this is trivial but is there a layer for this? Is not really a convolution ... there is one "Dense layer" (weights) per data point.
In [266]: X = np.random.randn(10, 3); W = np.random.randn(10, 3, 4); (X[:, :, None] * W).sum(axis=1).shape
Out[266]: (10, 4)
Create your own layer:
Warning: works only with fixed batch size, you need to define batch_shape or batch_input_shape in your models!!!!
class SampleDense(Layer):
def __init__(self, units, **kwargs):
self.units = units
super(SampleDense, self).__init__(**kwargs)
def build(self, input_shape):
weight_shape = input_shape + (self.units,)
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='uniform',
trainable=True)
self.built = True
def call(self, inputs):
inputs = K.expand_dims(inputs, axis=-1)
outputs = inputs * self.kernel
outputs = K.sum(outputs, axis=-2)
return outputs
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.units,)

Tensorflow 2.0 How to handle tf.Variable as Weight in __call__ function?

I made 'Decoder' layer to make Product Item Matrix in Tensorflow.
I've tried to initialize W, bias and make a calculation to get loss using some value.
class Decoder(tf.keras.layers.Layer):
def __init__(self, units=len(counter), input_dim=300):
super(Decoder, self).__init__()
self.trainer = Trainer(vec_dict)
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(units, input_dim), dtype='float32'), trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(units,), dtype='float32'), trainable=True)
def call(self, x_play, y_pos, y_neg):
x = self.trainer(x_play)
user_vector = x[..., tf.newaxis, :]
wp = self.w.numpy()[y_pos]
bp = self.b.numpy()[y_pos]
pos_mul = tf.multiply(user_vector, wp)
pos = tf.reduce_sum(pos_mul, axis=-1) + bp
wn = self.w.numpy()[y_neg]
bn = self.b.numpy()[y_neg]
neg_mul = tf.multiply(user_vector, wn)
neg = tf.reduce_sum(neg_mul, axis=-1) + bn
tot = tf.sigmoid(tf.concat([pos, neg], axis=1))
return tot
As you can see this, If I used '.numpy()', it can make me possible to get matrix which has the shape what I want to.
For instance, self.w has (10000, 300) shapes, and y_pos has (256, 100)
and self.w.numpy()[y_pos] has (256, 100, 300)
The problem is that if I ran this code for training, I got these kind of errors:
NotImplementedError: in converted code:
<ipython-input-835-8d5f0b593979>:4 train_step *
predictions = model(x, y_pos, y_neg)
/usr/local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-838-b9b896ea4fff>:64 call
wp = self.w.numpy()[y_pos]
/usr/local/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:579 numpy
"numpy() is only available when eager execution is enabled.")
NotImplementedError: numpy() is only available when eager execution is enabled.
Somebody else who let me know what's going on.

Providing output from one model to another model

I want to provide the output from one model (f) into another model (c). The following code works
features_ = sess.run(f.features, feed_dict={x:x_, y:y_, dropout:1.0, training:False})
sess.run(c.optimize, feed_dict={x:x_, y:y_, features:features_, dropout:1.0, training:False})
c only needs features_ and y_. It does not need x_. However, if I try to remove x_ as an input, i.e.,
feed_dict={y:y_, features:features_}
I get the following error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,28,28,1]
[[Node: Placeholder = Placeholderdtype=DT_FLOAT, shape=[?,28,28,1], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Is there a reason for this? features_ is a numpy ndarray, so it doesn't seem to be a tensor type or anything like that.
Here is the code for f:
class ConvModelSmall(object):
def __init__(self, x, y, settings, num_chan, num_features, lr, reg, dropout, training, scope):
""" init the model with hyper-parameters etc """
self.x = x
self.y = y
self.dropout = dropout
self.training = training
initializer = tf.contrib.layers.xavier_initializer(uniform=False)
self.weights = get_parameters(scope=scope, initializer=initializer, dims)
self.biases = get_parameters(scope=scope, initializer=initializer, dims)
self.features = self.feature_model()
self.acc = settings.acc(self.features, self.y)
self.loss = settings.loss(self.features, self.y) + reg * reg_loss_fn(self.weights)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.optimize = tf.train.AdagradOptimizer(lr).minimize(self.loss)
def feature_model(self):
conv1 = conv2d('conv1', self.x, self.weights['wc1'], self.biases['bc1'], 2, self.training, self.dropout)
conv2 = conv2d('conv2', conv1, self.weights['wc2'], self.biases['bc2'], 2, self.training, self.dropout)
conv3 = conv2d('conv3', conv2, self.weights['wc3'], self.biases['bc3'], 2, self.training, self.dropout)
dense1_reshape = tf.reshape(conv3, [-1, self.weights['wd1'].get_shape().as_list()[0]])
dense1 = fc_batch_relu(dense1_reshape, self.weights['wd1'], self.biases['bd1'], self.training, self.dropout)
dense2 = fc_batch_relu(dense1, self.weights['wd2'], self.biases['bd2'], self.training, self.dropout)
out = tf.matmul(dense2, self.weights['wout']) + self.biases['bout']
return out
Here is the code for c:
class LinearClassifier(object):
def __init__(self, features, y, training, num_features, num_classes, lr, reg, scope=""):
self.features = features
self.y = y
self.num_features = num_features
self.num_classes = num_classes
initializer = tf.contrib.layers.xavier_initializer(uniform=False)
self.W = get_scope_variable(scope=scope, var="W", shape=[num_features, num_classes], initializer=initializer)
self.b = get_scope_variable(scope=scope, var="b", shape=[num_classes], initializer=initializer)
scores = tf.matmul(tf.layers.batch_normalization(self.features, training=training), self.W) + self.b
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=scores)) + reg * tf.nn.l2_loss(self.W)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.optimize = tf.train.GradientDescentOptimizer(lr).minimize(self.loss)
The devil is probably in these lines:
update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS )
with tf.control_dependencies(update_ops):
self.optimize = tf.train.GradientDescentOptimizer( lr ).minimize( self.loss )
By the time you define c, f is already defined, so when you say update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) it will collect all the update ops in the current graph. That will include the ops related to f, x within it.
Then with tf.control_dependencies(update_ops): means "you should do the following only after all update_ops are executed, including having given a value to x. But there is no value for x and the error happens.
To get around this, you can either separate the two networks into two different tf.Graphs, or, probably easier, when you get the update_ops you should filter them by scope in the tf.get_collection() method. For that to work, you should add tf.name_scopes to your network classes ConvModelSmall and LinearClassifier.

Keras unable to calculate number of parameters in a Keras Custom Layer

I am building a Keras Custom layer with some Tensorflow support. Before that I wanted to test whether a Convolution2D layer works properly if I write a Keras layer with Tensorflow's conv2d in the call function.
class Convolutional2D(Layer):
def __init__(self, filters=None, kernel_size=None, padding='same', activation='linear', strides=(1,1), name ='Conv2D', **kwargs):
self.filters = filters
self.kernel_size = kernel_size
self.padding = padding
self.activation = activation
self.strides = strides
self.name = name
self.input_spec = [InputSpec(ndim=4)]
super(Convolutional2D, self).__init__(**kwargs)
def call(self, input):
out = tf.layers.conv2d(inputs=input, filters=self.filters, kernel_size=self.kernel_size, strides=self.strides, padding=self.padding,
data_format='channels_last')
return(out)
def compute_output_shape(self, input_shape):
batch_size = input_shape[0]
width = input_shape[1]/self.strides[0]
height = input_shape[2]/self.strides[1]
channels = self.filters
return(batch_size, width, height, channels)
def get_config(self):
config = {'filters': self.filters, 'kernel_size': self.kernel_size, 'padding': self.padding, 'activation':self.activation, 'strides':self.strides,
'name':self.name}
base_config = super(Convolutional2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
This compiles properly but when the I use model.summary() it does not calculate the number of parameters for this layer.
What do I have to do so that when I check the total number of parameters of the model the number includes the trainable number of parameters of this layer?
I have found the answer to this problem.
def build(self, input_shape):
if self.data_format == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
kernel_shape = self.kernel_size + (input_dim, self.filters)
self.kernel = self.add_weight(shape=kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.filters,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
# Set input spec.
self.input_spec = InputSpec(ndim=self.rank + 2,
axes={channel_axis: input_dim})
self.built = True
The add weights defines the number of parameters which I have not done in my code. But that does not hamper the performance of the model. It works fine except for the fact one cannot get the number of parameters specification.