integrating sampled softmax in keras failed - tensorflow

Based on How can I use TensorFlow's sampled softmax loss function in a Keras model?, I created this code:
class SampledSoftmax(tensorflow.keras.layers.Layer):
def __init__(self, **kwargs):
super(SampledSoftmax, self).__init__(**kwargs)
def call(self, inputs):
def f1(inputs):
return tf.nn.sampled_softmax_loss(
inputs[0]._keras_history[0].weights[0],
inputs[0]._keras_history[0].bias,
tf.reshape(tf.argmax(inputs[1], 1), [-1, 1]),
inputs[0],
8192,
817496)
def f2(inputs):
logits = tf.matmul(inputs[0], tf.transpose(inputs[0]._keras_history[0].weights[0]))
logits = tf.nn.bias_add(logits, inputs[0]._keras_history[0].bias)
return tf.nn.softmax_cross_entropy_with_logits_v2(
labels=inputs[1],
logits=logits)
return tf.cond(K.learning_phase(), true_fn=f1(inputs), false_fn=f2(inputs))
and when used with the following model:
#model
input_layer = Input(shape=(None,), dtype='int32')
target_input = Input(shape=(None,vocab_size), dtype='int8')
embedding_layer = Embedding(vocab_size,
EMBEDDING_DIM,
trainable=True,
mask_zero=True) (input_layer)
common = LSTM(LSTM_UNITS, return_sequences=True,dropout=0.2, recurrent_dropout=0.2)(embedding_layer)
common = (Dense(PROJ_UNITS, activation='linear'))(common)
out = (Dense(vocab_size, name='output_layer'))(common)
out = (SampledSoftmax())([out, target_input])
model = Model(inputs=[input_layer,target_input], outputs=out)
it failed with this error:
ValueError: Shape must be rank 2 but is rank 3 for 'sampled_softmax/sampled_softmax_loss/MatMul' (op: 'MatMul') with input shapes: [?,?,817496], [?,817496].
I made some progress based on google search:
class MyLayer(tensorflow.keras.layers.Dense):
def __init__(self, num_sampled, num_classes, mode, **kwargs):
self.num_sampled = num_sampled
self.num_classes = num_classes
self.mode = mode
super(MyLayer, self).__init__(num_classes, **kwargs)
self.input_spec = [InputSpec(ndim=2)]
def build(self, input_shape):
#self.input_spec = [InputSpec(shape=input_shape)]
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs_and_labels):
inputs, labels = inputs_and_labels
if self.mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=self.kernel,
biases=self.bias,
labels=tf.reshape(tf.argmax(labels, 1), [-1, 1]),
inputs=inputs,
num_sampled=self.num_sampled,
num_classes=self.num_classes,
num_true=1)
elif self.mode == "eval":
logits = tf.matmul(inputs, tf.transpose(self.kernel))
logits = tf.nn.bias_add(logits, self.bias)
loss = tf.nn.softmax_cross_entropy_with_logits(
labels=labels,
logits=logits)
return loss
def compute_output_shape(self, input_shape):
dense_shape, classes_shape = input_shape
return (dense_shape[0], )
and the error now:
The error now:
ValueError: Layer my_layer expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'dense/BiasAdd:0' shape=(?, ?, 512) dtype=float32>, <tf.Tensor 'input_2:0' shape=(?, ?, 817496) dtype=int8>]
I tried to use self.input_spec but it does not work until now.

Related

Custom pooling layer, WARNING:tensorflow:Gradients do not exist for variables

Below is the code I am trying to custom the global average pooling code. My goal is to change the line "return np.mean(inputs, axis=(1, 2)" and write my own custom pooling method. However, although the code is working, I'm having problems with gradients and I can't get the same result with the global average pooling method. I am getting the below warning. Can you help me please?
WARNING:tensorflow:Gradients do not exist for variables ['conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0'] when minimizing the loss. If you're using model.compile(), did you forget to provide a lossargument?
WARNING:tensorflow:Gradients do not exist for variables ['conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0'] when minimizing the loss. If you're using model.compile(), did you forget to provide a lossargument?
import numpy as np
import tensorflow as tf
import math
import os
import random
import itertools
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import array_ops
import numba as nb
import matplotlib.pyplot as plt
import skimage
from keras import backend as K
random.seed(91)
class LGPooling2D(tf.keras.layers.Layer):
def __init__(self, pool_size=(3, 3), strides=(2, 2), padding='SAME', data_format='channels_last', **kwargs):
super(LGPooling2D, self).__init__(**kwargs)
self.pool_size = pool_size
self.strides = strides
self.padding = padding
self.data_format = 'NHWC' if data_format == 'channels_last' else 'NCHW'
self.output_dim = 64
def build(self, input_shape):
super(LGPooling2D, self).build(input_shape)
def _pooling_function(self, x, name=None):
#b = K.shape(x)[0]
input_shape = tf.keras.backend.int_shape(x)
b, r,c,channel = input_shape[0],input_shape[1],input_shape[2],input_shape[3]
def _mid_pool(inputs, is_train):
return np.mean(inputs, axis=(1, 2)) # we change this part.
def custom_grad(op, grad):
if self.data_format == 'NHWC':
ksizes = [1, self.pool_size[0], self.pool_size[1], 1]
strides = [1, self.strides[0], self.strides[1], 1]
else:
ksizes = [1, 1, self.pool_size[0], self.pool_size[1]]
strides = [1, 1, self.strides[0], self.strides[1]]
return gen_nn_ops.max_pool_grad_v2(
op.inputs[0],
op.outputs[0],
grad,
ksizes,
strides,
self.padding,
data_format=self.data_format
), tf.constant(0.0)
def py_func(func, inp, Tout, stateful=True, name=None, grad=None, rnd_name=None):
# Need to generate a unique name to avoid duplicates:
tf.compat.v1.RegisterGradient(rnd_name)(grad)
g = tf.compat.v1.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.compat.v1.py_func(func, inp, Tout, stateful=stateful, name=name)
def _mid_range_pool(x, name=None):
rnd_name = 'LGPooling2D' + str(np.random.randint(0, 1E+8))
with tf.compat.v1.name_scope(name, "mod", [x]) as name:
z = py_func(_mid_pool,
[x, tf.keras.backend.learning_phase()],
[tf.float32],
name=name,
grad=custom_grad, rnd_name=rnd_name)[0]
z.set_shape((b, channel))
return z
return _mid_range_pool(x, name)
def compute_output_shape(self, input_shape):
r, c = input_shape[1], input_shape[2]
sr, sc = self.strides
num_r = math.ceil(r/sr) if self.padding == 'SAME' else r//sr
num_c = math.ceil(c/sc) if self.padding == 'SAME' else c//sc
return (input_shape[0], input_shape[3])
def call(self, inputs):
# K.in_train_phase(self._tf_pooling_function(inputs), self._pooling_function_test(inputs))
input_shape = tf.shape(inputs)
output = self._pooling_function(inputs)
# output = tf.reshape(output, self.compute_output_shape(input_shape))
return output
def get_config(self):
config = {
'pool_size': self.pool_size,
'strides': self.strides
}
base_config = super(LGPooling2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3),activation="relu",input_shape=input_shape))
# model.add(tf.keras.layers.SpatialDropout2D(0.15))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3),activation='relu'))
# model.add(tf.keras.layers.SpatialDropout2D(0.1))
# model.add(tf.keras.layers.MaxPooling2D())
model.add(LGPooling2D(pool_size=(3, 3), strides=(2, 2)))
# model.add(tf.keras.layers.GlobalAveragePooling2D())
# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(100, activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
optim = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
model.compile( loss="categorical_crossentropy",optimizer=optim, metrics=['accuracy'])
return model
batch_size = 32
num_classes = 100
epochs = 50
# input image dimensions
img_rows, img_cols = 32, 32
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 3)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.0
x_test /= 255.0
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
reduceLROnPlat = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.4, patience=6, verbose=1, mode='min', min_delta=0.0001, cooldown=5, min_lr=0.00001)
callbacks_list = [reduceLROnPlat]
print('custom pooling')
model = get_model()
# model = get_model(IMG_SHAPE = (32, 32, 3))
tf.keras.utils.plot_model(model, to_file='LbpModel.png', show_shapes=True)
modelcustom = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_split=0.10,
callbacks=callbacks_list)

ValueError: Structure of Python function inputs does not match input_signature

System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I try to convert the tensorflow offical image caption model to TFLite model
I try to convert the tf.keras.Model 's encoder and decoder model as following:
import tensorflow as tf
embedding_dim = 256
units = 512
top_k = 5000
vocab_size = top_k + 1
features_shape = 2048
attention_features_shape = 64
class BahdanauAttention(tf.keras.Model):
def __init__(self, utils):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(utils)
self.W2 = tf.keras.layers.Dense(utils)
self.V = tf.keras.layers.Dense(1)
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis_shape = tf.expand_dims(hidden, 1)
# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis_shape))
# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class CNN_Encoder(tf.keras.Model):
#由于您已经提取了特征并使用pickle进行了转储
#该编码器通过完全连接的层传递这些特征
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
# #tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32)])
#tf.function
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
#tf.function
def call(self, x , features, hidden):
#将注意力定义为一个单独的模型
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
#将concated后的的向量传递给GRU
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)`
encoder._set_inputs(tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.float32))
decoder._set_inputs([tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name='x'),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32, name='feature'),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32, name='hidden')])
encoder_converter = tf.lite.TFLiteConverter.from_keras_model(encoder)
decoder_converter = tf.lite.TFLiteConverter.from_keras_model(decoder)
encoder_model = encoder_converter.convert()
decoder_model = decoder_converter.convert()
open("encoder_model.tflite", "wb").write(encoder_model)
open("decoder_model.tflite", "wb").write(decoder_model)
The error messge is
ValueError: Structure of Python function inputs does not match input_signature:
inputs: (
[<tf.Tensor 'x:0' shape=(1, 1) dtype=int32>, <tf.Tensor 'feature:0' shape=(1, 64, 256) dtype=float32>, <tf.Tensor 'hidden:0' shape=(1, 512) dtype=float32>])
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name='x'),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name='feature'),
TensorSpec(shape=(1, 512), dtype=tf.float32, name='hidden'))
I think the function input is the same as the input signature.How can I fix the problem?
i also had the same issue. found a simple solution to this. the call method of tf.keras.Model class accepts only single input, so you need to pack all your inputs inside a list/tuple
https://github.com/tensorflow/tensorflow/issues/32488#issuecomment-560248754

TensorFlow Keras(v2.2) model fit with multiple outputs and losses failed

I want to use TensorFlow Keras(v2.2) model fit in mnist with multiple outputs and losses, but it failed.
My costume model will return a list [logits, embedding]. logits is 2D tensor [batch , 10] and embedding is also 2D tensor [batch, 64].
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.reshape = tf.keras.layers.Reshape((28, 28, 1))
self.conv2D1 = tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')
self.maxPool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")
self.conv2D2 = tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')
self.maxPool2 = tf.keras.layers.MaxPooling2D(pool_size=2)
self.flatten = tf.keras.layers.Flatten(data_format="channels_last")
self.dropout = tf.keras.layers.Dropout(tf.compat.v1.placeholder_with_default(0.25, shape=[], name="dropout"))
self.dense1 = tf.keras.layers.Dense(64, activation=None)
self.dense2 = tf.keras.layers.Dense(10, activation=None)
def call(self, inputs, training):
x = self.reshape(inputs)
x = self.conv2D1(x)
x = self.maxPool1(x)
if training:
x = self.dropout(x)
x = self.conv2D2(x)
x = self.maxPool2(x)
if training:
x = self.dropout(x)
x = self.flatten(x)
x = self.dense1(x)
embedding = tf.math.l2_normalize(x, axis=1)
logits = self.dense2(embedding)
return [logits, embedding]
loss_0 is normal cross_entropy
def loss_0(y_true, y_pred):
loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred[0]))
loss_1 is triplet_semihard_loss
def loss_1(y_true, y_pred):
loss_1 = tfa.losses.triplet_semihard_loss(y_true=y_true, y_pred=y_pred[1], distance_metric="L2")
return loss_1
When I use model fit, I can only get logits tensor in each loss. I can't get embedding tensor. y_pred[0] and y_pred[1] is not work. Any suggestion?
model = MyModel()
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3), loss=[loss_0, loss_1], loss_weights=[0.1, 0.1])
history = model.fit(train_dataset, epochs=5)

subclass of tf.keras.Model can not get summay() result

I want build subclass of tf.keras.Model and want to see the model structure with summary function. But it not works. The following is my code:
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.summary()
The error:
ValueError: This model has not yet been built. Build the model first
by calling build() or calling fit() with some data, or specify an
input_shape argument in the first layer(s) for automatic build.
You need to call each layer once to infer shapes and then call build() method of the tf.keras.Model with model's input shape as argument:
import tensorflow as tf
import numpy as np
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((32, 32, 3))
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) multiple 896
_________________________________________________________________
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 3686528
_________________________________________________________________
dense_1 (Dense) multiple 1290
=================================================================
Total params: 3,688,714
Trainable params: 3,688,714
Non-trainable params: 0
_________________________________________________________________
A better solution is listed here. You need to provide a model method to infer the model explicitly.
import tensorflow as tf
from tensorflow.keras.layers import Input
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
return self.dense(inputs)
def model(self):
x = Input(shape=(1))
return Model(inputs=[x], outputs=self.call(x))
MyModel().model().summary()
Editing #Vlad's answer to avoid this error ValueError: Input 0 of layer conv2d_10 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 32, 3)
Change this line from:
model.build((32, 32, 3 ))
To:
model.build((None, 32, 32, 3 ))
Final Code:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((None, 32, 32, 3 ))
model.summary()

How to add a trainable weight to an LSTM in Keras/Tensorflow

I would like to add a trainable weight to my LSTM and when I use the following wrapper provided by Keras, the tensor is initialized but isn't added to the LSTM layer. When I use the same code on Dense layers or convnets it works properly. Is there any other way to add a variable to a recurrent model?
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[0][1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
a, b = x
return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [(shape_a[0], self.output_dim), shape_b[:-1]]