tf.reshape(self.normalized_price(prce), (-1, 1)), ValueError: Shape must be rank 1 but is rank 2 - tensorflow

I am getting the following error when I am calling the subclass of the model. My guess is that I am not passing the two parameters correctly or the reshape is not outputting the correct value.
ValueError: Shape must be rank 1 but is rank 2 for '{{node base_stock_model/concat}} = ConcatV2[N=3, T=DT_FLOAT, Tidx=DT_INT32](base_stock_model/sequential_2/embedding_2/embedding_lookup/Identity_1, base_stock_model/sequential_3/embedding_3/embedding_lookup/Identity_1, base_stock_model/Reshape, base_stock_model/concat/axis)' with input shapes: [32], [32], [1,1], [].
Here is the main class model
class StockModel(tfrs.models.Model):
def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
super().__init__()
embedding_dimension = 32
self.user_model= UserModel()
self.stock_model= base_stockModel()
self.rating_model = tf.keras.Sequential([
tf.keras.layers.Dense(256, activation="relu"),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(1),
])
# The tasks.
self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
loss=tf.keras.losses.MeanSquaredError(),
metrics=[tf.keras.metrics.RootMeanSquaredError()],
)
self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=stocks.batch(1).map(self.stock_model)
)
)
# The loss weights.
self.rating_weight = rating_weight
self.retrieval_weight = retrieval_weight
def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
user_embeddings = self.user_model(features['username'])
# np.array([features["name"],features["price"]])
price=tf.as_string(features["price"])
stock_embeddings = self.stock_model([features["name"],price])
return (
user_embeddings,
stock_embeddings,
self.rating_model(
tf.concat([user_embeddings, stock_embeddings], axis=1)
),
)
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
ratings = features.pop("Rating")
print("features",features)
user_embeddings, stock_embeddings, rating_predictions = self(features)
# We compute the loss for each task.
rating_loss = self.rating_task(
labels=ratings,
predictions=rating_predictions,
)
retrieval_loss = self.retrieval_task(user_embeddings, stock_embeddings)
# And combine them using the loss weights.
return (self.rating_weight * rating_loss
+ self.retrieval_weight * retrieval_loss)
The above main class model calls the base_stockModel class, which is causing errors.
class base_stockModel(tf.keras.Model):
def __init__(self):
super().__init__()
embedding_dimension=32
self.stock_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_stock_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_stock_titles) + 1, embedding_dimension)
])
self.price_embedding=tf.keras.Sequential([
tf.keras.layers.Discretization(prices_bucket.tolist()),
tf.keras.layers.Embedding(len(prices_bucket)+2,32)
])
self.normalized_price = tf.keras.layers.Normalization(axis=None)
self.normalized_price.adapt(prices)
def call(self,input,*args,**kwargs):
print(input.get_shape(),kwargs)
# print(tf.rank(input),[input[:]],input.get_shape(),input.dtype)
# nme=input[3]
nme=input[0]
prce=input[1]
prce=tf.strings.to_number(input[1],out_type=tf.dtypes.float32)
#print(tf.rank(self.stock_embedding(nme)),tf.rank(self.price_embedding(prce)),tf.rank(tf.reshape(sself.normalized_price(prce), (-1, 1))))
return tf.concat([
self.stock_embedding(nme),
self.price_embedding(prce),
tf.reshape(self.normalized_price(prce), (-1, 1)),
], axis=1)
This code is a variant of tensorflow recommender official page https://www.tensorflow.org/recommenders/examples/multitask/
https://www.tensorflow.org/recommenders/examples/context_features
Any help is much appreciated.

First I analyzed all the ranks of input tensor. If they were not the same rank model wants then we have to use tf.reshape() command or adjust the input to match model's demand. Note that, tf.shape() gives you the shape while running the model.
Here is documentation on it.
https://www.tensorflow.org/api_docs/python/tf/reshape
https://www.tensorflow.org/api_docs/python/tf/shape

Related

PyTorch calculate gradient of output with respect to "output"

I was playing around with the backward method of PyTorch tensor to find the gradient of a multidimensional output of the model with respect to intermediate activation layers.
When I try to calculate the gradients of the output with respect to the last activation layer (the output), I get the gradients as 1. Shouldn't the gradients of outputs with respect to itself be 0? Am I wrong here or is there any mistake in my implementation? Below is my code and output for reference.
Code:
import torch.nn as nn
import torch
torch.manual_seed(7986)
class myModel(nn.Module):
def __init__(self):
super(myModel, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=3, kernel_size=1), nn.Sigmoid(),
)
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=2, kernel_size=2), nn.Sigmoid(),
)
self.gradients = None
def activations_hook(self, grad):
self.gradients = grad
def forward(self, x):
out1 = self.layer1(x)
out2 = self.layer2(out1)
h = out2.register_hook(self.activations_hook)
return out2
def get_activations_gradient(self):
return self.gradients
myNet = myModel()
inputs = torch.randn(1, 2, 2, 2)
outputs = myNet(inputs)
print(f"inputs =\n{inputs}\n")
print(f"outputs =\n{outputs}\n")
outputs.backward(torch.ones_like(outputs))
gradients = myNet.get_activations_gradient()
print(f"gradients =\n{gradients}\n")
Output:
inputs =
tensor([[[[-1.1344, -0.6808],
[-1.8061, -0.8529]],
[[ 0.3898, -1.9382],
[-2.0533, -0.6483]]]])
outputs =
tensor([[[[0.3999]],
[[0.5760]]]], grad_fn=<SigmoidBackward0>)
gradients =
tensor([[[[1.]],
[[1.]]]])
Since it is a variable, the derivative of the output with respect to the output itself will be equal to 1.

Converting from PyTorch to Tensorflow for Self-Attention Pooling Layer

I have found an implementation of the said layer from this paper, "Self-Attention Encoding and Pooling for Speaker Recognition", available at here via Pytorch. However, due to CUDA compatibility issues, I can't want to use the said code. Also, thus far, all my codes have been implemented in Tensorflow. So, I want to do a one-to-one translation/conversion or whatever, from PyTorch to Tensorflow.
First of all, this is the code in PyTorch:
class SelfAttentionPooling(nn.Module):
def __init__(self, input_dim):
super(SelfAttentionPooling, self).__init__()
self.W = nn.Linear(input_dim, 1)
def forward(self, batch_rep):
"""
input:
batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension
attention_weight:
att_w : size (N, T, 1)
return:
utter_rep: size (N, H)
"""
softmax = nn.functional.softmax
att_w = softmax(self.W(batch_rep).squeeze(-1)).unsqueeze(-1)
utter_rep = torch.sum(batch_rep * att_w, dim=1)
return utter_rep
And this is my translation of the snippet code to Tensorflow:
class Self_Attention_Pooling(keras.layers.Layer): ?
def __init__(self, input_dim):
super(Self_Attention_Pooling, self).__init__()
self.W = Dense(input_dim)
def forward(self, batch_rep):
softmax = Softmax()
att_w = self.W(batch_rep)
att_w = softmax(att_w)
# Not so sure about these two lines though.
#x = np.expand(batch_rep)
#att_w = softmax(self.W(x))
utter_rep = np.sum(batch_rep * att_w, axis=1)
return utter_rep
Is my implementation/translation/conversion from PyTorch to Tensorflow correct? If not, please edit and help me.
Thank you very much.
2 remarks regarding your implementation:
For custom layers in TF, you should implement the call method instead of the forward method cf Implementing custom layers.
For the operations you should replace the numpy functions by tensorflow functions to enable GPU support.
Here is the code I am using in TF for the SelfAttentionPooling:
import tensorflow as tf
class SelfAttentionPooling(tf.keras.layers.Layer):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(units=1, use_bias=False)
def call(self, x: tf.Tensor) -> tf.Tensor:
"""Apply the self attention pooling on input tensor.
Args:
x: input tensor (?, seq_len, emb_dim)
Returns:
(?, emb_dim)
"""
# (?, seq_len)
attention_weights = tf.nn.softmax(tf.squeeze(self.dense(x)))
# (?, emb_dim)
pooled = tf.reduce_sum(tf.expand_dims(attention_weights, axis=-1) * x, axis=1)
return pooled
You can quickly check it gives the expected output:
self_attn_pooling = SelfAttentionPooling()
# (?, seq_len, emb_dim)
input_shape = 4, 9, 128
x = tf.random.normal(input_shape)
pooled = self_attn_pooling(x)
# (?, emb_dim)
assert pooled.shape == (4, 128)

Unable to understand the behavior of method `build` in tensorflow keras layers (tf.keras.layers.Layer)

Layers in tensorflow keras have a method build that is used to defer the weights creation to a time when you have seen what the input is going to be. a layer's build method
I have a few questions i have not been able to find the answer of:
here it is said that
If you assign a Layer instance as attribute of another Layer, the outer layer will start tracking the weights of the inner layer.
What does it mean to track the weights of a layer?
The same link also mentions that
We recommend creating such sublayers in the init method (since the sublayers will typically have a build method, they will be built when the outer layer gets built).
Does it mean that while running the build method of child class (self), there will an iteration through all the attributes of self and whichever are found to be subclassed from (instances of) tf.keras.layer.Layer will have their build methods run automatically?
I can run this code:
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def call(self, x):
return self.l1(x)
net = Net()
print(net.variables)
But not this:
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
super().build()
def call(self, x):
return self.l1(x)
net = Net()
print(net.variables)
why?
I would say the build mentioned means, when you build a self-defined tf.keras.Model for example
net = Net()
then you will get all the tf.keras.layers.Layer objects create in __init__, and being stored in net which is a callable object. In this case, it will become a completed object for TF to train later, this is what it said to track. The next time you call net(inputs) you'll can get your outputs.
Here is a example of Tensorflow self-defined decoder with attention
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# query hidden state shape == (batch_size, hidden size)
# query_with_time_axis shape == (batch_size, 1, hidden size)
# values shape == (batch_size, max_len, hidden size)
# we are doing this to broadcast addition along the time axis to calculate the score
query_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
# the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
super(Decoder, self).__init__()
self.batch_sz = batch_sz
self.dec_units = dec_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.dec_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.fc = tf.keras.layers.Dense(vocab_size)
# used for attention
self.attention = BahdanauAttention(self.dec_units)
def call(self, x, hidden, enc_output):
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(hidden, enc_output)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[2]))
# output shape == (batch_size, vocab)
x = self.fc(output)
return x, state, attention_weights
I have tried to put tf.keras.layers.Layer object in call and got really poor outcome, guess that was because if you put it in call then it will be call multiple times while each time a forward-backward propagation happends.

Custom Attention Layer using in Keras

I want to create a custom attention layer that for input at any time this layer returns the weighted mean of inputs at all time inputs.
For Example, I want that input tensor with shape [32,100,2048] goes to layer and I get the tensor with the shape [32,100,2048]. I wrote the Layer as follow:
import tensorflow as tf
from keras.layers import Layer, Dense
#or
from tensorflow.keras.layers import Layer, Dense
class Attention(Layer):
def __init__(self, units_att):
self.units_att = units_att
self.W = Dense(units_att)
self.V = Dense(1)
super().__init__()
def __call__(self, values):
t = tf.constant(0, dtype= tf.int32)
time_steps = tf.shape(values)[1]
initial_outputs = tf.TensorArray(dtype=tf.float32, size=time_steps)
initial_att = tf.TensorArray(dtype=tf.float32, size=time_steps)
def should_continue(t, *args):
return t < time_steps
def iteration(t, values, outputs, atts):
score = self.V(tf.nn.tanh(self.W(values)))
# attention_weights shape == (batch_size, time_step, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
outputs = outputs.write(t, context_vector)
atts = atts.write(t, attention_weights)
return t + 1, values, outputs, atts
t, values, outputs, atts = tf.while_loop(should_continue, iteration,
[t, values, initial_outputs, initial_att])
outputs = outputs.stack()
outputs = tf.transpose(outputs, [1,0,2])
atts = atts.stack()
atts = tf.squeeze(atts, -1)
atts = tf.transpose(atts, [1,0,2])
return t, values, outputs, atts
For input= tf.constant(2, shape= [32, 100, 2048], dtype= tf.float32) I get the
output with shape = [32,100,2048] in tf2 and [32,None, 2048] in tf1.
For Input input= Input(shape= (None, 2048)) I get the output with shape = [None, None, 2048] in tf1 and I get error
TypeError: 'Tensor' object cannot be interpreted as an integer
in tf2.
Finally, in both cases, I can't use this layer in my model because my model input is Input(shape= (None, 2048)) and I get the error
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
in tf1 and in tf2 I get the same error as said in above, I create my model with Keras functional method.
From the code you have shared, looks like you want to implement Bahdanau's attention layer in your code. You want to attend to all the 'values' (prev layer output - all its hidden states) and your 'query' would be the last hidden state of the decoder. Your code should actually be very simple and should look like:
class Bahdanau(tf.keras.layers.Layer):
def __init__(self, n):
super(Bahdanau, self).__init__()
self.w = tf.keras.layers.Dense(n)
self.u = tf.keras.layers.Dense(n)
self.v = tf.keras.layers.Dense(1)
def call(self, query, values):
query = tf.expand_dims(query, 1)
e = self.v(tf.nn.tanh(self.w(query) + self.u(values)))
a = tf.nn.softmax(e, axis=1)
c = a * values
c = tf.reduce_sum(c, axis=1)
return a,c
##Say we want 10 units in the single layer MLP determining w,u
attentionlayer = Bahdanau(10)
##Call with i/p: decoderstate # t-1 and all encoder hidden states
a, c = attentionlayer(stminus1, hj)
We are not specifying the tensor shape anywhere in the code. This code will return you a context tensor of same size as 'stminus1' which is the 'query'. It does this after attending to all the 'values' (all output states of decoder) using Bahdanau's attention mechanism.
So assuming your batch size is 32, timesteps=100 and embedding dimension=2048, the shape of stminus1 should be (32,2048) and the shape of the hj should be (32,100,2048). The shape of the output context would be (32,2048). We also returned the 100 attention weights just in case you want to route them to a nice display.
This is the simplest version of 'Attention'. If you have any other intent, please let me know and I will reformat my answer. For more specific details, please refer https://towardsdatascience.com/create-your-own-custom-attention-layer-understand-all-flavours-2201b5e8be9e

Converting keras functional model to keras class in tensorflow 2

I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.
I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)