LSTM encoder decoder model training errors: ValueError - tensorflow

Resources
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.version.VERSION)
print(keras.__version__)
#2.5.0
#2.5.0
LSTM Encoder Decoder Model with Attention
n_features = 129
type_max = 3
n_padded_in = 10
n_padded_out = 10
input_item = layers.Input(batch_input_shape=[None, n_padded_in],
name="item_input",
dtype=tf.int64)
input_type = layers.Input(batch_input_shape=[None, n_padded_in],
name="type_input",
dtype=tf.int64)
encoding_padding_mask = tf.math.logical_not(tf.math.equal(input_item, 0))
embedding_item = layers.Embedding(input_dim=n_features,
output_dim=batch_size,
name="item_embedding")(input_item)
embedding_type = layers.Embedding(input_dim=type_max+1,
output_dim=batch_size,
name="rec_embedding")(input_type)
concat_inputs = layers.Concatenate(name="concat_inputs")(
[embedding_item, embedding_type])
concat_inputs = tf.keras.layers.BatchNormalization(
name="batchnorm_inputs")(concat_inputs)
encoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_encoder")
encoder_output, hidden, cell = encoder_lstm(concat_inputs)
states = [hidden, cell]
decoder_output = hidden
decoder_lstm = layers.LSTM(units=latent_dim,
return_state=True,
name="lstm_decoder")
output_dense = layers.Dense(n_features, name="output")
att = layers.Attention(use_scale=False,
causal=True,
name="attention")
inputs = np.zeros((batch_size, 1, n_features))
all_outputs = []
for _ in range(n_padded_out):
context_vector = att([decoder_output, encoder_output])
context_vector = tf.expand_dims(context_vector, 1)
inputs = tf.cast(inputs, tf.float32)
inputs = tf.concat([context_vector, inputs], axis=-1)
decoder_output, state_h, state_c = decoder_lstm(inputs, initial_state=states)
output = output_dense(decoder_output)
output = tf.expand_dims(output, 1)
all_outputs.append(output)
inputs = output
states = [state_h, state_c]
all_outputs = layers.Lambda(lambda x: tf.concat(x, axis=1))(all_outputs)
type_encoder_model = keras.Model([input_item, input_type],
all_outputs,
name="type_encoder_model")
type_encoder_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(learning_rate=l_rate),
metrics=["sparse_categorical_accuracy"])
type_encoder_model.summary()
Data Preparation
#second input as sequence
type_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["product_type"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#first input sequence
input_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["input_seq"].to_list(),
maxlen=n_padded_in,
padding="pre",
value=0.0
)
#output sequence
output_seq_padded = keras.preprocessing.sequence.pad_sequences(
data["output_seq"].to_list(),
maxlen=n_padded_out,
padding="pre",
value=0.0
)
Data Samples
type_seq_padded
array([[0, 0, 0, ..., 1, 1, 1],
[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 1, 3, 3],
[0, 0, 0, ..., 3, 3, 3],
[0, 0, 0, ..., 3, 3, 3]], dtype=int32)
input_seq_padded
array([[ 0, 0, 0, ..., 101, 58, 123],
[ 0, 0, 0, ..., 79, 95, 87],
[ 0, 0, 0, ..., 98, 109, 123],
...,
[ 0, 0, 0, ..., 123, 109, 98],
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 95, 123, 95]], dtype=int32)
output_seq_padded
array([[ 0, 0, 0, ..., 58, 123, 43],
[ 0, 0, 0, ..., 95, 87, 123],
[ 0, 0, 0, ..., 109, 123, 10],
...,
[ 0, 0, 0, ..., 109, 98, 123],
[ 0, 0, 0, ..., 98, 123, 43],
[ 0, 0, 0, ..., 123, 95, 95]], dtype=int32)
My LSTM Encoder Decoder model takes 2 input as sequence: items and item-types, and 1 output sequence: items. Last dense layer calculates probability of purchase of 129 different items as next item to be purchased. Model is trained with code below:
hist = type_encoder_model.fit([input_seq_padded[:64000],
type_seq_padded[:64000]],
output_seq_padded[:64000],
epochs=1,
batch_size=128,
verbose=1)
And when i attempt to use the model for prediction with code below:
y_pred = base_model_X.predict([input_seq_padded_test,
type_seq_padded_test])
Test Samples
type_seq_padded_test
array([[0, 0, 0, ..., 2, 3, 3],
[0, 0, 0, ..., 3, 2, 2],
[0, 0, 0, ..., 3, 3, 3],
...,
[0, 0, 0, ..., 3, 2, 1],
[0, 0, 0, ..., 3, 2, 3],
[0, 0, 0, ..., 2, 3, 3]], dtype=int32)
input_seq_padded_test
array([[ 0, 0, 0, ..., 31, 10, 13],
[ 0, 0, 0, ..., 9, 6, 6],
[ 0, 0, 0, ..., 13, 13, 9],
...,
[ 0, 0, 0, ..., 10, 51, 18],
[ 0, 0, 0, ..., 12, 44, 12],
[ 0, 0, 0, ..., 6, 9, 11]], dtype=int32)
I get an error like below:
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1569 predict_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1559 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1552 run_step **
outputs = model.predict_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1525 predict_step
return self(x, training=False)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:421 call
inputs, training=training, mask=mask)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/functional.py:556 _run_internal_graph
outputs = node.layer(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1030 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1363 _call_wrapper
return self._call_wrapper(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py:1395 _call_wrapper
result = self.function(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1768 concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_array_ops.py:1228 concat_v2
"ConcatV2", values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
compute_device)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3565 _create_op_internal
op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
control_input_ops, op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
raise ValueError(str(e))
ValueError: Dimension 0 in both shapes must be equal, but are 32 and 128. Shapes are [32,1] and [128,1]. for '{{node base_model_X/tf.concat_40/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](base_model_X/tf.expand_dims_80/ExpandDims, base_model_X/148834, base_model_X/tf.concat_40/concat/axis)' with input shapes: [32,1,256], [128,1,137], [] and with computed input tensors: input[2] = <-1>.
Now, i am looking for a solution for the next error above. I don't know what the folks think, but I really start to hate encoder-decoder lstm. Thanks in advance for your ideas or different model configuration recommendations.

Related

how to train model in which labels is [5,30]?

How to train on a dataset which has each label of shape [5,30]. For example :
[
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 21, 56, 57, 3,
22, 19, 58, 6, 59, 4, 60, 1, 61, 62, 23, 63, 23, 64],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 65, 7, 66, 2, 67, 68, 3, 69, 70],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 11, 12, 5, 13, 14, 9, 10, 5, 15, 16, 17, 2, 8],
[ 0, 0, 0, 0, 0, 2, 71, 1, 72, 73, 74, 7, 75, 76, 77, 3,
20, 78, 18, 79, 1, 21, 80, 81, 3, 82, 83, 84, 6, 85],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
86, 87, 3, 88, 89, 1, 90, 91, 22, 92, 93, 4, 6, 94]
]
One way was to reshape the labels to [150], but that will make the tokenized sentences lose their meanings. Please suggest me how to arrange the keras layers and which layers to be able to make the model? I want to able to generate sentences later.
My code for model right now is this.
model = tf.keras.Sequential([ feature_layer,
layers.Dense(128, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dropout(.1),
layers.Dense(5),
layers.Dense(30, activation='softmax'), ])
opt = Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='mean_absolute_percentage_error', metrics=['accuracy'])
The actual data.
state
district
month
rainfall
max_temp
min_temp
max_rh
min_rh
wind_speed
advice
Orissa
Kendrapada
february
0.0
34.6
19.4
88.2
29.6
12.0
chances of foot rot disease in paddy crop; apply urea at 3 weeks after transplanting at active tillering stage for paddy;......
Jharkhand
Saraikela Kharsawan
february
0
35.2
16.6
29.4
11.2
3.6
provide straw mulch and go for intercultural operations to avoid moisture losses from soil; chance of leaf blight disease in potato crop; .......
I need to be able to generate the advices.
If you do consider that the output needs to be in this shape (and not flattened), the easiest (and also correct solution in my opinion) is to have a multi-output network, each output having a layers.Dense(30,activation='softmax').
You would have something like:
def create_model():
base_model = .... (stacked Dense units + other) # you can even create multi-input multi-output if you really want that.
first_output = Dense(30,activation='softmax',name='output_1')(base_model)
second_output = Dense(30,activation='softmax',name='output_2')(base_model)
...
fifth_output = Dense(30,activation='softmax',name='output_5')(base_model)
model = Model(inputs=input_layer,
outputs=[first_output,second_output,third_output,fourth_output,fifth_output])
return model
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,
loss={'output_1': 'sparse_categorical_crossentropy',
'output_2': 'sparse_categorical_crossentropy',
'output_3': 'sparse_categorical_crossentropy',
'output_4': 'sparse_categorical_crossentropy',
'output_5': 'sparse_categorical_crossentropy'},
metrics={'output_1':tf.keras.metrics.Accuracy(),
'output_2':tf.keras.metrics.Accuracy(),
'output_3':tf.keras.metrics.Accuracy(),
'output_4':tf.keras.metrics.Accuracy(),
'output_5':tf.keras.metrics.Accuracy()})
model.fit(X, y,
epochs=100, batch_size=10, validation_data=(val_X, val_y))
Here, note that y (both for train and valid) is a a numpy array of length 5 (number of outputs) and each element has length 30.
Again, ensure that you actually need such a configuration; I posted the answer as a demonstration of multi-output label in TensorFlow and Keras and for the benefit of the others, but I am not 100% sure you actually need this exact configuration (perhaps you can opt for something easier).
Note the usage of sparse_categorical_crossentropy, since your labels are not one-hot encoded (also MAPE is for regression, not classification).

Add a index selected tensor to another tensor with overlapping indices in pytorch

This is a follow up question to this question. I want to do the exactly same thing in pytorch. Is it possible to do this? If yes, how?
import torch
image = torch.tensor([[246, 50, 101], [116, 1, 113], [187, 110, 64]])
iy = torch.tensor([[1, 0, 2], [1, 0, 2], [2, 2, 2]])
ix = torch.tensor([[0, 2, 1], [1, 2, 0], [0, 1, 2]])
warped_image = torch.zeros(size=image.shape)
I need something like torch.add.at(warped_image, (iy, ix), image) that gives the output as
[[ 0. 0. 51.]
[246. 116. 0.]
[300. 211. 64.]]
Note that the indices at (0,1) and (1,1) point to the same location (0,2). So, I want warped_image[0,2] = image[0,1] + image[1,1] = 51.
What you are looking for is torch.Tensor.index_put_ with the accumulate argument set to True:
>>> warped_image = torch.zeros_like(image)
>>> warped_image.index_put_((iy, ix), image, accumulate=True)
tensor([[ 0, 0, 51],
[246, 116, 0],
[300, 211, 64]])
Or, using the out-place version torch.index_put:
>>> torch.index_put(torch.zeros_like(image), (iy, ix), image, accumulate=True)
tensor([[ 0, 0, 51],
[246, 116, 0],
[300, 211, 64]])

bilstm and attention to find the topic representation of text

#Preprocessing of data
df = pd.read_csv("small_quac.csv")
df = df.drop(['Unnamed: 0'], axis = 1)
shared_topic, section_title, for_tokenize = read_data(df)
# Define x_train and x_test
x_train = np.asarray(shared_topic)
y_train = np.asarray(section_title)
# Find max_seq_len
max_seq_len_x = get_max_seq_len(x_train, remove_stopwords=False)
max_seq_len_y = get_max_seq_len(y_train, remove_stopwords=False)
max_seq_len = max(max_seq_len_x, max_seq_len_y)
tokenizer = Tokenizer(filters='\n')
tokenizer.fit_on_texts(for_tokenize)
vocab_size = len(tokenizer.word_index) + 1
X = tokenizer.texts_to_sequences(x_train)
y = tokenizer.texts_to_sequences(y_train)
# print(X[0])
word2idx = tokenizer.word_index
idx2word = tokenizer.index_word
fdist = tokenizer.word_counts
X = pad_sequences(X, maxlen=max_seq_len_x, padding='post')
y = pad_sequences(y, maxlen=max_seq_len_y, padding='post')
# from here modelling starts
rnn_cell_size = 128
max_seq_len_y = 14
max_seq_len_x = 139
class Attention(tf.keras.Model):
def __init__(self, units):
super(Attention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, features, hidden):
hidden_with_time_axis = tf.expand_dims(hidden, 1)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.nn.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
sequence_input = tf.keras.layers.Input(shape=(max_seq_len_x,))
embedded_sequences = tf.keras.layers.Embedding(vocab_size,
300, weights=[embedding_matrix],
trainable=False, mask_zero=True, name='Encoder-Word-Embedding')(sequence_input)
lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM
(rnn_cell_size,
dropout=0.3,
return_sequences=True,
return_state=True,
recurrent_activation='relu',
recurrent_initializer='glorot_uniform'), name="bi_lstm_0")(embedded_sequences)
lstm, forward_h, forward_c, backward_h, backward_c = tf.keras.layers.Bidirectional \
(tf.keras.layers.LSTM
(rnn_cell_size,
dropout=0.2,
return_sequences=True,
return_state=True,
recurrent_activation='relu',
recurrent_initializer='glorot_uniform'))(lstm)
state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])
state_c = tf.keras.layers.Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(32)(lstm, state_h)
output = keras.layers.Dense(max_seq_len_y, activation='softmax')(context_vector)
model = keras.Model(inputs=sequence_input, outputs=output)
# summarize layers
print(model.summary())
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
history = model.fit(x=X, y=y,epochs=30)
Also, I am using glove embeddings 300 Dimensions.
Here my X is a matrix of shape (59,139) where 59 = number of samples and 139 = length of maximum sentence in my text rows. These 139 values are filled with the word2idx of my vocabulary.
Y is a matrix of shape (59, 14) where 59=same above and 14 = length of my maximum title and filled with word2idx of vocabulary.
For example I want this:
Input:
array([293, 40, 294, 129, 75, 130, 129, 131, 295, 296, 132, 297, 298,
2, 299, 34, 12, 76, 300, 27, 301, 15, 1, 302, 133, 4,
77, 303, 3, 134, 304, 78, 34, 305, 11, 306, 307, 4, 1,
132, 135, 22, 10, 308, 11, 136, 4, 1, 309, 50, 4, 310,
11, 78, 311, 312, 3, 77, 1, 313, 130, 10, 137, 11, 12,
109, 7, 314, 315, 7, 1, 76, 316, 4, 317, 318, 34, 138,
319, 139, 320, 3, 77, 321, 79, 322, 4, 1, 323, 324, 4,
1, 325, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0])
Output:
array([1040, 1041, 2, 1042, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Please help me out, I have spend so many days to find the approach but I am unable to find it.

How to skip 'for' loop when dealing with numpy arrays

Here is my code:
import numpy as np
>>> x
array([[ 1, 57],
[ 2, 21],
[ 4, 34],
...,
[3348, 29],
[3350, 23],
[3353, 11]])
>>> x.shape
(1310, 2)
>>> pic # greyscale image
array([[223, 222, 225, ..., 217, 219, 214],
[224, 222, 219, ..., 220, 219, 216],
[223, 224, 220, ..., 219, 215, 213],
...,
[228, 226, 231, ..., 224, 228, 229],
[229, 227, 227, ..., 216, 225, 227],
[226, 228, 225, ..., 218, 225, 230]], dtype=uint8)
pic = np.stack((pic,pic,pic), axis=2)
>>> pic.shape
(2208, 2752, 3)
>>>labels.shape
(2208, 2752)
color = [0, 0, 255]
for i in x:
B=np.full((i[1],3), color).astype('int')
pic[labels==i[0]]=B
It colors all the pixels in grayscale image (pic) with blue (rgb 0,0,255), which satisfy the condition pic[labels==i[0]]. Now, this is very slow because of the 'for' loop used (for i in x).
Is there any efficient 'Numpy way', which won't include for loop, and
would, therefore, be much faster. Thanks for your kind help!

What is the order output of conv2d in tensorflow?

I wanted to get the values of output tensor in tensorflow.
the kernel shape of first layer was K[row, col, in_channel, out_channel].
the input image shape is P[batch, row, col, channel]
But I tried to get the first four kernel value, they were K[0, 0, 0, 0], K[0, 1, 0, 0], K[1, 0, 0, 0], K[1, 1, 0, 0].
I got the input values were P[0, 0, 0, 0], P[0, 0, 1, 0], P[0, 1, 0, 0], P[0, 1, 1, 0].
python code is that "F = tf.nn.conv2d(P, K, stride=[1, 1, 1, 1], padding='SAME')"
Console showed output value (F[0, 0, 0, 0]) is not K[0, 0, 0, 0] * P[0, 0, 0, 0] + K[0, 1, 0, 0] * P[0, 0, 1, 0] + K[1, 0, 0, 0] * P[0, 1, 0, 0] + K[1, 1, 0, 0] * P[0, 1, 1, 0]
What is the order of these output feature maps? I had 40 conv_kernel,the first output was not computed by the first conv_kernel
There's something wrong in your input values.
Remember that conv2d wants an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels].
In fact, reshaping your data the results are the one expected (please note that conv2d computes the correlation and not the convolution).
import tensorflow as tf
K = tf.get_variable("K", shape=(4,4), initializer=tf.constant_initializer([
[0, 0, 0, 0],
[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 1, 0, 0]
]))
K = tf.reshape(K, (4,4,1,1))
P = tf.get_variable("P", shape=(4,4), initializer=tf.constant_initializer([
[0, 0, 0, 0],
[0, 0, 1, 0],
[0, 1, 0, 0],
[0, 1, 1, 0]
]))
P = tf.reshape(P, (1,4,4,1))
F = tf.nn.conv2d(P, K, strides=[1, 1, 1, 1], padding='VALID')
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print(sess.run(F))
In this example, I'm computing the correlation between the input P (a batch with 1 element of depth 1) and the filter P (4x4 filter, with input depth 1 and output depth 1).