how to concatenate with a flatten layer - tensorflow

I would like to flatten an input before concatenation like below.
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import (
CategoryEncoding,
Concatenate,
Dense,
Discretization,
Embedding,
Flatten,
Input,
)
from tensorflow.keras.layers.experimental.preprocessing import HashedCrossing
dnn_hidden_units = [32, 8]
NBUCKETS = 16
latbuckets = np.linspace(start=38.0, stop=42.0, num=NBUCKETS).tolist()
lonbuckets = np.linspace(start=-76.0, stop=-72.0, num=NBUCKETS).tolist()
# Bucketization with Discretization layer
plon = Discretization(lonbuckets, name="plon_bkt")(inputs["pickup_longitude"])
plat = Discretization(latbuckets, name="plat_bkt")(inputs["pickup_latitude"])
dlon = Discretization(lonbuckets, name="dlon_bkt")(inputs["dropoff_longitude"])
dlat = Discretization(latbuckets, name="dlat_bkt")(inputs["dropoff_latitude"])
# Feature Cross with HashedCrossing layer
p_fc = HashedCrossing(num_bins=NBUCKETS * NBUCKETS, name="p_fc")((plon, plat))
d_fc = HashedCrossing(num_bins=NBUCKETS * NBUCKETS, name="d_fc")((dlon, dlat))
pd_fc = HashedCrossing(num_bins=NBUCKETS**4, name="pd_fc")((p_fc, d_fc))
# Embedding with Embedding layer
pd_embed = Embedding(input_dim=NBUCKETS**4, output_dim=10, name="pd_embed")(
pd_fc
)
unk = Concatenate(axis=1)([pd_embed])
# Concatenate and define inputs for deep network
deep = Concatenate(name="deep_input",axis=0)(
[
inputs["pickup_longitude"],
inputs["pickup_latitude"],
inputs["dropoff_longitude"],
inputs["dropoff_latitude"],
Flatten(name="flatten_embedding")(pd_embed),
]
)
I am getting the following error at the conatenate layer.
ValueError: A Concatenate layer requires inputs with matching shapes
except for the concatenation axis. Received: input_shape=[(None,),
(None,), (None,), (None,), (None, 10)]
I understand that (None,10) should be (None*10) or just (None) but I am not sure how to get there.

The concatenate layer takes input as a list of tensors, all of the same shape except for the concatenation axis, and returns a single tensor that is the concatenation of all inputs.
In the error you have mentioned above says that you try to concatenate 2 different shapes None(has an unknown number of dimensions, and an unknown size in all dimensions) and None,10 ( has a known number of dimensions, and an unknown size for one or more dimension).
For example i have to concatenate 2 tensors a and b (a,b has to be the same size)
import tensorflow as tf
a=tf.random.uniform([2,3])
b=tf.random.uniform([2,3])
tf.keras.layers.Concatenate(axis=0)([a.numpy(), b.numpy()])
output:<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0.5595623 , 0.07109773, 0.646863 ],
[0.1997714 , 0.6131079 , 0.03418195],
[0.40428162, 0.94192684, 0.10390592],
[0.72463846, 0.3348019 , 0.95906615]], dtype=float32)>
If a and b are of different shape it will produce an error
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(2, 3), (3, 2)]
Thank You.

Related

multiply two layer of tensor with different sizes

I want to multiply two layer as follows:
from tensorflow.keras.layers import *
import tensorflow as tf
scale_, mean_ = 2., 4.
a = Input(shape=(128,128), name='Input_vec')
m_num = Input(shape=(4,), name='Input_num')
output = Lambda(lambda x: tf.multiply(x[0], x[1]))((a, m_num[1]))
but I always get the following error:
ValueError: Dimensions must be equal, but are 128 and 4 for '{{node lambda_5/Mul}} = Mul[T=DT_FLOAT](Placeholder, Placeholder_1)' with input shapes: [?,128,128], [4].

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples

This is a regression problem, where I want to generate 5 float values from each image of size 224 x 224. So I use fully connected networks with 5 nodes in the last layer. But doing so in keras gives me the following error:
import keras, os
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3
## data_list = list of four 224x224 numpy arrays
inception = InceptionV3(weights='imagenet', include_top=False)
x = inception.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(5, activation='relu')(x)
y = [np.random.random(5),np.random.random(5),np.random.random(5),np.random.random(5)]
model = Model(inputs=inception.input, outputs=predictions)
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss="mae")
model.fit(data_list, y, verbose=0, epochs=100)
Error:
ValueError: Data cardinality is ambiguous:
     x sizes: 224, 224, 224, 224
     y sizes: 5, 5, 5, 5
Make sure all arrays contain the same number of samples.
What could be going wrong?
Convert data_list and y to numpy arrays or tensors.
In your code the list is treated as four inputs while your model has one input - https://keras.io/api/models/model_training_apis/
Add these lines:
import tensorflow as tf
data_list = tf.stack(data_list)
y = tf.stack(y)
Try this
model.fit(np.array(data_list), np.array(y), verbose=0, epochs=100)

Getting TypeError: sparse matrix length is ambiguous; use getnnz() or shape[0] while doing multi class classification

from sklearn.naive_bayes import CategoricalNB
from sklearn.datasets import make_multilabel_classification
X, y = make_multilabel_classification(sparse = True, n_labels = 15,
return_indicator = 'sparse', allow_unlabeled = False)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
I tried using X.todense() but the error is still raised.
X_train = X_train.todense()
X_test = X_test.todense()
Training on the dataset
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score
classifier = MLkNN(k=20)
classifier.fit(X_train, y_train)
predicting the output of trained dataset.
y_pred = classifier.predict(X_test)
accuracy_score(y_test,y_pred)
np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)
You are trying to get the length from a matrix, which is ambigious:
len(y_pred)
Your matrix y_pred has the dimension (25,5), as seen with y_pred.shape.
So instead of len(y_pred), you could use y_pred.shape[0], which would return 25.
But then you will encounter a problem when you are using y_pred.reshape(y_pred.shape[0],1)
ValueError: cannot reshape array of size 125 into shape (25, 1)
(previously: y_pred.reshape(len(y_pred),1))
This error makes sense, because you are trying to reshape a matrix with 125 values into a matrix with only 25 values. You need to rethink your code here.

what's the meaning of 'input_length'?

the data have 4 timestamps,but the embedding's input_length=3,so what's the meaning of input_length?
from tensorflow import keras
import numpy as np
data = np.array([[0,0,0,0]])
emb = keras.layers.Embedding(input_dim=2, output_dim=3, input_length=3)
emb(data)
As per the official documentation here,
input_length: Length of input sequences, when it is constant. This
argument is required if you are going to connect Flatten then Dense
layers upstream (without it, the shape of the dense outputs cannot be
computed).
from tensorflow import keras
import numpy as np
model = keras.models.Sequential()
model.add(keras.layers.Embedding(input_dim=2, output_dim=3, input_length=4))
# the model will take as input an integer matrix of size (batch, input_length).
input_array = np.array([[0,0,0,0]])
model.compile('rmsprop', 'mse')
output_array = model.predict(input_array)
print(output_array)
Above works fine, but if you change input_length to 3, then you will get below error:
ValueError: Error when checking input: expected embedding_input to
have shape (3,) but got array with shape (4,)

Layer dot_1 was called with an input that isn't a symbolic tensor. All inputs to the layer should be tensors

i am using a model on the MovieLens dataset. I wanted to combine two sequentials in a dot product of keras. However I got the following error:
Layer dot_1 was called with an input that isn't a symbolic tensor. Received
type: <class 'keras.engine.sequential.Sequential'>. Full input:
[<keras.engine.sequential.Sequential object at 0x00000282DAFCC710>,
<keras.engine.sequential.Sequential object at 0x00000282DB172C18>]. All
inputs to the layer should be tensors.
The code below is how the model is build. The error comes from the line with:
merged = dot([P, Q], axes = 1, normalize = True)
max_userid, max_movieid and K_FACTORS are already defined. Can somebody help me with this error?
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Reshape, Concatenate, dot
from keras import Input
from keras.optimizers import Adagrad
# Define model
# P is the embedding layer that creates an User by latent factors matrix.
# If the intput is a user_id, P returns the latent factor vector for that user.
P = Sequential()
P.add(Embedding(max_userid, K_FACTORS, input_length=1))
P.add(Reshape((K_FACTORS,)))
# Q is the embedding layer that creates a Movie by latent factors matrix.
# If the input is a movie_id, Q returns the latent factor vector for that movie.
Q = Sequential()
Q.add(Embedding(max_movieid, K_FACTORS, input_length=1))
Q.add(Reshape((K_FACTORS,)))
mergedModel = Sequential()
merged = dot([P, Q], axes = 1, normalize = True)
mergedModel.add(merged)
ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0)
The Keras functional API provides a more flexible way for defining
such models.
from keras.layers import Input
input_1 = Input(shape=(1,))
input_2 = Input(shape=(1,))
P = Reshape((K_FACTORS,))(Embedding(max_userid, K_FACTORS, input_length=1)(input_1))
Q = Reshape((K_FACTORS,))(Embedding(max_userid, K_FACTORS, input_length=1)(input_2))
P_dot_Q = dot([P, Q], axes = 1, normalize = True)
model = Model(inputs=[input_1,input_2], outputs=P_dot_Q)
#print(model.summary())
#model.compile(loss = 'MSE', optimizer='adam',metrics = ['accuracy'])
#model.fit([np.array([1]), np.array([1])],[1])