Data cardinality is ambiguous: x sizes: 60000 y sizes: 10000 - tensorflow

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
seed=0
np.random.seed(seed)
tf.random.set_seed(3)
(x_train,y_class_train),(x_test,y_class_test)=tf.keras.datasets.mnist.load_data()
print('train_image_count: {}' .format(x_train.shape[0]))
print('test_image_count: {}' .format(x_test.shape[0]) )
x_train=x_train.reshape(x_train.shape[0],28,28,1).astype(float) / 255
print(x_train.shape)
x_test=x_test.reshape(x_test.shape[0],28,28,1).astype(float) / 255
print(x_test.shape)
y_train=pd.get_dummies(y_class_test)
y_test=pd.get_dummies(y_class_test)
print(y_train.shape)
print(y_test.shape)
x=tf.keras.layers.Input([28,28,1])
h=tf.keras.layers.Conv2D(6, kernel_size=5, padding='same', activation='swish')(x)
h=tf.keras.layers.MaxPool2D()(h)
h=tf.keras.layers.Conv2D(16,kernel_size=5,activation='swish')(h)
h=tf.keras.layers.MaxPool2D()(h)
h=tf.keras.layers.Dropout(0.25)(h)
h=tf.keras.layers.Flatten()(h)
h=tf.keras.layers.Dense(120, activation='swish')(h)
h=tf.keras.layers.Dense(84, activation='swish')(h)
h=tf.keras.layers.Dropout(0.25)(h)
y=tf.keras.layers.Dense(10,activation='softmax')(h)
model=tf.keras.models.Model(x,y)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')
from keras.callbacks import EarlyStopping
Early_Stopping_callbacks=EarlyStopping(monitor='accuracy', patience=10)
model.fit(x_train,y_train, epochs=100, batch_size=200, callbacks=[Early_Stopping_callbacks])
model.summary()
error>
ValueError: Data cardinality is ambiguous: x sizes: 60000 y sizes: 10000 Make sure all arrays contain the same number of samples.
I study tensorflow using MNIST. but, I don't know why get a error. plz tell me why occur error. thanks

Related

I'm building a deep neural network and I keep getting "TypeError: __init__() takes from 1 to 3 positional arguments but 4 were given"

I'm trying to develop a deep neural network where I want to predict a single parameter based on multiple inputs. However, I'm getting the error, as stated in the title, and I'm not sure why. I haven't even called an __init__() method in my code, so I'm confused as to why it's giving me this error.
This is the code that I've written so far and yields the following error. I would appreciate any help, thanks!
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
d = pd.read_csv(r"AirfoilSelfNoise.csv")
x = d.iloc[:, 0:5] #frequency [Hz], angle of attack [deg], chord length [m], free-stream velocity [m/s], suction side displacement thickness [m], input
y = d.iloc[:, 5] #scaled sound pressure level [dB], output
df = pd.DataFrame(d, columns=['f', 'alpha', 'c', 'U_infinity', 'delta', 'SSPL'])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)
mod = keras.Sequential(
keras.layers.Dense(30, input_shape=(5,), activation='relu'),
keras.layers.Dense(25, activation='relu'),
keras.layers.Dense(1, activation='sigmoid')
)
mod.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
mod.fit(xtrain, ytrain, epochs=50)```
TypeError: __init__() takes from 1 to 3 positional arguments but 4 were given
You forgot to add brackets into the Sequential function. With your code, it takes all the layers as different input parameters. However, the first parameter needs to be a list of your desired layers. In your case:
mod = keras.Sequential([
keras.layers.Dense(30, input_shape=(5,), activation='relu'),
keras.layers.Dense(25, activation='relu'),
keras.layers.Dense(1, activation='sigmoid')]
)

What's the reason for shapes not being compatible and how can I fix this?

Context I actually did not code before but now I'm hoping to build a facial recognition system from fer2013. I encountered a problem when it came to building my CNN which is that the shapes of the labels (None, 7) and the image (23, 23, 7) are not compatible. I don't know where I went wrong. Below are the codes.
import pandas as pd
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/fer2013.csv')
from csv import reader
import numpy as np
from numpy import genfromtxt
import cv2
from tensorflow import keras
import matplotlib.pyplot as plt
pixels = df['pixels'].tolist() #1
faces = []
for pixel_sequence in pixels: #this lists down each row
face = [int(pixel) for pixel in pixel_sequence.split()] #this lists down each integer as separate
face = np.asarray(face).reshape(48,48)
face = cv2.resize(face.astype('uint8'), (48,48))
faces.append(face.astype('float32')) #### <-- this line
face = face / 255.0
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
emotions = np.array(pd.get_dummies(df['emotion']))
import sklearn.model_selection
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau
X_train, X_test, y_train, y_test = train_test_split(faces,emotions, test_size=0.2, random_state=10)
model = keras.Sequential([
keras.layers.Conv2D(128, 3, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.MaxPool2D(2,2),
keras.layers.Dense(7, activation='softmax')
])
model.compile(optimizer='adam',
loss=keras.losses.CategoricalCrossentropy(),
metrics='accuracy')
model.fit(X_train, y_train, epochs = 5, batch_size=300)

ValueError: Data cardinality is ambiguous:

I'm using a code from : https://github.com/TheoMoumiadis/HVAC-calc-with-NN
but I have this error :
ValueError: Data cardinality is ambiguous:
x sizes: 667
y sizes: 668
Make sure all arrays contain the same number of samples.
Could you help me ? Should I make a shape but how ?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import models
from keras import layers
from keras.utils import np_utils
dataset = pd.read_csv('C:/.../ENB2012_data.csv')
print(dataset)
X_train = dataset.iloc[0:667,1:9].values.astype('float32')
Y1_train = dataset.loc[0:667,'Y1'].values.astype('float32')
Y2_train = dataset.loc[0:667,'Y2'].values.astype('float32')
X_test = dataset.iloc[668:767,1:9].values.astype('float32')
Y1_test = dataset.loc[668:767,'Y1'].values.astype('float32')
Y2_test = dataset.loc[668:767,'Y2'].values.astype('float32')
mean = X_train.mean(axis=0)
X_train -= mean
std = X_train.std(axis=0)
X_train /= std
X_test -= mean
X_test /= std
def build_model():
model =models.Sequential()
model.add(layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = build_model()
model.fit(X_train, Y1_train, epochs=300, batch_size=10, verbose=0)
test_mse_score, test_mae_score = model.evaluate(X_test, Y1_test)
Thank you #Frightera and #Antoine. For the benefit of community providing solution here.
Please refer working code as shown below
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import models
from keras import layers
from keras.utils import np_utils
dataset = pd.read_csv('C:/.../ENB2012_data.csv')
#print(dataset)
X_train = dataset.iloc[0:668,1:9].values.astype('float32')
Y1_train = dataset.loc[0:667,'Y1'].values.astype('float32')
Y2_train = dataset.loc[0:667,'Y2'].values.astype('float32')
X_test = dataset.iloc[667:767,1:9].values.astype('float32')
Y1_test = dataset.loc[668:767,'Y1'].values.astype('float32')
Y2_test = dataset.loc[668:767,'Y2'].values.astype('float32')
mean = X_train.mean(axis=0)
X_train -= mean
std = X_train.std(axis=0)
X_train /= std
X_test -= mean
X_test /= std
def build_model():
model =models.Sequential()
model.add(layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = build_model()
model.fit(X_train, Y1_train, epochs=300, batch_size=10, verbose=0)
test_mse_score, test_mae_score = model.evaluate(X_test, Y1_test)
Output:
4/4 [==============================] - 0s 3ms/step - loss: 283.6571 - mae: 13.5637

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples

This is a regression problem, where I want to generate 5 float values from each image of size 224 x 224. So I use fully connected networks with 5 nodes in the last layer. But doing so in keras gives me the following error:
import keras, os
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3
## data_list = list of four 224x224 numpy arrays
inception = InceptionV3(weights='imagenet', include_top=False)
x = inception.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(5, activation='relu')(x)
y = [np.random.random(5),np.random.random(5),np.random.random(5),np.random.random(5)]
model = Model(inputs=inception.input, outputs=predictions)
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss="mae")
model.fit(data_list, y, verbose=0, epochs=100)
Error:
ValueError: Data cardinality is ambiguous:
     x sizes: 224, 224, 224, 224
     y sizes: 5, 5, 5, 5
Make sure all arrays contain the same number of samples.
What could be going wrong?
Convert data_list and y to numpy arrays or tensors.
In your code the list is treated as four inputs while your model has one input - https://keras.io/api/models/model_training_apis/
Add these lines:
import tensorflow as tf
data_list = tf.stack(data_list)
y = tf.stack(y)
Try this
model.fit(np.array(data_list), np.array(y), verbose=0, epochs=100)

What resources should I be learning to calculate my kernel size?

I am trying to build a Softmax classifier using Convolutional Neural Networks but I keep getting the following error from keras:
Negative dimension size caused by subtracting 4 from 1 for 'max_pooling1d_1/MaxPool' (op: 'MaxPool') with input shapes: [?,1,1,64].
I am using a reshaped dataset of the following sizes:
train_x(624,3,9)
train_y(624,2) after one hot encoding
test_x(150,3,9)
test_y(150,2) after one hot encoding
the 3D numpy array was reshaped from (624,27) matrix into (624,3,9) and so on.
I honestly think the problem comes from calculating the sizes of kernels and pool_size.
What resources should I be reading to get my input in a format that y network will admit?
Many thanks!
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.utils import to_categorical
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
f=open('data/data_shuffled.csv')
data=f.read()
f.close()
lines=data.split('\n')
header=lines[0].split(',')
lines=lines[1:625]
train_x=np.zeros(((len(lines)),len(header)))
for i, line in enumerate(lines):
values=[float(x) for x in line.split(',')[0:]]
train_x[i,:]=values
f=open('data/labels_shuffled.csv')
data=f.read()
f.close()
lines=data.split('\n')
header=lines[0].split(',')
lines=lines[1:625]
train_y=np.zeros(((len(lines)),len(header)))
for i, line in enumerate(lines):
values=[float(x) for x in line.split(',')[0:]]
train_y[i,:]=values
f=open('data/data_shuffled.csv')
data=f.read()
f.close()
lines=data.split('\n')
header=lines[0].split(',')
lines=lines[626:776]
test_x=np.zeros(((len(lines)),len(header)))
for i, line in enumerate(lines):
values=[float(x) for x in line.split(',')[0:]]
test_x[i,:]=values
f=open('data/labels_shuffled.csv')
data=f.read()
f.close()
lines=data.split('\n')
header=lines[0].split(',')
lines=lines[626:776]
test_y=np.zeros(((len(lines)),len(header)))
for i, line in enumerate(lines):
values=[float(x) for x in line.split(',')[0:]]
test_y[i,:]=values
#reshaping data to have samples.
train_x=train_x.reshape(624,3,9)
test_x=test_x.reshape(150,3,9)
#one hot encoding
train_y=to_categorical(train_y)
test_y=to_categorical(test_y)
verbose, epochs, batch_size = 0, 10000, 32
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu',input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
history=model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
model.evaluate(test_x, test_y, batch_size=batch_size, verbose=1)
Just need to get a model prediction, either 1 or 0.
Pls help,
Each of your input size is only (3,9), and after two Conv1D filter, the size for each filter would be (1,1), just like what the error states, so a MaxPooling1D of size 2 will not work here.
One solution is probably to remove pool_size=2 from the max pooling since your feature size is already a single value (i.e., you can consider it as the result from a max pooling and move forward).