How do convert data type of Tensorflow Dataset [EMNIST/balanced] (From uint8 to float32) - tensorflow

I am using Tensorflow dataset "emnist/balanced". The data type of features value is uint8 by default. However, Tensorflow model accept only float values.
How can I convert the features and labels data type to float32.
The code is here:
#########################################################3
import tensorflow as tf
import tensorflow_datasets as tfds
datasets, info = tfds.load(name="emnist/balanced", with_info=True, as_supervised=True)
emnist_train, emnist_test = datasets['train'], datasets['test']
.
.
.
.
.
.
history = model.fit(emnist_train, epochs = 10)
#validation
test_loss, test_acc = model.evaluate(emnist_test, verbose=2)
print(test_acc)
Error --
2
3
----> 4 history = model.fit(emnist_train, epochs = 10)
5
6 #validation
TypeError: Value passed to parameter 'features' has DataType uint8 not in list of allowed values: float16, bfloat16, float32, float64
TypeError: Value passed to parameter 'features' has DataType uint8 not in list of allowed values: float16, bfloat16, float32, float64

Please refer working code to train a ANN for MNIST dataset
try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
from __future__ import absolute_import, division, print_function, unicode_literals
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
print("T/F Version:",tf.__version__)
#### Import the Fashion MNIST dataset
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
##Scale these values to a range of 0 to 1 before feeding them to the neural network model
train_images = train_images / 255.0
test_images = test_images / 255.0
###Build the model
##the neural network requires configuring the layers of the model
##Set up the layers
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10)
])
###Compile the model
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
###Train the model
##Feed the model
model.fit(train_images, train_labels, epochs=10)
###Evaluate accuracy
##compare how the model performs on the test dataset
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
output:
T/F Version: 2.1.0
Train accuracy:91.06
Test accuracy: 0.8871

Related

How to find class labels from a keras model

I am predicting classes, but there is something I don't get. In the simplified example below, I train a model to predict MNIST handwritten digits. My test set has an accuracy of 95%, when I use
model.evaluate(test_image, test_label)
However, when I use
model.predict(test_image)
and the extract the predicted labels using np.argmax(), this accuracy drops. When I run all the code again and again, this accuracy changes a lot.
I suspect now that the classes in the model are not ordered 0, 1 ... 9. Is there a way to see the class labels of a model? Or did I make another mistake?
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
import numpy as np
# Load data
(train_image, train_label), (test_image, test_label) = load_data()
# Train
model = Sequential([
Flatten(input_shape=(28,28)),
Dense(100, activation="relu"),
Dense(100, activation="relu"),
Dense(10, activation="sigmoid")
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics='accuracy')
history = model.fit(train_image, train_label,
batch_size=32, epochs=50,
validation_data=(test_image, test_label),
verbose = 0)
eval = model.evaluate(test_image, test_label)
print('Accuracy (auto):', eval[1]) # This is always high
# Predict and evaluate manually
predictions = model.predict(test_image)
pred = np.array([np.argmax(pred) for pred in predictions])
true = test_label
print('Accuracy (manually):', np.mean(pred == true)) # This varies a lot

ValueError: Data cardinality is ambiguous:

I'm using a code from : https://github.com/TheoMoumiadis/HVAC-calc-with-NN
but I have this error :
ValueError: Data cardinality is ambiguous:
x sizes: 667
y sizes: 668
Make sure all arrays contain the same number of samples.
Could you help me ? Should I make a shape but how ?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import models
from keras import layers
from keras.utils import np_utils
dataset = pd.read_csv('C:/.../ENB2012_data.csv')
print(dataset)
X_train = dataset.iloc[0:667,1:9].values.astype('float32')
Y1_train = dataset.loc[0:667,'Y1'].values.astype('float32')
Y2_train = dataset.loc[0:667,'Y2'].values.astype('float32')
X_test = dataset.iloc[668:767,1:9].values.astype('float32')
Y1_test = dataset.loc[668:767,'Y1'].values.astype('float32')
Y2_test = dataset.loc[668:767,'Y2'].values.astype('float32')
mean = X_train.mean(axis=0)
X_train -= mean
std = X_train.std(axis=0)
X_train /= std
X_test -= mean
X_test /= std
def build_model():
model =models.Sequential()
model.add(layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = build_model()
model.fit(X_train, Y1_train, epochs=300, batch_size=10, verbose=0)
test_mse_score, test_mae_score = model.evaluate(X_test, Y1_test)
Thank you #Frightera and #Antoine. For the benefit of community providing solution here.
Please refer working code as shown below
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import models
from keras import layers
from keras.utils import np_utils
dataset = pd.read_csv('C:/.../ENB2012_data.csv')
#print(dataset)
X_train = dataset.iloc[0:668,1:9].values.astype('float32')
Y1_train = dataset.loc[0:667,'Y1'].values.astype('float32')
Y2_train = dataset.loc[0:667,'Y2'].values.astype('float32')
X_test = dataset.iloc[667:767,1:9].values.astype('float32')
Y1_test = dataset.loc[668:767,'Y1'].values.astype('float32')
Y2_test = dataset.loc[668:767,'Y2'].values.astype('float32')
mean = X_train.mean(axis=0)
X_train -= mean
std = X_train.std(axis=0)
X_train /= std
X_test -= mean
X_test /= std
def build_model():
model =models.Sequential()
model.add(layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = build_model()
model.fit(X_train, Y1_train, epochs=300, batch_size=10, verbose=0)
test_mse_score, test_mae_score = model.evaluate(X_test, Y1_test)
Output:
4/4 [==============================] - 0s 3ms/step - loss: 283.6571 - mae: 13.5637

CNN Uint8 datatype Issue

I am trying to make a CNN for MNIST using Keras, But I have some problems with the code.
I mostly get this error:
TypeError: Value passed to parameter 'input' has DataType uint8 not in list of allowed values: float16, bfloat16, float32, float64
Here is my code:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Conv2D, Dropout, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import to_categorical
(Train_Data, Train_Labels), (Test_Data, Test_Labels) = tf.keras.datasets.mnist.load_data()
Train_Data = Train_Data.reshape(60000,28,28,1)
Test_Data = Test_Data.reshape(10000,28,28,1)
def save(model):
model.save("CNN")
def load(name):
model = tf.keras.models.load_model(name)
model = keras.Sequential()
model.add(Conv2D(784, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(5,5)))
model.add(Dropout(.2))
model.add(keras.layers.Flatten())
model.add(Dense(25, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimzer='adam', loss="mse", metrics=['accuracy'])
model.fit(Train_Data, Train_Labels)
I don't know what to do, help would be appreciated,
The original images of the MNIST data are of type uint8 (values in range [0,255]), however before training a CNN you need to normalize them. Commonly you need to normalize it to some uniform bound around zero, for example [-0.5,0.5]. You can do so by adding the lines:
Train_Data = Train_Data / 255 - 0.5
Test_Data = Train_Data / 255 - 0.5

Error converting keras model to tfjs: duplicate weight name Variable

Follwing the tutorial at https://www.tensorflow.org/tutorials/images/hub_with_keras resulted in a file model.h5. Converting to tensorflow-js with the command
tensorflowjs_converter --input_format keras ./model.h5 /tmp/jsmodel/
failed with
Exception: Error dumping weights, duplicate weight name Variable
Why is this and how can it be fixed?
MCVE
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers
import numpy as np
data_root = tf.keras.utils.get_file(
'flower_photos','https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
untar=True)
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
IMAGE_SHAPE = (224, 224)
image_data = image_generator.flow_from_directory(str(data_root), target_size=IMAGE_SHAPE)
feature_extractor_url = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/2" ##param {type:"string"}
feature_extractor_layer = hub.KerasLayer(feature_extractor_url,
input_shape=(224,224,3))
for image_batch, label_batch in image_data:
print("Image batch shape: ", image_batch.shape)
print("Labe batch shape: ", label_batch.shape)
break
feature_extractor_layer.trainable = False
model = tf.keras.Sequential([
feature_extractor_layer,
layers.Dense(image_data.num_classes, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['acc'])
steps_per_epoch = np.ceil(image_data.samples/image_data.batch_size)
history = model.fit(image_data, epochs=2,
steps_per_epoch=steps_per_epoch) # removed callback
model.save("/tmp/so_model.h5")
This fails with a
RuntimeError: Unable to create link (name already exists)
but the model is created. Calling the above tensorflowjs_converter --input_format keras /tmp/model.h5 /tmp/jsmodel fails with the above
Exception: Error dumping weights, duplicate weight name Variable
UPDATE: see also Retrain image detection with MobileNet

MLP totally different results for Keras and scikit-learn

Running a single hidden layer MLP on MNIST, I get extremly different results for Keras and sklearn.
import numpy as np
np.random.seed(5)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.neural_network import MLPClassifier
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
batch_data = x_train[:2000]
batch_labels = y_train[:2000]
# flat 2d images
batch_data_flat = batch_data.reshape(2000, 784)
# one-hot encoding
batch_labels_one_hot = np_utils.to_categorical(batch_labels, num_classes)
num_hidden_nodes = 100
alpha = 0.0001
batch_size = 128
beta_1 = 0.9
beta_2 = 0.999
epsilon = 1e-08
learning_rate_init = 0.001
epochs = 200
# keras
keras_model = Sequential()
keras_model.add(Dense(num_hidden_nodes, activation='relu',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_model.add(Dense(num_classes, activation='softmax',
kernel_regularizer=regularizers.l2(alpha),
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform'))
keras_optim = Adam(lr=learning_rate_init, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
keras_model.compile(optimizer=keras_optim, loss='categorical_crossentropy', metrics=['accuracy'])
keras_model.fit(batch_data_flat, batch_labels_one_hot, batch_size=batch_size, epochs=epochs, verbose=0)
# sklearn
sklearn_model = MLPClassifier(hidden_layer_sizes=(num_hidden_nodes,), activation='relu', solver='adam',
alpha=alpha, batch_size=batch_size, learning_rate_init=learning_rate_init,
max_iter=epochs, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
sklearn_model.fit(batch_data_flat, batch_labels_one_hot)
# evaluate both on their training data
score_keras = keras_model.evaluate(batch_data_flat, batch_labels_one_hot)
score_sklearn = sklearn_model.score(batch_data_flat, batch_labels_one_hot)
print("Acc: keras %f, sklearn %f" % (score_keras[1], score_sklearn))
Outputs: Acc: keras 0.182500, sklearn 1.000000
The only difference I see is that scikit-learn computes for the Glorot initialization of the final layer sqrt(2 / (fan_in + fan_out)) vs. sqrt(6 / (fan_in + fan_out)) from Keras. But that should not cause such a difference I think. Do I forget something here?
scikit-learn 0.19.1, Keras 2.2.0 (Backend Tensorflow 1.9.0)
You should probably initialize the biases with 'zeros' and not with 'glorot_uniform'.