model.predict() having a strange output - tensorflow

This is all the files that I used, the only one that isn't there are the images
Import the file data, my data is 20 samples of dogs and 20 samples of cats
import matplotlib.pyplot as plt
import os
import cv2
import random
DIR = 'assets'
CATEGORIES = ['Cat', 'Dog']
img_size = 50
training_data = []
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (img_size, img_size))
training_data.append([new_array, class_num])
create_training_data()
print(len(training_data))
# Shuffle the data
random.shuffle(training_data)
x_train = []
y_train = []
for featurs, label in training_data:
x_train.append(featurs)
y_train.append(label)
x_train = np.asarray(x_train).reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)
import pickle
pickle_out = open('x_train.pickle', 'wb')
pickle.dump(x_train, pickle_out)
pickle_out.close()
pickle_out = open('y_train.pickle', 'wb')
pickle.dump(y_train, pickle_out)
pickle_out.close()
Train the data
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
x_train = pickle.load(open('x_train.pickle', 'rb'))
y_train = pickle.load(open('y_train.pickle', 'rb'))
x_train = x_train / 255.0
print(x_train.shape)
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
]
)
# inputs = keras.Input(shape=(50, 50, 1))
# x = layers.Conv2D(32, 3)(inputs)
# x = layers.BatchNormalization()(x)
# x = keras.activations.relu(x)
# x = layers.MaxPooling2D()(x)
# x = layers.Flatten()(x)
# outputs = layers.Dense(10, activation='softmax')(x)
# model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(),
metrics=['accuracy']
)
model.fit(x_train, y_train, batch_size=2, epochs=100, validation_split=0.1)
model.save('trained_model')
Test the data
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
import tensorflow as tf
CATEGORIES = ['Cat', 'Dog']
def format(file_path):
size = 50
img_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (size, size))
return new_array.reshape(-1, size, size, 1)
model = tf.keras.models.load_model('trained_model')
prediction = model.predict([format('dog.jpg')])
print(prediction)
The above runs but the output looks like this.
[[ -36.40766 -1036.2589 -1382.8297 -1486.9949 -1403.7932
-56.355995 -1364.2837 -1351.6316 -1385.2439 -1392.8472 ]]
Why is it giving me so many numbers instead to a simple 1 or 0?
I'm expecting an output of something like [[0.]] or [[1.]]
Update:
I have changed the code according to the suggestions but it is predicting the exact same thing every time
Edit to training file
inputs = keras.Input(shape=(50, 50, 1))
x = layers.Conv2D(16, 3)(inputs)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(32, 3)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(64, 3)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
print(model.summary())
model.compile(
loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(3e-4),
metrics=['accuracy']
)
model.fit(x_train, y_train, batch_size=2, epochs=100, validation_split=0.1)
model.save('saved_model')
Edits for testing file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
import tensorflow as tf
CATEGORIES = ['Bird', 'Cat', 'Dog']
def format(file_path):
size = 50
img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
new_img = cv2.resize(img, (size, size))
return new_img.reshape(-1, 50, 50, 1)
model = tf.keras.models.load_model('saved_model')
prediction = model.predict([format('cat.jpg')])
prediction2 = model.predict([format('dog.jpg')])
prediction3 = model.predict([format('bird.jpg')])
print(CATEGORIES[int(prediction[0][0])])
print(CATEGORIES[int(prediction2[0][0])])
print(CATEGORIES[int(prediction3[0][0])])
the output is now showing even though the images are completely different.
Cat
Cat
Cat

There are two problems that I see here. First, when defining the model
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
]
)
Since you are working with a binary classification problem, the last layer should be specified to have the sigmoid activation function like so layers.Dense(10, activation='sigmoid'). This will have the effect of restricting the range of your output from 0 to 1.
This, however, will still give you numbers in between that range. This is because when you actually make the predictions in
prediction = model.predict([format('dog.jpg')])
print(prediction)
You are not applying the threshold of 0.5 to the predictions (below 0.5 is classified as 0 and above as a 1). This can be easily adjusted prediction = (model.predict([format('dog.jpg')]) > 0.5).astype("int32"). The .astype("int32") function is necessary as otherwise your predictions would be in boolean.

For a binary classification, your last layer should have only one outpout(instead of 10 in your case), and should use the sigmoïd activation function. Then you should add one more step to your model. That is a proposition.
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10, activation='relu'),
layers.Dense(1, activation='sigmoid')
]
)

Related

Implementing TensorFlow Triplet Loss

I would like to implement the built in TensorFlow addons version of triplet loss with a tutorial here for a siamese network, however I can't seem to get it quite right. No matter how I wrangle the code another error pops up, currently
TypeError: Could not build a TypeSpec for <KerasTensor: shape=(3, None, 256) dtype=float32 (created by layer 'tf.math.l2_normalize_4')> with type KerasTensor.
Note, this is just a token implementation kept simple in order to understand how to implement Triplet Loss. I don't expect the model to actually learn anything.
Code:
!pip install -U tensorflow-addons
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.datasets import fashion_mnist
# Dummy data to pass to the model
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
train_data = [x_train[:20000],x_train[20000:40000],x_train[40000:]]
train_labels = [y_train[:20000],y_train[20000:40000],y_train[40000:]]
train_data = tf.convert_to_tensor(train_data)
train_labels = tf.convert_to_tensor(train_labels)
#train_data = np.asarray(train_data)
#train_labels = np.asarray(train_labels)
def create_model(input_shape):
inp = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
#x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
model = tf.keras.Model(inp,x)
return model
def get_siamese_model(input_shape):
"""
Model architecture
"""
# Define the tensors for the triplet of input images
anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
# Convolutional Neural Network (same from earlier)
embedding_model = create_model(input_shape)
# Generate the embedding outputs
encoded_anchor = embedding_model(anchor_input)
encoded_positive = embedding_model(positive_input)
encoded_negative = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [encoded_anchor, encoded_positive, encoded_negative]
#x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
# Connect the inputs with the outputs
siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
# return the model
return embedding_model, siamese_triplet
emb_mod, model = get_siamese_model([28,28,1])
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss())
# Train the network
#train_dataset = tf.convert_to_tensor(train_dataset)
history = model.fit(
train_data,
epochs=5)
I am not sure what exactly you are trying to do, but you also have to incorporate your labels into your training dataset when using the tfa.losses.TripletSemiHardLoss(). Here is a working example:
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.datasets import fashion_mnist
# Dummy data to pass to the model
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
train_data = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(x_train[:20000]),
tf.data.Dataset.from_tensor_slices(x_train[20000:40000]),
tf.data.Dataset.from_tensor_slices(x_train[40000:])))
train_labels = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(y_train[:20000]),
tf.data.Dataset.from_tensor_slices(y_train[20000:40000]),
tf.data.Dataset.from_tensor_slices(y_train[40000:])))
dataset = tf.data.Dataset.zip((train_data, train_labels)).batch(32)
def create_model(input_shape):
inp = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
#x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
model = tf.keras.Model(inp,x)
return model
def get_siamese_model(input_shape):
"""
Model architecture
"""
# Define the tensors for the triplet of input images
anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
# Convolutional Neural Network (same from earlier)
embedding_model = create_model(input_shape)
# Generate the embedding outputs
encoded_anchor = embedding_model(anchor_input)
encoded_positive = embedding_model(positive_input)
encoded_negative = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [encoded_anchor, encoded_positive, encoded_negative]
#x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
# Connect the inputs with the outputs
siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
# return the model
return embedding_model, siamese_triplet
emb_mod, model = get_siamese_model([28,28,1])
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss())
# Train the network
history = model.fit(
dataset,
epochs=1)
625/625 [==============================] - 76s 120ms/step - loss: 0.1354 - model_79_loss: 0.0572 - model_79_1_loss: 0.0453 - model_79_2_loss: 0.0330

Tensorflow Custom Dataset - Add metadata as additional input to an image input processed by a CNN

I've got a working CNN model that classifies images from a custom dataset that is loaded with a csv file. The dataset is split up into training, validation and test dataset after being shuffled. Now I want to expand the image input by four extra input classes containing info / metadata about the images.
I've already learnt that I should split up my cnn model into two branches, one for the images and one for the extra input. My question is, how must I modify my data input so that the model can correctly process both images and additional input?
I'm very new to creating neural networks in tensorflow. My entire code is basically from this website. However, none of the topics could solve the problem for my code.
This is my code: (additional metadata are called usages, completions, heights, constructions)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io
# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values
# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)
img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE
# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
image = tf.io.read_file(image_file)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, (img_width, img_height))
return tf.cast(image, tf.float32) / 255.0, label, \
tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
tf.cast(hei, tf.float32), tf.cast(con, tf.float32)
# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
if tf.random.uniform((), minval=0, maxval=1) < 0.1:
image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
image = tf.image.random_brightness(image, max_delta=0.25)
image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
image = tf.image.random_flip_left_right(image)
return image, label, usg, com, hei, con
# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)
ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)
ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)
## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##
# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,)) # how is this shape supposed to be?
x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)
out2 = layers.Dense(128, activation='relu')(input_dat)
merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)
history = History()
no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss', # stop training when overfitting occurs
min_delta=0.015, patience=1,
verbose=2, mode='auto')
# TRAINING STEP
model.compile(
optimizer=keras.optimizers.Adam(3e-5),
loss=[keras.losses.SparseCategoricalCrossentropy()],
metrics=["accuracy"])
model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
verbose=1, validation_data=ds_val)
So far I've only added the extra inputs to the dataset tensor and changed the model structure. How exactly do I split my dataset into input_img and input_dat so that each model branch will receive their proper input?
Also I have a custom test step in order to plot a confusion matrix. How is this supposed to be modified? Here the working code, for just the image input:
y_true = []
y_pred = []
for x, y in ds_test:
y_true.append(y)
predicts = model.predict(x) # compute model predictions for test step
y_pred.append(np.argmax(predicts, axis=-1))
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)
cm = confusion_matrix(true, pred) # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm)) # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5) # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))
Any help is greatly appreciated!!

Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2

Before reshaping xtraindata and xtest data, I got error:
"Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2.". After reshaping xtraindata and xtestdata as (1400,24,24,1) and (600,24,24,1) in order. Then I got error like this:
"Incompatible shapes: [32,1] vs. [32,6,6,1]
[[node mean_squared_error/SquaredDifference (defined at C:\Users\User\Documents\car_person.py:188) ]] [Op:__inference_test_function_7945]
Function call stack:
test_function"
I cannot make evaluate function working on created model. What should I do in order to make test data compatible with model?
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import cv2
import pandas as pd
import tensorflow as tf
import itertools as it
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
except RuntimeError as e:
print(e)
#gpu_options=K.tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
path = "C:/Users/User/Desktop/tunel_data"
training_data=[]
def create_training_data(training_data, path):
categories = ["tunel_data_other", "tunel_data_car"]
for category in categories:
path=os.path.join(path, category)
for img in os.listdir(path):
print(img)
if category=="tunel_data_other":
class_num= 0
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('car'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
elif category=="tunel_data_car":
class_num = 1
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('person'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
path = "C:/Users/User/Desktop/tunel_data"
return training_data
create_training_data(training_data, path)
x=[]
y=[]
for i in range(len(training_data)):
x.append(training_data[i][0])
y.append(training_data[i][1])
#print(x)
#print(y)
x = np.array(x).reshape(2000, 576)
"""
principle_features = PCA(n_components=250)
feature = principle_features.fit_transform(x)
"""
feature = x
label = y
feature_df = pd.DataFrame(feature)
#df = DataFrame (People_List,columns=['First_Name','Last_Name','Age'])
label_df = pd.DataFrame(label)
data = pd.concat([feature_df, label_df], axis=1).to_csv('complete.csv')
data = pd.read_csv("complete.csv")
data = data.sample(frac=1).reset_index(drop=True)
print(data)
x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.7, random_state=65)
xtraindata=pd.DataFrame(data=x_train[:,:])
xtestdata=pd.DataFrame(data=x_test[:,:])
print(xtraindata)
ytraindata=pd.DataFrame(data=y_train[:])
ytestdata=pd.DataFrame(data=y_test[:])
print(ytraindata)
xtraindata = np.asarray(xtraindata)
ytraindata = np.asarray(ytraindata)
xtestdata = np.asarray(xtestdata)
ytestdata = np.asarray(ytestdata)
x=np.asarray(x)
y=np.asarray(y)
xtraindata = xtraindata.reshape(1400,24,24,1)
xtestdata = xtestdata.reshape(600,24,24,1)
activation = ["tanh", "relu", "sigmoid", "softmax"]
input_size1 = range(10)
input_size2 = range(10)
k_scores = []
in_size = []
possible = list(it.permutations(activation, 4))
for c in possible:
for i in input_size1:
for a in input_size2:
model = tf.keras.Sequential([tf.keras.layers.Conv2D(256, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(512, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dense(250, activation=c[0]),
tf.keras.layers.Dense(i, activation=c[1]),
tf.keras.layers.Dense(a, activation=c[2]),
tf.keras.layers.Dense(1, activation=c[3])])
model.compile(optimizer='sgd', loss='mse')
val_loss = model.evaluate(xtestdata, ytestdata, verbose=1)
k_scores.append(val_loss)
in_size.append([i,a])
print(k_scores)
print("Best activation functions for each layer:", possible[(k_scores.index((min(k_scores)))) % len(possible)],
"/n Best input sizes:", "840", in_size[k_scores.index((min(k_scores)))][0], in_size[k_scores.index((min(k_scores)))][1], "1")
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][0]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][0], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][1]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][1], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][2]))
model.add(tf.keras.layers.Dense(1, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][3]))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "mse"])
model.fit(x, y, batch_size=16, epochs=5)
predictions = model.predict([x_test])
print(predictions)
print(predictions.shape)
output layer size is different. you want size (32, 1) but model's output is (32, 6, 6, 1)
insert Flatten() between MaxPooling2D and Dense() maybe this work's well.
and here is the tip. .evaluate method is only for trained model. you should use .fit first.

Input shape of initial_state of tf.keras.layers.LSTM

Here I want to construct a very basic and simple character-wise RNN.
suppose that my dataset is embedded like this:
import numpy as np
batch_1 = np.array([[1, 2, ...., 20], [21, .....,40], [41,....,60], [61,...., 80]])
batch_2 = np.array([[...], [...], [...], [...]])
import tensorflow as tf
batch_size = 4
steps_number = 20
hidden_units = 100
keep_prob = 0.5
dim = tf.zeros([batch_size, hidden_units])
input_data = tf.keras.layers.Input(shape=(1, steps_number), batch_size=batch_size)
hidden_1, state_h, state_c = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=True)(input_data, initial_state=[dim, dim], training=True)
hideen_2 = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=False)(hidden_1, initial_state=[state_h, state_c], training=True)
hidden3 = tf.keras.layers.Dense(10, activation='relu')(hidden_1)
output = tf.keras.layers.Dense(1, activation='sigmoid')(hidden3)
model = tf.keras.models.Model(input_data, output)
Here I got this error in the hidden_2 layer:
ValueError: Shape (100, 4) must have rank at least 3
The problem is that the output of hidden_1 layer size should be [batch_size, steps_number, hidden_units]
Here is the working solution however, I dont I understand why I have to specify the Input shape in term of colum array:
shape=(steps_number,1) instead of (1,steps_number)
import tensorflow as tf
batch_size = 4
steps_number = 20
hidden_units = 100
keep_prob = 0.5
dim = tf.zeros([batch_size, hidden_units])
input_data = tf.keras.layers.Input(shape=(steps_number,1), batch_size=batch_size)
hidden_1, state_h, state_c = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=True, return_sequences=True)(input_data, initial_state=[dim, dim], training=True)
print(hidden_1.get_shape().as_list)
hideen_2 = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=False)(hidden_1, initial_state=[state_h, state_c], training=True)
hidden3 = tf.keras.layers.Dense(10, activation='relu')(hidden_1)
output = tf.keras.layers.Dense(1, activation='sigmoid')(hidden3)
model = tf.keras.models.Model(input_data, output)

ValueError: in case of LSTM with `stateful=True`

I tried to use LSTM network with stateful=True as follows:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import LambdaCallback
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
raw = np.sin(2*np.pi*np.arange(1024)/float(1024/2))
data = pd.DataFrame(raw)
window_size = 3
data_s = data.copy()
for i in range(window_size):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
print (data)
ds = data.values
n_rows = ds.shape[0]
ts = int(n_rows * 0.8)
train_data = ds[:ts,:]
test_data = ds[ts:,:]
train_X = train_data[:,:-1]
train_y = train_data[:,-1]
test_X = test_data[:,:-1]
test_y = test_data[:,-1]
print (train_X.shape)
print (train_y.shape)
print (test_X.shape)
print (test_y.shape)
(816, 3)
(816,)
(205, 3)
(205,)
batch_size = 3
n_feats = 1
train_X = train_X.reshape(train_X.shape[0], batch_size, n_feats)
test_X = test_X.reshape(test_X.shape[0], batch_size, n_feats)
print(train_X.shape, train_y.shape)
regressor = Sequential()
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
resetCallback = LambdaCallback(on_epoch_begin=lambda epoch,logs: regressor.reset_states())
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
previous_inputs = test_X
regressor.reset_states()
previous_predictions = regressor.predict(previous_inputs).reshape(-1)
test_y = test_y.reshape(-1)
plt.plot(test_y, color = 'blue')
plt.plot(previous_predictions, color = 'red')
plt.show()
However, I got:
ValueError: Error when checking target: expected dense_1 to have 3 dimensions, but got array with shape (816, 1)
PS this code has been adapted from https://github.com/danmoller/TestRepo/blob/master/testing%20the%20blog%20code%20-%20train%20and%20pred.ipynb
Two minor bugs:
Here you have
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
This LSTM will return a 3D vector, but your y is 2D which throws a valuerror. You can fix this with return_sequences=False. I'm not sure why you initially had train_X.shape[0] inside of your batch_input, the number of samples in your entire set shouldn't affect the size of each batch.
regressor.add(LSTM(units = 64, batch_input_shape=(1, batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=False))
After this you have
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
In a stateful network you can only put in a number of inputs that divides the batch size. Since 7 doesn't divide 816 we change this to 1:
regressor.fit(train_X, train_y, batch_size=1, epochs = 1, callbacks=[resetCallback])
The same goes in your predict. You must specify batch_size=1:
previous_predictions = regressor.predict(previous_inputs, batch_size=1).reshape(-1)