How to reshape real-time input data in my prediction code - tensorflow

This is a real-time sign language detection ,and i reshaped my X_train,y_train,X_test and y_test to add CNN into my architecture .It was LSTM only but am getting errors on the prediction part on how to reshape the real-time input.
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten() if results.face_landmarks else
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 30
#30 frames
sequence_length = 30
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
res = np.load(os.path.join(DATA_PATH,action, str(sequence),"
{}.npy".format(frame_num)))
window.append(res)
sequences.append(window)
labels.append(label_map[action])
#------------------------------------------------------------------------------------
#this above codes are to show what my code looks like.But my question starts from here below
#------------------------------------------------------------------------------------
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.05)
x_train.shape ---->(85, 30, 1662)
# reshaping the input
x_train = x_train.reshape(-1, 300, 1662,1) ; x_test = x_test.reshape(-1, 30, 1662,1)
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
# define the model
model = Sequential()
model.add(TimeDistributed(Conv1D(3, 3, 1,activation='relu', input_shape=[30,1662,1])) ) #
(3, 128, 216, 1)
# model.add(TimeDistributed(Conv1D(3,3,1,activation='relu')))
model.add(TimeDistributed(MaxPooling1D(pool_size=(3,))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(320, return_sequences=True, activation='relu'))
model.add(LSTM(640, return_sequences=True, activation='relu'))
model.add(LSTM(320, return_sequences=False, activation='relu'))
model.add(Dense(320, activation='relu'))
model.add(Dense(180, activation='relu'))
model.add(Dense(np.array(actions).shape[0], activation='softmax'))
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
model.load_weights('action.h5')
#############################################################################################
#Prediction
########################################################################################
#New Detection Variables
sequence = []
sentence = []
threshold = .4
cap = cv2.VideoCapture(0)
#Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
#Read Feed
ret, frame = cap.read()
#Make detections
image,results = mediapipe_detection(frame,holistic)
#Prediciton Logic
keypoints = extract_keypoints(results)
sequence.insert(0,keypoints)
sequence = sequence[:30]
if len(sequence) == 30:
res = model.predict(np.expand_dims(sequence,axis=0))[0]
#Visualization
if res[np.argmax(res)] > threshold:
if len(sentence) > 0:
if actions[np.argmax(res)] != sentence[-1]:
sentence.append(actions[np.argmax(res)])
else:
sentence.append(actions[np.argmax(res)])
if len(sentence)>5:
sentence = sentence[-5:]
#Viz probability
image = prob_viz(res,actions,image,colors)
cv2.rectangle(image,(0,0),(640,40),(245,117,16),-1)
cv2.putText(image, ' '.join(sentence),(3,30),
cv2.FONT_HERSHEY_SIMPLEX, 1,(255,255,255),2,cv2.LINE_AA)
#Show to Screen
cv2.imshow('OpenCV feed', image)
#Breaking the Feed
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
But am getting an error on the prediction part

I adjust a bit that is only matching the dataset and expecting values you can modify the target value from my sample. It is possible working with the streams when input is concatenated you can do it with transform functions or queue then transform functions.
[ Sample ]:
import tensorflow as tf
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.callbacks import TensorBoard
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 2
# no_sequences = 30
#30 frames
# sequence_length = 30
sequence_length = 2
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definication / Class
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def load_target_wave( wave_file='F:\\temp\\Python\\Speech\\Piano\\Berklee44v4\\piano_G2.wav' ) :
test_file = tf.io.read_file( wave_file )
test_audio, sample_rates = tf.audio.decode_wav(contents=test_file)
return test_audio
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten()
if results.left_hand_landmarks :
pass
else :
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten()
if results.right_hand_landmarks :
pass
else :
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten()
if results.face_landmarks :
pass
else :
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Generate DATA
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
wav_G2 = load_target_wave( )
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
# res = np.load(os.path.join(DATA_PATH,action, str(sequence), "{}.npy".format(frame_num)))
res = wav_G2
window.append(res)
sequences.append(window)
labels.append(label_map[action])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
sequences = tf.cast( sequences, dtype=tf.int32 )
sequences = tf.constant( sequences, shape=( 12, 1, 2, 287232, 1 ) )
labels = tf.cast( labels, dtype=tf.int32 )
labels = tf.constant( labels, shape=( 12, 1, 1, ) )
dataset = tf.data.Dataset.from_tensor_slices(( sequences, labels ))
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 2, 287232, 1 )),
tf.keras.layers.Reshape(( 256, 2244, 1 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (4, 4), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((16128, 1120)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(1),
])
model.summary()
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=1 ,validation_data=(dataset))
# model.load_weights('action.h5')

Related

Why does my Resnet56 implementation have less accuracy than in the original paper?

I was trying to implement Resnet56 in Tensorflow to classify the CIFAR10 images, but somehow I got a lower accuracy than the original creators.
I did everything exactly as described in the paper: same architecture, same data augmentation, same learning rate scheduling, same batch size...
But somehow my implementation produced an accuracy of only 91.84%, while in the original paper they reached 93.03% for the 56 layer Resnet.
Here is the link to the Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
I found what my problem was (see answers if interested) and here you can find my (now correct) implementation, that can now reach the exact same accuracy:
import argparse
import datetime
import os
import re
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2") # Report only TF errors and warnings by default
parser = argparse.ArgumentParser()
parser.add_argument("--resnet_n", default=9, type=int, help="n from Resnet paper.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
class ResNet(keras.Model):
class ResidualBlock(tf.Module):
def __init__(self, filters: int, down_sample: bool):
super().__init__()
self.filters = filters
self.down_sample = down_sample
def __call__(self, x):
out = x
out = keras.layers.Conv2D(filters=self.filters,
kernel_size=(3, 3),
strides=(1, 1) if not self.down_sample else (2, 2),
padding="same",
use_bias=False,
kernel_initializer=tf.keras.initializers.HeNormal)(out)
out = keras.layers.BatchNormalization()(out)
out = keras.layers.ReLU()(out)
out = keras.layers.Conv2D(filters=self.filters,
kernel_size=(3, 3),
strides=(1, 1),
padding="same",
use_bias=False,
kernel_initializer=tf.keras.initializers.HeNormal)(out)
out = keras.layers.BatchNormalization()(out)
if self.down_sample:
residual = keras.layers.Conv2D(filters=self.filters, kernel_size=(1, 1), strides=(2, 2),
padding="same",
use_bias=False,
kernel_initializer=tf.keras.initializers.HeNormal)(x)
residual = tf.keras.layers.BatchNormalization()(residual)
else:
residual = x
out = out + residual
out = keras.layers.ReLU()(out)
return out
def __init__(self, args):
inputs = keras.layers.Input(shape=(32, 32, 3), dtype=tf.float32)
outputs = keras.layers.Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same", use_bias=False,
kernel_initializer=tf.keras.initializers.HeNormal)(
inputs)
outputs = keras.layers.BatchNormalization()(outputs)
outputs = keras.layers.ReLU()(outputs)
for _ in range(0, args.resnet_n):
outputs = self.ResidualBlock(16, False)(outputs)
outputs = self.ResidualBlock(32, True)(outputs)
for _ in range(1, args.resnet_n):
outputs = self.ResidualBlock(32, False)(outputs)
outputs = self.ResidualBlock(64, True)(outputs)
for _ in range(1, args.resnet_n):
outputs = self.ResidualBlock(64, False)(outputs)
outputs = keras.layers.GlobalAveragePooling2D()(outputs)
outputs = keras.layers.Dense(10, activation=tf.nn.softmax)(outputs)
super().__init__(inputs, outputs)
def main(args, tb_callback):
ds_train,ds_test = tfds.load("cifar10",split=["train","test"],as_supervised=True)
img_augmentation = keras.Sequential(
[
keras.layers.RandomFlip("horizontal"),
keras.layers.RandomTranslation(height_factor=0.125, width_factor=0.125, fill_mode="constant",
fill_value=0.5)
]
)
ds_train = ds_train.map(lambda img, label: (tf.cast(img, tf.float32) / 255.0, label))
ds_test = ds_test.map(lambda img, label: (tf.cast(img, tf.float32) / 255.0, label))
total_count, per_pixel_sum = ds_train.reduce((np.float32(0), tf.zeros((32, 32, 3))),
lambda prev, curr: (prev[0] + 1.0, prev[1] + curr[0]))
per_pixel_mean = per_pixel_sum / total_count
ds_train = ds_train.map(lambda img, label: (img_augmentation(img, training=True), tf.one_hot(label, 10)))
ds_test = ds_test.map(lambda img, label: (img, tf.one_hot(label, 10)))
ds_train = ds_train.map(lambda img, label: (img - per_pixel_mean, label))
ds_test = ds_test.map(lambda img, label: (img - per_pixel_mean, label))
ds_train = ds_train.shuffle(5000).batch(128, drop_remainder=True).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.shuffle(5000).batch(128, drop_remainder=True).prefetch(tf.data.AUTOTUNE)
model = ResNet(args)
learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(
[32000, 48000], [0.1, 0.01, 0.001]
)
weight_decay = keras.optimizers.schedules.PiecewiseConstantDecay(
[32000, 48000], [1e-4, 1e-5, 1e-6]
)
model.compile(
optimizer=tfa.optimizers.SGDW(weight_decay=weight_decay, learning_rate=learning_rate, momentum=0.9,
nesterov=False),
loss=tf.losses.CategoricalCrossentropy(),
metrics=[tf.metrics.CategoricalAccuracy("accuracy")],
)
model.fit(x=ds_train, epochs=200, validation_data=ds_test, callbacks=[tb_callback], use_multiprocessing=True,
workers=args.threads)
model.save(args.logdir + '/model')
print('OK')
if __name__ == "__main__":
args = parser.parse_args([] if "__file__" not in globals() else None)
# Fix random seeds and threads
np.random.seed(args.seed)
tf.random.set_seed(args.seed)
tf.config.threading.set_inter_op_parallelism_threads(args.threads)
tf.config.threading.set_intra_op_parallelism_threads(args.threads)
# Create logdir name
args.logdir = os.path.join("{}/{}".format("logs", os.path.basename(globals().get("__file__", "notebook"))),
"{}-{}".format(
datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in
sorted(vars(args).items())))
))
tb_callback = tf.keras.callbacks.TensorBoard(args.logdir, histogram_freq=1, update_freq=100, profile_batch=0)
main(args, tb_callback)
I found what my problems were:
I didn't apply data augmentation correctly, changed img_augmentation(img) to img_augmentation(img, training=True)
Changed kernel initializer to HeNormal, what they used in the paper
Added per pixel mean substraction as a normalization
Disabling nesterov helped somehow (IDK why)

model.predict() having a strange output

This is all the files that I used, the only one that isn't there are the images
Import the file data, my data is 20 samples of dogs and 20 samples of cats
import matplotlib.pyplot as plt
import os
import cv2
import random
DIR = 'assets'
CATEGORIES = ['Cat', 'Dog']
img_size = 50
training_data = []
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (img_size, img_size))
training_data.append([new_array, class_num])
create_training_data()
print(len(training_data))
# Shuffle the data
random.shuffle(training_data)
x_train = []
y_train = []
for featurs, label in training_data:
x_train.append(featurs)
y_train.append(label)
x_train = np.asarray(x_train).reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)
import pickle
pickle_out = open('x_train.pickle', 'wb')
pickle.dump(x_train, pickle_out)
pickle_out.close()
pickle_out = open('y_train.pickle', 'wb')
pickle.dump(y_train, pickle_out)
pickle_out.close()
Train the data
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
x_train = pickle.load(open('x_train.pickle', 'rb'))
y_train = pickle.load(open('y_train.pickle', 'rb'))
x_train = x_train / 255.0
print(x_train.shape)
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
]
)
# inputs = keras.Input(shape=(50, 50, 1))
# x = layers.Conv2D(32, 3)(inputs)
# x = layers.BatchNormalization()(x)
# x = keras.activations.relu(x)
# x = layers.MaxPooling2D()(x)
# x = layers.Flatten()(x)
# outputs = layers.Dense(10, activation='softmax')(x)
# model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(),
metrics=['accuracy']
)
model.fit(x_train, y_train, batch_size=2, epochs=100, validation_split=0.1)
model.save('trained_model')
Test the data
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
import tensorflow as tf
CATEGORIES = ['Cat', 'Dog']
def format(file_path):
size = 50
img_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (size, size))
return new_array.reshape(-1, size, size, 1)
model = tf.keras.models.load_model('trained_model')
prediction = model.predict([format('dog.jpg')])
print(prediction)
The above runs but the output looks like this.
[[ -36.40766 -1036.2589 -1382.8297 -1486.9949 -1403.7932
-56.355995 -1364.2837 -1351.6316 -1385.2439 -1392.8472 ]]
Why is it giving me so many numbers instead to a simple 1 or 0?
I'm expecting an output of something like [[0.]] or [[1.]]
Update:
I have changed the code according to the suggestions but it is predicting the exact same thing every time
Edit to training file
inputs = keras.Input(shape=(50, 50, 1))
x = layers.Conv2D(16, 3)(inputs)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(32, 3)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(64, 3)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
print(model.summary())
model.compile(
loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(3e-4),
metrics=['accuracy']
)
model.fit(x_train, y_train, batch_size=2, epochs=100, validation_split=0.1)
model.save('saved_model')
Edits for testing file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
import tensorflow as tf
CATEGORIES = ['Bird', 'Cat', 'Dog']
def format(file_path):
size = 50
img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
new_img = cv2.resize(img, (size, size))
return new_img.reshape(-1, 50, 50, 1)
model = tf.keras.models.load_model('saved_model')
prediction = model.predict([format('cat.jpg')])
prediction2 = model.predict([format('dog.jpg')])
prediction3 = model.predict([format('bird.jpg')])
print(CATEGORIES[int(prediction[0][0])])
print(CATEGORIES[int(prediction2[0][0])])
print(CATEGORIES[int(prediction3[0][0])])
the output is now showing even though the images are completely different.
Cat
Cat
Cat
There are two problems that I see here. First, when defining the model
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
]
)
Since you are working with a binary classification problem, the last layer should be specified to have the sigmoid activation function like so layers.Dense(10, activation='sigmoid'). This will have the effect of restricting the range of your output from 0 to 1.
This, however, will still give you numbers in between that range. This is because when you actually make the predictions in
prediction = model.predict([format('dog.jpg')])
print(prediction)
You are not applying the threshold of 0.5 to the predictions (below 0.5 is classified as 0 and above as a 1). This can be easily adjusted prediction = (model.predict([format('dog.jpg')]) > 0.5).astype("int32"). The .astype("int32") function is necessary as otherwise your predictions would be in boolean.
For a binary classification, your last layer should have only one outpout(instead of 10 in your case), and should use the sigmoïd activation function. Then you should add one more step to your model. That is a proposition.
model = keras.Sequential(
[
keras.Input(shape=(50, 50, 1)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10, activation='relu'),
layers.Dense(1, activation='sigmoid')
]
)

Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2

Before reshaping xtraindata and xtest data, I got error:
"Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2.". After reshaping xtraindata and xtestdata as (1400,24,24,1) and (600,24,24,1) in order. Then I got error like this:
"Incompatible shapes: [32,1] vs. [32,6,6,1]
[[node mean_squared_error/SquaredDifference (defined at C:\Users\User\Documents\car_person.py:188) ]] [Op:__inference_test_function_7945]
Function call stack:
test_function"
I cannot make evaluate function working on created model. What should I do in order to make test data compatible with model?
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import cv2
import pandas as pd
import tensorflow as tf
import itertools as it
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
except RuntimeError as e:
print(e)
#gpu_options=K.tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
path = "C:/Users/User/Desktop/tunel_data"
training_data=[]
def create_training_data(training_data, path):
categories = ["tunel_data_other", "tunel_data_car"]
for category in categories:
path=os.path.join(path, category)
for img in os.listdir(path):
print(img)
if category=="tunel_data_other":
class_num= 0
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('car'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
elif category=="tunel_data_car":
class_num = 1
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('person'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
path = "C:/Users/User/Desktop/tunel_data"
return training_data
create_training_data(training_data, path)
x=[]
y=[]
for i in range(len(training_data)):
x.append(training_data[i][0])
y.append(training_data[i][1])
#print(x)
#print(y)
x = np.array(x).reshape(2000, 576)
"""
principle_features = PCA(n_components=250)
feature = principle_features.fit_transform(x)
"""
feature = x
label = y
feature_df = pd.DataFrame(feature)
#df = DataFrame (People_List,columns=['First_Name','Last_Name','Age'])
label_df = pd.DataFrame(label)
data = pd.concat([feature_df, label_df], axis=1).to_csv('complete.csv')
data = pd.read_csv("complete.csv")
data = data.sample(frac=1).reset_index(drop=True)
print(data)
x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.7, random_state=65)
xtraindata=pd.DataFrame(data=x_train[:,:])
xtestdata=pd.DataFrame(data=x_test[:,:])
print(xtraindata)
ytraindata=pd.DataFrame(data=y_train[:])
ytestdata=pd.DataFrame(data=y_test[:])
print(ytraindata)
xtraindata = np.asarray(xtraindata)
ytraindata = np.asarray(ytraindata)
xtestdata = np.asarray(xtestdata)
ytestdata = np.asarray(ytestdata)
x=np.asarray(x)
y=np.asarray(y)
xtraindata = xtraindata.reshape(1400,24,24,1)
xtestdata = xtestdata.reshape(600,24,24,1)
activation = ["tanh", "relu", "sigmoid", "softmax"]
input_size1 = range(10)
input_size2 = range(10)
k_scores = []
in_size = []
possible = list(it.permutations(activation, 4))
for c in possible:
for i in input_size1:
for a in input_size2:
model = tf.keras.Sequential([tf.keras.layers.Conv2D(256, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(512, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dense(250, activation=c[0]),
tf.keras.layers.Dense(i, activation=c[1]),
tf.keras.layers.Dense(a, activation=c[2]),
tf.keras.layers.Dense(1, activation=c[3])])
model.compile(optimizer='sgd', loss='mse')
val_loss = model.evaluate(xtestdata, ytestdata, verbose=1)
k_scores.append(val_loss)
in_size.append([i,a])
print(k_scores)
print("Best activation functions for each layer:", possible[(k_scores.index((min(k_scores)))) % len(possible)],
"/n Best input sizes:", "840", in_size[k_scores.index((min(k_scores)))][0], in_size[k_scores.index((min(k_scores)))][1], "1")
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][0]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][0], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][1]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][1], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][2]))
model.add(tf.keras.layers.Dense(1, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][3]))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "mse"])
model.fit(x, y, batch_size=16, epochs=5)
predictions = model.predict([x_test])
print(predictions)
print(predictions.shape)
output layer size is different. you want size (32, 1) but model's output is (32, 6, 6, 1)
insert Flatten() between MaxPooling2D and Dense() maybe this work's well.
and here is the tip. .evaluate method is only for trained model. you should use .fit first.

I followed the tensorflow image segmentation tutorial, but the predicted mask is blank

I'd like to try image segmentation with my grayscale tif images (the shape of original images are (512,512) and the value of each pixel is between 0-2 or NaN which is in float32 type and the mask images have 0, 1, or NaN also in float32 type). I followed Google Colab and tensorflow tutorial to create the following code:
from glob import glob
from PIL import Image
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
from tensorflow.python.keras import backend as K
#get the path of my data
img = sorted(glob('train_sub_5/*.tif'))
mask = sorted(glob('train_mask_sub_5/*.tif'))
#split into train and test data
img, img_val, mask, mask_val = train_test_split(img, mask, test_size=0.2, random_state=42)
#load image as array and append to a list
train_image = []
for m in img:
img= Image.open(m)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
train_image.append(stacked_img)
train_mask = []
for n in mask:
mask= Image.open(n)
mask_arr= np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
train_mask.append(stacked_mask)
test_img = []
for o in img_val:
img= Image.open(o)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
test_img.append(stacked_img)
test_mask = []
for p in mask_val:
mask= Image.open(p)
mask_arr = np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
test_mask.append(stacked_mask)
#create TensorSliceDataset
for i, j in zip(train_image, train_mask):
train= tf.data.Dataset.from_tensor_slices(([i], [j]))
for k, l in zip(test_img, test_mask):
test= tf.data.Dataset.from_tensor_slices(([k], [l]))
#for visualization
def display(display_list):
plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i+1)
plt.title(title[i])
plt.imshow(display_list[i])
plt.axis('off')
plt.show()
for img, mask in train.take(1):
sample_image = img.numpy()[:,:,0]
sample_mask = mask.numpy()[:,:,0]
display([sample_image, sample_mask])
The output of the visualization looks normal like below:
out put of the visualization
#build the model
train_length = len(train_image)
img_shape = (512,512,1)
batch_size = 8
buffer_size = 5
epochs = 5
train_dataset = train.cache().shuffle(train_length).batch(batch_size).repeat()
train_dataset = train_dataset.prefetch(buffer_size)
test_dataset = test.batch(batch_size).repeat()
def conv_block(input_tensor, num_filters):
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
return encoder
def encoder_block(input_tensor, num_filters):
encoder = conv_block(input_tensor, num_filters)
encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
return encoder_pool, encoder
def decoder_block(input_tensor, concat_tensor, num_filters):
decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
return decoder
inputs = layers.Input(shape=img_shape)
# 256
encoder0_pool, encoder0 = encoder_block(inputs, 32)
# 128
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)
# 64
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)
# 32
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)
# 16
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)
# 8
center = conv_block(encoder4_pool, 1024)
# center
decoder4 = decoder_block(center, encoder4, 512)
# 16
decoder3 = decoder_block(decoder4, encoder3, 256)
# 32
decoder2 = decoder_block(decoder3, encoder2, 128)
# 64
decoder1 = decoder_block(decoder2, encoder1, 64)
# 128
decoder0 = decoder_block(decoder1, encoder0, 32)
# 256
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)
model = models.Model(inputs=[inputs], outputs=[outputs])
def dice_coeff(y_true, y_pred):
smooth = 1.
# Flatten
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
model.summary()
#save model
save_model_path = 'tmp/weights.hdf5'
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', mode='max', save_best_only=True)
#start training
history = model.fit(train_dataset,
steps_per_epoch=int(np.ceil(train_length / float(batch_size))),
epochs=epochs,
validation_data=test_dataset,
validation_steps=int(np.ceil(len(test_img) / float(batch_size))),
callbacks=[cp])
#training process visualization
dice = history.history['dice_loss']
val_dice = history.history['val_dice_loss']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, dice, label='Training Dice Loss')
plt.plot(epochs_range, val_dice, label='Validation Dice Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Dice Loss')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
The output of the training process visualization looks like below:
The output of the training process visualization
The model seems functioning.
#make prediction
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
display([image[0,:,:,0], mask[0,:,:,0], create_mask(pred_mask)])
def create_mask(pred_mask):
pred_mask = tf.argmax(pred_mask, axis=-1)
pred_mask = pred_mask[..., tf.newaxis]
return pred_mask[0,:,:,0]
show_predictions(test_dataset, 3)
The output of the prediction is below:
The output of predictions
I tried to inspect the variables test and test_dataset using:
for img, mask in test:
print(img,mask)
But I only got one image array and one mask array. Does it mean that there's only one image array and one mask array in the dataset? What's wrong with my code creating train and test TensorSliceDataset?
The Second question is why I got the predicted mask blank? Is it because some of my patches have nan? As you can see in output, the white part of the input image and the true mask, the sea is represented by NaN. If this is the problem, how do I set the value for NaN if I hope the model can ignore sea?
Thank you for your help.
def display(display_list):
fig = plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i + 1)
plt.title(title[i])
plt.imshow(tf.keras.preprocessing.image.array_to_img
(display_list[i]))
plt.axis('off')
plt.show()
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
pred_mask *= 255.0
print(pred_mask.min())
print(pred_mask.max())
print(np.unique(pred_mask, return_counts=True))
display([image[0], mask[0], pred_mask[0]])
show_predictions(test_dataset, 3)

ValueError: in case of LSTM with `stateful=True`

I tried to use LSTM network with stateful=True as follows:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import LambdaCallback
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
raw = np.sin(2*np.pi*np.arange(1024)/float(1024/2))
data = pd.DataFrame(raw)
window_size = 3
data_s = data.copy()
for i in range(window_size):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
print (data)
ds = data.values
n_rows = ds.shape[0]
ts = int(n_rows * 0.8)
train_data = ds[:ts,:]
test_data = ds[ts:,:]
train_X = train_data[:,:-1]
train_y = train_data[:,-1]
test_X = test_data[:,:-1]
test_y = test_data[:,-1]
print (train_X.shape)
print (train_y.shape)
print (test_X.shape)
print (test_y.shape)
(816, 3)
(816,)
(205, 3)
(205,)
batch_size = 3
n_feats = 1
train_X = train_X.reshape(train_X.shape[0], batch_size, n_feats)
test_X = test_X.reshape(test_X.shape[0], batch_size, n_feats)
print(train_X.shape, train_y.shape)
regressor = Sequential()
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
resetCallback = LambdaCallback(on_epoch_begin=lambda epoch,logs: regressor.reset_states())
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
previous_inputs = test_X
regressor.reset_states()
previous_predictions = regressor.predict(previous_inputs).reshape(-1)
test_y = test_y.reshape(-1)
plt.plot(test_y, color = 'blue')
plt.plot(previous_predictions, color = 'red')
plt.show()
However, I got:
ValueError: Error when checking target: expected dense_1 to have 3 dimensions, but got array with shape (816, 1)
PS this code has been adapted from https://github.com/danmoller/TestRepo/blob/master/testing%20the%20blog%20code%20-%20train%20and%20pred.ipynb
Two minor bugs:
Here you have
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
This LSTM will return a 3D vector, but your y is 2D which throws a valuerror. You can fix this with return_sequences=False. I'm not sure why you initially had train_X.shape[0] inside of your batch_input, the number of samples in your entire set shouldn't affect the size of each batch.
regressor.add(LSTM(units = 64, batch_input_shape=(1, batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=False))
After this you have
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
In a stateful network you can only put in a number of inputs that divides the batch size. Since 7 doesn't divide 816 we change this to 1:
regressor.fit(train_X, train_y, batch_size=1, epochs = 1, callbacks=[resetCallback])
The same goes in your predict. You must specify batch_size=1:
previous_predictions = regressor.predict(previous_inputs, batch_size=1).reshape(-1)