Detect digit on a live video camera using OpenCV and TensorFlow - tensorflow

I tried the code provided below to detect digit in the video camera and put a contour around it then classify it using the H5 model but it's giving me bad results, just the camera is open and I can see neither detection nor classification. I'm not sure what I need to change or work on.
I use python2.7
OpenCV 4.2.0 and TensorFlow 1.5.0
The code I'm working with:
from statistics import mode
import cv2, time
from keras.models import load_model
from keras.datasets import mnist
import tensorflow as tf
import numpy as np
import vision_definitions
from PIL import Image
import numpy as np
import sys, os
from utils.inference import detect_digits
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.preprocessor import preprocess_input
# parameters for loading data and images
detection_model_path = '../trained_models/detection_models/model.sav'
class_model_path = '../trained_models/class_models/Num.h5'
# hyper-parameters for bounding boxes shape
frame_window = 10
class_offsets = (20, 40)
# loading models
digit_detection = load_detection_model(detection_model_path)
class_classifier = load_model(class_model_path)
# getting input model shapes for inference
class_target_size = class_classifier.input_shape[1:3]
class_window = []
class_window1 = []
# starting video streaming
cameraIndex = 0
resolution = vision_definitions.kVGA
colorSpace = vision_definitions.kRGBColorSpace
resolution = 2
colorSpace = 3
cv2.namedWindow('window_frame')
video_capture = cv2.VideoCapture(0)
if video_capture.isOpened():
frame = video_capture.read()
else:
rval = False
while True:
rval, frame = video_capture.read()
gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
digits = detect_digits(digit_detection, gray_image)
frame = cv2.resize(frame, (640, 480))
key = cv2.waitKey(1)
b,g,r = cv2.split(frame) # get b,g,r
rgb_img = cv2.merge([r,g,b]) # switch it to rgb
for digit_coordinates in digits:
x1, x2, y1, y2 = apply_offsets(digit_coordinates, class_offsets)
gray_digit = gray_image[y1:y2, x1:x2]
try:
gray_digit = cv2.resize(gray_digit, (class_target_size))
except:
continue
gray_digit = preprocess_input(gray_digit, True)
gray_digit = np.expand_dims(gray_digit, 0)
gray_digit = np.expand_dims(gray_digit, -1)
class_prediction = class_classifier.predict(gray_digit)
class_probability = np.max(class_prediction)
class_label_arg = np.argmax(class_prediction)
color = color.astype(int)
color = color.tolist()
draw_bounding_box(digit_coordinates, rgb_image, color)
draw_text(digit_coordinates, rgb_image, class_mode,
color, 0, -45, 1, 1)
frame = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
cv2.imshow('window_frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break

I spend sometimes since there is no CV2.imshow() on Windows except C++ but there it is ...
[ Sample ]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import tensorflow as tf
import os
from os.path import exists
import time
def f1( picture ):
return np.asarray( picture )
fig = plt.figure()
image = plt.imread( "C:\\Users\\Jirayu Kaewprateep\\Pictures\\Cats\\samples\\03.png" )
im = plt.imshow( image )
global video_capture_0
video_capture_0 = cv2.VideoCapture(0)
video_capture_1 = cv2.VideoCapture(1)
def animate(i ):
ret0, frame0 = video_capture_0.read()
if (ret0):
picture = np.concatenate( ( np.reshape(frame0[:,:,2:3], ( 480, 640, 1 )),
np.reshape(frame0[:,:,1:2], ( 480, 640, 1 )),
np.reshape(frame0[:,:,0:1], ( 480, 640, 1 ))),
axis=2 )
im.set_array( f1( picture ) )
return im,
while True:
# Capture frame-by-frame
ret0, frame0 = video_capture_0.read()
ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
plt.show()
# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()
[ Model ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
# tf.keras.layers.Reshape(( 29, 39 * 3 )),
# tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, return_state=False)),
tf.keras.layers.RandomFlip("horizontal_and_vertical"),
tf.keras.layers.RandomRotation(0.2),
tf.keras.layers.RandomZoom(.5, .2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Dense(64),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Dense(64),
])
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()
[ Output ]:

Related

How to reshape real-time input data in my prediction code

This is a real-time sign language detection ,and i reshaped my X_train,y_train,X_test and y_test to add CNN into my architecture .It was LSTM only but am getting errors on the prediction part on how to reshape the real-time input.
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten() if results.face_landmarks else
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 30
#30 frames
sequence_length = 30
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
res = np.load(os.path.join(DATA_PATH,action, str(sequence),"
{}.npy".format(frame_num)))
window.append(res)
sequences.append(window)
labels.append(label_map[action])
#------------------------------------------------------------------------------------
#this above codes are to show what my code looks like.But my question starts from here below
#------------------------------------------------------------------------------------
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.05)
x_train.shape ---->(85, 30, 1662)
# reshaping the input
x_train = x_train.reshape(-1, 300, 1662,1) ; x_test = x_test.reshape(-1, 30, 1662,1)
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
# define the model
model = Sequential()
model.add(TimeDistributed(Conv1D(3, 3, 1,activation='relu', input_shape=[30,1662,1])) ) #
(3, 128, 216, 1)
# model.add(TimeDistributed(Conv1D(3,3,1,activation='relu')))
model.add(TimeDistributed(MaxPooling1D(pool_size=(3,))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(320, return_sequences=True, activation='relu'))
model.add(LSTM(640, return_sequences=True, activation='relu'))
model.add(LSTM(320, return_sequences=False, activation='relu'))
model.add(Dense(320, activation='relu'))
model.add(Dense(180, activation='relu'))
model.add(Dense(np.array(actions).shape[0], activation='softmax'))
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
model.load_weights('action.h5')
#############################################################################################
#Prediction
########################################################################################
#New Detection Variables
sequence = []
sentence = []
threshold = .4
cap = cv2.VideoCapture(0)
#Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
#Read Feed
ret, frame = cap.read()
#Make detections
image,results = mediapipe_detection(frame,holistic)
#Prediciton Logic
keypoints = extract_keypoints(results)
sequence.insert(0,keypoints)
sequence = sequence[:30]
if len(sequence) == 30:
res = model.predict(np.expand_dims(sequence,axis=0))[0]
#Visualization
if res[np.argmax(res)] > threshold:
if len(sentence) > 0:
if actions[np.argmax(res)] != sentence[-1]:
sentence.append(actions[np.argmax(res)])
else:
sentence.append(actions[np.argmax(res)])
if len(sentence)>5:
sentence = sentence[-5:]
#Viz probability
image = prob_viz(res,actions,image,colors)
cv2.rectangle(image,(0,0),(640,40),(245,117,16),-1)
cv2.putText(image, ' '.join(sentence),(3,30),
cv2.FONT_HERSHEY_SIMPLEX, 1,(255,255,255),2,cv2.LINE_AA)
#Show to Screen
cv2.imshow('OpenCV feed', image)
#Breaking the Feed
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
But am getting an error on the prediction part
I adjust a bit that is only matching the dataset and expecting values you can modify the target value from my sample. It is possible working with the streams when input is concatenated you can do it with transform functions or queue then transform functions.
[ Sample ]:
import tensorflow as tf
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.callbacks import TensorBoard
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 2
# no_sequences = 30
#30 frames
# sequence_length = 30
sequence_length = 2
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definication / Class
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def load_target_wave( wave_file='F:\\temp\\Python\\Speech\\Piano\\Berklee44v4\\piano_G2.wav' ) :
test_file = tf.io.read_file( wave_file )
test_audio, sample_rates = tf.audio.decode_wav(contents=test_file)
return test_audio
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten()
if results.left_hand_landmarks :
pass
else :
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten()
if results.right_hand_landmarks :
pass
else :
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten()
if results.face_landmarks :
pass
else :
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Generate DATA
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
wav_G2 = load_target_wave( )
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
# res = np.load(os.path.join(DATA_PATH,action, str(sequence), "{}.npy".format(frame_num)))
res = wav_G2
window.append(res)
sequences.append(window)
labels.append(label_map[action])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
sequences = tf.cast( sequences, dtype=tf.int32 )
sequences = tf.constant( sequences, shape=( 12, 1, 2, 287232, 1 ) )
labels = tf.cast( labels, dtype=tf.int32 )
labels = tf.constant( labels, shape=( 12, 1, 1, ) )
dataset = tf.data.Dataset.from_tensor_slices(( sequences, labels ))
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 2, 287232, 1 )),
tf.keras.layers.Reshape(( 256, 2244, 1 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (4, 4), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((16128, 1120)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(1),
])
model.summary()
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=1 ,validation_data=(dataset))
# model.load_weights('action.h5')

How to feed grayscale images into a pretrained neural network models?

Noob here . I have been working with grayscale images for mitosis classification Here's a sample image I'm working with . I have employed VGGnet for achieving this. I've some doubts about how my grayscale image should be fed into the neural network. I've read the documentation of VGGnet about being trained on colored images on Imagenet.
I read the images using cv2.imread() and by squeezing into an array. I found its shape to be (227,227,3). Shouldn't it be (227,227,1) when I'm handling with grayscale images ? Model accuracy was also found to be only 50% . I'm wondering if it's something wrong with the dataset itself or VGGnet isn't suitable for this purpose. Or should I use some other method to read these images to get grayscale images?
I have tried the solutions listed in similar questions . Am I reading the images in the right way?
I'm sharing my code here.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Display Image data
from PIL import Image
import cv2
from google.colab import drive
drive.mount('/content/drive')
mitotic_image = Image.open('/content/drive/MyDrive/Medical/actualmito1/trainactmito01.jpg')
nonmitotic_image=Image.open('/content/drive/MyDrive/Medical/actualnonmito1/trainactnonmito01.jpg')
# subplotting image data
fig = plt.figure(figsize=(15,9))
ax1 = fig.add_subplot(1, 2, 1)
img_plot = plt.imshow(nonmitotic_image, cmap = plt.cm.bone)
ax1.set_title("Non-Mitotic Image")
ax2 = fig.add_subplot(1, 2, 2)
img_plot = plt.imshow(mitotic_image, cmap = plt.cm.bone)
ax2.set_title("Mitotic Image")
plt.show()
import os
yes = os.listdir("/content/drive/MyDrive/Medical/actualmito1")
no = os.listdir("/content/drive/MyDrive/Medical/actualnonmito1")
data = np.concatenate([yes, no])
target_yes = np.full(len(yes), 1)
target_no = np.full(len(no), 0)
# Image Target
data_target = np.concatenate([target_yes, target_no])
# Generate Image Data
img = cv2.imread("/content/drive/MyDrive/Medical/actualmito1/trainactmito01.jpg")
mitosis = cv2.resize(img,(32,32))
plt.imshow(mitosis)
X_data = []
yes = os.listdir("/content/drive/MyDrive/Medical/actualmito1")
for file in yes:
img = cv2.imread("/content/drive/MyDrive/Medical/actualmito1/" + file)
# resizing image data to 32x32
img = cv2.resize(img, (224,224))
X_data.append(img) # This will store list of all image data in an array
no = os.listdir("/content/drive/MyDrive/Medical/actualnonmito1")
for file in no:
img = cv2.imread("/content/drive/MyDrive/Medical/actualnonmito1/" + file)
# resizing image data to 32x32
img = cv2.resize(img, (224,224))
X_data.append(img) # This will store list of all image data in an array
X = np.squeeze(X_data)
X.shape
# Image Pixel Normalization
X = X.astype('float32')
X /= 255
X.shape
# Train & Test Data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, data_target, test_size = 0.1,
random_state = 3)
x_train2, x_val, y_train2, y_val = train_test_split(x_train, y_train, test_size = 0.15,
random_state = 3)
# VGG16 - Transfer Learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, Flatten, ZeroPadding2D,
Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
def build_model():
#use Imagenet = pre-trained models weights called knowledge transfer
# image_shape = 32x32x3
vgg16_model = VGG16(weights = 'imagenet', include_top = False, input_shape=(224,224,3))
# Input Layer
model = Sequential()
# paadding = 'same' = ZeroPadding
model.add(Conv2D(filters=3, kernel_size=(3,3), padding='same', input_shape = (224,224,3)))
# add transfer learning model
model.add(vgg16_model)
# Average Pooling Layer
model.add(GlobalAveragePooling2D())
model.add(BatchNormalization())
model.add(Dropout(0.5))
# Fully Connected Layer
model.add(Dense(units = 512, activation='relu'))
model.add(Dropout(0.5))
# Output Layer
model.add(Dense(units = 1, activation='sigmoid'))
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['Accuracy'])
return model
model = build_model()
model.summary()
from tensorflow.keras import callbacks
filepath = "/content/drive/MyDrive/BestModelMRI3.hdf5"
checkpoint = callbacks.ModelCheckpoint(filepath, monitor = 'val_loss', save_best_only = True,
mode = 'min',verbose = 1)
import datetime
import keras
import os
logdir = os.path.join("/content/drive/MyDrive/MRI_logs",
datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir)
history = model.fit(x_train2, y_train2, epochs = 200, batch_size = 32, shuffle = True,
validation_data = (x_val, y_val), callbacks = [checkpoint, tensorboard_callback],verbose= 1)
model.load_weights("/content/drive/MyDrive/BestModelMRI3.hdf5")
model.evaluate(x_test, y_test)
predictions = model.predict(x_test)
yhat = np.round(predictions)
from sklearn.metrics import confusion_matrix, classification_report
confusion_matrix(y_test, yhat)
sns.heatmap(confusion_matrix(y_test, yhat), annot = True, cmap = 'RdPu')
print(classification_report(y_test, yhat))
VGG model was trained on RGB images. CV2 reads in images as BGR. so add code
img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
to convert BGR to RGB. Also VGG requires the pixels to be in the range -1 to +1 so scale with
X=X/127.5 -1

What format do these lists have to be in to be accepted by Keras Tuners Search function?

This Code reads in a set of testing and training guitar jpg images for the neural net to learn and test from.
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
DATADIR = "C:/Users/TheKid/Data/DataMiningProject/DataSet"
CATEGORIES = ["Fender_Jazzmaster", "Gibson_ES"]
CATEGORIES2 = ["Test"]
for category in CATEGORIES:
path = os.path.join(DATADIR,category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
IMG_SIZE = 70
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
training_data = []
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DATADIR,category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
training_data.append([new_array,class_num])
create_training_data()
print(len(training_data))
random.shuffle(training_data)
X = []
y = []
for features, label in training_data:
X.append(features)
y.append(label)
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
for category in CATEGORIES2:
path2 = os.path.join(DATADIR,category)
for img in os.listdir(path2):
img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
IMG_SIZE = 70
new_array2 = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
testing_data = []
def create_testing_data():
for category in CATEGORIES2:
path2 = os.path.join(DATADIR,category)
class_num2 = CATEGORIES2.index(category)
for img in os.listdir(path2):
img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
new_array2 = cv2.resize(img_array2,(IMG_SIZE,IMG_SIZE))
testing_data.append([new_array2,class_num2])
create_testing_data()
print(len(testing_data))
random.shuffle(testing_data)
X2 = []
y2 = []
for features, label in testing_data:
X2.append(features)
y2.append(label)
X2 = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
import pickle
pickle_out = open("X.pickle" , "wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open("y.pickle" , "wb")
pickle.dump(y, pickle_out)
pickle_out.close()
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)
pickle_out = open("X2.pickle" , "wb")
pickle.dump(X2, pickle_out)
pickle_out.close()
pickle_out = open("y2.pickle" , "wb")
pickle.dump(y2, pickle_out)
pickle_out.close()
pickle_in = open("X2.pickle", "rb")
X = pickle.load(pickle_in)
This next bit of code takes in the pickle file saved in previous code and is supposed to use Keras tuners search function to run different variants of the neural net with different amounts of conv layer ,layer sizes etc so I can choose the most efficient version. But when run this error gets thrown:
ValueError: Data cardinality is ambiguous:
x sizes: 1312
y sizes: 12
Please provide data which shares the same first dimension.
The Shapes of all the variables are:
(x_train = (1312, 70, 70, 1)
y_train =(1312,)
x_test = (1312, 70, 70, 1)
y_test =(12,)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from tensorflow import keras
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
pickle_in = open("X.pickle","rb")
x_train = pickle.load(pickle_in)
pickle_in = open("y.pickle","rb")
y_train = pickle.load(pickle_in)
pickle_in = open("X2.pickle","rb")
x_test = pickle.load(pickle_in)
pickle_in = open("y2.pickle","rb")
y_test = pickle.load(pickle_in)
x_train=np.array(x_train/255.0)
y_train=np.array(y_train)
x_test=np.array(x_test/255.0)
y_test=np.array(y_test)
LOG_DIR = f"{int(time.time())}"
def build_model(hp):
model = keras.models.Sequential()
model.add(Conv2D(hp.Int("input_units",32, 256, 32 ), (3, 3), input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
for i in range(hp.Int("n_layers", 1, 4)):
model.add(Conv2D(hp.Int(f"conv-{i}_units",32, 256, 32 ), (3, 3)))
model.add(Activation('relu'))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(optimizer="adam",
loss="binary_crossentropy",
metrics=["accuracy"])
return model
tuner = RandomSearch(
build_model,
objective = "val_accuracy",
max_trials = 1,
executions_per_trial = 1,
directory = LOG_DIR)
tuner.search(x=x_train,
y=y_train,
epochs=1,
batch_size=64,
validation_data=(x_test,y_test))
with open(f"tuner_{int(time.time())}.pkl", "wb") as f:
pickle.dump(tuner, f)
tuner = pickle.load(open(""))
print(tuner.get_best_hyperparameters()[0].values)
How would I go about resolving this error? It's seems like a matrix formatting issue to me but I have little experience in dealing with a problem like this.
As the error message and shape of the data (x_test and y_test) clearly suggests, you have 1312 rows in x_test and 12 rows in y_test. You are feeding this data to validation_data=(x_test,y_test).
Kindly pass the same dimension or same rows of data for x_test and y_test in validation_data and this should fix your error.

different results in inference between python and c++ opencv Mat::

i'm doing a re identification network, implementing a triplet-loss function, at that point everything is fine. the networks works fine in python, I implemented the network on keras with tensorflow as backend, I passed the .hd5 to a .pb file to make inference in tensorflow c++, the probmes is that with the same images the result is difference between python and c++ and I don't know why anyone to help me?
here is the the model in python:
import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
}
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()
def create_features_model(self):
# Define the vision modules
img_input = layer.Input(shape=(self.shape))
x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)
x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)
x = layer.Flatten()(x)
x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
features_model = md(img_input, out)
features_model.summary()
return features_model
def create_model(self):
base_model = self.feature_model
# triplet framework, shared weights
input_shape = (self.shape)
input_target = layer.Input(shape=input_shape, name='input_target')
input_positive = layer.Input(shape=input_shape, name='input_pos')
input_negative = layer.Input(shape=input_shape, name='input_neg')
net_target = base_model(input_target)
net_positive = base_model(input_positive)
net_negative = base_model(input_negative)
# The Lamda layer produces output using given function. Here its Euclidean distance.
positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])
# This lambda layer simply stacks outputs so both distances are available to the objective
distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
[positive_distance, negative_distance, diference])
model = md([input_target, input_positive, input_negative], distances, name='result')
# Setting up optimizer designed for variable learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
loss=self.triplet_loss, metrics=[self.accuracy])
return model
def triplet_loss(self, _, y_pred):
margin = K.constant(0.5)
return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def accuracy(self, _, y_pred):
return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])
def lnorm(self, x):
return K.l2_normalize(x, axis=-1)
def euclidean_distance(self, vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
this is how I made inference on python:
from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative
else:
print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")
tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
net.triplet_model.load_weights("/home/amejia/PycharmProjects/triplet_loss/simple-grande.hdf5")
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
print(a)
the inference in c++ :
in c++ this si how I made inference:
tensorflow::Tensor target(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
img_to_float2(tracks, detections, target, positive, frame);
std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
{"input_pos:0", positive},
{"input_neg:0", target}};
std::vector<tensorflow::Tensor> Outputs;
tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);
auto data = Outputs[0].flat<float>();
std::cout << Outputs[0].DebugString() << std::endl;
and this is the function to put create the in tensor:
void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {
auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();
resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);
cv::Mat trackImg = tracks.get_img().clone();
resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);

CNN using TFlearn - The accuracy is very poor

# -*- coding: utf-8 -*-
"""
Created on Sat Apr 15 23:46:38 2017
#author: Aprameyo
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.misc import imresize
import os
import urllib
import tensorflow as tf
import cv2 as cv2
import numpy as np
import tflearn
from tflearn.data_utils import image_preloader
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
Tensor = np.zeros((112,92,280))
Tensor_V = np.zeros((112,92,120))
K = 0
for x in range(1, 41):
directory = "s%d"%x
os.chdir(directory)
for y in range(1,8):
image = "%d.pgm"%y
Tensor[:,:,K] = (cv2.imread(image,-1))
K = K+1
os.chdir("../")
#Tensor_Fin = tf.convert_to_tensor(Tensor, dtype=tf.float32)
T =0
Targets = np.zeros((280,40))
for x in range(0,40):
for y in range(0,7):
Targets[T,x] = 1
T = T+1
K1 = 0
for x in range(1, 41):
directory = "s%d"%x
os.chdir(directory)
for y in range(8,11):
image = "%d.pgm"%y
Tensor_V[:,:,K1] = (cv2.imread(image,-1))
K1 = K1+1
os.chdir("../")
#Tensor_Fin = tf.convert_to_tensor(Tensor, dtype=tf.float32)
T1 =0
Targets_V = np.zeros((120,40))
for x in range(0,40):
for y in range(0,3):
Targets_V[T1,x] = 1
T1 = T1+1
Tensor_Train = Tensor
Targets_Train = Targets
Tensor_Vali = Tensor_V
Targets_Vali = Targets_V
Tensor_Train = Tensor_Train.reshape([-1, 112, 92, 1])
Tensor_Vali = Tensor_Vali.reshape([-1, 112, 92, 1])
network = input_data(shape=[None, 112, 92, 1], name='input')
network = conv_2d(network, 32, 3, activation='relu', regularizer="L2")
network = max_pool_2d(network, 2)
network = local_response_normalization(network)
network = conv_2d(network, 64, 3, activation='relu', regularizer="L2")
network = max_pool_2d(network, 2)
network = local_response_normalization(network)
network = fully_connected(network, 128, activation='sigmoid')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='sigmoid')
network = dropout(network, 0.8)
network = fully_connected(network, 40, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.01,
loss='categorical_crossentropy', name='target')
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': Tensor_Train}, {'target': Targets_Train},n_epoch=20,
validation_set=({'input': Tensor_Vali}, {'target':Targets_Vali}),
snapshot_step=100, show_metric=True, run_id='convnet_mnist')
I implemented this code by keeping it in a directory called att_faces which contains 10 images of a person and there are 40 such persons, the training set includes 7 of them and the test includes 3 of them. I can't figure out why the accuracy is so abominable.