I try to find a proper solution to convert a rgb mask from "cam vid" dataset to categorical mask.
I have the list of rgb value and corresponding label. What is the proper way to generate a categorical label in keras?
A "cam vids" dataset looks like this:
.
├── default
│ └── 000048-NG.jpg
├── defaultannot
│ └── 000048-NG.png
├── default.txt
└── label_colors.txt
2 directories, 4 files
I read your question and separate your question into two requirements
From the recording camera, you need to convert the output to the dataset with category and
Generate the categorizing label
That depends on your model learning and assumption of the input and target when you can use supervised learning, self-supervised learning, or unsupervised learning.
[ Sample ]: Quick sample of supervised learning return label .
import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import tensorflow as tf
import os
from os.path import exists
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
fig = plt.figure()
image = plt.imread( "F:\\datasets\\downloads\\cats_name\\train\\Symbols\\01.jpg" )
im = plt.imshow( image )
list_actual_label = [ 'Shoes', 'Duck' ]
global video_capture_0
video_capture_0 = cv2.VideoCapture(0)
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
loggings = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\loggings.log"
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def f1( picture ):
return tf.constant( picture ).numpy()
def animate( i ):
ret0, frame0 = video_capture_0.read()
if (ret0):
frame0 = tf.image.resize(frame0, [29, 39]).numpy()
temp = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,2:3])
temp2 = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,1:2])
temp3 = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,0:1])
temp = tf.keras.layers.Concatenate(axis=2)([temp, temp2])
temp = tf.keras.layers.Concatenate(axis=2)([temp, temp3])
# 480, 640
temp = tf.keras.preprocessing.image.array_to_img(
temp,
data_format=None,
scale=True
)
temp = f1( temp )
im.set_array( temp )
result = predict_action( temp )
print( list_actual_label[result] )
return im,
def predict_action ( image ) :
predictions = model.predict(tf.constant(image, shape=(1, 29, 39, 3) , dtype=tf.float32))
result = tf.math.argmax(predictions[0])
return result
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Reshape((234, 32)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
])
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_picture = []
list_label = []
path_1 = "F:\\datasets\\downloads\\Duck_Shoe\\Duck\\"
path_2 = "F:\\datasets\\downloads\\Duck_Shoe\\Shoes\\"
for file in os.listdir( path_1 ):
image = plt.imread( path_1 + file )
image = tf.image.resize(image, [29, 39]).numpy()
for i in range ( 40 ) :
if i % 6 == 0 :
layer = tf.keras.layers.RandomZoom(.5, .2)
image = layer( image ).numpy()
list_picture.append( image )
elif i % 5 == 0 :
image = tf.image.random_hue(image, 0.2).numpy()
image = tf.image.random_flip_up_down(image, 1).numpy()
list_picture.append( image )
elif i % 4 == 0 :
image = tf.image.random_saturation(image, 5, 10, 1).numpy()
image = tf.image.random_flip_left_right(image, 1).numpy()
list_picture.append( image )
elif i % 3 == 0 :
image = tf.image.random_flip_up_down(image, 1).numpy()
image = tf.image.random_saturation(image, 5, 10, 1).numpy()
list_picture.append( image )
elif i % 2 == 0 :
image = tf.image.random_flip_left_right(image, 1).numpy()
image = tf.image.random_hue(image, 0.2).numpy()
list_picture.append( image )
else :
list_picture.append( image )
list_label.append( 1 )
for file in os.listdir( path_2 ):
image = plt.imread( path_2 + file )
image = tf.image.resize(image, [29, 39]).numpy()
for i in range ( 40 ) :
if i % 6 == 0 :
layer = tf.keras.layers.RandomZoom(.5, .2)
image = layer( image ).numpy()
list_picture.append( image )
elif i % 5 == 0 :
image = tf.image.random_hue(image, 0.2).numpy()
image = tf.image.random_flip_up_down(image, 1).numpy()
list_picture.append( image )
elif i % 4 == 0 :
image = tf.image.random_saturation(image, 5, 10, 1).numpy()
image = tf.image.random_flip_left_right(image, 1).numpy()
list_picture.append( image )
elif i % 3 == 0 :
image = tf.image.random_flip_up_down(image, 1).numpy()
image = tf.image.random_saturation(image, 5, 10, 1).numpy()
list_picture.append( image )
elif i % 2 == 0 :
image = tf.image.random_flip_left_right(image, 1).numpy()
image = tf.image.random_hue(image, 0.2).numpy()
list_picture.append( image )
else :
list_picture.append( image )
list_label.append( 0 )
dataset = tf.data.Dataset.from_tensor_slices((tf.constant([list_picture], shape=(len(list_picture), 1, 29, 39, 3), dtype=tf.float32),tf.constant([list_label], shape=(len(list_picture), 1, 1, 1), dtype=tf.int64)))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam( learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam' )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.MeanSquaredLogarithmicError(reduction=tf.keras.losses.Reduction.AUTO, name='mean_squared_logarithmic_error')
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
model.load_weights(checkpoint_path)
print("model load: " + checkpoint_path)
input("Press Any Key!")
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=2 ,validation_data=(dataset))
model.save_weights(checkpoint_path)
while True:
ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
plt.show()
# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()
input('...')
Related
I am writing a character recognizing CNN. I have used
EMNIST Dataset.
Kaggle Notebook link : https://www.kaggle.com/code/notshrirang/ocr-with-cnn
GitHub Notebook link : https://github.com/NotShrirang/MyOCR
My model does pretty good on testing dataset. But when I use the image I captured with my phone, it never predict correctly.
What to do? Please help.
Here is my code snippet for model architecture:
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(kernel_size=(8, 8),filters=128, input_shape=(28, 28, 1), activation="relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(kernel_size=(4, 4), filters=64, activation="relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(27, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.SGD(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy']
)
model.fit(X_train, y_train, epochs=8, validation_split = 0.2)
Output While training model:
Loss vs Val Loss Plot:
Evaluation:
evaluation = new_model.evaluate(X_test, y_test)
evaluation
I resized the images to (28x28).
Also, all images in training, testing and real data are converted to grayscale.
The dataset has all images with their matrix transposed. I have straightened them.
I tried normalizing the data, but it decreased the val_accuracy so I stopped normalizing.
I tried shuffling the data. It increased the val_accuracy so I kept it.
I tried increasing and decreasing layers and epoch in model. It was of no use except change in training time.
I added batch normalization. I increased time required to train model.
there are many techniques not only the model
Increase the size of data, which also creates significant inputs.
Use features extraction functions and preprocessing of data, MFCC, Furriers, and Data input transforming ( blur, rotates, flipped, paddings, zooms, or random noises ).
Create multiple sets of data inputs and training K-folded validation.
Random data selection, saved and load model or performance callbacks parameter adjusting.
Compares between models or model concatenated.
Sample: My image recognitions templates, useful when performing functions and conversations for solutions.
import os
from os.path import exists
import tensorflow as tf
import tensorflow_io as tfio
import pandas as pd
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 13 (2) (3).xlsx', index_col=None, header=[0])
list_label = [ ]
list_Image = [ ]
list_file_actual = [ ]
list_label_actual = [ 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy' ]
for Index, Image, Label in variables.values:
print( Label )
# list_label.append( Label )
image = tf.io.read_file( Image )
image = tf.io.decode_image(image)
list_file_actual.append(image)
image = tf.image.resize(image, [32,32], method='nearest')
list_Image.append(image)
if Label == 0:
list_label.append(0)
else:
list_label.append(9)
# if Label == 0:
# list_label_actual.append('Candidt Kibt')
# else:
# list_label_actual.append('Pikaploy')
list_label = tf.cast( list_label, dtype=tf.int32 )
list_label = tf.constant( list_label, shape=( 54, 1, 1 ) )
list_Image = tf.cast( list_Image, dtype=tf.int32 )
list_Image = tf.constant( list_Image, shape=( 54, 1, 32, 32, 3 ) )
# print( list_label_actual )
# print( list_label )
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
loggings = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\loggings.log"
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
log_dir = checkpoint_dir
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
dataset = tf.data.Dataset.from_tensor_slices(( list_Image, list_label ))
list_Image = tf.constant( list_Image, shape=( 54, 32, 32, 3) ).numpy()
print( "===========================================" )
print( "type of variables: " )
print( type(variables) )
print( variables )
print( "variables.values: " )
print( variables.values )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 3 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Reshape((512, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(10),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
model.load_weights(checkpoint_path)
print("model load: " + checkpoint_path)
input("Press Any Key!")
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if( logs['accuracy'] >= 0.97 ):
self.model.stop_training = True
custom_callback = custom_callback()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'] )
model.save_weights(checkpoint_path)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=120, epochs=10000, callbacks=[custom_callback] )
plt.figure(figsize=(6, 6))
plt.title("Actors recognitions")
for i in range(30):
img = tf.keras.preprocessing.image.array_to_img(
list_Image[i],
data_format=None,
scale=True
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
plt.subplot(6, 6, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(list_file_actual[i])
plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" + str(list_label_actual[tf.math.argmax(score)]))
plt.show()
input('...')
Output: They are actually Thailand actors, broadcasts collected from the Internet.
I want show(plot by plt) some samples about data augmentaion by using keras ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode = "nearset"
)
from tensorflow.keras.preprocessing import image
fnames = sorted([os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)])
img_path = fnames[3]
img = image.load_img(img_path, target_size=(150, 150))
x = image.img_to_array(img) # (150, 150, 3)
x = x.reshape((1,) + x.shape) # (1, 150, 150, 3)
i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i%4==0: break
plt.show()
I write my code like this and it occurs "RuntimeError: boundary mode not supported" at
for batch in datagen.flow(x, batch_size=1):
...
I don't know whta is the problem..
I solved it,
from tensorflow.keras.preprocessing import image
from numpy import expand_dims
img = image.load_img('../dogs_vs_cats/cats_and_dogs_small/train/cats/cat.11.jpg')
data = image.img_to_array(img)
samples = expand_dims(data, 0)
it = datagen.flow(samples, batch_size=1)
for i in range(9):
plt.subplot(330 + 1 + i)
batch = it.next()
image = batch[0].astype('uint8')
plt.imshow(image)
plt.show()
This is a real-time sign language detection ,and i reshaped my X_train,y_train,X_test and y_test to add CNN into my architecture .It was LSTM only but am getting errors on the prediction part on how to reshape the real-time input.
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten() if results.face_landmarks else
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 30
#30 frames
sequence_length = 30
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
res = np.load(os.path.join(DATA_PATH,action, str(sequence),"
{}.npy".format(frame_num)))
window.append(res)
sequences.append(window)
labels.append(label_map[action])
#------------------------------------------------------------------------------------
#this above codes are to show what my code looks like.But my question starts from here below
#------------------------------------------------------------------------------------
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.05)
x_train.shape ---->(85, 30, 1662)
# reshaping the input
x_train = x_train.reshape(-1, 300, 1662,1) ; x_test = x_test.reshape(-1, 30, 1662,1)
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
# define the model
model = Sequential()
model.add(TimeDistributed(Conv1D(3, 3, 1,activation='relu', input_shape=[30,1662,1])) ) #
(3, 128, 216, 1)
# model.add(TimeDistributed(Conv1D(3,3,1,activation='relu')))
model.add(TimeDistributed(MaxPooling1D(pool_size=(3,))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(320, return_sequences=True, activation='relu'))
model.add(LSTM(640, return_sequences=True, activation='relu'))
model.add(LSTM(320, return_sequences=False, activation='relu'))
model.add(Dense(320, activation='relu'))
model.add(Dense(180, activation='relu'))
model.add(Dense(np.array(actions).shape[0], activation='softmax'))
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
model.load_weights('action.h5')
#############################################################################################
#Prediction
########################################################################################
#New Detection Variables
sequence = []
sentence = []
threshold = .4
cap = cv2.VideoCapture(0)
#Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
#Read Feed
ret, frame = cap.read()
#Make detections
image,results = mediapipe_detection(frame,holistic)
#Prediciton Logic
keypoints = extract_keypoints(results)
sequence.insert(0,keypoints)
sequence = sequence[:30]
if len(sequence) == 30:
res = model.predict(np.expand_dims(sequence,axis=0))[0]
#Visualization
if res[np.argmax(res)] > threshold:
if len(sentence) > 0:
if actions[np.argmax(res)] != sentence[-1]:
sentence.append(actions[np.argmax(res)])
else:
sentence.append(actions[np.argmax(res)])
if len(sentence)>5:
sentence = sentence[-5:]
#Viz probability
image = prob_viz(res,actions,image,colors)
cv2.rectangle(image,(0,0),(640,40),(245,117,16),-1)
cv2.putText(image, ' '.join(sentence),(3,30),
cv2.FONT_HERSHEY_SIMPLEX, 1,(255,255,255),2,cv2.LINE_AA)
#Show to Screen
cv2.imshow('OpenCV feed', image)
#Breaking the Feed
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
But am getting an error on the prediction part
I adjust a bit that is only matching the dataset and expecting values you can modify the target value from my sample. It is possible working with the streams when input is concatenated you can do it with transform functions or queue then transform functions.
[ Sample ]:
import tensorflow as tf
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow.keras.callbacks import TensorBoard
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DATA_PATH = os.path.join('MP_Data')
#Actions
actions = np.array(['hello','thanks','iloveyou'])
#30 videos worth of data
no_sequences = 2
# no_sequences = 30
#30 frames
# sequence_length = 30
sequence_length = 2
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definication / Class
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def load_target_wave( wave_file='F:\\temp\\Python\\Speech\\Piano\\Berklee44v4\\piano_G2.wav' ) :
test_file = tf.io.read_file( wave_file )
test_audio, sample_rates = tf.audio.decode_wav(contents=test_file)
return test_audio
def mediapipe_detection(image,model):
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
return image,results
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in
results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in
results.left_hand_landmarks.landmark]).flatten()
if results.left_hand_landmarks :
pass
else :
np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in
results.right_hand_landmarks.landmark]).flatten()
if results.right_hand_landmarks :
pass
else :
np.zeros(21*3)
face = np.array([[res.x, res.y, res.z] for res in
results.face_landmarks.landmark]).flatten()
if results.face_landmarks :
pass
else :
np.zeros(468*3)
return np.concatenate([pose,face,lh,rh])
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions,input_frame,colors):
output_frame = input_frame.copy()
for num,prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40),colors[num], -1)
cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1,
(255,255,255),2,cv2.LINE_AA)
return output_frame
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Generate DATA
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
wav_G2 = load_target_wave( )
for action in actions:
for sequence in range(no_sequences):
try:
os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
except:
pass
label_map = {label:num for num, label in enumerate(actions)}
sequences, labels = [], []
for action in actions:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
# res = np.load(os.path.join(DATA_PATH,action, str(sequence), "{}.npy".format(frame_num)))
res = wav_G2
window.append(res)
sequences.append(window)
labels.append(label_map[action])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
sequences = tf.cast( sequences, dtype=tf.int32 )
sequences = tf.constant( sequences, shape=( 12, 1, 2, 287232, 1 ) )
labels = tf.cast( labels, dtype=tf.int32 )
labels = tf.constant( labels, shape=( 12, 1, 1, ) )
dataset = tf.data.Dataset.from_tensor_slices(( sequences, labels ))
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 2, 287232, 1 )),
tf.keras.layers.Reshape(( 256, 2244, 1 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (4, 4), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((16128, 1120)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(1),
])
model.summary()
res = [.2,0.7,.01]
actions[np.argmax(res)]
model.compile(optimizer = 'Adam',loss='categorical_crossentropy',metrics=
['categorical_accuracy'])
actions[np.argmax(res[1])]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=1 ,validation_data=(dataset))
# model.load_weights('action.h5')
So I'm doing this project on empty shelf detection in store and sending alert through sns and I'm am not able to get any source on how to complete it. I'm coding on google colab.
I trained my images on YOLO and tensor flow. And I've a working live feed showing me the object detection. But now I want my object to detect empty shelf when the items are removed then send an alert to the said number or account.
Can anyone help me on how to achieve this? or anyway to compare the two images like from planogram and captured feed, then send alert on the missing item.
Thanks.
I answer for the image categorized task but SNS you need to request for interface allows or specification methods.
You can find the code from the Internet but we are also trying and specification is important since two-sided communication they ar expecting the same definitions.
Some messages server may delays to crashes with communication forwards when you are not sending correct messages.
We will help the image categorizes task.
[ Sample ]: The list of communication target expecting you create folders or queues for target server you may utilized this codes samples.
import tensorflow as tf
import tensorflow_io as tfio
import pandas as pd
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_label_actual = [ 'Candidt Kibt', 'Pikaploy' ]
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0])
list_label = [ ]
list_Image = [ ]
list_file_actual = [ ]
for Index, Image, Label in variables.values:
print( Label )
list_label.append( Label )
image = tf.io.read_file( Image )
image = tfio.experimental.image.decode_tiff(image, index=0)
list_file_actual.append(image)
image = tf.image.resize(image, [32,32], method='nearest')
list_Image.append(image)
list_label = tf.cast( list_label, dtype=tf.int32 )
list_label = tf.constant( list_label, shape=( 33, 1, 1 ) )
list_Image = tf.cast( list_Image, dtype=tf.int32 )
list_Image = tf.constant( list_Image, shape=( 33, 1, 32, 32, 4 ) )
dataset = tf.data.Dataset.from_tensor_slices(( list_Image, list_label ))
list_Image = tf.constant( list_Image, shape=( 33, 32, 32, 4) ).numpy()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Reshape((256, 32 * 32)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(2),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if( logs['accuracy'] >= 0.97 ):
self.model.stop_training = True
custom_callback = custom_callback()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'] )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=50, callbacks=[custom_callback] )
plt.figure(figsize=(6,6))
plt.title("Actors recognitions")
for i in range(len(list_Image)):
img = tf.keras.preprocessing.image.array_to_img(
list_Image[i],
data_format=None,
scale=True
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
plt.subplot(6, 6, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(list_file_actual[i])
plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" + str(list_label_actual[tf.math.argmax(score)]))
plt.show()
input('...')
[ Output ]:
I just implemented a LSTM,
but I'm not sure if I interpreted the structure right.
is in this context testPredict = model.predict(Xtest) the last value of the sequence, and therefore ultimately (after reversing the MinMaxscaler) the variable last_value = (testPredict[-1]) the prediction regarding the future?
from IPython.core.debugger import set_trace
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import yfinance as yf
import sklearn
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import LSTM, Dense, Dropout, Flatten
from sklearn.metrics import mean_squared_error
from keras.layers import ConvLSTM2D
from keras.layers import Bidirectional
from keras.models import model_from_json
df = yf.download(tickers="BTC-USD", period="20wk", interval="60m")
df = df[["Close"]]
df["returns"] = df.Close.pct_change()
df["log_returns"] = np.log(1 + df["returns"])
df.dropna(inplace=True)
X = df[["Close", "log_returns"]].values
scaler = MinMaxScaler(feature_range=(0, 1)).fit(X)
X_scaled = scaler.transform(X)
y = [x[0] for x in X_scaled]
split = int(len(X_scaled) * 0.8)
X_train = X_scaled[:split]
X_test = X_scaled[split : len(X_scaled)]
y_train = y[:split]
y_test = y[split : len(y)]
assert len(X_train) == len(y_train)
assert len(X_test) == len(y_test)
n = 24 #analyze the last 24 prices
Xtrain = []
ytrain = []
Xtest = []
ytest = []
for i in range(n, len(X_train)):
Xtrain.append(X_train[i - n : i, : X_train.shape[1]])
ytrain.append(y_train[i])
for i in range(n, len(X_test)):
Xtest.append(X_test[i - n : i, : X_test.shape[1]])
ytest.append(y_test[i])
val = np.array(ytrain[0])
val = np.c_[val, np.zeros(val.shape)]
scaler.inverse_transform(val)
Xtrain, ytrain = (np.array(Xtrain), np.array(ytrain))
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], Xtrain.shape[2]))
Xtest, ytest = (np.array(Xtest), np.array(ytest))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], Xtest.shape[2]))
model = Sequential()
model.add(LSTM(8, return_sequences=True, input_shape=(Xtrain.shape[1], Xtrain.shape[2])))
#model.add(Bidirectional(LSTM(8, return_sequences=True, input_shape=(Xtrain.shape[1], Xtrain.shape[2]))))
model.add(LSTM(4))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss="mean_squared_error", optimizer="adam")
model.fit(Xtrain, ytrain, epochs=100, validation_data=(Xtest, ytest), batch_size=16, verbose=1)
trainPredict = model.predict(Xtrain)
testPredict = model.predict(Xtest)
trainPredict = np.c_[trainPredict, np.zeros(trainPredict.shape)]
testPredict = np.c_[testPredict, np.zeros(testPredict.shape)]
trainPredict = scaler.inverse_transform(trainPredict)
trainPredict = [x[0] for x in trainPredict]
testPredict = scaler.inverse_transform(testPredict)
testPredict = [x[0] for x in testPredict]
trainScore = mean_squared_error([x[0][0] for x in Xtrain], trainPredict, squared=False)
#print("Train Score: %.2f RMSE" % (trainScore))
testScore = mean_squared_error([x[0][0] for x in Xtest], testPredict, squared=False)
#print("Test Score: %.2f RMSE" % (testScore))
########################################################################################################################
last_value = (testPredict[-1])
I will show you a picture result for understanding that does not change the truth that the label and value match with the criteria but reversed order to verify the result. Sufficient data is required for the working model.
Selecting random pictures from dataset ~ 6-7 per actor in reversed order verifies that the model is working when predicting the new income of the input that is all.
Symmetric shape could tell you action to respond but it is not the future prediction, in his case to have the prediction result he selects the most matching with scores or softmax but without softmax, he also can use np.argmax working with the sequences output ( you need to see the model output )
Predicting real-time data you need to input the variables and historical as you create some report, the model learns the change of the values within scopes !
[ Sample ]: Example by prediction target you visually see that learning network can do it correctly in both-way and you can use of the networks with new input reversed order is not good examines only you don't have any data.
import os
from os.path import exists
import tensorflow as tf
import tensorflow_io as tfio
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH = os.path.join('F:\\datasets\\downloads\\Actors\\train\\Pikaploy', '*.tif')
PATH_2 = os.path.join('F:\\datasets\\downloads\\Actors\\train\\Candidt Kibt', '*.tif')
files = tf.data.Dataset.list_files(PATH)
files_2 = tf.data.Dataset.list_files(PATH_2)
list_file = []
list_file_actual = []
list_label = []
list_label_actual = [ 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt' ]
for file in files.take(5):
image = tf.io.read_file( file )
image = tfio.experimental.image.decode_tiff(image, index=0)
list_file_actual.append(image)
image = tf.image.resize(image, [32,32], method='nearest')
list_file.append(image)
list_label.append(1)
for file in files_2.take(5):
image = tf.io.read_file( file )
image = tfio.experimental.image.decode_tiff(image, index=0)
list_file_actual.append(image)
image = tf.image.resize(image, [32,32], method='nearest')
list_file.append(image)
list_label.append(9)
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
loggings = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\loggings.log"
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
log_dir = checkpoint_dir
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
dataset = tf.data.Dataset.from_tensor_slices((tf.constant(tf.cast(list_file, dtype=tf.int64), shape=(10, 1, 32, 32, 4), dtype=tf.int64),
tf.constant(list_label, shape=(10, 1, 1), dtype=tf.int64)))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(10),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
model.load_weights(checkpoint_path)
print("model load: " + checkpoint_path)
input("Press Any Key!")
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=50 )
plt.figure(figsize=(5,2))
plt.title("Actors recognitions")
for i in range(len(list_file)):
img = tf.keras.preprocessing.image.array_to_img(
list_file[i],
data_format=None,
scale=True
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array)
predictions = predictions[-1:]
score = tf.nn.softmax(predictions[0])
plt.subplot(5, 2, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(list_file_actual[i])
plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" + str(list_label_actual[tf.math.argmax(score)]))
plt.show()
input('...')