I am following Nicollas renotte's tutorial on Realtime hand-sign detection with TensorFlow and OpenCV and finished the code.
import cv2
import numpy as np
import time
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while True:
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.5,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release()
break
cap.release()
detections = detect_fn(input_tensor)
so this code is running fine and recognize the hand sign and draw a box around the hand-sign and labels it but I want to print the name of the recognized hand-sign in the terminal itself ( for using it with pyttx3 to speak out the sign that's detected)
I tried just printing the detections['detection_classes'] but that only give some sort of array as output can anyone explain how I can print the name of the object detected with the score?
Thanks in advance, first post on Stack Overflow so please go easy on me
detections['detection_classes'] returns the category id of each bouding box detected.
A category index is a dictionary that maps integer ids to dicts
containing categories, e.g. {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}.
So if you print category_index, you will get something like this:
{1: {'id': 1, 'name': 'Aa'}, 2: {'id': 2, 'name': 'Bb'}, ...}
assuming you are dealing with hand signs of alphabets.
With this knowledge, it is easy to print the label for the hand-sign detected.
# flatten the category_index to a single dictionary
category_dict = {value.get('id'):value.get('name') for _,value in category_index.items()}
detected_signs = []
for sign_index in detections['detection_classes']:
sign_label = category_dict.get(sign_index)
detected_signs.append(sign_label)
print(detected_signs)
# Feed detected_signs to downstream system like pyttx3 to speak out the sign
I resorted to using the cloud training workflow. Given the product I got, I would have expected to drop directly into the code that I have that works with other tflite models, but the cloud produced model doesn't work. I get "index out of range" when asking for interpreter.get_tensor parameters.
Here is my code, basically a modified example, where I can ingest a video and produce a video with results.
import argparse
import cv2
import numpy as np
import sys
import importlib.util
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='model.tflite')
# default='/tmp/detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='dict.txt')
# default='/tmp/coco_labels.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--video', help='Name of the video file',
default='test.mp4')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
VIDEO_NAME = args.video
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tensorflow')
pkg = True
if pkg is None:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to video file
VIDEO_PATH = os.path.join(CWD_PATH,VIDEO_NAME)
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Open video file
video = cv2.VideoCapture(VIDEO_PATH)
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(
'M', 'J', 'P', 'G'), 10, (1920, 1080))
while(video.isOpened()):
# Acquire frame and resize to expected shape [1xHxWx3]
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
print (boxes)
print (classes)
print (scores)
#num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed)
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 4)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0],
label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 0, 0), 2) # Draw label text
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
#output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
out.write(frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
video.release()
out.release()
cv2.destroyAllWindows()
Here is what the print statements should look like when using the canned tflite model:
[32. 76. 56. 76. 0. 61. 74. 0. 0. 0.]
[0.609375 0.48828125 0.44921875 0.44921875 0.4140625 0.40234375
0.37890625 0.3125 0.3125 0.3125 ]
[[-0.01923192 0.17330796 0.747546 0.8384144 ]
[ 0.01866053 0.5023282 0.39603746 0.6143299 ]
[ 0.01673795 0.47382414 0.34407628 0.5580931 ]
[ 0.11588445 0.78543806 0.8778869 1.0039229 ]
[ 0.8106107 0.70675755 1.0080075 0.89248717]
[ 0.84941524 0.06391776 1.0006479 0.28792098]
[ 0.05543692 0.53557926 0.40413857 0.62823087]
[ 0.07051808 -0.00938512 0.8822515 0.28100258]
[ 0.68205094 0.33990026 0.9940187 0.6020821 ]
[ 0.08010477 0.01998334 0.6011186 0.26135433]]
Here is the error when presented with the cloud created model:
File "tflite_vid.py", line 124, in <module>
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
IndexError: list index out of range
So I would kindly ask that someone explain how to either develop a TFlite model with TF2 with Python or how to get the cloud to generate a usable TFlite model. Please oh please do not point me into a direction that entails wondering through the Internet examples unless they are the actual gospel on how to do this.,
In output_details[1], it is [1] <- list index out of range. Your model may have 1 output, but the code try to access the 2nd output.
For more usage about Python code, please refer to https://www.tensorflow.org/lite/guide/inference#load_and_run_a_model_in_python for guidance.
I've two netcdf files containing both unstructured grids. The first grid has 3 vertices per face and the second has 4 vertices per face.
For the grid containing 3 vertices per face I can use matplotlib.tri for visualization (like triplot_demo.py:
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
xy = np.asarray([
[-0.101, 0.872], [-0.080, 0.883], [-0.069, 0.888], [-0.054, 0.890],
[-0.045, 0.897], [-0.057, 0.895], [-0.073, 0.900], [-0.087, 0.898],
[-0.090, 0.904], [-0.069, 0.907], [-0.069, 0.921], [-0.080, 0.919],
[-0.073, 0.928], [-0.052, 0.930], [-0.048, 0.942], [-0.062, 0.949],
[-0.054, 0.958], [-0.069, 0.954], [-0.087, 0.952], [-0.087, 0.959],
[-0.080, 0.966], [-0.085, 0.973], [-0.087, 0.965], [-0.097, 0.965],
[-0.097, 0.975], [-0.092, 0.984], [-0.101, 0.980], [-0.108, 0.980],
[-0.104, 0.987], [-0.102, 0.993], [-0.115, 1.001], [-0.099, 0.996],
[-0.101, 1.007], [-0.090, 1.010], [-0.087, 1.021], [-0.069, 1.021],
[-0.052, 1.022], [-0.052, 1.017], [-0.069, 1.010], [-0.064, 1.005],
[-0.048, 1.005], [-0.031, 1.005], [-0.031, 0.996], [-0.040, 0.987],
[-0.045, 0.980], [-0.052, 0.975], [-0.040, 0.973], [-0.026, 0.968],
[-0.020, 0.954], [-0.006, 0.947], [ 0.003, 0.935], [ 0.006, 0.926],
[ 0.005, 0.921], [ 0.022, 0.923], [ 0.033, 0.912], [ 0.029, 0.905],
[ 0.017, 0.900], [ 0.012, 0.895], [ 0.027, 0.893], [ 0.019, 0.886],
[ 0.001, 0.883], [-0.012, 0.884], [-0.029, 0.883], [-0.038, 0.879],
[-0.057, 0.881], [-0.062, 0.876], [-0.078, 0.876], [-0.087, 0.872],
[-0.030, 0.907], [-0.007, 0.905], [-0.057, 0.916], [-0.025, 0.933],
[-0.077, 0.990], [-0.059, 0.993]])
x = np.degrees(xy[:, 0])
y = np.degrees(xy[:, 1])
triangles = np.asarray([
[65, 44, 20],
[65, 60, 44]])
triang = tri.Triangulation(x, y, triangles)
plt.figure()
plt.gca().set_aspect('equal')
plt.triplot(triang, 'go-', lw=1.0)
plt.title('triplot of user-specified triangulation')
plt.xlabel('Longitude (degrees)')
plt.ylabel('Latitude (degrees)')
plt.show()
-- indices of the related point annotated afterwards
BUT how to visualize the unstructured grid containing 4 vertices per face (quadrilaterals)? Following the previous exapmle, my faces looks like:
quatrang = np.asarray([
[65, 60, 44, 20]])
Obviously trying tri.Triangulation doesn't work:
quatr = tri.Triangulation(x, y, quatrang)
ValueError: triangles must be a (?,3) array
I cannot find anything in the matplotlib libraries regarding 4 vertices per face. Any help is greatly appreciated..
EDIT: Changed the question based upon a minimal, complete and verifiable example
As commented already, since there is no Quatrangulation or simiar, there is no standard way to plot a a similar plot as triplot with four points per shape in matplotlib.
Of course you could triangulate your mesh again to obtain 2 triangles per quadrilateral. Or, you can plot a PolyCollection of the shapes, given their coordinates in space. The following shows the latter, defining a quatplot function which takes the coordinates and the indices of the vertices as input and draws a PolyCollection of those to the axes.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.collections
xy = np.asarray([
[-0.101, 0.872], [-0.080, 0.883], [-0.069, 0.888], [-0.054, 0.890],
[-0.090, 0.904], [-0.069, 0.907], [-0.069, 0.921], [-0.080, 0.919],
[-0.080, 0.966], [-0.085, 0.973], [-0.087, 0.965], [-0.097, 0.965],
[-0.104, 0.987], [-0.102, 0.993], [-0.115, 1.001], [-0.099, 0.996],
[-0.052, 1.022], [-0.052, 1.017], [-0.069, 1.010], [-0.064, 1.005],
[-0.045, 0.980], [-0.052, 0.975], [-0.040, 0.973], [-0.026, 0.968],
[ 0.017, 0.900], [ 0.012, 0.895], [ 0.027, 0.893], [ 0.019, 0.886],
[ 0.001, 0.883], [-0.012, 0.884], [-0.029, 0.883], [-0.038, 0.879],
[-0.030, 0.907], [-0.007, 0.905], [-0.057, 0.916], [-0.025, 0.933],
[-0.077, 0.990], [-0.059, 0.993]])
x = np.degrees(xy[:, 0])
y = np.degrees(xy[:, 1])
quatrang = np.asarray([
[19,13,10,22], [35,7,3,28]])
def quatplot(x,y, quatrangles, ax=None, **kwargs):
if not ax: ax=plt.gca()
xy = np.c_[x,y]
verts=xy[quatrangles]
pc = matplotlib.collections.PolyCollection(verts, **kwargs)
ax.add_collection(pc)
ax.autoscale()
plt.figure()
plt.gca().set_aspect('equal')
quatplot(x,y, quatrang, ax=None, color="crimson", facecolor="None")
plt.plot(x,y, marker="o", ls="", color="crimson")
plt.title('quatplot of user-specified quatrangulation')
plt.xlabel('Longitude (degrees)')
plt.ylabel('Latitude (degrees)')
for i, (xi,yi) in enumerate(np.degrees(xy)):
plt.text(xi,yi,i, size=8)
plt.show()