TensorFlow 2 print object name if object is detected - tensorflow

I'm trying to write code using Tensorflow from tutorial for detecting objects with a camera, I want the detected object to print once as soon as it is detected.
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
Here are the names of my labels that have been learned
labels = [{'name':'phone', 'id':1}, {'name':'headphones', 'id':2}, {'name':'glasses', 'id':3}, {'name':'mug', 'id':4}]
with open(files['LABELMAP'], 'w') as f:
for label in labels:
f.write('item { \n')
f.write('\tname:\'{}\'\n'.format(label['name']))
f.write('\tid:{}\n'.format(label['id']))
f.write('}\n')

Related

TensorFlow 2 print if object is detected

I'm trying to print "test" while detecting an object with leable id 1. The word test is printed non-stop :(
if 1 in detections['detection_classes']:
print("test")
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
I am interested in printing the name id actually gets detected on the screen.

Faster RCNN Bounding Box Coordinate

I trained a model using Faster RCNN, this model is used to follow the strips.
here is the output of my model
The python code I use to get this output is as follows:
import cv2
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
IMAGE = "test6.JPG"
MODEL_NAME = 'D:/object_detection/inference_graph'
PATH_TO_CKPT = "D:/object_detection/inference_graph/frozen_inference_graph.pb"
PATH_TO_LABELS = "D:/object_detection/training/labelmap.pbtxt"
PATH_TO_IMAGE = "D:/object_detection/images/" + IMAGE
NUM_CLASSES = 2
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.compat.v1.Session(graph=detection_graph)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
image = cv2.imread(PATH_TO_IMAGE)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_expanded = np.expand_dims(image_rgb, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.60)
cv2.imshow('Object detector', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
my aim is to reach the coordinates of the boxes in the photo
for this i tried:
visulaize = vis_util.visualize_boxes_and_labels_on_image_array(
image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=1,
min_score_thresh=0.90)
print(visulaize)
and i tried:
perception = (boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
print(perception)
then i tried:
n=boxes.shape[0]
for i in range(n):
if not np.any(boxes[i]):
continue
print(boxes[i])
print(np.squeeze(boxes))
Lastly, I tried the following
x,y,h,w=boxes
print(x,y,h,w)
print(detection_boxes)
print(boxes)
x,y,w,h=detection_boxes
print(x,y,w,h)
print(np.squeenze(boxes))
print(boxes.shape)
but none of them gave satisfactory results
I need your help to reach the coordinates of the boxes
You need to apply nms and denormalize the boxes.
def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200):
"""Apply non maximum suppression.
# Arguments
boxes: Numpy array, box coordinates of shape (num_boxes, 4)
where each columns corresponds to x_min, y_min, x_max, y_max
scores: Numpy array, of scores given for each box in 'boxes'
iou_thresh : float, intersection over union threshold
for removing boxes.
top_k: int, number of maximum objects per class
# Returns
selected_indices: Numpy array, selected indices of kept boxes.
num_selected_boxes: int, number of selected boxes.
"""
selected_indices = np.zeros(shape=len(scores))
if boxes is None or len(boxes) == 0:
return selected_indices
# x_min = boxes[:, 0]
# y_min = boxes[:, 1]
# x_max = boxes[:, 2]
# y_max = boxes[:, 3]
x_min = boxes[:, 1]
y_min = boxes[:, 0]
x_max = boxes[:, 3]
y_max = boxes[:, 2]
areas = (x_max - x_min) * (y_max - y_min)
remaining_sorted_box_indices = np.argsort(scores)
remaining_sorted_box_indices = remaining_sorted_box_indices[-top_k:]
num_selected_boxes = 0
while len(remaining_sorted_box_indices) > 0:
best_score_args = remaining_sorted_box_indices[-1]
selected_indices[num_selected_boxes] = best_score_args
num_selected_boxes = num_selected_boxes + 1
if len(remaining_sorted_box_indices) == 1:
break
remaining_sorted_box_indices = remaining_sorted_box_indices[:-1]
best_x_min = x_min[best_score_args]
best_y_min = y_min[best_score_args]
best_x_max = x_max[best_score_args]
best_y_max = y_max[best_score_args]
remaining_x_min = x_min[remaining_sorted_box_indices]
remaining_y_min = y_min[remaining_sorted_box_indices]
remaining_x_max = x_max[remaining_sorted_box_indices]
remaining_y_max = y_max[remaining_sorted_box_indices]
inner_x_min = np.maximum(remaining_x_min, best_x_min)
inner_y_min = np.maximum(remaining_y_min, best_y_min)
inner_x_max = np.minimum(remaining_x_max, best_x_max)
inner_y_max = np.minimum(remaining_y_max, best_y_max)
inner_box_widths = inner_x_max - inner_x_min
inner_box_heights = inner_y_max - inner_y_min
inner_box_widths = np.maximum(inner_box_widths, 0.0)
inner_box_heights = np.maximum(inner_box_heights, 0.0)
intersections = inner_box_widths * inner_box_heights
remaining_box_areas = areas[remaining_sorted_box_indices]
best_area = areas[best_score_args]
unions = remaining_box_areas + best_area - intersections
intersec_over_union = intersections / unions
intersec_over_union_mask = intersec_over_union <= iou_thresh
remaining_sorted_box_indices = remaining_sorted_box_indices[
intersec_over_union_mask]
return selected_indices.astype(int), num_selected_boxes
def denormalize_box(box, image_shape):
"""Scales corner box coordinates from normalized values to image dimensions.
# Arguments
box: Numpy array containing corner box coordinates.
image_shape: List of integers with (height, width).
# Returns
returns: box corner coordinates in image dimensions
"""
# x_min, y_min, x_max, y_max = box[:4]
y_min, x_min, y_max, x_max = box[:4]
height, width = image_shape
x_min = int(x_min * width)
y_min = int(y_min * height)
x_max = int(x_max * width)
y_max = int(y_max * height)
# return [x_min, y_min, x_max, y_max]
return [y_min, x_min, y_max, x_max]
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
conf_threshold = 0.5
nms_threshold = 0.45
image_shape = image.shape[:2]
# Filtering the boxes based on conf_threshold
filtered_scores = [scores[0][i] for i in np.where(scores[0] > conf_threshold)]
filtered_boxes = [boxes[0][i] for i in np.where(scores[0] > conf_threshold)]
filtered_classes = [classes[0][i] for i in np.where(scores[0] > conf_threshold)]
if len(filtered_scores[0]) != 0:
# NMS thresholding
indices, count = apply_non_max_suppression(filtered_boxes[0], filtered_scores[0], nms_threshold, 200)
selected_indices = indices[:count]
## Getting the final boxes
final_boxes = filtered_boxes[0][selected_indices]
final_scores = filtered_scores[0][selected_indices]
final_classes = filtered_classes[0][selected_indices]
final_boxes = [denormalize_box(box, image_shape) for box in final_boxes]

plt.show() doesn't show image

I can't find why this plt.show() is not working. I'm using the code given in Tensorflow:
https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/auto_examples/plot_object_detection_saved_model.html
I found solutions saying that I should use plt.show(block=false) and add plt.pause(5) but it didn't work. Please help.
After running the code I get:
Running inference for image... Done, but it doesn't show any image.
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes'],
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=.30,
agnostic_mode=False)
plt.figure()
plt.imshow(image_np_with_detections)
print('Done')
plt.show()

How to count the number detected object (in bounding box) with tensorflow object detection API

i use tutorial from edje electronics with Faster R-CNN and it's works
but i want to improve it. i want to count the object
the question is....... how can i remove the percentage of accuracy and replace it with number of counted bounding box.
i don't know which one i must add and remove it to counting the bounding box
here is the code
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
sys.path.append("..")
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_NAME = 'inference_graph'
VIDEO_NAME = 'animal.mov'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
PATH_TO_VIDEO = os.path.join(CWD_PATH,VIDEO_NAME)
NUM_CLASSES = 6
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
video = cv2.VideoCapture(PATH_TO_VIDEO)
while(video.isOpened()):
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_expanded = np.expand_dims(frame_rgb, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.60)
cv2.imshow('Object detector', frame)
if cv2.waitKey(1) == ord('q'):
break
video.release()
cv2.destroyAllWindows()
You have to modify the visualize_boxes_and_labels_on_image_array() function in utils/visualization_utils.py to remove the conf score display and show length of boxes array

How to use GPU to detect object using pre-trained models in Tensorflow?

I'm using Tensorflow to detect object based on this tutorial. The reason why it's running so slow is this line output_dict =sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)}). And below is the whole function code:
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.device('/gpu:0'):
print('GPU is using')
with tf.Session() as sess:
time0 = datetime.datetime.now()
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
time1 = datetime.datetime.now()
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
time2 = datetime.datetime.now()
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
time3 = datetime.datetime.now()
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
time4 = datetime.datetime.now()
print(time1-time0, time2-time1, time3-time2, time4-time3)
return output_dict
I don't know how to use GPU with tf.session.run(). Anyone can teach me how to use GPU with session.run???