I'm trying to print "test" while detecting an object with leable id 1. The word test is printed non-stop :(
if 1 in detections['detection_classes']:
print("test")
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
I am interested in printing the name id actually gets detected on the screen.
Related
I'm trying to write code using Tensorflow from tutorial for detecting objects with a camera, I want the detected object to print once as soon as it is detected.
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
Here are the names of my labels that have been learned
labels = [{'name':'phone', 'id':1}, {'name':'headphones', 'id':2}, {'name':'glasses', 'id':3}, {'name':'mug', 'id':4}]
with open(files['LABELMAP'], 'w') as f:
for label in labels:
f.write('item { \n')
f.write('\tname:\'{}\'\n'.format(label['name']))
f.write('\tid:{}\n'.format(label['id']))
f.write('}\n')
I trained a model using Faster RCNN, this model is used to follow the strips.
here is the output of my model
The python code I use to get this output is as follows:
import cv2
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
IMAGE = "test6.JPG"
MODEL_NAME = 'D:/object_detection/inference_graph'
PATH_TO_CKPT = "D:/object_detection/inference_graph/frozen_inference_graph.pb"
PATH_TO_LABELS = "D:/object_detection/training/labelmap.pbtxt"
PATH_TO_IMAGE = "D:/object_detection/images/" + IMAGE
NUM_CLASSES = 2
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.compat.v1.Session(graph=detection_graph)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
image = cv2.imread(PATH_TO_IMAGE)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_expanded = np.expand_dims(image_rgb, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.60)
cv2.imshow('Object detector', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
my aim is to reach the coordinates of the boxes in the photo
for this i tried:
visulaize = vis_util.visualize_boxes_and_labels_on_image_array(
image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=1,
min_score_thresh=0.90)
print(visulaize)
and i tried:
perception = (boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
print(perception)
then i tried:
n=boxes.shape[0]
for i in range(n):
if not np.any(boxes[i]):
continue
print(boxes[i])
print(np.squeeze(boxes))
Lastly, I tried the following
x,y,h,w=boxes
print(x,y,h,w)
print(detection_boxes)
print(boxes)
x,y,w,h=detection_boxes
print(x,y,w,h)
print(np.squeenze(boxes))
print(boxes.shape)
but none of them gave satisfactory results
I need your help to reach the coordinates of the boxes
You need to apply nms and denormalize the boxes.
def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200):
"""Apply non maximum suppression.
# Arguments
boxes: Numpy array, box coordinates of shape (num_boxes, 4)
where each columns corresponds to x_min, y_min, x_max, y_max
scores: Numpy array, of scores given for each box in 'boxes'
iou_thresh : float, intersection over union threshold
for removing boxes.
top_k: int, number of maximum objects per class
# Returns
selected_indices: Numpy array, selected indices of kept boxes.
num_selected_boxes: int, number of selected boxes.
"""
selected_indices = np.zeros(shape=len(scores))
if boxes is None or len(boxes) == 0:
return selected_indices
# x_min = boxes[:, 0]
# y_min = boxes[:, 1]
# x_max = boxes[:, 2]
# y_max = boxes[:, 3]
x_min = boxes[:, 1]
y_min = boxes[:, 0]
x_max = boxes[:, 3]
y_max = boxes[:, 2]
areas = (x_max - x_min) * (y_max - y_min)
remaining_sorted_box_indices = np.argsort(scores)
remaining_sorted_box_indices = remaining_sorted_box_indices[-top_k:]
num_selected_boxes = 0
while len(remaining_sorted_box_indices) > 0:
best_score_args = remaining_sorted_box_indices[-1]
selected_indices[num_selected_boxes] = best_score_args
num_selected_boxes = num_selected_boxes + 1
if len(remaining_sorted_box_indices) == 1:
break
remaining_sorted_box_indices = remaining_sorted_box_indices[:-1]
best_x_min = x_min[best_score_args]
best_y_min = y_min[best_score_args]
best_x_max = x_max[best_score_args]
best_y_max = y_max[best_score_args]
remaining_x_min = x_min[remaining_sorted_box_indices]
remaining_y_min = y_min[remaining_sorted_box_indices]
remaining_x_max = x_max[remaining_sorted_box_indices]
remaining_y_max = y_max[remaining_sorted_box_indices]
inner_x_min = np.maximum(remaining_x_min, best_x_min)
inner_y_min = np.maximum(remaining_y_min, best_y_min)
inner_x_max = np.minimum(remaining_x_max, best_x_max)
inner_y_max = np.minimum(remaining_y_max, best_y_max)
inner_box_widths = inner_x_max - inner_x_min
inner_box_heights = inner_y_max - inner_y_min
inner_box_widths = np.maximum(inner_box_widths, 0.0)
inner_box_heights = np.maximum(inner_box_heights, 0.0)
intersections = inner_box_widths * inner_box_heights
remaining_box_areas = areas[remaining_sorted_box_indices]
best_area = areas[best_score_args]
unions = remaining_box_areas + best_area - intersections
intersec_over_union = intersections / unions
intersec_over_union_mask = intersec_over_union <= iou_thresh
remaining_sorted_box_indices = remaining_sorted_box_indices[
intersec_over_union_mask]
return selected_indices.astype(int), num_selected_boxes
def denormalize_box(box, image_shape):
"""Scales corner box coordinates from normalized values to image dimensions.
# Arguments
box: Numpy array containing corner box coordinates.
image_shape: List of integers with (height, width).
# Returns
returns: box corner coordinates in image dimensions
"""
# x_min, y_min, x_max, y_max = box[:4]
y_min, x_min, y_max, x_max = box[:4]
height, width = image_shape
x_min = int(x_min * width)
y_min = int(y_min * height)
x_max = int(x_max * width)
y_max = int(y_max * height)
# return [x_min, y_min, x_max, y_max]
return [y_min, x_min, y_max, x_max]
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
conf_threshold = 0.5
nms_threshold = 0.45
image_shape = image.shape[:2]
# Filtering the boxes based on conf_threshold
filtered_scores = [scores[0][i] for i in np.where(scores[0] > conf_threshold)]
filtered_boxes = [boxes[0][i] for i in np.where(scores[0] > conf_threshold)]
filtered_classes = [classes[0][i] for i in np.where(scores[0] > conf_threshold)]
if len(filtered_scores[0]) != 0:
# NMS thresholding
indices, count = apply_non_max_suppression(filtered_boxes[0], filtered_scores[0], nms_threshold, 200)
selected_indices = indices[:count]
## Getting the final boxes
final_boxes = filtered_boxes[0][selected_indices]
final_scores = filtered_scores[0][selected_indices]
final_classes = filtered_classes[0][selected_indices]
final_boxes = [denormalize_box(box, image_shape) for box in final_boxes]
I've tried modifying the code with the pre_process function and the color_mode but each time a different error pops up.
def pre_process(img):
ret, mask = cv2. threshold(img, 100, 255, cv2.THRESH_BINARY_INV)
mask_inv = cv2.bitwise_not(mask)
return mask_inv
train = ImageDataGenerator(rescale=1/255, vertical_flip=True,preprocessing_function=pre_process)
test = ImageDataGenerator(rescale=1/255, vertical_flip=True,preprocessing_function=pre_process)
train_dataset = train.flow_from_directory(path,
target_size=(512,512),
batch_size = 32,
class_mode = 'binary',
color_mode = 'grayscale',
shuffle = True
)
test_dataset = test.flow_from_directory(path,
target_size=(512,512),
batch_size =32,
class_mode = 'binary',
color_mode = 'grayscale',
shuffle = True
)
ValueError: could not broadcast input array from shape (512,512) into shape
(512,512,1)..... It's what i get from the above code.
However, if i used the pre_process method below, there is no error, but my loss is always NAN while training.
def pre_process(img):
ret, mask = cv2. threshold(img, 100, 255, cv2.THRESH_BINARY_INV)
mask_inv = cv2.bitwise_not(mask)
img_expanded = tf.expand_dims(mask_inv, 2)
return img_expanded
I can't find why this plt.show() is not working. I'm using the code given in Tensorflow:
https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/auto_examples/plot_object_detection_saved_model.html
I found solutions saying that I should use plt.show(block=false) and add plt.pause(5) but it didn't work. Please help.
After running the code I get:
Running inference for image... Done, but it doesn't show any image.
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes'],
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=.30,
agnostic_mode=False)
plt.figure()
plt.imshow(image_np_with_detections)
print('Done')
plt.show()
I'm using Tensorflow to detect object based on this tutorial. The reason why it's running so slow is this line output_dict =sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)}). And below is the whole function code:
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.device('/gpu:0'):
print('GPU is using')
with tf.Session() as sess:
time0 = datetime.datetime.now()
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
time1 = datetime.datetime.now()
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
time2 = datetime.datetime.now()
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
time3 = datetime.datetime.now()
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
time4 = datetime.datetime.now()
print(time1-time0, time2-time1, time3-time2, time4-time3)
return output_dict
I don't know how to use GPU with tf.session.run(). Anyone can teach me how to use GPU with session.run???