Saving image in a real time object detector - tensorflow

I am currently running a real-time object detector using SSD MobileNetv2 in TensorFlow 1.x and would like to know if there are any ways where I can save an image when one of the class gets detected by the video stream.
PATH_TO_FROZEN_GRAPH = 'path-to-inference-graph.pb'
PATH_TO_LABEL_MAP = 'path-to-label-map.pbtxt'
NUM_CLASSES = 4
cap = cv2.VideoCapture(0)
Basically, I have built the detector to detect 4 classes and would like to save the image (maybe it is likely to come out as a burst of images, still fine) when one of the class gets detected.
label_map = label_map_util.load_labelmap(PATH_TO_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret, image_np = cap.read()
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=3,
)
cv2.imshow('Detection', cv2.resize(image_np, (1200, 800)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
How do I achieve this? Are there any other variations for it?

After session.run, you get the results in (boxes, scores, classes, num_detections)
You just have to iterate over them and see the class and score and finally save the
if 'req_class_name' in classes:
#check for confidence score also
cv2.imwrite('/path/to/destination/image.png', image_np)

Related

How to count the number detected object (in bounding box) with tensorflow object detection API

i use tutorial from edje electronics with Faster R-CNN and it's works
but i want to improve it. i want to count the object
the question is....... how can i remove the percentage of accuracy and replace it with number of counted bounding box.
i don't know which one i must add and remove it to counting the bounding box
here is the code
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
sys.path.append("..")
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_NAME = 'inference_graph'
VIDEO_NAME = 'animal.mov'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
PATH_TO_VIDEO = os.path.join(CWD_PATH,VIDEO_NAME)
NUM_CLASSES = 6
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
video = cv2.VideoCapture(PATH_TO_VIDEO)
while(video.isOpened()):
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_expanded = np.expand_dims(frame_rgb, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.60)
cv2.imshow('Object detector', frame)
if cv2.waitKey(1) == ord('q'):
break
video.release()
cv2.destroyAllWindows()
You have to modify the visualize_boxes_and_labels_on_image_array() function in utils/visualization_utils.py to remove the conf score display and show length of boxes array

How do I print the (frame number,bounding box information,confidence) of object detections to a text file in tensorflow object detector?

I used tensorflow object detection api for detecting multiple objects in my videos. However, I have been struggling with figuring out as to how to write these resulting object detections to a text/CSV/xml file (basically the bounding box information, the frame number of the images sequence, confidence of the bbox)
I've seen several answers in stackoverflow and github but most of them were either vague or I just could not get the exact answer I'm looking for.
Shown below is the last part of the detection code, I know that the detection_boxes and detection_scores are what I need but I just cannot figure out how to write these to a text file and also write only the final bbox detections which are seen on the images but not ALL detection bounding boxes
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph) # Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8) plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
You can try the following code
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
width = 1024
height = 600
threshold = 0.5
temp = [] # list to store scores greater than 0.5
# iterate through all scores and pick those having a score greater than 0.5
for index,value in enumerate(classes[0]):
if scores[0,index] > threshold:
temp.append(scores[0,index])
# Similarly, classes[0,index] will give you the class of the bounding box detection
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# For printing the bounding box coordinates
for i,j in zip(output_dict['detection_boxes'],output_dict['detection_scores']):
if(j>threshold):
print(i[1]*width,i[0]*height,i[3]*width,i[2]*height)
The above code snippet will provide you with the bounding box coordinates and the detection scores. You can use a minimum threshold to filter unnecessary detections. I hope this helps you out. Also, I could not quite understand what you meant by frame number. Could you please elucidate further on what you actually mean by this.
Please let me know if you face any issues

Tensor Flow Object detection API notifications

Tensorflow 1.12.0
Python 3.5.0
Windows 10
Hello all, I've created my own object detection model based on tensorflows object detection tutorial. I want to notify by SMS (via a service like twilio) when a object is detected, but I don't want to be notified by every frame of the same object class, instead I'd like to have a delay between text messages of objects i.e. every 5 seconds (at least) between every call. I've looked at threading and timer, but I fear that I'll restart the threading and timer every for call and was wondering if their is a more efficient way via the object detection API to accomplish this task. I know that I can print the actual class detected via
print [category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > 0.5]
in the code
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
but again, I don't want an overflow of text messages to my phone, and I don't want my script to pause neither.. if any suggestions can be given, I'd appreciate it. Thank you all.
Instead of timer, try using a counter to count the frames in which it was detected.
and reset the counter every 10 frames or so.

argument must be a string or a number

I got this error message after 10 seconds I run the script.
int() argument must be a string, a bytes-like object or a number, not 'NoneType'
Could you please explain me this error message and help me to solve this issue.
I'm using tensorflow version 1.8 cv2 version 3.4.0
Thanks in advance!
My code
video = cv2.VideoCapture('rtsp://10.10.10.10/h264.sdp?res=half&x0=0&y0=0&x1=1920&y1=1080&qp=20&ratelimit=10000&doublescan=0&ssn=16026')
ret = video.set(3,1280)
ret = video.set(4,720)
while(True):
try:
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
#frame = cameraDevice.get_frame()
ret, frame = video.read()
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=4,
min_score_thresh=0.85)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
except Exception as exc:
print(exc)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
video.release()
cv2.destroyAllWindows()

sess.run() is too slow

The sess.run() function by the Tensorflow object detection module takes about 2.5 seconds to detect bounding bozes in a 600x600 image. How can I speed up this code?
def run(image, detection_graph):
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = image
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
print("2")
start_time = datetime.datetime.now()
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
end_time = datetime.datetime.now()
diff = (end_time - start_time).total_seconds()*1000
print (diff)
print("3")
return boxes[0], scores[0]
#print scores
#print classes
Your sess.run execution time is normal for the first run, after that it will probably run 100 times faster (not kidding).
The key is re-using the session, in your example I'd add another image evaluation and measure that time and check if performance improves, like:
# all your prev code here
print (diff)
print("3")
image_np = image2 # get another image from somewhere
image_np_expanded = np.expand_dims(image_np, axis=0)
start_time = datetime.datetime.now()
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
end_time = datetime.datetime.now()
diff = (end_time - start_time).total_seconds()*1000
print("Detection #2")
print(diff)
So you don't need GPU or smaller images (yet), just "warm-up" the session and use it for all the predictions.
I currently have a really modest setup in a test environment with the last version of Ubuntu running on VirtualBox, single core and no GPU (MobileNet2 + COCO dataset), the times I get are pretty decent once the session is "warm".
--- 3.7862255573272705 seconds ---
--- 0.21631121635437012 seconds ---
--- 0.1784508228302002 seconds ---
Note the first slow execution time, the last one was an image with size 1050*600