How to count the number detected object (in bounding box) with tensorflow object detection API - tensorflow

i use tutorial from edje electronics with Faster R-CNN and it's works
but i want to improve it. i want to count the object
the question is....... how can i remove the percentage of accuracy and replace it with number of counted bounding box.
i don't know which one i must add and remove it to counting the bounding box
here is the code
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
sys.path.append("..")
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_NAME = 'inference_graph'
VIDEO_NAME = 'animal.mov'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
PATH_TO_VIDEO = os.path.join(CWD_PATH,VIDEO_NAME)
NUM_CLASSES = 6
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
video = cv2.VideoCapture(PATH_TO_VIDEO)
while(video.isOpened()):
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_expanded = np.expand_dims(frame_rgb, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.60)
cv2.imshow('Object detector', frame)
if cv2.waitKey(1) == ord('q'):
break
video.release()
cv2.destroyAllWindows()

You have to modify the visualize_boxes_and_labels_on_image_array() function in utils/visualization_utils.py to remove the conf score display and show length of boxes array

Related

Loading and testing a Tensorflow 2 trained model

i already was able to train a custom TF2 model using this tutorial:
https://neptune.ai/blog/how-to-train-your-own-object-detector-using-tensorflow-object-detection-api
Now im getting stuck with testing this model. The script i use for this is also from a turoial and i changed the paths etc but it still doesnt work... I tried and tried and tried for many hours now but at the time i just got demotivated...
I can resolve many errors but the current one not, maybe anyone can help me. Im quite new to object detection..
import numpy as np
import os
import six as urllib # import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture(1)
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
# In[3]:
from object_detection.utils import label_map_util # from utils import label_map_util
from object_detection.utils import visualization_utils as vis_util # from utils import visualization_utils as vis_util
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/exported_models/first_model/saved_model' # MODEL_NAME = 'inference_graph'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/saved_model.pb' # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/data/label_map.pbtxt' # PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 1
# ## Load a (frozen) Tensorflow model into memory.
# In[6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef() # od_graph_def = tf.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[7]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[8]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'images/test/'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12,8)
# In[10]:
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
image_np = np.array(cv2.imread('Test.jpg'))
cv2.imshow('image',image_np)
cv2.waitKey(1)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Thats the code i use to try testing the model
And this is the current error:
Traceback (most recent call last):
File "d:\VSCode\Machine_Learning_Tests\Tensorflow\test\object_detection_tutorial_wwwPythonProgrammingNet__mitBild.py", line 65, in <module>
od_graph_def.ParseFromString(serialized_graph)
google.protobuf.message.DecodeError: Error parsing message with type 'tensorflow.GraphDef'

Saving image in a real time object detector

I am currently running a real-time object detector using SSD MobileNetv2 in TensorFlow 1.x and would like to know if there are any ways where I can save an image when one of the class gets detected by the video stream.
PATH_TO_FROZEN_GRAPH = 'path-to-inference-graph.pb'
PATH_TO_LABEL_MAP = 'path-to-label-map.pbtxt'
NUM_CLASSES = 4
cap = cv2.VideoCapture(0)
Basically, I have built the detector to detect 4 classes and would like to save the image (maybe it is likely to come out as a burst of images, still fine) when one of the class gets detected.
label_map = label_map_util.load_labelmap(PATH_TO_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret, image_np = cap.read()
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=3,
)
cv2.imshow('Detection', cv2.resize(image_np, (1200, 800)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
How do I achieve this? Are there any other variations for it?
After session.run, you get the results in (boxes, scores, classes, num_detections)
You just have to iterate over them and see the class and score and finally save the
if 'req_class_name' in classes:
#check for confidence score also
cv2.imwrite('/path/to/destination/image.png', image_np)

Real-time counter using Tensorflow object detection API

Im currently working on real-time object detection using tensorflow API.I've gotten that figured out, but right now I would like to add in object counter. So, I'll have real-time object detection + counter.
The source code for object detection was taken from tensorflow ipynb tutorial and I added OpenCV for real-time detection. I've merged the real-time detection source code with the counter source code where initially it was for vehicle counting from this guy's repo.
So, my current output: No error and no output. But my webcam light flickers which shows it's being used so the opencv part is working. Could anyone take a look the code and help me figure what's wrong? It'd be a really great help. Thank you in advance.
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import csv
import time
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture(0)
# initialize .csv
with open('traffic_measurement.csv', 'w') as f:
writer = csv.writer(f)
csv_line = \
'Person Movement Direction'
writer.writerows([csv_line.split(',')])
# Variables to count persons
total_passed_person = 0
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
from utils import label_map_util
from utils import visualization_utils as vis_util
# # Model preparation
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
# ## Download Model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
# ## Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we
know
that this corresponds to `airplane`. Here we use internal utility functions, but anything that
returns
a dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
# Detection
def object_detection_function():
total_passed_person = 0
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(input_frame, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# for all the frames that are extracted from input video
while cap.isOpened():
(ret,frame) = cap.read()
if not ret:
print ('end of the video file...')
break
input_frame = frame
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
(counter, csv_line) = \
vis_util.visualize_boxes_and_labels_on_image_array(
cap.get(1),
input_frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
total_passed_person = total_passed_person + counter
# insert information text to video frame
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(
input_frame,
'Detected Persons: ' + str(total_passed_person),
(10, 35),
font,
0.8,
(0, 0xFF, 0xFF),
2,
cv2.FONT_HERSHEY_SIMPLEX,
)
# when the vehicle passed over line and counted, make the color of ROI line green
if counter == 1:
cv2.line(input_frame, (0, 200), (640, 200), (0, 0xFF, 0), 5)
else:
cv2.line(input_frame, (0, 200), (640, 200), (0, 0, 0xFF), 5)
# insert information text to video frame
cv2.rectangle(input_frame, (10, 275), (230, 337), (180, 132, 109), -1)
cv2.putText(
input_frame,
'ROI Line',
(545, 190),
font,
0.6,
(0, 0, 0xFF),
2,
cv2.LINE_AA,
)
cv2.putText(
input_frame,
'-Movement Direction: ' + direction,
(14, 302),
font,
0.4,
(0xFF, 0xFF, 0xFF),
1,
cv2.FONT_HERSHEY_COMPLEX_SMALL,
)
if csv_line != 'not_available':
with open('traffic_measurement.csv', 'a') as f:
writer = csv.writer(f)
(direction) = \
csv_line.split(',')
writer.writerows([csv_line.split(',')])
cv2.imshow('object detection',cv2.resize(input_frame, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break

Tensorflow object detection UnicodeEncodeError

I am trying to use the pretrained faster_rcnn_inception_resnet_v2_atrous_oid. The code is modified from the official Quick Start notebook. When I use other models like faster_rcnn_nas_coco_2017_11_08, everything works. However, when I change to faster_rcnn_inception_resnet_v2_atrous_oid, I got the following error:
runfile('D:/python/tf/models-master/research/object_detection/Learn_faster.py', wdir='D:/python/tf/models-master/research/object_detection')
Reloaded modules: utils, utils.label_map_util, utils.visualization_utils
downloaded
Traceback (most recent call last):
File "e:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2898, in run_code
self.showtraceback()
File "e:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 1826, in showtraceback
self._showtraceback(etype, value, stb)
File "e:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 554, in _showtraceback
dh.parent_header, ident=topic)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 712, in send
to_send = self.serialize(msg, ident)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 607, in serialize
content = self.pack(content)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 103, in <lambda>
ensure_ascii=False, allow_nan=False,
File "e:\Anaconda3\lib\site-packages\zmq\utils\jsonapi.py", line 43, in dumps
s = s.encode('utf8')
UnicodeEncodeError: 'utf-8' codec can't encode character '\udcd5' in position 2098: surrogates not allowed
The code is:
import numpy as np
import os
import six.moves.urllib as urllib
import tarfile
import tensorflow as tf
from matplotlib import pyplot as plt
from PIL import Image
if tf.__version__ != '1.4.0':
raise ImportError('Please upgrade your tensorflow installation to v1.4.0!')
from utils import label_map_util
from utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = 'faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08'#'faster_rcnn_nas_coco_2017_11_08'#'faster_rcnn_resnet101_coco_2017_11_08' #'faster_rcnn_nas_coco_2017_11_08' 'rfcn_resnet101_coco_2017_11_08'# , , 'ssd_inception_v2_coco_2017_11_08'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map')#'mscoco_label_map.pbtxt')
NUM_CLASSES = 545
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
print("downloaded")
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 7) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map')
should be
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map.pbtxt')

More efficient way of loading images for detection

I am using tensorflow object detection api to do some semi real time object detection tasks.
The images will be taken by camera at a speed of 2 images/sec. Each image will be cropped into 4 small images so in total I need to process 8 images/sec.
My detection model has been exported into a frozen graph (.pb file) and loaded in GPU memory. Then I load images to numpy arrays to feed them into my model.
The detection itself only takes about 0.1 sec/image, however, loading each image takes about 0.45 sec.
The script I am using was revised from the code samples provided by object detection api(link), it reads each image and convert them into numpy array and then feed into detection models. The most time consumming part of this process is load_image_into_numpy_array, it takes almost 0.45 seconds.
The script is in below:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import timeit
import scipy.misc
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = 'animal_detection.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the
# images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test'
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 10) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for image_path in TEST_IMAGE_PATHS:
start = timeit.default_timer()
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
end = timeit.default_timer()
print(end-start)
start = timeit.default_timer()
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=2)
I am thinking of a more efficient way to load images that are produced by camera, the first thought is to avoid numpy array and try to use tensorflow native ways to load images, but I have no idea where to get start since I am very new to tensorflow.
If I could find some tensorflow way to load images, maybe I could take 4 images into 1 batch and feed them into my model so that I might get some improvement in speed.
An immature idea is try to save 4 small images cropped from 1 raw image into a tf_record file, and load tf_record file as one batch to feed the model, but I have no idea how to achieve that.
Any help will be appreciated.
I found one solution that can reduce image loading from 0.4 second to 0.01 second. I will post answer here in case if someone also has same problem.
Instead of using PIL.Image and numpy, we could use imread in opencv.
I also managed to batch images so that we can achieve a better speedup.
The script goes as follow:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tensorflow as tf
import timeit
import cv2
from collections import defaultdict
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_PATH = sys.argv[1]
IMAGE_PATH = sys.argv[2]
BATCH_SIZE = int(sys.argv[3])
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = os.path.join(MODEL_PATH, 'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
PATH_TO_TEST_IMAGES_DIR = IMAGE_PATH
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 129) ]
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for i in range(0, len(TEST_IMAGE_PATHS), BATCH_SIZE):
images = []
start = timeit.default_timer()
for j in range(0, BATCH_SIZE):
image = cv2.imread(TEST_IMAGE_PATHS[i+j])
image = np.expand_dims(image, axis=0)
images.append(image)
image_np_expanded = np.concatenate(images, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)