i already was able to train a custom TF2 model using this tutorial:
https://neptune.ai/blog/how-to-train-your-own-object-detector-using-tensorflow-object-detection-api
Now im getting stuck with testing this model. The script i use for this is also from a turoial and i changed the paths etc but it still doesnt work... I tried and tried and tried for many hours now but at the time i just got demotivated...
I can resolve many errors but the current one not, maybe anyone can help me. Im quite new to object detection..
import numpy as np
import os
import six as urllib # import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture(1)
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
# In[3]:
from object_detection.utils import label_map_util # from utils import label_map_util
from object_detection.utils import visualization_utils as vis_util # from utils import visualization_utils as vis_util
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/exported_models/first_model/saved_model' # MODEL_NAME = 'inference_graph'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/saved_model.pb' # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/data/label_map.pbtxt' # PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 1
# ## Load a (frozen) Tensorflow model into memory.
# In[6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef() # od_graph_def = tf.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[7]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[8]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'images/test/'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12,8)
# In[10]:
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
image_np = np.array(cv2.imread('Test.jpg'))
cv2.imshow('image',image_np)
cv2.waitKey(1)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Thats the code i use to try testing the model
And this is the current error:
Traceback (most recent call last):
File "d:\VSCode\Machine_Learning_Tests\Tensorflow\test\object_detection_tutorial_wwwPythonProgrammingNet__mitBild.py", line 65, in <module>
od_graph_def.ParseFromString(serialized_graph)
google.protobuf.message.DecodeError: Error parsing message with type 'tensorflow.GraphDef'
Related
After struggling with compatibility issues between Tensorflow 2.00 and the object detection API, I downgraded to Tensorflow 1.15 to be able to train my own model. after completing the training I modified the jupyter notebook included in the Tensorflow object detection API repo to test on my own images but I keep getting this error:
Traceback (most recent call last):
File "object_detection_tutorial_converted.py", line 254, in <module>
show_inference(detection_model, image_path)
File "object_detection_tutorial_converted.py", line 235, in show_inference
output_dict = run_inference_for_single_image(model, image_np)
File "object_detection_tutorial_converted.py", line 203, in run_inference_for_single_image
num_detections = int(output_dict.pop('num_detections'))
TypeError: int() argument must be a string, a bytes-like object or a number, not 'Tensor'
Here's my modified jupyter notebook
import os
import pathlib
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1
# Patch the location of gfile
tf.gfile = tf.io.gfile
def load_model(model_name):
model_dir = pathlib.Path(model_name)/"saved_model"
model = model = tf.compat.v2.saved_model.load(str(model_dir), None)
model = model.signatures['serving_default']
return model
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'training/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS
model_name = 'devices_graph'
detection_model = load_model(model_name)
print(detection_model.inputs)
detection_model.output_dtypes
detection_model.output_shapes
def run_inference_for_single_image(model, image):
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis,...]
# Run inference
output_dict = model(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
return output_dict
# Run it on each test image and show the results:
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
display(Image.fromarray(image_np))
for image_path in TEST_IMAGE_PATHS:
show_inference(detection_model, image_path)
First you need to create an inference of the model using the script in the below link and later load the "frozen_inference_graph.pb" file/model, We need to give full path not just the folder path.
https://github.com/tensorflow/models/blob/master/research/object_detection/export_inference_graph.py
example path
MODEL_PATH = '/home/sumanh/tf_models/Archive/model/ssd_inception_v2_coco_2018_01_28/190719/frozen_inference_graph.pb'
That's strange this worked for tensorflow 2.0.0 for me. Can you send console log
I use Python 3.6 with Anaconda and use the Spyder editor on my system which is a standard desktop with Windows 10. I set up TensorFlow Object Detection API as instructed in
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md.
Since the formal installation instructions are in a Linux nature, I also got help from
https://medium.com/#rohitrpatil/how-to-use-tensorflow-object-detection-api-on-windows-102ec8097699.
At the end, I wanted to test the system I already set up by running an already supported test file "object_detection_tutorial.pynb" on Jupyter notebook. It immediately gave the error:
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-10-34f5cdda911a> in <module>
15 # This is needed since the notebook is stored in the object_detection folder.
16 sys.path.append("..")
---> 17 from object_detection.utils import ops as utils_ops
18
19 if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
ModuleNotFoundError: No module named 'object_detection'
I couldn't find a solution for the error even though many times discussed on Github and here. I decided to go with Spyder, and test the code right in there. It gave error for the line
%matplotlib inline
in the code. After some research, I found that this is a Jupyter-ish command thus I commented it out. Instead I added
matplotlib.use('TkAgg')
plt.show()
Final structure of the official test code I've been testing on Spyder is
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import matplotlib
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')
# This is needed to display the images.
# %matplotlib inline
from utils import label_map_util
from utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[1], image.shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: image})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.int64)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
matplotlib.use('TkAgg')
plt.show()
You can see the last two lines that are added by me.
When I run this code, it gives no error, however a figure window opens and never shows a figure in it. When I hover mouse cursor on it, it shows up busy all the time.
I've tried many suggestions but I couldn't figure things out. I already created a system environment variable
PYTHON_PATH
and added values of
C:\Users\user\models;
C:\Users\user\models\research;
C:\Users\user\models\research\slim;
C:\Users\user\models\research\object_detection;
C:\Users\user\models\research\object_detection\utils;
C:\Neon-ProgramData\Anaconda3;
C:\Neon-ProgramData\Anaconda3\Scripts;
C:\Neon-ProgramData\Anaconda3\Library\bin;
I also correctly compiled proto files with protoc.exe and confirmed that .py files are sitting there.
In Anaconda, I've created an environment for TensorFlow works and TF also works normally.
I'm completely lost in the problem. I think I did the installation correctly and tried to use all suggestions the internet gave to me. I want to test and use this API and need help about where I got stuck.
I am trying to read a video file (using opencv), loop over all frames using tensorflow's object-detection API to do the predictions and bounding boxes, and writing the predicted frames (with boxes) to a new video file. I used the object_detection_tutorial.ipynb with some modifications to capture the video frames and process it in faster-rcnn-inception-resnet-v2 loaded from a frozen graph (after trained).
I am using a tesla P100 gpu in a cloud machine with windows 10 and 56GB ram. Also using tensorflow-gpu.
When I run the code, it takes 0,5 second per frame. Is it a normal speed for a tesla P100 or I am doing something wrong in the code to make it slower?
This code is just a test, as later I will have to use it in a real time video prediction task. If 0,5 second per frame is an expected speed using tensorflow API, I think I will cannot use it in my task :(
So, after running it, i get the following running times
processing frame number 1.0
time to capture video frame 0.0
time to predict 0.49225664138793945
time to generate boxes in a frame 0.14833950996398926
time to write a frame in video file 0.04687023162841797
total time in the loop 0.6874663829803467
As you guys can see, the code using the CPU (opencv) goes fast. But when I use the GPU, it takes almost 0,5 seconds just in prediction task (used in sess.run).
Any advices? Thank you in advance. Bellow follows my code
from distutils.version import StrictVersion
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time
from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt
from PIL import Image
import cv2
from imutils import paths
import re
#This is needed since the code is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
from utils import label_map_util
from utils import visualization_utils as vis_util
#Detection using tensorflow inside write_video function
def write_video():
filename = 'output/teste_v2.avi'
codec = cv2.VideoWriter_fourcc('W', 'M', 'V', '2')
cap = cv2.VideoCapture('pneu_trim2.mp4')
framerate = round(cap.get(5),2)
w = int(cap.get(3))
h = int(cap.get(4))
resolution = (w, h)
VideoFileOutput = cv2.VideoWriter(filename, codec, framerate, resolution)
################################
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.
#
# What model to download.
MODEL_NAME = 'training/pneu_incep_step_24887'
print("loading model from " + MODEL_NAME)
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'object-detection.pbtxt')
NUM_CLASSES = 5
# ## Load a (frozen) Tensorflow model into memory.
time_graph = time.time()
print('loading graphs')
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
print("tempo build graph = " + str(time.time() - time_graph))
# ## Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
################################
with tf.Session(graph=detection_graph) as sess:
with detection_graph.as_default():
while (cap.isOpened()):
time_loop = time.time()
print('processing frame number: ' + str(cap.get(1)))
time_captureframe = time.time()
ret, image_np = cap.read()
print("time to capture video frame = " + str(time.time() - time_captureframe))
if (ret != True):
break
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
#image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
time_prediction = time.time()
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
print("time to predict = " + str(time.time() - time_prediction))
# Visualization of the results of a detection.
time_visualizeboxes = time.time()
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
print("time to generate boxes in a frame = " + str(time.time() - time_visualizeboxes))
time_writeframe = time.time()
VideoFileOutput.write(image_np)
print("time to write a frame in video file = " + str(time.time() - time_writeframe))
print("total time in the loop = " + str(time.time() - time_loop))
cap.release()
VideoFileOutput.release()
print('done')
Actually the problem is with the model you were using.
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
Basically the model Faster-rcnn-inception-resnet-v2 will take more time.
You can refer the link to know the speed for the model
I am trying to use the pretrained faster_rcnn_inception_resnet_v2_atrous_oid. The code is modified from the official Quick Start notebook. When I use other models like faster_rcnn_nas_coco_2017_11_08, everything works. However, when I change to faster_rcnn_inception_resnet_v2_atrous_oid, I got the following error:
runfile('D:/python/tf/models-master/research/object_detection/Learn_faster.py', wdir='D:/python/tf/models-master/research/object_detection')
Reloaded modules: utils, utils.label_map_util, utils.visualization_utils
downloaded
Traceback (most recent call last):
File "e:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2898, in run_code
self.showtraceback()
File "e:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 1826, in showtraceback
self._showtraceback(etype, value, stb)
File "e:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 554, in _showtraceback
dh.parent_header, ident=topic)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 712, in send
to_send = self.serialize(msg, ident)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 607, in serialize
content = self.pack(content)
File "e:\Anaconda3\lib\site-packages\jupyter_client\session.py", line 103, in <lambda>
ensure_ascii=False, allow_nan=False,
File "e:\Anaconda3\lib\site-packages\zmq\utils\jsonapi.py", line 43, in dumps
s = s.encode('utf8')
UnicodeEncodeError: 'utf-8' codec can't encode character '\udcd5' in position 2098: surrogates not allowed
The code is:
import numpy as np
import os
import six.moves.urllib as urllib
import tarfile
import tensorflow as tf
from matplotlib import pyplot as plt
from PIL import Image
if tf.__version__ != '1.4.0':
raise ImportError('Please upgrade your tensorflow installation to v1.4.0!')
from utils import label_map_util
from utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = 'faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08'#'faster_rcnn_nas_coco_2017_11_08'#'faster_rcnn_resnet101_coco_2017_11_08' #'faster_rcnn_nas_coco_2017_11_08' 'rfcn_resnet101_coco_2017_11_08'# , , 'ssd_inception_v2_coco_2017_11_08'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map')#'mscoco_label_map.pbtxt')
NUM_CLASSES = 545
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
print("downloaded")
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 7) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map')
should be
PATH_TO_LABELS = os.path.join('data', 'oid_bbox_trainable_label_map.pbtxt')
I am using tensorflow object detection api to do some semi real time object detection tasks.
The images will be taken by camera at a speed of 2 images/sec. Each image will be cropped into 4 small images so in total I need to process 8 images/sec.
My detection model has been exported into a frozen graph (.pb file) and loaded in GPU memory. Then I load images to numpy arrays to feed them into my model.
The detection itself only takes about 0.1 sec/image, however, loading each image takes about 0.45 sec.
The script I am using was revised from the code samples provided by object detection api(link), it reads each image and convert them into numpy array and then feed into detection models. The most time consumming part of this process is load_image_into_numpy_array, it takes almost 0.45 seconds.
The script is in below:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import timeit
import scipy.misc
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = 'animal_detection.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the
# images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test'
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 10) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for image_path in TEST_IMAGE_PATHS:
start = timeit.default_timer()
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
end = timeit.default_timer()
print(end-start)
start = timeit.default_timer()
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=2)
I am thinking of a more efficient way to load images that are produced by camera, the first thought is to avoid numpy array and try to use tensorflow native ways to load images, but I have no idea where to get start since I am very new to tensorflow.
If I could find some tensorflow way to load images, maybe I could take 4 images into 1 batch and feed them into my model so that I might get some improvement in speed.
An immature idea is try to save 4 small images cropped from 1 raw image into a tf_record file, and load tf_record file as one batch to feed the model, but I have no idea how to achieve that.
Any help will be appreciated.
I found one solution that can reduce image loading from 0.4 second to 0.01 second. I will post answer here in case if someone also has same problem.
Instead of using PIL.Image and numpy, we could use imread in opencv.
I also managed to batch images so that we can achieve a better speedup.
The script goes as follow:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tensorflow as tf
import timeit
import cv2
from collections import defaultdict
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_PATH = sys.argv[1]
IMAGE_PATH = sys.argv[2]
BATCH_SIZE = int(sys.argv[3])
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = os.path.join(MODEL_PATH, 'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
PATH_TO_TEST_IMAGES_DIR = IMAGE_PATH
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 129) ]
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for i in range(0, len(TEST_IMAGE_PATHS), BATCH_SIZE):
images = []
start = timeit.default_timer()
for j in range(0, BATCH_SIZE):
image = cv2.imread(TEST_IMAGE_PATHS[i+j])
image = np.expand_dims(image, axis=0)
images.append(image)
image_np_expanded = np.concatenate(images, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)