why is my object detection bounding box error.. bounding box always detected anything but not move... the accuracy changed - tensorflow

this is my code for train model for object detection. i used google colab. using ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config. i have an error when try to detect image. its not detect my picture but always show the bounding box. bounding box is not moving but the accuracy changed
%cd /content/models/research/object_detection
#Loading the saved_model
import tensorflow as tf
import time
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from PIL import Image
from google.colab.patches import cv2_imshow
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
IMAGE_SIZE = (12, 8) # Output display size as you want
import matplotlib.pyplot as plt
PATH_TO_SAVED_MODEL="/mydrive/customTF2/data/inference_graph/saved_model"
print('Loading model...', end='')
# Load saved model and build the detection function
detect_fn=tf.saved_model.load(PATH_TO_SAVED_MODEL)
print('Done!')
#Loading the label_map
category_index=label_map_util.create_category_index_from_labelmap("/mydrive/customTF2/data/label_map.pbtxt",use_display_name=True)
#category_index=label_map_util.create_category_index_from_labelmap([path_to_label_map],use_display_name=True)
def load_image_into_numpy_array(path):
return np.array(Image.open(path))
image_path = "/mydrive/papaya_test_images/pp.jpg"
#print('Running inference for {}... '.format(image_path), end='')
image_np = load_image_into_numpy_array(image_path)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image_np)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis, ...]
detections = detect_fn(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes'],
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=.07, # Adjust this value to set the minimum probability boxes to be classified as True
agnostic_mode=False)
%matplotlib inline
plt.figure(figsize=IMAGE_SIZE, dpi=200)
plt.axis("off")
plt.imshow(image_np_with_detections)
plt.show()

Related

Loading and testing a Tensorflow 2 trained model

i already was able to train a custom TF2 model using this tutorial:
https://neptune.ai/blog/how-to-train-your-own-object-detector-using-tensorflow-object-detection-api
Now im getting stuck with testing this model. The script i use for this is also from a turoial and i changed the paths etc but it still doesnt work... I tried and tried and tried for many hours now but at the time i just got demotivated...
I can resolve many errors but the current one not, maybe anyone can help me. Im quite new to object detection..
import numpy as np
import os
import six as urllib # import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture(1)
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
# In[3]:
from object_detection.utils import label_map_util # from utils import label_map_util
from object_detection.utils import visualization_utils as vis_util # from utils import visualization_utils as vis_util
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/exported_models/first_model/saved_model' # MODEL_NAME = 'inference_graph'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/saved_model.pb' # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/data/label_map.pbtxt' # PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 1
# ## Load a (frozen) Tensorflow model into memory.
# In[6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef() # od_graph_def = tf.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[7]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[8]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'images/test/'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12,8)
# In[10]:
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
image_np = np.array(cv2.imread('Test.jpg'))
cv2.imshow('image',image_np)
cv2.waitKey(1)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Thats the code i use to try testing the model
And this is the current error:
Traceback (most recent call last):
File "d:\VSCode\Machine_Learning_Tests\Tensorflow\test\object_detection_tutorial_wwwPythonProgrammingNet__mitBild.py", line 65, in <module>
od_graph_def.ParseFromString(serialized_graph)
google.protobuf.message.DecodeError: Error parsing message with type 'tensorflow.GraphDef'

I am running out of 25gb ram on google colab

This is my code below I am making a multilabel classification model using 8000 x-ray images can someone here help me this is my code below most of the ram is used while loading the images itself and only 10 epochs are able to run.
Can someone tell me what changes do I need to make to this code for it to run and generate the model.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import *
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os
# construct the argument parser and parse the arguments
# initialize the initial learning rate, number of epochs to train for,
# and batch size
INIT_LR = 1e-3
EPOCHS = 40
BS = 66
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")
imagePaths = list(paths.list_images('/content/drive/My Drive/testset/'))
data = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# extract the class label from the filename
label = imagePath.split(os.path.sep)[-2]
# load the image, swap color channels, and resize it to be a fixed
# 224x224 pixels while ignoring aspect ratio
image = cv2.imread(imagePath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))
# update the data and labels lists, respectively
data.append(image)
labels.append(label)
# convert the data and labels to NumPy arrays while scaling the pixel
# intensities to the range [0, 255]
data = np.array(data) / 255.0
labels = np.array(labels)
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.20, stratify=labels, random_state=42)
# initialize the training data augmentation object
trainAug = ImageDataGenerator(
rotation_range=15,
fill_mode="nearest")
# load the VGG16 network, ensuring the head FC layer sets are left
# off
baseModel = VGG16(weights="imagenet", include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers:
layer.trainable = False
# compile our model
print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
# train the head of the network
print("[INFO] training head...")
H = model.fit(
trainAug.flow(trainX, trainY, batch_size=BS),
steps_per_epoch=len(trainX) // BS,
validation_data=(testX, testY),
validation_steps=len(testX) // BS,
epochs=EPOCHS)
# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)
# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)
# show a nicely formatted classification report
print(classification_report(testY.argmax(axis=1), predIdxs,
target_names=lb.classes_))
# compute the confusion matrix and and use it to derive the raw
# accuracy, sensitivity, and specificity
cm = confusion_matrix(testY.argmax(axis=1), predIdxs)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
# show the confusion matrix, accuracy, sensitivity, and specificity
print(cm)
print("acc: {:.4f}".format(acc))
print("sensitivity: {:.4f}".format(sensitivity))
print("specificity: {:.4f}".format(specificity))
# plot the training loss and accuracy
N = EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc",color='green')
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on COVID-19 Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig('/content/drive/My Drive/setcovid/plot2.png')
# serialize the model to disk
print("[INFO] saving COVID-19 detector model...")
model.save('/content/drive/My Drive/setcovid/model2',save_format="h5" )
You can try to generate TFRecords from this data, store them into your drive and then feed them into your model through batches instead of directly loading them to memory. I would recommend the Hvass Laboratories YouTube channel, tensorflow tutorials playlist, tutorial number 18, TFRecords and Dataset API and the Dataset api tensorflow's official documentation.

Tensorflow object detection API tutorial error

After struggling with compatibility issues between Tensorflow 2.00 and the object detection API, I downgraded to Tensorflow 1.15 to be able to train my own model. after completing the training I modified the jupyter notebook included in the Tensorflow object detection API repo to test on my own images but I keep getting this error:
Traceback (most recent call last):
File "object_detection_tutorial_converted.py", line 254, in <module>
show_inference(detection_model, image_path)
File "object_detection_tutorial_converted.py", line 235, in show_inference
output_dict = run_inference_for_single_image(model, image_np)
File "object_detection_tutorial_converted.py", line 203, in run_inference_for_single_image
num_detections = int(output_dict.pop('num_detections'))
TypeError: int() argument must be a string, a bytes-like object or a number, not 'Tensor'
Here's my modified jupyter notebook
import os
import pathlib
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1
# Patch the location of gfile
tf.gfile = tf.io.gfile
def load_model(model_name):
model_dir = pathlib.Path(model_name)/"saved_model"
model = model = tf.compat.v2.saved_model.load(str(model_dir), None)
model = model.signatures['serving_default']
return model
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'training/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS
model_name = 'devices_graph'
detection_model = load_model(model_name)
print(detection_model.inputs)
detection_model.output_dtypes
detection_model.output_shapes
def run_inference_for_single_image(model, image):
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis,...]
# Run inference
output_dict = model(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
return output_dict
# Run it on each test image and show the results:
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
display(Image.fromarray(image_np))
for image_path in TEST_IMAGE_PATHS:
show_inference(detection_model, image_path)
First you need to create an inference of the model using the script in the below link and later load the "frozen_inference_graph.pb" file/model, We need to give full path not just the folder path.
https://github.com/tensorflow/models/blob/master/research/object_detection/export_inference_graph.py
example path
MODEL_PATH = '/home/sumanh/tf_models/Archive/model/ssd_inception_v2_coco_2018_01_28/190719/frozen_inference_graph.pb'
That's strange this worked for tensorflow 2.0.0 for me. Can you send console log

No module named 'object_detection' on Spyder with W10

I use Python 3.6 with Anaconda and use the Spyder editor on my system which is a standard desktop with Windows 10. I set up TensorFlow Object Detection API as instructed in
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md.
Since the formal installation instructions are in a Linux nature, I also got help from
https://medium.com/#rohitrpatil/how-to-use-tensorflow-object-detection-api-on-windows-102ec8097699.
At the end, I wanted to test the system I already set up by running an already supported test file "object_detection_tutorial.pynb" on Jupyter notebook. It immediately gave the error:
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-10-34f5cdda911a> in <module>
15 # This is needed since the notebook is stored in the object_detection folder.
16 sys.path.append("..")
---> 17 from object_detection.utils import ops as utils_ops
18
19 if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
ModuleNotFoundError: No module named 'object_detection'
I couldn't find a solution for the error even though many times discussed on Github and here. I decided to go with Spyder, and test the code right in there. It gave error for the line
%matplotlib inline
in the code. After some research, I found that this is a Jupyter-ish command thus I commented it out. Instead I added
matplotlib.use('TkAgg')
plt.show()
Final structure of the official test code I've been testing on Spyder is
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import matplotlib
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')
# This is needed to display the images.
# %matplotlib inline
from utils import label_map_util
from utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[1], image.shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: image})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.int64)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
matplotlib.use('TkAgg')
plt.show()
You can see the last two lines that are added by me.
When I run this code, it gives no error, however a figure window opens and never shows a figure in it. When I hover mouse cursor on it, it shows up busy all the time.
I've tried many suggestions but I couldn't figure things out. I already created a system environment variable
PYTHON_PATH
and added values of
C:\Users\user\models;
C:\Users\user\models\research;
C:\Users\user\models\research\slim;
C:\Users\user\models\research\object_detection;
C:\Users\user\models\research\object_detection\utils;
C:\Neon-ProgramData\Anaconda3;
C:\Neon-ProgramData\Anaconda3\Scripts;
C:\Neon-ProgramData\Anaconda3\Library\bin;
I also correctly compiled proto files with protoc.exe and confirmed that .py files are sitting there.
In Anaconda, I've created an environment for TensorFlow works and TF also works normally.
I'm completely lost in the problem. I think I did the installation correctly and tried to use all suggestions the internet gave to me. I want to test and use this API and need help about where I got stuck.

More efficient way of loading images for detection

I am using tensorflow object detection api to do some semi real time object detection tasks.
The images will be taken by camera at a speed of 2 images/sec. Each image will be cropped into 4 small images so in total I need to process 8 images/sec.
My detection model has been exported into a frozen graph (.pb file) and loaded in GPU memory. Then I load images to numpy arrays to feed them into my model.
The detection itself only takes about 0.1 sec/image, however, loading each image takes about 0.45 sec.
The script I am using was revised from the code samples provided by object detection api(link), it reads each image and convert them into numpy array and then feed into detection models. The most time consumming part of this process is load_image_into_numpy_array, it takes almost 0.45 seconds.
The script is in below:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import timeit
import scipy.misc
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = 'animal_detection.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the
# images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test'
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 10) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for image_path in TEST_IMAGE_PATHS:
start = timeit.default_timer()
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
end = timeit.default_timer()
print(end-start)
start = timeit.default_timer()
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=2)
I am thinking of a more efficient way to load images that are produced by camera, the first thought is to avoid numpy array and try to use tensorflow native ways to load images, but I have no idea where to get start since I am very new to tensorflow.
If I could find some tensorflow way to load images, maybe I could take 4 images into 1 batch and feed them into my model so that I might get some improvement in speed.
An immature idea is try to save 4 small images cropped from 1 raw image into a tf_record file, and load tf_record file as one batch to feed the model, but I have no idea how to achieve that.
Any help will be appreciated.
I found one solution that can reduce image loading from 0.4 second to 0.01 second. I will post answer here in case if someone also has same problem.
Instead of using PIL.Image and numpy, we could use imread in opencv.
I also managed to batch images so that we can achieve a better speedup.
The script goes as follow:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tensorflow as tf
import timeit
import cv2
from collections import defaultdict
from utils import label_map_util
from utils import visualization_utils as vis_util
MODEL_PATH = sys.argv[1]
IMAGE_PATH = sys.argv[2]
BATCH_SIZE = int(sys.argv[3])
# Path to frozen detection graph. This is the actual model that is used for the
# object detection.
PATH_TO_CKPT = os.path.join(MODEL_PATH, 'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'animal_label_map.pbtxt')
NUM_CLASSES = 1
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def,name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
PATH_TO_TEST_IMAGES_DIR = IMAGE_PATH
TEST_IMAGE_PATHS = [
os.path.join(PATH_TO_TEST_IMAGES_DIR,'image{}.png'.format(i)) for i in range(1, 129) ]
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
with detection_graph.as_default():
with tf.Session(graph=detection_graph, config=config) as sess:
for i in range(0, len(TEST_IMAGE_PATHS), BATCH_SIZE):
images = []
start = timeit.default_timer()
for j in range(0, BATCH_SIZE):
image = cv2.imread(TEST_IMAGE_PATHS[i+j])
image = np.expand_dims(image, axis=0)
images.append(image)
image_np_expanded = np.concatenate(images, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
stop = timeit.default_timer()
print (stop - start)