PiCamera mmal Error in Raspberrypi W Zero - tensorflow

I Do Detecting Mask Project on Raspberrypi W Zero. But I can't catch the error.
This is my Tensorflow and openCV Code
# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import picamera
#from picamera import sleep
import numpy as np
import imutils
import time
import cv2
import os
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
(104.0, 177.0, 123.0))
# pass the blob through the network and obtain the face detections
detections = faceNet.forward()
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > 0.5:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# load our serialized face detector model from disk
prototxtPath = r"/home/pi/Desktop/pi/face_detector/deploy.prototxt"
weightsPath = r"/home/pi/Desktop/pi/face_detector/res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
maskNet = load_model("./mask_detector.model")
# initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
cap = picamera.PiCamera()
# loop over the frames from the video stream
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=400)
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
# do a bit of cleanup
And I run this Tensorflow and openCV code in Raspberrypi W Zero... But This Error Bothering Me..
mmal: mmal_vc_port_enable: failed to enable port vc.null_sink:in:0(OPQV): ENOSPC
mmal: mmal_port_enable: failed to enable connected port (vc.null_sink:in:0(OPQV))0x34600e0 (ENOSPC)
mmal: mmal_connection_enable: output port couldn't be enabled
Traceback (most recent call last):
File "detect_mask_video.py", line 88, in <module>
cap = picamera.PiCamera()
File "/usr/lib/python3/dist-packages/picamera/camera.py", line 433, in __init__
File "/usr/lib/python3/dist-packages/picamera/camera.py", line 513, in _init_preview
self, self._camera.outputs[self.CAMERA_PREVIEW_PORT])
File "/usr/lib/python3/dist-packages/picamera/renderers.py", line 558, in __init__
File "/usr/lib/python3/dist-packages/picamera/mmalobj.py", line 2212, in enable
prefix="Failed to enable connection")
File "/usr/lib/python3/dist-packages/picamera/exc.py", line 184, in mmal_check
raise PiCameraMMALError(status, prefix)
picamera.exc.PiCameraMMALError: Failed to enable connection: Out of resources
I Search this error in google But I can't fix this...Please Help me.. :D
What I've done so far :
raspberrypi update
Increasing memory
Reconnect PiCamera

Enter the following command on the terminal:
sudo modprobe bcm2835-v4l2
To access the mmal device as a standard v4l(video for Linux) device.


Nan Loss during training - MaskRCNN

I am training a custom Dataset sample_data where each image annotation is in the Pascal VOC XML format.
When training the MaskRCNN model from the matterport repo for TensorFlow 1.15 and the maskrcnn for Tensorflow 2.2.0
config for the matterport using tensorflow-cpu :
When running the model (using both versions) tensorflow-cpu, data generation is pretty fast(almost instantly) and training happens as expected with proper loss values
But when using the tensorflow-gpu,
The model loading is too long, then epochs start after another 7-10 minutes and the loss generated is nan,
I've tried to
lower the Learning rate to 1e-5,
multiprocessing off,
workers = 1,
changed optimizer to Adam,
System Specs:
i5 12400f,
12gb Ram,
12Gb RTX 3060,
all cudnn and cudatoolkit version according to tensorflow documentation installed.
Training Code :
from os import listdir
import imgaug
import numpy as np
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from matplotlib import pyplot
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
import mrcnn.model as mrmodel
import warnings
import tensorflow as tf
import time
# gpu_available = tf.config.list_physical_devices('GPU')
gpu_available = tf.test.is_gpu_available()
class CornDataset(Dataset):
# load the dataset definitions
def load_dataset(self, dataset_dir, is_train=True):
# start = time.perf_counter()
# define classes
self.add_class("dataset", 1, "fall-armyworm-larva")
self.add_class("dataset", 2, "fall-armyworm-larval-damage")
self.add_class("dataset", 3, "fall-armyworm-frass")
self.add_class("dataset", 4, "fall-armyworm-egg")
self.add_class("dataset", 5, "healthy-maize")
self.add_class("dataset", 6, "maize-streak-disease")
# define data locations
images_dir = dataset_dir + '/images/'
annotations_dir = dataset_dir + '/annots/'
# find all images
count = 1
for filename in listdir(images_dir):
# extract image id
image_id = filename[:-4]
name1 = ''
if filename[-4:] != 'jpeg':
name1 = filename[:-4]
name1 = filename[:-5]
image_id = name1
# skip all images after 115 if we are building the train set
if is_train and int(image_id) >= 6770:
# skip all images before 115 if we are building the test/val set
if not is_train and int(image_id) < 6770:
img_path = images_dir + filename
ann_path = annotations_dir + image_id + '.xml'
# add to dataset
self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path, class_ids = [0,1,2,3,4,5,6])
# stop = time.perf_counter()
# print("time for load_dataset",(stop-start))
# extract bounding boxes from an annotation file
def extract_boxes(self, filename):
# start = time.perf_counter()
# load and parse the file
tree = ElementTree.parse(filename)
# get the root of the document
root = tree.getroot()
# extract each bounding box
boxes = list()
for box in root.findall('.//object'):
name = box.find('name').text #Add label name to the box list
xmin = int(box.find('./bndbox/xmin').text)
ymin = int(box.find('./bndbox/ymin').text)
xmax = int(box.find('./bndbox/xmax').text)
ymax = int(box.find('./bndbox/ymax').text)
coors = [xmin, ymin, xmax, ymax, name]
# extract image dimensions
width = int(root.find('.//size/width').text)
height = int(root.find('.//size/height').text)
# stop = time.perf_counter()
# print("time for extract_boxes",(stop-start))
return boxes, width, height
# load the masks for an image
def load_mask(self, image_id):
# start = time.perf_counter()
# get details of image
info = self.image_info[image_id]
# define box file location
path = info['annotation']
#return info, path
# load XML
boxes, w, h = self.extract_boxes(path)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, row_e = box[1], box[3]
col_s, col_e = box[0], box[2]
# box[4] will have the name of the class
if box[4]=='fall-armyworm-larva':
masks[row_s:row_e, col_s:col_e, i] = 1
elif box[4]=='fall-armyworm-larval-damage':
masks[row_s:row_e, col_s:col_e, i] = 2
elif box[4]=='fall-armyworm-frass':
masks[row_s:row_e, col_s:col_e, i] = 3
elif box[4]=='fall-armyworm-egg':
masks[row_s:row_e, col_s:col_e, i] = 4
elif box[4]=='healthy-maize' or box[4]=='healthy-maize' or box[4]=='healthy-images' or box[4]=='none-healthy':
masks[row_s:row_e, col_s:col_e, i] = 5
elif box[4]=='maize-streak-disease':
masks[row_s:row_e, col_s:col_e, i] = 6
# stop = time.perf_counter()
# print("time for load_mask",(stop-start))
return masks, asarray(class_ids, dtype='int32')
# load an image reference
def image_reference(self, image_id):
info = self.image_info[image_id]
return info['path']
validset_dir = 'validation/'
train_set = CornDataset()
train_set.load_dataset(dataset_dir, is_train=True)
print('Train: %d' % len(train_set.image_ids))
# test/val set
test_set = CornDataset()
test_set.load_dataset(dataset_dir, is_train=False)
print('Test: %d' % len(test_set.image_ids))
import random
num=random.randint(0, len(train_set.image_ids))
# define image id
image_id = num
# load the image
image = train_set.load_image(image_id)
# load the masks and the class ids
mask, class_ids = train_set.load_mask(image_id)
# extract bounding boxes from the masks
bbox = extract_bboxes(mask)
# display image with masks and bounding boxes
display_instances(image, bbox, mask, class_ids, train_set.class_names)
class CornConfig(Config):
# define the name of the configuration
NAME = "corn_cfg"
# number of classes (background + 5 Diseases + 1 Healthy)
# number of training steps per epoch
# Skip detections with < 90% confidence
# prepare config
config = CornConfig()
import os
ROOT_DIR = "/home/mehathab/Desktop/maskrcnn_drY-run"
# Directory to save logs and trained model
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# define the model
model = MaskRCNN(mode='training', model_dir=DEFAULT_LOGS_DIR, config=config)
model_inference = MaskRCNN(mode="inference", config=config, model_dir=DEFAULT_LOGS_DIR)
# load weights (mscoco) and exclude the output layers
WEIGHT_PATH = 'mask_rcnn_coco.h5'
model.load_weights(WEIGHT_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
# history = model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=100, layers='3+')
mean_average_precision_callback = mrmodel.MeanAveragePrecisionCallback(model,

Camera calibration python

good evening I'm trying to calibrate a camera. I followed the code posted on the OpenCV website but as I tried to run it, for some reason the code runs through the images I have given it but when the runtime is finished it doesn't produce the calibration parameters. here's the following error message I get
error: (-215:Assertion failed) nimages > 0 in function 'cv::calibrateCameraRO'
#!/usr/bin/env python
import cv2 as cv
import numpy as np
import os
import glob
# Defining the dimensions of checkerboard
size = (1376, 917)
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Defining the world coordinates for 3D points
objp = np.zeros((CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
objp[:, :2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2)
#prev_img_shape = None
# Creating vector to store vectors of 3D points for each checkerboard image
objpoints = []
# Creating vector to store vectors of 2D points for each checkerboard image
imgpoints = []
# Extracting path of individual image stored in a given directory
images = glob.glob('*.jpeg')
for image in images:
img = cv.imread(image)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
# If desired number of corners are found in the image then ret = true
ret, corners = cv.findChessboardCorners(gray, CHECKERBOARD, None)
If desired number of corner are detected,
we refine the pixel coordinates and display
them on the images of checker board
if ret == True:
# refining pixel coordinates for given 2d points.
corners2 = cv.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)
# Draw and display the corners
cv.drawChessboardCorners(img, CHECKERBOARD, corners2, ret)
cv.imshow('img', img)
Performing camera calibration by
passing the value of known 3D points (objpoints)
and corresponding pixel coordinates of the
detected corners (imgpoints)
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, size, None, None)
print("\n camera Calibrated", ret)
print("\nCamera matrix:\n", mtx)
print("\ndist:\n", dist)
print("\nrotation vector : \n", rvecs)
print("\n translation vector : \n", tvecs)

Tensorflow lite only using the first item in the labelmap.txt file when identifying items

I have installed tensorflow 1.15 and created a custom model. I converted it into a .tflite file so tensorflow lite can read it. Then I ran the following code:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
images = glob.glob(PATH_TO_IMAGES)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
interpreter = Interpreter(model_path=PATH_TO_CKPT)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
Now, my custom model seems to work. It located the items on the image correctly but it labels everything with the first item on the labelmap.txt. For example:
The model identifies the remotes in the images but labels them as "key" because it is the first thing in the labelmap.txt. I don't know why this is happening, can someone please help me. I am sorry if anything is unclear. Please let me know and I will try my best to clarify a little better. Thank you.
I followed the https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi.

Why am I getting "IndexError: list index out of range" when training on the cloud?

I resorted to using the cloud training workflow. Given the product I got, I would have expected to drop directly into the code that I have that works with other tflite models, but the cloud produced model doesn't work. I get "index out of range" when asking for interpreter.get_tensor parameters.
Here is my code, basically a modified example, where I can ingest a video and produce a video with results.
import argparse
import cv2
import numpy as np
import sys
import importlib.util
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
# default='/tmp/detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
# default='/tmp/coco_labels.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
parser.add_argument('--video', help='Name of the video file',
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
VIDEO_NAME = args.video
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tensorflow')
pkg = True
if pkg is None:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to video file
# Path to .tflite file, which contains the model that is used for object detection
# Path to label map file
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
interpreter = Interpreter(model_path=PATH_TO_CKPT)
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Open video file
video = cv2.VideoCapture(VIDEO_PATH)
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(
'M', 'J', 'P', 'G'), 10, (1920, 1080))
# Acquire frame and resize to expected shape [1xHxWx3]
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
print (boxes)
print (classes)
print (scores)
#num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed)
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 4)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0],
label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 0, 0), 2) # Draw label text
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
#output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
# Clean up
Here is what the print statements should look like when using the canned tflite model:
[32. 76. 56. 76. 0. 61. 74. 0. 0. 0.]
[0.609375 0.48828125 0.44921875 0.44921875 0.4140625 0.40234375
0.37890625 0.3125 0.3125 0.3125 ]
[[-0.01923192 0.17330796 0.747546 0.8384144 ]
[ 0.01866053 0.5023282 0.39603746 0.6143299 ]
[ 0.01673795 0.47382414 0.34407628 0.5580931 ]
[ 0.11588445 0.78543806 0.8778869 1.0039229 ]
[ 0.8106107 0.70675755 1.0080075 0.89248717]
[ 0.84941524 0.06391776 1.0006479 0.28792098]
[ 0.05543692 0.53557926 0.40413857 0.62823087]
[ 0.07051808 -0.00938512 0.8822515 0.28100258]
[ 0.68205094 0.33990026 0.9940187 0.6020821 ]
[ 0.08010477 0.01998334 0.6011186 0.26135433]]
Here is the error when presented with the cloud created model:
File "tflite_vid.py", line 124, in <module>
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
IndexError: list index out of range
So I would kindly ask that someone explain how to either develop a TFlite model with TF2 with Python or how to get the cloud to generate a usable TFlite model. Please oh please do not point me into a direction that entails wondering through the Internet examples unless they are the actual gospel on how to do this.,
In output_details[1], it is [1] <- list index out of range. Your model may have 1 output, but the code try to access the 2nd output.
For more usage about Python code, please refer to https://www.tensorflow.org/lite/guide/inference#load_and_run_a_model_in_python for guidance.


I am implementing a Faster RCNN v2 Inception in Tensorflow Object Detection API. To remove redundant overlapping detections, I read that NMS should be applied.
One way of doing this is adjusting the NMS IOU Threshold in the config file first_stage_nms_iou_threshold.
What is this parameter exactly? To what value should this parameter be adjusted to (default value is 0.7)
Why is it called first_stage_nms_iou_threshold? Why first stage only?
Is there another easy and more effective way of removing redundant detections?
I can't anwser your first and second question but i had the same problem with overlapping bounding boxes and use the following code to fix them manually... You have to know the x1,y1,x2,y2 coordinates of your bounding boxes which are overlapping...
# import the necessary packages
from nms import non_max_suppression_slow
import numpy as np
import cv2
# path to your image
# and the coordinates x1,x2,y1,y2 of the overlapping bounding boxes
images = [
("path/to/your/image", np.array([
(664, 0, 988, 177),
(670, 10, 1000, 188),
(685, 20, 1015, 193),
(47, 100, 357, 500),
(55, 105, 362, 508),
(68, 120, 375, 520),
(978, 80, 1093, 206)]))]
# loop over the images
for (imagePath, boundingBoxes) in images:
# load the image and clone it
print("[x] %d initial bounding boxes" % (len(boundingBoxes)))
image = cv2.imread(imagePath)
orig = image.copy()
# loop over the bounding boxes for each image and draw them
for (startX, startY, endX, endY) in boundingBoxes:
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
# perform non-maximum suppression on the bounding boxes
pick = non_max_suppression_slow(boundingBoxes, 0.3)
print("[x] after applying non-maximum, %d bounding boxes" % (len(pick)))
# loop over the picked bounding boxes and draw them
for (startX, startY, endX, endY) in pick:
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
# display the images
cv2.imshow("Original", orig)
cv2.imshow("After NMS", image)
and still need this :
# import the necessary packages
import numpy as np
def non_max_suppression_slow(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
# compute the area of the bounding boxes and sort the bounding
# boxes by the bottom-right y-coordinate of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list, add the index
# value to the list of picked indexes, then initialize
# the suppression list (i.e. indexes that will be deleted)
# using the last index
last = len(idxs) - 1
i = idxs[last]
suppress = [last]
# loop over all indexes in the indexes list
for pos in range(0, last):
# grab the current index
j = idxs[pos]
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = max(x1[i], x1[j])
yy1 = max(y1[i], y1[j])
xx2 = min(x2[i], x2[j])
yy2 = min(y2[i], y2[j])
# compute the width and height of the bounding box
w = max(0, xx2 - xx1 + 1)
h = max(0, yy2 - yy1 + 1)
# compute the ratio of overlap between the computed
# bounding box and the bounding box in the area list
overlap = float(w * h) / area[j]
# if there is sufficient overlap, suppress the
# current bounding box
if overlap > overlapThresh:
# delete all indexes from the index list that are in the
# suppression list
idxs = np.delete(idxs, suppress)
# return only the bounding boxes that were picked
return boxes[pick]