Camera calibration python - camera

good evening I'm trying to calibrate a camera. I followed the code posted on the OpenCV website but as I tried to run it, for some reason the code runs through the images I have given it but when the runtime is finished it doesn't produce the calibration parameters. here's the following error message I get
error: (-215:Assertion failed) nimages > 0 in function 'cv::calibrateCameraRO'
#!/usr/bin/env python
import cv2 as cv
import numpy as np
import os
import glob
# Defining the dimensions of checkerboard
CHECKERBOARD = (10, 7)
size = (1376, 917)
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Defining the world coordinates for 3D points
objp = np.zeros((CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
objp[:, :2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2)
#prev_img_shape = None
# Creating vector to store vectors of 3D points for each checkerboard image
objpoints = []
# Creating vector to store vectors of 2D points for each checkerboard image
imgpoints = []
# Extracting path of individual image stored in a given directory
images = glob.glob('*.jpeg')
for image in images:
print(image)
img = cv.imread(image)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
# If desired number of corners are found in the image then ret = true
ret, corners = cv.findChessboardCorners(gray, CHECKERBOARD, None)
"""
If desired number of corner are detected,
we refine the pixel coordinates and display
them on the images of checker board
"""
if ret == True:
objpoints.append(objp)
# refining pixel coordinates for given 2d points.
corners2 = cv.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)
imgpoints.append(corners)
# Draw and display the corners
cv.drawChessboardCorners(img, CHECKERBOARD, corners2, ret)
cv.imshow('img', img)
cv.waitKey(1000)
cv.destroyAllWindows()
"""
Performing camera calibration by
passing the value of known 3D points (objpoints)
and corresponding pixel coordinates of the
detected corners (imgpoints)
"""
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, size, None, None)
print("\n camera Calibrated", ret)
print("\nCamera matrix:\n", mtx)
print("\ndist:\n", dist)
print("\nrotation vector : \n", rvecs)
print("\n translation vector : \n", tvecs)

Related

How to get intel realsense D435i camera serial numbers from frames for multiple cameras?

I have initialized one pipeline for two cameras and I am getting color and depth images from the same.
The problem is that I cannot find camera serial numbers for corresponding frames to determine which camera captured the frames.
Below is my code:
import pyrealsense2 as rs
import numpy as np
import cv2
import logging
import time
# Configure depth and color streams...
pipeline_1 = rs.pipeline()
config_1 = rs.config()
config_1.enable_device('938422072752')
config_1.enable_device('902512070386')
config_1.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config_1.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
# Start streaming from both cameras
pipeline_1.start(config_1)
try:
while True:
# Camera 1
# Wait for a coherent pair of frames: depth and color
frames_1 = pipeline_1.wait_for_frames()
depth_frame_1 = frames_1.get_depth_frame()
color_frame_1 = frames_1.get_color_frame()
if not depth_frame_1 or not color_frame_1:
continue
# Convert images to numpy arrays
depth_image_1 = np.asanyarray(depth_frame_1.get_data())
color_image_1 = np.asanyarray(color_frame_1.get_data())
# Apply colormap on depth image (image must be converted to 8-bit per pixel first)
depth_colormap_1 = cv2.applyColorMap(cv2.convertScaleAbs(depth_image_1, alpha=0.5), cv2.COLORMAP_JET)
# Camera 2
# Wait for a coherent pair of frames: depth and color
frames_2 = pipeline_1.wait_for_frames()
depth_frame_2 = frames_2.get_depth_frame()
color_frame_2 = frames_2.get_color_frame()
if not depth_frame_2 or not color_frame_2:
continue
# Convert images to numpy arrays
depth_image_2 = np.asanyarray(depth_frame_2.get_data())
color_image_2 = np.asanyarray(color_frame_2.get_data())
# Apply colormap on depth image (image must be converted to 8-bit per pixel first)
depth_colormap_2 = cv2.applyColorMap(cv2.convertScaleAbs(depth_image_2, alpha=0.5), cv2.COLORMAP_JET)
# Stack all images horizontally
images = np.hstack((color_image_1, depth_colormap_1,color_image_2, depth_colormap_2))
# Show images from both cameras
cv2.namedWindow('RealSense', cv2.WINDOW_NORMAL)
cv2.imshow('RealSense', images)
cv2.waitKey(20)
finally:
pipeline_1.stop()
How can I find camera serial numbers after wait_for_frames() to determine which camera captured depth and color image.
I adopted your code, combined it with the C++ example posted by nayab to compose the following code that grabs the color image (only) of multiple RealSense cameras and stacks them horizontally:
import pyrealsense2 as rs
import numpy as np
import cv2
import logging
import time
realsense_ctx = rs.context() # The context encapsulates all of the devices and sensors, and provides some additional functionalities.
connected_devices = []
# get serial numbers of connected devices:
for i in range(len(realsense_ctx.devices)):
detected_camera = realsense_ctx.devices[i].get_info(
rs.camera_info.serial_number)
connected_devices.append(detected_camera)
pipelines = []
configs = []
for i in range(len(realsense_ctx.devices)):
pipelines.append(rs.pipeline()) # one pipeline for each device
configs.append(rs.config()) # one config for each device
configs[i].enable_device(connected_devices[i])
configs[i].enable_stream(rs.stream.color, 1920, 1080, rs.format.bgr8, 30)
pipelines[i].start(configs[i])
try:
while True:
images = []
for i in range(len(pipelines)):
print("waiting for frame at cam", i)
frames = pipelines[i].wait_for_frames()
color_frame = frames.get_color_frame()
images.append(np.asanyarray(color_frame.get_data()))
# Stack all images horizontally
image_composite = images[0]
for i in range(1, len(images)):
images_composite = np.hstack((image_composite, images[i]))
# Show images from both cameras
cv2.namedWindow('RealSense', cv2.WINDOW_NORMAL)
cv2.imshow('RealSense', images_composite)
cv2.waitKey(20)
finally:
for i in range(len(pipelines)):
pipelines[i].stop()
This will look for the connected devices and find the serial numbers.
They are saved in a list and you can use them to start the available cameras.
# Configure depth and color streams...
realsense_ctx = rs.context()
connected_devices = []
for i in range(len(realsense_ctx.devices)):
detected_camera = ealsense_ctx.devices[i].get_info(rs.camera_info.serial_number)
connected_devices.append(detected_camera)

PiCamera mmal Error in Raspberrypi W Zero

I Do Detecting Mask Project on Raspberrypi W Zero. But I can't catch the error.
This is my Tensorflow and openCV Code
# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import picamera
#from picamera import sleep
import numpy as np
import imutils
import time
import cv2
import os
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
(104.0, 177.0, 123.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
print(detections.shape)
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > 0.5:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# load our serialized face detector model from disk
prototxtPath = r"/home/pi/Desktop/pi/face_detector/deploy.prototxt"
weightsPath = r"/home/pi/Desktop/pi/face_detector/res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
maskNet = load_model("./mask_detector.model")
# initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
cap = picamera.PiCamera()
# loop over the frames from the video stream
while(cap.isOpened()):
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=400)
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
cap.close()
break
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
And I run this Tensorflow and openCV code in Raspberrypi W Zero... But This Error Bothering Me..
mmal: mmal_vc_port_enable: failed to enable port vc.null_sink:in:0(OPQV): ENOSPC
mmal: mmal_port_enable: failed to enable connected port (vc.null_sink:in:0(OPQV))0x34600e0 (ENOSPC)
mmal: mmal_connection_enable: output port couldn't be enabled
Traceback (most recent call last):
File "detect_mask_video.py", line 88, in <module>
cap = picamera.PiCamera()
File "/usr/lib/python3/dist-packages/picamera/camera.py", line 433, in __init__
self._init_preview()
File "/usr/lib/python3/dist-packages/picamera/camera.py", line 513, in _init_preview
self, self._camera.outputs[self.CAMERA_PREVIEW_PORT])
File "/usr/lib/python3/dist-packages/picamera/renderers.py", line 558, in __init__
self.renderer.inputs[0].connect(source).enable()
File "/usr/lib/python3/dist-packages/picamera/mmalobj.py", line 2212, in enable
prefix="Failed to enable connection")
File "/usr/lib/python3/dist-packages/picamera/exc.py", line 184, in mmal_check
raise PiCameraMMALError(status, prefix)
picamera.exc.PiCameraMMALError: Failed to enable connection: Out of resources
I Search this error in google But I can't fix this...Please Help me.. :D
What I've done so far :
raspberrypi update
Increasing memory
Reconnect PiCamera
Enter the following command on the terminal:
sudo modprobe bcm2835-v4l2
To access the mmal device as a standard v4l(video for Linux) device.

Why am I getting "IndexError: list index out of range" when training on the cloud?

I resorted to using the cloud training workflow. Given the product I got, I would have expected to drop directly into the code that I have that works with other tflite models, but the cloud produced model doesn't work. I get "index out of range" when asking for interpreter.get_tensor parameters.
Here is my code, basically a modified example, where I can ingest a video and produce a video with results.
import argparse
import cv2
import numpy as np
import sys
import importlib.util
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='model.tflite')
# default='/tmp/detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='dict.txt')
# default='/tmp/coco_labels.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--video', help='Name of the video file',
default='test.mp4')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
VIDEO_NAME = args.video
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tensorflow')
pkg = True
if pkg is None:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to video file
VIDEO_PATH = os.path.join(CWD_PATH,VIDEO_NAME)
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Open video file
video = cv2.VideoCapture(VIDEO_PATH)
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(
'M', 'J', 'P', 'G'), 10, (1920, 1080))
while(video.isOpened()):
# Acquire frame and resize to expected shape [1xHxWx3]
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
print (boxes)
print (classes)
print (scores)
#num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed)
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 4)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0],
label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 0, 0), 2) # Draw label text
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
#output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
out.write(frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
video.release()
out.release()
cv2.destroyAllWindows()
Here is what the print statements should look like when using the canned tflite model:
[32. 76. 56. 76. 0. 61. 74. 0. 0. 0.]
[0.609375 0.48828125 0.44921875 0.44921875 0.4140625 0.40234375
0.37890625 0.3125 0.3125 0.3125 ]
[[-0.01923192 0.17330796 0.747546 0.8384144 ]
[ 0.01866053 0.5023282 0.39603746 0.6143299 ]
[ 0.01673795 0.47382414 0.34407628 0.5580931 ]
[ 0.11588445 0.78543806 0.8778869 1.0039229 ]
[ 0.8106107 0.70675755 1.0080075 0.89248717]
[ 0.84941524 0.06391776 1.0006479 0.28792098]
[ 0.05543692 0.53557926 0.40413857 0.62823087]
[ 0.07051808 -0.00938512 0.8822515 0.28100258]
[ 0.68205094 0.33990026 0.9940187 0.6020821 ]
[ 0.08010477 0.01998334 0.6011186 0.26135433]]
Here is the error when presented with the cloud created model:
File "tflite_vid.py", line 124, in <module>
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
IndexError: list index out of range
So I would kindly ask that someone explain how to either develop a TFlite model with TF2 with Python or how to get the cloud to generate a usable TFlite model. Please oh please do not point me into a direction that entails wondering through the Internet examples unless they are the actual gospel on how to do this.,
In output_details[1], it is [1] <- list index out of range. Your model may have 1 output, but the code try to access the 2nd output.
For more usage about Python code, please refer to https://www.tensorflow.org/lite/guide/inference#load_and_run_a_model_in_python for guidance.

How can I draw a rectangle from the points I want, using ROI?

Hello I am beginner in OpenCv.
I have a maze image. I wrote maze solver code. I need to get the photo like the picture for this code to work.
I want to choose the contours of the white area using ROI but I could not
When I try the ROI method I get a smooth rectangle with a black area selected.
https://i.stack.imgur.com/Ty5BX.png -----> this is my code result
https://i.stack.imgur.com/S7zuJ.png --------> I want to this result
import cv2
import numpy as np
#import image
image = cv2.imread('rt4.png')
#grayscaleqq
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#cv2.imshow('gray', gray)
#qcv2.waitKey(0)
#binary
#ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
threshold = 150
thresh = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
cv2.namedWindow('second', cv2.WINDOW_NORMAL)
cv2.imshow('second', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#dilation
kernel = np.ones((1,1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.namedWindow('dilated', cv2.WINDOW_NORMAL)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(),
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)
[0])
list = []
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
a = w-x
b = h-y
list.append((a,b,x,y,w,h))
# show ROI
#cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
#cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('home/Desktop/output/{}.png'.format(i), roi)
cv2.namedWindow('marked areas', cv2.WINDOW_NORMAL)
cv2.imshow('marked areas',image)
cv2.waitKey(0)
cv2.destroyAllWindows()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv2.cornerHarris(gray,2,3,0.04)
#result is dilated for marking the corners, not important
dst = cv2.dilate(dst,None)
image[dst>0.01*dst.max()]=[0,0,255]
cv2.imshow('dst',image)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
list.sort()
print(list[len(list)-1])
I misunderstood your question earlier. So, I'm rewriting.
As #Silencer has already stated, you could use the drawContours method. You can do it as follows:
import cv2
import numpy as np
#import image
im = cv2.imread('Maze2.png')
gaus = cv2.GaussianBlur(im, (5, 5), 1)
# mask1 = cv2.dilate(gaus, np.ones((15, 15), np.uint8, 3))
mask2 = cv2.erode(gaus, np.ones((5, 5), np.uint8, 1))
imgray = cv2.cvtColor(mask2, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
maxArea1=0
maxI1=0
for i in range(len(contours)):
area = cv2.contourArea(contours[i])
epsilon = 0.01 * cv2.arcLength(contours[i], True)
approx = cv2.approxPolyDP(contours[i], epsilon, True)
if area > maxArea1 :
maxArea1 = area
print(maxArea1)
print(maxI1)
cv2.drawContours(im, contours, maxI1, (0,255,255), 3)
cv2.imshow("yay",im)
cv2.imshow("gray",imgray)
cv2.waitKey(0)
cv2.destroyAllWindows()
I used it on the following image:
And I got the right answer. You can add additional filters, or you could decrease the area using an ROI, to decrese the discrepancy, but it wasn't required
Hope it helps!
A simple solution to just draw a slanted rectangle would be to use cv2.polylines. Based on your result, I'm assuming you have the coordinates of the vertices of the area already, lets call them [x1,y1], [x2,y2], [x3,y3], [x4,y4]. The polylines function draws a line from vertex to vertex to create a closed polygon.
import cv2
import numpy as np
#List coordinates of vertices as an array
pts = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]], np.int32)
pts = pts.reshape((-1,1,2))
#Draw lines from vertex to vertex
cv2.polylines(image, [pts], True, (255,0,0))

About use tf.image.crop_and_resize

I'm working on the ROI pooling layer which work for fast-rcnn and I am used to use tensorflow. I found tf.image.crop_and_resize can act as the ROI pooling layer.
But I try many times and cannot get the result that I expected.Or did the true result is exactly what I got?
here is my code
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
img_path = r'F:\IMG_0016.JPG'
img = cv2.imread(img_path)
img = img.reshape([1,580,580,3])
img = img.astype(np.float32)
#img = np.concatenate([img,img],axis=0)
img_ = tf.Variable(img) # img shape is [580,580,3]
boxes = tf.Variable([[100,100,300,300],[0.5,0.1,0.9,0.5]])
box_ind = tf.Variable([0,0])
crop_size = tf.Variable([100,100])
#b = tf.image.crop_and_resize(img,[[0.5,0.1,0.9,0.5]],[0],[50,50])
c = tf.image.crop_and_resize(img_,boxes,box_ind,crop_size)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
a = c.eval(session=sess)
plt.imshow(a[0])
plt.imshow(a[1])
And I handed in my origin img and result:a0,a1
if I was wrong can anyone teach me how to use this function? thanks.
Actually, there's no problem with Tensorflow here.
From the doc of tf.image.crop_and_resize (emphasis is mine) :
boxes: A Tensor of type float32. A 2-D tensor of shape [num_boxes, 4].
The i-th row of the tensor specifies the coordinates of a box in the
box_ind[i] image and is specified in normalized coordinates [y1, x1,
y2, x2]. A normalized coordinate value of y is mapped to the image
coordinate at y * (image_height - 1), so as the [0, 1] interval of
normalized image height is mapped to [0, image_height - 1] in image
height coordinates. We do allow y1 > y2, in which case the sampled
crop is an up-down flipped version of the original image. The width
dimension is treated similarly. Normalized coordinates outside the [0,
1] range are allowed, in which case we use extrapolation_value to
extrapolate the input image values.
The boxes argument needs normalized coordinates. That's why you get a black box with your first set of coordinates [100,100,300,300] (not normalized, and no extrapolation value provided), and not with your second set [0.5,0.1,0.9,0.5].
However, as that why matplotlib show you gibberish on your second attempt, it's just because you're using the wrong datatype.
Quoting the matplotlib documentation of plt.imshow (emphasis is mine):
All values should be in the range [0 .. 1] for floats or [0 .. 255]
for integers. Out-of-range values will be clipped to these bounds.
As you're using float outside the [0,1] range, matplotlib is bounding your values to 1. That's why you get those colored pixels (either solid red, solid green or solid blue, or a mixing of these). Cast your array to uint_8 to get an image that make sense.
plt.imshow( a[1].astype(np.uint8))
Edit :
As requested, I will dive a bit more into
tf.image.crop_and_resize.
[When providing non normalized coordinates and no extrapolation values], why I just get a blank result?
Quoting the doc :
Normalized coordinates outside the [0, 1] range are allowed, in which
case we use extrapolation_value to extrapolate the input image values.
So, normalized coordinates outside [0,1] are allowed. But they still need to be normalized !
With your example, [100,100,300,300], the coordinates you provide makes the red square. Your original image is the little green dot in the upper left corner! The default value of the argument extrapolation_value is 0, so the values outside the frame of the original image are inferred as [0,0,0] hence the black.
But if your usecase needs another value, you can provide it. The pixels will take a RGB value of extrapolation_value%256 on each channel. This option is useful if the zone you need to crop is not fully included in you original images. (A possible usecase would be sliding windows for example).
It seems that tf.image.crop_and_resize expects pixel values in the range [0,1].
Changing your code to
test = tf.image.crop_and_resize(image=image_np_expanded/255., ...)
solved the problem for me.
Yet another variant is to use tf.central_crop function.
Below is a concrete implementation of the tf.image.crop_and_resize API. tf version 1.14
import tensorflow as tf
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
tf.enable_eager_execution()
def single_data_2(img_path):
img = tf.read_file(img_path)
img = tf.image.decode_bmp(img,channels=1)
img_4d = tf.expand_dims(img, axis=0)
processed_img = tf.image.crop_and_resize(img_4d,boxes=
[[0.4529,0.72,0.4664,0.7358]],crop_size=[64,64],box_ind=[0])
processed_img_2 = tf.squeeze(processed_img,0)
raw_img_3 = tf.squeeze(img_4d,0)
return raw_img_3, processed_img_2
def plot_two_image(raw,processed):
fig=plt.figure(figsize=(35,35))
raw_ = fig.add_subplot(1,2,1)
raw_.set_title('Raw Image')
raw_.imshow(raw,cmap='gray')
processed_ = fig.add_subplot(1,2,2)
processed_.set_title('Processed Image')
processed_.imshow(processed,cmap='gray')
img_path = 'D:/samples/your_bmp_image.bmp'
raw_img, process_img = single_data_2(img_path)
print(raw_img.dtype,process_img.dtype)
print(raw_img.shape,process_img.shape)
raw_img=tf.squeeze(raw_img,-1)
process_img=tf.squeeze(process_img,-1)
print(raw_img.dtype,process_img.dtype)
print(raw_img.shape,process_img.shape)
plot_two_image(raw_img,process_img)
Below is my working code, also output image is not black, this can be of help to someone
for idx in range(len(bboxes)):
if bscores[idx] >= Threshold:
#Region of Interest
y_min = int(bboxes[idx][0] * im_height)
x_min = int(bboxes[idx][1] * im_width)
y_max = int(bboxes[idx][2] * im_height)
x_max = int(bboxes[idx][3] * im_width)
class_label = category_index[int(bclasses[idx])]['name']
class_labels.append(class_label)
bbox.append([x_min, y_min, x_max, y_max, class_label, float(bscores[idx])])
#Crop Image - Working Code
cropped_image = tf.image.crop_to_bounding_box(image, y_min, x_min, y_max - y_min, x_max - x_min).numpy().astype(np.int32)
# encode_jpeg encodes a tensor of type uint8 to string
output_image = tf.image.encode_jpeg(cropped_image)
# decode_jpeg decodes the string tensor to a tensor of type uint8
#output_image = tf.image.decode_jpeg(output_image)
score = bscores[idx] * 100
file_name = tf.constant(OUTPUT_PATH+image_name[:-4]+'_'+str(idx)+'_'+class_label+'_'+str(round(score))+'%'+'_'+os.path.splitext(image_name)[1])
writefile = tf.io.write_file(file_name, output_image)