Tesseract and multiple line license plates: How can I get characters from a two line license plate? - python-tesseract

i tried getting individual characters from the image and passing them through the ocr, but the result is jumbled up characters. Passing the whole image is at least returning the characters in order but it seems like the ocr is trying to read all the other contours as well.
example image:
Image being used
The result : 6A7J7B0
Desired result : AJB6779
The code
img = cv2.imread("data/images/car6.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# resize image to three times as large as original for better readability
gray = cv2.resize(gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
# perform gaussian blur to smoothen image
blur = cv2.GaussianBlur(gray, (5,5), 0)
# threshold the image using Otsus method to preprocess for tesseract
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# create rectangular kernel for dilation
rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
# apply dilation to make regions more clear
dilation = cv2.dilate(thresh, rect_kern, iterations = 1)
# find contours of regions of interest within license plate
try:
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
except:
ret_img, contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# sort contours left-to-right
sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
# create copy of gray image
im2 = gray.copy()
# create blank string to hold license plate number
plate_num = ""
# loop through contours and find individual letters and numbers in license plate
for cnt in sorted_contours:
x,y,w,h = cv2.boundingRect(cnt)
height, width = im2.shape
# if height of box is not tall enough relative to total height then skip
if height / float(h) > 6: continue
ratio = h / float(w)
# if height to width ratio is less than 1.5 skip
if ratio < 1.5: continue
# if width is not wide enough relative to total width then skip
if width / float(w) > 15: continue
area = h * w
# if area is less than 100 pixels skip
if area < 100: continue
# draw the rectangle
rect = cv2.rectangle(im2, (x,y), (x+w, y+h), (0,255,0),2)
# grab character region of image
roi = thresh[y-5:y+h+5, x-5:x+w+5]
# perfrom bitwise not to flip image to black text on white background
roi = cv2.bitwise_not(roi)
# perform another blur on character region
roi = cv2.medianBlur(roi, 5)
try:
text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ --psm 8 --oem 3')
# clean tesseract text by removing any unwanted blank spaces
clean_text = re.sub('[\W_]+', '', text)
plate_num += clean_text
except:
text = None
if plate_num != None:
print("License Plate #: ", plate_num)

For me psm mode 11 worked able to detect single line and multi as well
pytesseract.image_to_string(img, lang='eng', config='--oem 3 --psm 11').replace("\n", ""))
11 Sparse text. Find as much text as possible in no particular order.

If you want to extract license plate number from two rows you can replace following line:
sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * img.shape[1] )
with
sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])

Related

Jumbled text on pymupdf textbox creation

I've created a textbox redaction on pymupdf that seems to work perfectly.
But when viewing it on Mac OS, the numbers appear incorrect and jumbled. Anyone have an idea what could change a pdf's view for an identical file across OS?
def apply_overlay(
page, new_area, variable, fontsize, color, align, font, is_column=False
):
col = fitz.utils.getColor("white")
variable_area = copy.deepcopy(new_area)
variable_area.y1 = new_area.y0 + fontsize + 3
redaction = page.addRedactAnnot(
variable_area, fill=col, text=" "
) # flags not available
else:
redaction = page.addRedactAnnot(
new_area, fill=col, text=" "
)
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE)
writer = fitz.TextWriter(page.rect, color=color)
assignment
writer.fill_textbox(
new_area, variable, fontsize=fontsize, warn=True, align=align, font=font
)
writer.write_text(page)
# To show what happened, draw the rectangles, etc.
shape = page.newShape()
shape.drawRect(new_area) # the rect within which we had to stay
shape.finish(stroke_opacity=0) # show in red color
shape.commit()
shape = page.newShape()
shape.drawRect(writer.text_rect) # the generated TextWriter rectangle
shape.drawCircle(writer.last_point, 2) # coordinates of end of text
shape.finish(stroke_opacity=0) # show with blue color
shape.commit()
return shape

Pytesseract OCR with different colors

I am trying to read this type of image with pytesseract but I have some issue with the part in yellow because the color transformation that works for other chracters won't work for those in yellow boxes. Also I want to keep the " numbers fo each row well split.
Any idea how I could manage that?
Thanks
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# invert = 255 - thresh
# OCR
data = pytesseract.image_to_string(thresh, config="--psm 6")
print(data)
cv2.imshow("thresh", thresh)
# cv2.imshow("invert", invert)
cv2.waitKey()
Returns: '> SKAPOVALOY 4 (15\nRINDERKNECH 6 [EY 15\n'

Pigs counting when crossing a line using OpenCV

I'm trying to count the number of piglets that enter and leave a zone. This is important because, in my project, there is a balance underneath the zone that computes the weight of the animals. My goal is to find the pig's weight, so, to achieve that, I will count the number of piglets that enter the zone, and if this number is zero, I have the pig's weight, and according to the number of piglets that get in I will calculate the weight of each as well.
But the weight history is for the future. Currently, I need help in the counting process.
The video can be seen here. The entrance occurs from the minute 00:40 until 02:00 and the exit starts on the minute 03:54 and goes all the way through the video because the piglets start, at this point, to enter and exit the zone.
I've successfully counted the entrance with the code below. I defined a region of interest, very small, and filter the pigs according to their colors. It works fine until the piglets start to move around and get very active, leaving and entering the zone all the time.
I'm out of ideas to proceed with this challenge. If you have any suggestions, please, tell me!
Thanks!!
import cv2
FULL_VIDEO_PATH = "PATH TO THE FULL VIDEO"
MAX_COLOR = (225, 215, 219)
MIN_COLOR = (158, 141, 148)
def get_centroid(x, y, w, h):
x1 = int(w / 2)
y1 = int(h / 2)
cx = x + x1
cy = y + y1
return cx, cy
def filter_mask(frame):
# create a copy from the ROI to be filtered
ROI = (frame[80:310, 615:620]).copy()
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
# create a green rectangle on the structure that creates noise
thicker_line_filtered = cv2.rectangle(ROI, (400, 135), (0, 165), (20, 200, 20), -1)
closing = cv2.morphologyEx(thicker_line_filtered, cv2.MORPH_CLOSE, kernel)
opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel)
dilation = cv2.dilate(opening, kernel, iterations=2)
# Filter the image according to the colors
segmented_line = cv2.inRange(dilation, MIN_COLOR, MAX_COLOR)
# Resize segmented line only for plot
copy = cv2.resize(segmented_line, (200, 400))
cv2.imshow('ROI', copy)
return segmented_line
def count_pigs():
cap = cv2.VideoCapture(FULL_VIDEO_PATH)
ret, frame = cap.read()
total_pigs = 0
frames_not_seen = 0
last_center = 0
is_position_ok = False
is_size_ok = False
total_size = 0
already_counted = False
while ret:
# Window interval used for counting
count_window_interval = (615, 0, 620, 400)
# Filter frame
fg_mask = filter_mask(frame)
# Draw a line on the frame, which represents when the pigs will be counted
frame_with_line = cv2.line(frame, count_window_interval[0:2], count_window_interval[2:4],(0,0,255), 1)
contours, _ = cv2.findContours(fg_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# If no contour is found, increments the variable
if len(contours) == 0:
frames_not_seen += 1
# If no contours are found within 5 frames, set last_center to 0 to generate the position difference when
# a new counter is found.
if frames_not_seen > 5:
last_center = 0
for c in contours:
frames_not_seen = 0
# Find the contour coordinates
(x, y, w, h) = cv2.boundingRect(c)
# Calculate the rectangle's center
centroid = get_centroid(x, y, w, h)
# Get the moments from the contour to calculate its size
moments = cv2.moments(c)
# Get contour's size
size = moments['m00']
# Sum the size until count the current pig
if not already_counted:
total_size += size
# If the difference between the last center and the current one is bigger than 80 - which means a new pig
# enter the counting zone - set the position ok and set the already_counted to False to mitigate noises
# with significant differences to be counted
if abs(last_center - centroid[1]) > 80:
is_position_ok = True
already_counted = False
# Imposes limits to the size to evaluate if the contour is consistent
# Min and Max value determined experimentally
if 1300 < total_size < 5500:
is_size_ok = True
# If all conditions are True, count the pig and reset all of them.
if is_position_ok and is_size_ok and not already_counted:
is_position_ok = False
is_size_ok = False
already_counted = True
total_size = 0
total_pigs += 1
last_center = centroid[1]
frame_with_line = cv2.putText(frame_with_line, f'Pigs: {total_pigs}', (100, 370) , cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2)
cv2.imshow('Frame', frame_with_line)
cv2.moveWindow('ROI', 1130, 0)
cv2.moveWindow('Frame', 0, 0)
k = cv2.waitKey(15) & 0xff
if k == 27:
break
elif k == 32:
cv2.waitKey() & 0xff
ret, frame = cap.read()
cv2.destroyAllWindows()
cap.release()
if __name__ == '__main__':
count_pigs()

Combining multiple values from database into one image

I'm trying to take 5 consecutive pixels from each image of a database, and position them consecutively to create a new image of 250x250px. all images in the database are 250x250px.
The Numpy array I'm getting has only 250 items in it, although the database has about 13,000 photos in it. Can someone help me spot the problem?
Current output for 'len(new_img_pxl)' = 250
Illustration
#edit:
from imutils import paths
import cv2
import numpy as np
# access database
database_path = list(paths.list_images('database'))
#grey scale database
img_gray = []
x = -5
y = 0
r = 0
new_img_pxl = []
# open as grayscale, resize
for img_path in database_path:
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
img_resize = cv2.resize(img, (250, 250))
img_gray.append(img_resize)
# take five consecutive pixel from each image
for item in img_gray:
x += 5
y += 5
five_pix = item[[r][x:y]]
for pix in five_pix:
new_img_pxl.append(pix)
if y == 250:
r += 1
x = -5
y = 0
# convert to array
new_img_pxl_array = np.array(new_img_pxl)
reshape_new_img = new_img_pxl_array.reshape(25,10)
# Convert the pixels into an array using numpy
array = np.array(reshape_new_img, dtype=np.uint8)
new_img_output = cv2.imwrite('new_output_save/001.png',reshape_new_img)
your bug is in the second loop.
for item in img_gray:
for every image (i) in the list img_gray you do:
for a in item:
for each row (j) in the image (i), extract 5 pixels and append them to new_img_pxl.
the first bug is that you don't take just 5 pixels from each image, you take 5 pixels from each row of each image.
your 2nd bug is that after extracting 250 pixels the values of the variables x and y are higher than 250 (the length of a row). As a result, when you try to access the pixels [250:255] and so on you get 'None'.
If I understand your intentions, then the way you should have implemented this is as follows:
r = 0
# As Mark Setchell suggested, you might want to change iterating
# over a list of images to iterating over the list of paths
# for img_path in database_path:
for item in img_gray:
# As Mark Setchell suggested, you might wat to load and
# process your image here, overwriting the past image and
# having the memory released
x += 5
y += 5
# when you finish a row jump to the next?
if x==250:
x = 0
y = 5
r+=1
# not sure what you wanna do when you get to the end of the image.
# roll back to the start?
if r==249 && x==250:
r = 0
x = 0
y = 5
five_pix = a[r, x:y]
for pix in five_pix:
new_img_pxl.append(pix)

Find 7 vertices of a box using openCV

I don't know if this question have been repeating in here. If yes then i'm sorry..
I have a box that positioned to see H,W,L view. I understand steps to get vertices however most of the examples in the net only describes how to get 4 vertices from 2D plane. So my question is, how if we want to get 7 vertices (like the pic above) and handle it in numpy? How to differentiate between upper points and lower points?
I will be using Python to determine this.
Here's my attempt to get the 8 corners of the 3d rectangle. I masked on the saturation channel of the HSV color space since that separates out white.
I used findContours to get the contour of the box and then used approxPolyDP to get a six-point approximation (the six visible corners).
From there I approximated the two "hidden" corners via a parallelogram approximation. For each point I looked two points behind and created a fourth point that would make a parallelogram with that side. I then took the centroid of these parallelogram points to guess the corner. I hoped that taking the centroid of the points would help even out the error between the parallelogram assumption and the perspective warping, but it did a poor job.
If you need a better approximation there are probably ways to estimate the perspective warping to get the corners.
import cv2
import numpy as np
import random
def tup(point):
return (int(point[0]), int(point[1]));
# load image
img = cv2.imread("box.jpg");
# reduce size to fit on screen
scale = 0.25;
h,w = img.shape[:2];
h = int(scale*h);
w = int(scale*w);
img = cv2.resize(img, (w,h));
copy = np.copy(img);
# convert to hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# make mask
mask = cv2.inRange(s, 30, 255);
# dilate and erode to get rid of small holes
kernel = np.ones((5,5), np.uint8);
mask = cv2.dilate(mask, kernel, iterations = 1);
mask = cv2.erode(mask, kernel, iterations = 1);
# contours # OpenCV 3.4, in OpenCV 2 or 4 it returns (contours, _)
_, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
contour = contours[0]; # just take the first one
# approx until 6 points
num_points = 999999;
step_size = 0.01;
percent = step_size;
while num_points >= 6:
# get number of points
epsilon = percent * cv2.arcLength(contour, True);
approx = cv2.approxPolyDP(contour, epsilon, True);
num_points = len(approx);
# increment
percent += step_size;
# step back and get the points
# there could be more than 6 points if our step size misses it
percent -= step_size * 2;
epsilon = percent * cv2.arcLength(contour, True);
approx = cv2.approxPolyDP(contour, epsilon, True);
# draw contour
cv2.drawContours(img, [approx], -1, (0,0,200), 2);
# draw points
for point in approx:
point = point[0]; # drop extra layer of brackets
center = (int(point[0]), int(point[1]));
cv2.circle(img, center, 4, (150, 200, 0), -1);
# do parallelogram approx to get the two "hidden" corners to complete our 3d rectangle
proposals = [];
size = len(approx);
for a in range(size):
# get points backwards
two = approx[a - 2][0];
one = approx[a - 1][0];
curr = approx[a][0];
# get vector from one -> two
dx = two[0] - one[0];
dy = two[1] - one[1];
hidden = [curr[0] + dx, curr[1] + dy];
proposals.append([hidden, curr, a, two]);
# debug draw
c = np.copy(copy);
cv2.circle(c, tup(two), 4, (255, 0, 0), -1);
cv2.circle(c, tup(one), 4, (0,255,0), -1);
cv2.circle(c, tup(curr), 4, (0,0,255), -1);
cv2.circle(c, tup(hidden), 4, (255,255,0), -1);
cv2.line(c, tup(two), tup(one), (0,0,200), 1);
cv2.line(c, tup(curr), tup(hidden), (0,0,200), 1);
cv2.imshow("Mark", c);
cv2.waitKey(0);
# draw proposals
for point in proposals:
point = point[0];
center = (point[0], point[1]);
cv2.circle(img, center, 4, (200, 100, 0), -1);
# group points and sum up points
hidden_corners = [[0,0], [0,0]];
for point in proposals:
# get index and update hidden corners
index = point[2] % 2;
pos = point[0];
hidden_corners[index][0] += pos[0];
hidden_corners[index][1] += pos[1];
# divide to get centroid
hidden_corners[0][0] /= 3.0;
hidden_corners[0][1] /= 3.0;
hidden_corners[1][0] /= 3.0;
hidden_corners[1][1] /= 3.0;
# draw new points
for point in proposals:
# unpack
pos = point[0];
parent = point[1];
index = point[2] % 2;
source = point[3];
# draw
color = [random.randint(0, 150) for a in range(3)];
cv2.line(img, tup(hidden_corners[index]), tup(parent), (0,0,200), 2);
cv2.line(img, tup(pos), tup(parent), color, 1);
cv2.line(img, tup(pos), tup(source), color, 1);
cv2.circle(img, tup(hidden_corners[index]), 4, (200, 200, 0), -1);
# show
cv2.imshow("Image", img);
cv2.imshow("Mask", mask);
cv2.waitKey(0);