I was using pytesseract and am facing this error in google colab - python-tesseract

My code:
#Read the number plate
from PIL import Image
import pytesseract
text = pytesseract.image_to_string(Cropped, config='--psm 11')
#Cropped is the gray scale matrix
print("Detected Number is:",text)
cv2.imshow('image',img)
cv2.imshow('Cropped',Cropped)
cv2.waitKey(0)
cv2.destroyAllWindows()
ERROR:
----> 4 text = pytesseract.image_to_string(Cropped, config='--psm 11')
5
KeyError: 'PNG'

Related

OCR in the background while displaying findings in GUI

I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)

"im2col_out_cpu" not implemented for 'Byte'

I am trying to generate overlap patches from image size (112,112) but i am unable to do so. I have already tried a lot but it didn't work out.
**Code**
import torch
import numpy as np
import torch.nn as nn
from torch import nn
from PIL import Image
import cv2
import os
import math
import torch.nn.functional as F
import torchvision.transforms as T
from timm import create_model
from typing import List
import matplotlib.pyplot as plt
from torchvision import io, transforms
from utils_torch import Image, ImageDraw
from torchvision.transforms.functional import to_pil_image
IMG_SIZE = 112
# PATCH_SIZE = 64
resize = transforms.Resize((IMG_SIZE, IMG_SIZE))
img = resize(io.read_image("Adam_Brody_233.png"))
img = img.to(torch.float32)
image_size = 112
patch_size = 28
ac_patch_size = 12
pad = 4
img = img.unsqueeze(0)
soft_split = nn.Unfold(kernel_size=(ac_patch_size, ac_patch_size), stride=(patch_size, patch_size), padding=(pad, pad))
patches = soft_split(img).transpose(1, 2)
fig, ax = plt.subplots(16, 16)
for i in range(16):
for j in range(16):
sub_img = patches[:, i, j]
ax[i][j].imshow(to_pil_image(sub_img))
ax[i][j].axis('off')
plt.show()
Traceback
Traceback (most recent call last):
File "/home/cvpr/Documents/OPVT/unfold_ours.py", line 32, in <module>
patches = soft_split(img).transpose(1, 2)
File "/home/cvpr/anaconda3/envs/OPVT/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/cvpr/anaconda3/envs/OPVT/lib/python3.7/site-packages/torch/nn/modules/fold.py", line 295, in forward
self.padding, self.stride)
File "/home/cvpr/anaconda3/envs/OPVT/lib/python3.7/site-packages/torch/nn/functional.py", line 3831, in unfold
_pair(dilation), _pair(padding), _pair(stride))
RuntimeError: "im2col_out_cpu" not implemented for 'Byte'
Yes this is an open issue in PyTorch. A simple fix is just to convert your image tensor from ints to floats you can do it like this:
img = img.to(torch.float32)
This should solve your problem

How to recognize six characters from an image by using pytesseract

I can't understand how to recognize it.
Hope someone can get me out.
import cv2
import numpy as np
import pytesseract
from PIL import Image
image = cv2.imread('b.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
data = pytesseract.image_to_string(image, lang='eng', config='--psm 10')
print(data)

Displaying an image in grayscale on matplot lib [duplicate]

I'm trying to use matplotlib to read in an RGB image and convert it to grayscale.
In matlab I use this:
img = rgb2gray(imread('image.png'));
In the matplotlib tutorial they don't cover it. They just read in the image
import matplotlib.image as mpimg
img = mpimg.imread('image.png')
and then they slice the array, but that's not the same thing as converting RGB to grayscale from what I understand.
lum_img = img[:,:,0]
I find it hard to believe that numpy or matplotlib doesn't have a built-in function to convert from rgb to gray. Isn't this a common operation in image processing?
I wrote a very simple function that works with the image imported using imread in 5 minutes. It's horribly inefficient, but that's why I was hoping for a professional implementation built-in.
Sebastian has improved my function, but I'm still hoping to find the built-in one.
matlab's (NTSC/PAL) implementation:
import numpy as np
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
How about doing it with Pillow:
from PIL import Image
img = Image.open('image.png').convert('L')
img.save('greyscale.png')
If an alpha (transparency) channel is present in the input image and should be preserved, use mode LA:
img = Image.open('image.png').convert('LA')
Using matplotlib and the formula
Y' = 0.2989 R + 0.5870 G + 0.1140 B
you could do:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
def rgb2gray(rgb):
return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])
img = mpimg.imread('image.png')
gray = rgb2gray(img)
plt.imshow(gray, cmap=plt.get_cmap('gray'), vmin=0, vmax=1)
plt.show()
You can also use scikit-image, which provides some functions to convert an image in ndarray, like rgb2gray.
from skimage import color
from skimage import io
img = color.rgb2gray(io.imread('image.png'))
Notes: The weights used in this conversion are calibrated for contemporary CRT phosphors: Y = 0.2125 R + 0.7154 G + 0.0721 B
Alternatively, you can read image in grayscale by:
from skimage import io
img = io.imread('image.png', as_gray=True)
Three of the suggested methods were tested for speed with 1000 RGBA PNG images (224 x 256 pixels) running with Python 3.5 on Ubuntu 16.04 LTS (Xeon E5 2670 with SSD).
Average run times
pil : 1.037 seconds
scipy: 1.040 seconds
sk : 2.120 seconds
PIL and SciPy gave identical numpy arrays (ranging from 0 to 255). SkImage gives arrays from 0 to 1. In addition the colors are converted slightly different, see the example from the CUB-200 dataset.
SkImage:
PIL :
SciPy :
Original:
Diff :
Code
Performance
run_times = dict(sk=list(), pil=list(), scipy=list())
for t in range(100):
start_time = time.time()
for i in range(1000):
z = random.choice(filenames_png)
img = skimage.color.rgb2gray(skimage.io.imread(z))
run_times['sk'].append(time.time() - start_time)
start_time = time.time()
for i in range(1000):
z = random.choice(filenames_png)
img = np.array(Image.open(z).convert('L'))
run_times['pil'].append(time.time() - start_time)
start_time = time.time()
for i in range(1000):
z = random.choice(filenames_png)
img = scipy.ndimage.imread(z, mode='L')
run_times['scipy'].append(time.time() - start_time)
for k, v in run_times.items():
print('{:5}: {:0.3f} seconds'.format(k, sum(v) / len(v)))
Output
z = 'Cardinal_0007_3025810472.jpg'
img1 = skimage.color.rgb2gray(skimage.io.imread(z)) * 255
IPython.display.display(PIL.Image.fromarray(img1).convert('RGB'))
img2 = np.array(Image.open(z).convert('L'))
IPython.display.display(PIL.Image.fromarray(img2))
img3 = scipy.ndimage.imread(z, mode='L')
IPython.display.display(PIL.Image.fromarray(img3))
Comparison
img_diff = np.ndarray(shape=img1.shape, dtype='float32')
img_diff.fill(128)
img_diff += (img1 - img3)
img_diff -= img_diff.min()
img_diff *= (255/img_diff.max())
IPython.display.display(PIL.Image.fromarray(img_diff).convert('RGB'))
Imports
import skimage.color
import skimage.io
import random
import time
from PIL import Image
import numpy as np
import scipy.ndimage
import IPython.display
Versions
skimage.version
0.13.0
scipy.version
0.19.1
np.version
1.13.1
You can always read the image file as grayscale right from the beginning using imread from OpenCV:
img = cv2.imread('messi5.jpg', 0)
Furthermore, in case you want to read the image as RGB, do some processing and then convert to Gray Scale you could use cvtcolor from OpenCV:
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
The fastest and current way is to use Pillow, installed via pip install Pillow.
The code is then:
from PIL import Image
img = Image.open('input_file.jpg').convert('L')
img.save('output_file.jpg')
The tutorial is cheating because it is starting with a greyscale image encoded in RGB, so they are just slicing a single color channel and treating it as greyscale. The basic steps you need to do are to transform from the RGB colorspace to a colorspace that encodes with something approximating the luma/chroma model, such as YUV/YIQ or HSL/HSV, then slice off the luma-like channel and use that as your greyscale image. matplotlib does not appear to provide a mechanism to convert to YUV/YIQ, but it does let you convert to HSV.
Try using matplotlib.colors.rgb_to_hsv(img) then slicing the last value (V) from the array for your grayscale. It's not quite the same as a luma value, but it means you can do it all in matplotlib.
Background:
http://matplotlib.sourceforge.net/api/colors_api.html
http://en.wikipedia.org/wiki/HSL_and_HSV
Alternatively, you could use PIL or the builtin colorsys.rgb_to_yiq() to convert to a colorspace with a true luma value. You could also go all in and roll your own luma-only converter, though that's probably overkill.
Using this formula
Y' = 0.299 R + 0.587 G + 0.114 B
We can do
import imageio
import numpy as np
import matplotlib.pyplot as plt
pic = imageio.imread('(image)')
gray = lambda rgb : np.dot(rgb[... , :3] , [0.299 , 0.587, 0.114])
gray = gray(pic)
plt.imshow(gray, cmap = plt.get_cmap(name = 'gray'))
However, the GIMP converting color to grayscale image software has three algorithms to do the task.
you could do:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
def rgb_to_gray(img):
grayImage = np.zeros(img.shape)
R = np.array(img[:, :, 0])
G = np.array(img[:, :, 1])
B = np.array(img[:, :, 2])
R = (R *.299)
G = (G *.587)
B = (B *.114)
Avg = (R+G+B)
grayImage = img.copy()
for i in range(3):
grayImage[:,:,i] = Avg
return grayImage
image = mpimg.imread("your_image.png")
grayImage = rgb_to_gray(image)
plt.imshow(grayImage)
plt.show()
If you're using NumPy/SciPy already you may as well use:
scipy.ndimage.imread(file_name, mode='L')
Use img.Convert(), supports “L”, “RGB” and “CMYK.” mode
import numpy as np
from PIL import Image
img = Image.open("IMG/center_2018_02_03_00_34_32_784.jpg")
img.convert('L')
print np.array(img)
Output:
[[135 123 134 ..., 30 3 14]
[137 130 137 ..., 9 20 13]
[170 177 183 ..., 14 10 250]
...,
[112 99 91 ..., 90 88 80]
[ 95 103 111 ..., 102 85 103]
[112 96 86 ..., 182 148 114]]
With OpenCV its simple:
import cv2
im = cv2.imread("flower.jpg")
# To Grayscale
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
cv2.imwrite("grayscale.jpg", im)
# To Black & White
im = cv2.threshold(im, 127, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite("black-white.jpg", im)
I came to this question via Google, searching for a way to convert an already loaded image to grayscale.
Here is a way to do it with SciPy:
import scipy.misc
import scipy.ndimage
# Load an example image
# Use scipy.ndimage.imread(file_name, mode='L') if you have your own
img = scipy.misc.face()
# Convert the image
R = img[:, :, 0]
G = img[:, :, 1]
B = img[:, :, 2]
img_gray = R * 299. / 1000 + G * 587. / 1000 + B * 114. / 1000
# Show the image
scipy.misc.imshow(img_gray)
When the values in a pixel across all 3 color channels (RGB) are same then that pixel will always be in grayscale format.
One of a simple & intuitive method to convert a RGB image to Grayscale is by taking the mean of all color channels in each pixel and assigning the value back to that pixel.
import numpy as np
from PIL import Image
img=np.array(Image.open('sample.jpg')) #Input - Color image
gray_img=img.copy()
for clr in range(img.shape[2]):
gray_img[:,:,clr]=img.mean(axis=2) #Take mean of all 3 color channels of each pixel and assign it back to that pixel(in copied image)
#plt.imshow(gray_img) #Result - Grayscale image
Input Image:
Output Image:
image=myCamera.getImage().crop(xx,xx,xx,xx).scale(xx,xx).greyscale()
You can use greyscale() directly for the transformation.

HOG +SVM training with iniria dataset, TypeError: samples is not a numpy array, neither a scalar

I'm working on pedestrian detection with a team. I am trying to figure out an error that keeps showing up that says "TypeError: samples is not a numpy array, neither a scalar" which when appear points to the line of code that is svm.train(X_data, cv2.ml.ROW_SAMPLE, labels12)
i tried following dozens of online guides but i still couldn't solve the problem, and im also very new to this
import cv2
import numpy as np
from skimage import feature
from skimage import exposure
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# training
X_data = []
labels1 = []
label = []
files = glob.glob ("new_pos_1/crop*.PNG")
for myFile in files:
# print(myFile)
image = cv2.imread(myFile,)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
X_data.append (image)
labels1.append('Pedestrian')
print('X_data shape:', np.array(X_data).shape)
labels12 = np.array([labels1])
print('labels12 shape:',np.array(labels12).shape)
print('labels shape:', np.array(labels1).shape)
#Testing
Y_data = []
files = glob.glob ("new_pos_1/person*.PNG")
for myFile in files:
# print(myFile)
image = cv2.imread (myFile)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
Y_data.append (image)
label.append('Pedestrian')
print('Y_data shape:', np.array(Y_data).shape)
print('label shape:', np.array(label).shape)
hog_features = []
for image in np.array(X_data):
(fd, hogImage) = feature.hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
transform_sqrt=True, block_norm="L2-Hys", visualise=True)
hogImage = exposure.rescale_intensity(hogImage, out_range=(0, 255))
hogImage = hogImage.astype("uint8")
hog_features.append(fd)
print("I'm done hogging")
print(hog_features)
svm = cv2.ml.SVM_create()
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setType(cv2.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
print("Done initializing SVM parameters")
# Train SVM on training data
svm.train(X_data, cv2.ml.ROW_SAMPLE, labels12)
print("Done trainning")
svm.save('svm_data.dat')
print("SAVED.")
#testResponse = svm.predict(testData)[1].ravel()
cv2.waitKey(0)
The line at the beginning that says labels12 = np.array([labels1]) i used to try and fix the error that showed up to no avail.
This is the original website that helped me write this code: https://www.learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial/
you should also do X_data2 = np.array([X_data]) and call svm.train(X_data2, cv2.ml.ROW_SAMPLE, labels12)