How to use trained yolov7 best.pt file as "vehicle plate detection" application in python? - yolo

I have trained a model using yolov7 https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa#scrollTo=nD-uPyQ_2jiN colab file. After training I got a file named best.pt and I want to use it as a service/application in python. There is a yolov3 example which I created. This one was using .weights so how can I use .pt?
import cv2
import numpy as np
import time
import os
def getPhoto():
ROOT_DIR = os.path.dirname(__file__)
URL = "http://192.168.1.3:4747/video"
PC_CAM = 0
net = cv2.dnn.readNet(
f"{ROOT_DIR}\\yolov3_custom_final.weights",
f"{ROOT_DIR}\\yolov3_custom.cfg",
)
classes = []
with open(f"{ROOT_DIR}\\classes.txt", "r") as f:
classes = f.read().splitlines()
timeElapsed = 0
wCam, hCam = 640, 360
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(2, 3))
cap = cv2.VideoCapture(URL)
cap.set(3, wCam)
cap.set(4, hCam)
while True:
_, img = cap.read()
cv2.imshow("Detection Screen", img)
if cv2.waitKey(1) == ord("c"):
break
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(
img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False
)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.6:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(
img,
label + " " + confidence,
(x, y + 20),
font,
2,
(255, 255, 0),
2,
)
cv2.imwrite(f"{ROOT_DIR}\\DetectedPhoto.jpg", img)
print("Image Saved")
getPhoto()
I want to detect vehicle plates using .pt file and cut vehicle plates and save them as .jpeg.

you can do this with detect.py insided yolov7 folder. Run this:
python detect.py --weights best.pt --source image.jpg
you can download yolov7 with this method:
git clone https://github.com/WongKinYiu/yolov7.git
in yolov7 you can see detect.py

Related

Automatic annotation for yolo not working

I am trying to generate some annotation for image files that I have created for training , I am pasting object image on the top of background image and getting the x,y coordinates of the location where the object image is pasted ,
The bounding box for the pasted object is calculated as (x, (x+w), y , (y+h))
box = (x, (w+w), y , (y+h)) # w,h are width and height of the object image
I am converting this to yolo annotation using this function :
def convert_boxes_to_yolo(box, frame):
# frame is a tuple containing background image width and height
# x = box[0][0]
# y = box[0][1]
# w = box[1][0] - box[0][0]
# h = box[1][1] - box[0][1]
x,y,w,h = box
print( frame.shape)
xc = float((x + w/2.0) / frame.shape[1])
yc = float((y + h/2.0) / frame.shape[0])
wc = float(w / frame.shape[1])
hc = float(h / frame.shape[0])
return (str(xc), str(yc), str(wc), str(hc))
and using this function to plot the bounding box , which looks correct :
import cv2
import matplotlib.pyplot as plt
img = cv2.imread('Omen_6_image_generated.png')
dh, dw, _ = img.shape
#dh, dw = (35, 400)
fl = open('Omen_6_image_generated.txt', 'r')
data = fl.readlines()
fl.close()
for dt in data:
# Split string to float
_, x, y, w, h = map(float, dt.split())
# Taken from https://github.com/pjreddie/darknet/blob/810d7f797bdb2f021dbe65d2524c2ff6b8ab5c8b/src/image.c#L283-L291
# via https://stackoverflow.com/questions/44544471/how-to-get-the-coordinates-of-the-bounding-box-in-yolo-object-detection#comment102178409_44592380
l = int((x - w / 2) * dw)
r = int((x + w / 2) * dw)
t = int((y - h / 2) * dh)
b = int((y + h / 2) * dh)
if l < 0:
l = 0
if r > dw - 1:
r = dw - 1
if t < 0:
t = 0
if b > dh - 1:
b = dh - 1
cv2.rectangle(img, (l, t), (r, b), (0, 0, 255), 1)
image = Image.fromarray(img.astype('uint8'), 'RGB')
image.show()
The bounding box is plotted correctly but the online annotation tools are not able to parse the file.
For example the plotting code correctly plots the bounding box for the shared image and annotation file below but the AI annotation tool like https://www.makesense.ai/ is not able to parse it , also if you look the same image in labelImg results look wrong.
link to both image and yolo_file:
https://drive.google.com/drive/folders/13ZTVrzswtcvXRBo6kJAhiITxx-IzOi-_?usp=sharing

I am not able to render 2D images of 3D point cloud

I am trying to render 2D images of point clouds from different viewpoints and save them as images.
I found a code online which does the same thing but for meshes. I tweaked it a little bit to import the 3D point cloud. But the code does not work and gives back black images. Please help me with this. I am open to use another library too if you know the solution. I just want to render the 2D images. Thank You
Code:
import os.path
import math
import sys
C = bpy.context
D = bpy.data
scene = D.scenes['Scene']
# cameras: a list of camera positions
# a camera position is defined by two parameters: (theta, phi),
# where we fix the "r" of (r, theta, phi) in spherical coordinate system.
# 5 orientations: front, right, back, left, top
cameras = [
(60, 0), (60, 90), (60, 180), (60, 270),
(0, 0)
]
# 12 orientations around the object with 30-deg elevation
# cameras = [(60, i) for i in range(0, 360, 30)]
render_setting = scene.render
# output image size = (W, H)
w = 500
h = 500
render_setting.resolution_x = w
render_setting.resolution_y = h
def main():
argv = sys.argv
argv = argv[argv.index('--') + 1:]
if len(argv) != 2:
print('phong.py args: <3d mesh path> <image dir>')
exit(-1)
model = argv[0]
image_dir = argv[1]
# blender has no native support for off files
# install_off_addon()
# init_camera()
fix_camera_to_origin()
do_model(model, image_dir)
def install_off_addon():
try:
# bpy.ops.preferences.addon_install(
# overwrite=False,
# filepath=os.path.dirname(__file__) +
# '/blender-off-addon/import_off.py'
# )
bpy.ops.preferences.addon_enable(module='import_off')
except Exception as e:
print(e)
print("""Import blender-off-addon failed.
Did you pull the blender-off-addon submodule?
$ git submodule update --recursive --remote
""")
exit(-1)
def init_camera():
cam = D.objects['Camera']
# select the camera object
scene.objects.active = cam
cam.select = True
# set the rendering mode to orthogonal and scale
C.object.data.type = 'ORTHO'
C.object.data.ortho_scale = 2.
def fix_camera_to_origin():
origin_name = 'Origin'
# create origin
try:
origin = D.objects[origin_name]
except KeyError:
bpy.ops.object.empty_add(type='SPHERE')
D.objects['Empty'].name = origin_name
origin = D.objects[origin_name]
origin.location = (0, 0, 0)
cam = D.objects['Camera']
# scene.objects.active = cam
# cam.select = True
if 'Track To' not in cam.constraints:
bpy.ops.object.constraint_add(type='TRACK_TO')
cam.constraints['Track To'].target = origin
cam.constraints['Track To'].track_axis = 'TRACK_NEGATIVE_Z'
cam.constraints['Track To'].up_axis = 'UP_Y'
def do_model(path, image_dir):
name = load_model(path)
center_model(name)
normalize_model(name)
image_subdir = os.path.join(image_dir, name)
for i, c in enumerate(cameras):
move_camera(c)
render()
save(image_subdir, '%s.%d' % (name, i))
# delete_model(name)
def load_model(path):
d = os.path.dirname(path)
ext = path.split('.')[-1]
name = os.path.basename(path).split('.')[0]
# handle weird object naming by Blender for stl files
if ext == 'stl':
name = name.title().replace('_', ' ')
if name not in D.objects:
print('loading :' + name)
if ext == 'stl':
bpy.ops.import_mesh.stl(filepath=path, directory=d,
filter_glob='*.stl')
elif ext == 'off':
bpy.ops.import_mesh.off(filepath=path, filter_glob='*.off')
elif ext == 'obj':
bpy.ops.import_scene.obj(filepath=path, filter_glob='*.obj')
else:
bpy.ops.import_mesh.ply(filepath=path, filter_glob='*.ply')
return name
def delete_model(name):
for ob in scene.objects:
if ob.type == 'MESH' and ob.name.startswith(name):
ob.select = True
else:
ob.select = False
bpy.ops.object.delete()
def center_model(name):
bpy.ops.object.origin_set(type='GEOMETRY_ORIGIN')
D.objects[name].location = (0, 0, 0)
def normalize_model(name):
obj = D.objects[name]
dim = obj.dimensions
print('original dim:' + str(dim))
if max(dim) > 0:
dim = dim / max(dim)
obj.dimensions = dim
print('new dim:' + str(dim))
def move_camera(coord):
def deg2rad(deg):
return deg * math.pi / 180.
r = 3.
theta, phi = deg2rad(coord[0]), deg2rad(coord[1])
loc_x = r * math.sin(theta) * math.cos(phi)
loc_y = r * math.sin(theta) * math.sin(phi)
loc_z = r * math.cos(theta)
D.objects['Camera'].location = (loc_x, loc_y, loc_z)
def render():
bpy.ops.render.render()
def save(image_dir, name):
path = os.path.join(image_dir, name + '.png')
D.images['Render Result'].save_render(filepath=path)
print('save to ' + path)
if __name__ == '__main__':
main()

Set timer on detected object

i'm using yolo to detect object but i want to set timer for the detected object, can anyone help me?
so i want to make the object detecting with limited time for my projcet
i'm try my best but i don't have any idea how to do it
here is my code:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
whT = 320
confThreshold = 0.1
nmsThreshold = 0.4
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = [line.strip() for line in f.readlines()]
modelConfiguration = "yolov4.cfg"
modelWeights = "yolov4.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
font = cv.FONT_HERSHEY_PLAIN
for i in indices:
label = str(classNames[classIds[i]])
x, y, w, h = bbox[i]
#print(x,y,w,h)
cv.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
cv.putText(img, label, (x, y + 30), font, 3, (0,0,0), 3)
print("Jenis Mobil: " + label)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%', (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
while True:
success, img = cap.read()
blob = cv.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv.imshow('Image', img)
key = cv.waitKey(1)
if key == 27:
break
cap.release()
cv.destroyAllWindows()

how to pass continuous video stream to pyqt5 using Qthread correctly for face recognition?

I want to pass continuous video stream to pyqt5 qlabel named label_cam to show up in ui using QThread but it keep failed showing the video stream on the qlabel. The video stream is later aimed to recognize people out.
I have been trying to connect the signal "change_Pixmap" from the VideoThread class to the "set_image" function in the "label_cam" object but i guess the flow of code or variable assign is wrong. Below is my code.
class FaceRecogScreen(QDialog):
def init(self):
super(FaceRecogScreen, self).init()
uic.loadUi("face_recog.ui", self)
self.update_date_time()
self.pushButton_back.clicked.connect(self.back_to_main3)
self.load_model()
def load_model(self):
self.prototxt = "deploy.prototxt.txt"
self.model = "res10_300x300_ssd_iter_140000.caffemodel"
print("[INFORMATION] Loading model....")
self.net = cv2.dnn.readNetFromCaffe(self.prototxt, self.model)
weight = "facenet_keras_weights.h5"
self.model2 = load_model('FaceNetModel.h5')
self.model2.load_weights(weight)
self.collected_encodings = pickle.loads(open('face_encoding.pickle', "rb").read())
infile = open('face_encoding', 'rb')
data = pickle.load(infile)
self.knownEncodings, self.knownNames = data['encodings'], data['names']
self.knownEncodings = np.array(self.knownEncodings)
self.knownNames = np.array(self.knownNames)
self.clf = svm.SVC(gamma="scale", probability=True, tol=0.01)
self.clf.fit(self.knownEncodings, self.knownNames)
# self.label_cam= VideoLabel()
self.thread = VideoThread(self)
self.thread.change_Pixmap.connect(self.set_image)
# call the run() function in VideoThread class
self.thread.start()
# self.thread.change_Pixmap.connect(self.label_cam.set_image)
# # call the run() function in VideoThread class
# self.thread.start()
# layout = self.layout()
# layout.addWidget(self.label_cam)
# self.thread.run.start()
def update_date_time(self):
# Get the current date and time
date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.label_datetime.setText(date_time)
# Update the date and time in the table
def back_to_main3(self):
pushButton_back = WelcomeScreen()
widget.addWidget(pushButton_back)
widget.setCurrentIndex(widget.currentIndex()+1)
def set_image(self, frame):
self.setPixmap(QPixmap.fromImage(frame))
class VideoThread(QtCore.QThread):
change_Pixmap = QtCore.pyqtSignal(QtGui.QImage)
def run(self):
cap = cv2.VideoCapture(1)
while True:
ret, frame = cap.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, ch = frame.shape
bytesPerLine = ch * w
convertToQtFormat = QtGui.QImage(frame.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888)
p = convertToQtFormat.scaled(640, 480, QtCore.Qt.KeepAspectRatio)
self.change_Pixmap.emit(p)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(
frame, (160, 160)), 1.0, (300, 300), (104, 177, 123))
self.net.setInput(blob)
detections = self.net.forward()
self.frame = frame
# self.frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.normalize(frame, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
pixels = np.expand_dims(frame, axis=0)
encode = self.model2.predict(pixels)
face_name = []
for encoding in encode:
name = self.clf.predict([encoding])
face_name.extend(name)
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence < 0.5:
continue
box = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
text = "{:.2f}%".format(confidence*100)
y = startY - 10 if startY - 10 > 10 else startY*10
if name == 'unknown':
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 0, 255), 2)
cv2.putText(frame, name, (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
else:
cv2.rectangle(frame, (startX, y), (endX, endY), (0, 255, 0), 2)
cv2.putText(frame, name[0], (startX, startY),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)-yolov5 cpu error

I was trying to run my yolov5 custom model on cpu and I got this error.
this is the github page I have used : https://github.com/Amelia0911/onnxruntime-for-yolov5
import onnxruntime
from models.utils import *
import time
IMAGE_SIZE = (416, 416)
CONF_TH = 0.3
NMS_TH = 0.45
CLASSES = 80
model = onnxruntime.InferenceSession("models_train/bestnone.onnx")
anchor_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
stride = [8, 16, 32]
def draw(img, boxinfo, dst, id):
for *xyxy, conf, cls in boxinfo:
label = '{}|{}'.format(int(cls), '%.2f' % conf)
plot_one_box(xyxy, img, label=label, color=[0, 0, 255])
cv2.imencode('.jpg', img)[1].tofile(dst)
def detect(image):
img = cv2.resize(image,IMAGE_SIZE)
img = img.transpose(2, 0, 1)
dataset = (img, image)
img = dataset[0].astype('float32')
img_size = [dataset[0].shape[1], dataset[0].shape[2]]
img /= 255.0
img = img.reshape(1, 3, img_size[0], img_size[1])
inputs = {model.get_inputs()[0].name: img}
pred = torch.tensor(model.run(None, inputs)[0])
anchor = torch.tensor(anchor_list).float().view(3, -1, 2)
area = img_size[0]*img_size[1]
size = [int(area/stride[0]**2), int(area/stride[1]**2), int(area/stride[2]**2)]
feature = [[int(j/stride[i]) for j in img_size] for i in range(3)]
y = []
y.append(pred[:, :size[0]*3, :])
y.append(pred[:, size[0]*3:size[0]*3+size[1]*3, :])
y.append(pred[:, size[0]*3+size[1]*3:, :])
grid = []
for k, f in enumerate(feature):
grid.append([[i, j] for j in range(f[0]) for i in range(f[1])])
z = []
for i in range(3):
src = y[i]
xy = src[..., 0:2] * 2. - 0.5
wh = (src[..., 2:4] * 2) ** 2
dst_xy = []
dst_wh = []
for j in range(3):
dst_xy.append((xy[:, j*size[i]:(j+1)*size[i], :] + torch.tensor(grid[i])) * stride[i])
dst_wh.append(wh[:, j*size[i]:(j+1)*size[i], :] * anchor[i][j])
src[..., 0:2] = torch.from_numpy(np.concatenate((dst_xy[0], dst_xy[1], dst_xy[2]), axis=1))
src[..., 2:4] = torch.from_numpy(np.concatenate((dst_wh[0], dst_wh[1], dst_wh[2]), axis=1))
z.append(src.view(1, -1, CLASSES+5)) #85
pred = torch.cat(z, 1)
pred = nms(pred, CONF_TH, NMS_TH)
for det in pred:
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], dataset[1].shape).round()
if det == None:
return np.array([])
return det
if __name__ == '__main__':
import time
src = 'Temp-640x640.jpg'
t1 = time.time()
img = cv2.imdecode(np.fromfile(src, dtype=np.uint8), -1)
print(IMAGE_SIZE)
results = detect(img)
t2 = time.time()
print(results)
print("onnxruntime time = ", t2 - t1)
if results is not None and len(results):
draw(img, results, 'dst3.jpg', str(id))
print('Down!')
when I run this code I got the following error:
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 85, in <module>
results = detect(img)
File "C:\Users\acer\.spyder-py3\metallic surface defect detection\3_onnx_cpu_detec.py", line 30, in detect
img = img.reshape(1, 3, img_size[0], img_size[1])
ValueError: cannot reshape array of size 692224 into shape (1,3,416,416)
I think it is a color channel issue. I have tried to fix it .But it doesn't work .If someone know how to fix it please inform me.Thanks in advance