I am trying to create a screen recorder in Python. I have created one which gives the output in
output.mp4 file. But when I opened the file, Windows says that the format is not readable. Here is my code:
import pyautogui
import cv2
import numpy as np
resolution = (1920, 1080)
codec = cv2.VideoWriter_fourcc(*"XVID")
fps = 60.0
out = cv2.VideoWriter('output.mp4',0x7634706d , 20.0, (640,480))
cv2.namedWindow("Live", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Live", 480, 270)
while True:
img = pyautogui.screenshot()
frame = np.array(img)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
out.write(frame)
cv2.imshow('Live', frame)
if cv2.waitKey(1) == ord('q'):
break
out.release()
cv2.destroyAllWindows()
Now can anyone tell me why is it not working and how to debug it? Thank you
Related
Goal: Avoid File Write/Read Operations
Task: Generate RGBA image as shown in the picture below (img1 from the code)
Issue: Without file write and read operations, Getting Black Image as shown in the picture below (img2 from the code)
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
# download file from here: https://drive.google.com/file/d/1R9MEeK-7vUM59An-frFtZv2dtTw-jhs7/view?usp=sharing
bin_mask = np.load("bin_mask.npy") #
# Method1: Does unnecessary file write/read but works
plt.imsave('img1.png', bin_mask, cmap=cm.gray)
img1 = Image.open('img1.png')
# Method 2: No file write but img1 != img2;
# Ref: https://stackoverflow.com/questions/10965417/how-to-convert-a-numpy-array-to-pil-image-applying-matplotlib-colormap
img2 = Image.fromarray(np.uint8(cm.gray(bin_mask)*255))
# unique values of img1: [0, 255]; dtype=uint8
# unique values of img2: [0, 1, 255]; dtype=uint8
print("img1 same as img2: ", img1 == img2) # False
This task seems trivial at first sight but I'm not sure why its behaving this way.
Any suggestions would be appreciated, Thanks in advance.
You have same results from both images when bin_mask value is 0 and different when it is 1.
print('Bin mask={}, img1={}, img2={}'.format(bin_mask[-1][-1] ,np.array(img1)[-1][-1] ,np.array(img2)[-1][-1] ))
# Bin mask=0, img1=[ 0 0 0 255], img2=[ 0 0 0 255]
print('Bin mask={}, img1={}, img2={}'.format(bin_mask[0][0] ,np.array(img1)[0][0] ,np.array(img2)[0][0] ))
# Bin mask=1, img1=[255 255 255 255], img2=[ 1 1 1 255]
Looking further when you call cm.gray(1) gives (0.00392156862745098, 0.00392156862745098, 0.00392156862745098, 1.0) and cm.gray(255) gives (1,1,1,1). So you should be multiplying 255 with bin_mask if you looking for same result.
Following lines will result same content of Img1 and Img2.
img3=Image.fromarray(np.uint8(cm.gray(bin_mask*255)*255))
print(img1 == img3) #result will be false, since this is not correct way to compare data in Image
print(list(img1.getdata()) == list(img3.getdata())) # result is True
However the way you are doing takes too much time when it is compared with Opencv. You can do same thing using OpenCV using following way.
img3 = cv2.cvtColor(np.array(bin_mask.astype(np.uint8) * 255), cv2.COLOR_GRAY2RGBA)
print('Result from Opencv=',np.all(img3 == np.array(img1))) # true
Refer bellow, for full code to understand and time taken by your method vs mine.
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time
from PIL import Image
# download file from here: https://drive.google.com/file/d/1R9MEeK-7vUM59An-frFtZv2dtTw-jhs7/view?usp=sharing
bin_mask = np.load("/home/jagdish/Downloads/bin_mask.npy") #
# Method1: Does unnecessary file write/read but works
plt.imsave('img1.png', bin_mask, cmap=cm.gray)
img1 = Image.open('img1.png')
# Method 2: No file write but img1 != img2;
# Ref: https://stackoverflow.com/questions/10965417/how-to-convert-a-numpy-array-to-pil-image-applying-matplotlib-colormap
#Your way
img3=Image.fromarray(np.uint8(cm.gray(bin_mask*255)*255))
print('Comparing image class=',img1==img3)
print('Comparing content of Image=',list(img1.getdata()) == list(img3.getdata()))
#OpenCV way
img3 = cv2.cvtColor(np.array(bin_mask.astype(np.uint8) * 255), cv2.COLOR_GRAY2RGBA)
print('Result from Opencv=',np.all(img3 == np.array(img1)))
start_time = time.time()
for i in range(1000):
img3 = Image.fromarray(np.uint8(cm.gray(bin_mask,)*255))
print((time.time()-start_time)*1000)
start_time = time.time()
for i in range(1000):
img3 = cv2.cvtColor(np.array(bin_mask.astype(np.uint8) * 255), cv2.COLOR_GRAY2RGBA)
print((time.time()-start_time)*1000)
Here is time comparison for you.
Using matplotlib to process 1000 images 920 ms
using Opencv to process 1000 images 94 ms
I can't understand how to recognize it.
Hope someone can get me out.
import cv2
import numpy as np
import pytesseract
from PIL import Image
image = cv2.imread('b.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
data = pytesseract.image_to_string(image, lang='eng', config='--psm 10')
print(data)
The cell one consists of the following code-
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 # DOWNLOAD LINK
!bunzip2 /content/shape_predictor_68_face_landmarks.dat.bz2
datFile = "/content/shape_predictor_68_face_landmarks.dat"
The cell two consists of the following code-
import cv2
import dlib
cap = cv2.VideoCapture(0)
hog_face_detector = dlib.get_frontal_face_detector()
dlib_facelandmark = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
while True:
_, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = hog_face_detector(gray)
for face in faces:
face_landmarks = dlib_facelandmark(gray, face)
for n in range(0, 68):
x = face_landmarks.part(n).x
y = face_landmarks.part(n).y
cv2.circle(frame, (x, y), 1, (0, 255, 255), 1)
cv2.imshow("Face Landmarks", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I need landmarks on live video not on captured image. If possible please share your colab link after running the code
I have a CSV file with url's and box coordinates (x coordinate of the top left corner, y coordinate of the top left corner, x coordinate of the bottom right corner and y coordinate of the bottom right corner) and I would like to acquire the image, crop it based on the coordinates (to 256x256) and then save the image. Unfortunately a solution to download the whole database and then create a separate with cropped images is difficult due to the size of the database. That for, it is necessary to create the image database with cropped images from the beginning. Another way is to save the image and then subsequently crop it and rewrite the initial image (and then i += 1 iterate to the next one).
Would the current approach work or should I use a different method for it? Additonally, how would I save the acquired images to a specified folder, as currently it downloads to the same folder as the script.
import urllib.request
import csv
import numpy as np
import pandas as pd
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img = Image.open(BytesIO(response.content))
#crop_img = img[splitted_line[2]:splitted_line[3], splitted_line[4]:splitted_line[5]]
#crop_img = img[315:105, 370:173]
img.save(str(i) + ".png")
#crop_img = img[105:105+173,315:315+370]
#[y: y + h, x: x + w]
new_img = img.resize((256, 256))
new_img.save(str(i) + ".png")
imgplot = plt.imshow(img)
plt.show()
# urllib.request.urlopen(splitted_line[1])
print("Image saved for {0}".format(splitted_line[0]))
# img = cv2.imread(img_path, 0)
i += 1
else:
print("No result for {0}".format(splitted_line[0]))
Any further recommendations are welcome.
Edit: The latest version gives me error :
crop_img = img[105:105+173,315:315+370]
TypeError: 'JpegImageFile' object is not subscriptable
I solved the problem using Bytes.IO and some cropping/resizing techniques.
import csv
from io import BytesIO
import requests
from PIL import Image
import matplotlib.pyplot as plt
filename = "images"
# open file to read
with open("data_test.csv".format(filename), 'r') as csvfile:
reader = csv.reader(csvfile)
# pop header row (1st row in csv)
header = next(reader)
# iterate on all lines
i = 0
for line in csvfile:
splitted_line = line.split(',')
# check if we have an image URL
if splitted_line[1] != '' and splitted_line[1] != "\n":
response = requests.get(splitted_line[1])
img = Image.open(BytesIO(response.content))
#im.crop(box) ⇒ 4-tuple defining the left, upper, right, and lower pixel coordinate
left_x = int(splitted_line[2])
top_y = int(splitted_line[3])
right_x = int(splitted_line[4])
bottom_y = int(splitted_line[5])
crop = img.crop((left_x, top_y, right_x, bottom_y))
new_img = crop.resize((256, 256))
"""
# preview new images
imgplot = plt.imshow(new_img)
plt.show()
"""
new_img.save(str(i) + ".png")
print("Image saved for {0}".format(splitted_line[0]))
i += 1
else:
print("No result for {0}".format(splitted_line[0]))
Hope it will help someone. Any optimization recommendations are still welcome.
I am trying to integrate simplecv to pyqt4 with some mixed success. I was able to see a webcam capture in pyqt4 through simplecv, I can modify the image with simplecv and it shows ok in pyqt4 but when I try to add a geometry or text to the image it is not showing in pyqt4. If I run the simpleCV code on their own it works OK. Can someone help me understand why it is not working? By the way, as you can see I am new to pyqt4 and simpleCV. See the code that I currently have.
#!/usr/bin/env python
import os
import sys
import signal
from PyQt4 import uic, QtGui, QtCore
from webcamGUI3 import *
from SimpleCV import *
class Webcam(QtGui.QMainWindow):
def __init__(self, parent=None):
QtGui.QWidget.__init__(self,parent)
self.MainWindow = Ui_MainWindow()
self.MainWindow.setupUi(self)
self.webcam = Camera(0,{ "width": 640, "height": 480 })
self.timer = QtCore.QTimer()
self.connect(self.timer, QtCore.SIGNAL('timeout()'), self.show_frame)
self.timer.start(1);
def show_frame(self):
ipl_image = self.webcam.getImage()
ipl_image.dl().circle((150, 75), 50, Color.RED, filled = True)
data = ipl_image.getBitmap().tostring()
image = QtGui.QImage(data, ipl_image.width, ipl_image.height, 3 * ipl_image.width, QtGui.QImage.Format_RGB888)
pixmap = QtGui.QPixmap()
pixmap.convertFromImage(image.rgbSwapped())
self.MainWindow.lblWebcam.setPixmap(pixmap)
if __name__ == "__main__":
app = QtGui.QApplication(sys.argv)
webcam = Webcam()
webcam.show()
app.exec_()
Any ideas?
I got my friend, is very simple you just add
ipl_image = ipl_image.applyLayers()
see:
ipl_image = ipl_image = self.webcam.getImage().binarize().invert()
ipl_image.drawRectangle(30,50,100,100,color=Color.RED,width=3)
ipl_image.drawText('ola galera',80,190,fontsize=50)
ipl_image = ipl_image.applyLayers()
data = ipl_image.getBitmap().tostring()
image = QtGui.QImage(data, ipl_image.width, ipl_image.height, 3 * ipl_image.width, QtGui.QImage.Format_RGB888)
pixmap = QtGui.QPixmap()
pixmap.convertFromImage(image.rgbSwapped())
self.MainWindow.label.setPixmap(pixmap)