I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)
Here is the code sample that you can find in the 'https://realpython.com/'
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
data = pd.read_csv("avocado.csv")
data = data.query("type == 'conventional' and region == 'Albany'")
data["Date"] = pd.to_datetime(data["Date"], format="%Y-%m-%d")
data.sort_values("Date", inplace=True)
app = dash.Dash(__name__)
However, when I run this code, two graphs show up irrelevantly.
Where can I find the codes that generate these graphs? Why do these graphs occur?
I am trying to create a simple image classification tool.
I would like the code below to work with classifying images. It works fine when it is a non image NumPy array.
#https://e2eml.school/images_to_numbers.html
import numpy as np
from sklearn.utils import Bunch
from PIL import Image
monkey = [1]
dog = [2]
example_animals = Bunch(data = np.array([monkey,dog]),target = np.array(['monkey','dog']))
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example animal data passed through
import pandas as pd
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)
I have looked into how to make an image into a NumPy array at https://e2eml.school/images_to_numbers.html
The code below where I have converted images to NumPy array doesn't work.
When run it gets the following error
** 'setting an array element with a sequence'**
#https://e2eml.school/images_to_numbers.html
import numpy as np
from sklearn.utils import Bunch
from PIL import Image
monkey = np.asarray(Image.open("monkey.jpg"))
dog = np.asarray(Image.open("dog.jpeg"))
example_animals = Bunch(data = np.array([monkey,dog]),target = np.array(['monkey','dog']))
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example animal data passed through
import pandas as pd
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)
I would appreciate any insight how I fix the error 'setting an array element with a sequence' so that the images will be compatible with the sklearn processing.
You need to be sure that your images "monkey.jpg" and "dog.jpeg" have the same number of pixels. Otherwise, you will have to resize the images to have the same size. Moreover, the data of your Bunch object need to be of shape (n_samples, n_features) (you can check the documentation https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans.fit)
You need to be aware that you use an unserpervised learning model (Kmeans). So the output of the model is not directly "monkey" or "dog".
I found the solution to error setting an array element with a sequence
Kmeans requires the data arrays for comparison need to be the same size.
This means if importing pictures, the pictures need to be resized, converted into a numpy array (a format that is compatible with Kmeans) and finally made into a 1 dimensional array.
#https://e2eml.school/images_to_numbers.html
#https://machinelearningmastery.com/how-to-load-and-manipulate-images-for-deep-learning-in-python-with-pil-pillow/
import numpy as np
from matplotlib import pyplot as plt
from sklearn.utils import Bunch
from PIL import Image
from sklearn.cluster import KMeans
import pandas as pd
monkey = Image.open("monkey.jpg")
dog = Image.open("dog.jpeg")
#resize pictures
monkey1 = monkey.resize((180,220))
dog1 = dog.resize((180,220))
#make pictures into numpy array
monkey2 = np.asarray(monkey1)
dog2 = np.asarray(dog1)
#https://www.quora.com/How-do-I-convert-image-data-from-2D-array-to-1D-using-python
#make numpy array into 1 dimensional array
monkey3 = monkey2.reshape(-1)
dog3 = dog2.reshape(-1)
example_animals = Bunch(data = np.array([monkey3,dog3]),target = np.array(['monkey','dog']))
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example food data passed through
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)
I am unable to display plot using Bokeh. I am reading the data from dataframe. Here's a snippet of my Python code.
I am new to Bokeh. I tried following some of the examples from the User Guide. I'm unable to figure out what's going wrong here. Please advise.
import datetime
import pandas
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource
PATH_TO_CSV = "Sample_Data.csv"
output_notebook()
data = pd.read_csv(PATH_TO_CSV, index_col=False)
data['timestamp'] = pd.to_datetime(data['timestamp']).dt.strftime("%H:%M:%S")
source = ColumnDataSource(data)
p = figure(plot_width=400, plot_height=400, x_axis_type="datetime")
p.line('timestamp', 'event_msg', source=source)
show(p)
Here's sample .csv
event_msg,timestamp
Created,2019-03-02 13:19:44.164562-0700
Created,2019-03-02 13:20:32.212323-0700
Created,2019-03-02 13:20:56.582761-0700
Modified,2019-03-02 13:21:48.021752-0700
Deleted,2019-03-02 13:22:16.938382-0700
Modified,2019-03-02 13:22:22.139714-0700
Permission changed,2019-03-02 13:24:20.195975-0700
Deleted,2019-03-02 13:33:53.049900-0700
Modified,2019-03-02 13:33:56.266113-0700
Deleted,2019-03-02 13:33:59.757584-0700
I am seeing completely blank plot. Ideally, I am interested in plotting different line plots based on the event messages.
You should convert your time like this:
data['timestamp'] = pd.to_datetime(data['timestamp'])
So your code should look like (tested with Bokeh v1.1.0):
import os
import datetime
import pandas as pd
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource
PATH_TO_CSV = "Sample_Data.csv"
output_notebook()
data = pd.read_csv(os.path.join(os.path.dirname(__file__), PATH_TO_CSV), index_col = False)
data['timestamp'] = pd.to_datetime(data['timestamp'])
source = ColumnDataSource(data)
p = figure(plot_width = 400, plot_height = 400, x_axis_type = "datetime", y_range = data['event_msg'].unique())
p.line('timestamp', 'event_msg', source = source)
show(p)
Result:
I understand that there have been one or two other questions posted that are related but not exactly what I need. I'm building this gui that activates a module by clicking a button. This python module that gets activated by pushing the button generates heatmaps from multiple pandas dataframes and saves those images, which in turn is then saved into an xlsx using pandas ExcelWriter.
I've tried to implement QThread, as other stackoverflow examples tried to explain similar problems but I continue getting this error: "It is not safe to use pixmaps outside the GUI thread". I understand that technically I'm not creating the heatmap inside the MAIN gui thread but I thought with QThread that I am still inside "a" gui thread. These dataframes that the heatmaps are based off of can be of a large size at times and I am somewhat grasping the concept of sending a signal to the main gui thread when a heatmap is to be created and have the heatmap function inside the main gui class...but I fear that will be troublesome later in passing so much data around..this is more like pipelining than threading. I just want this working thread to create these images and save them and then take those saved files and save them into an xlsx without interrupting the main gui..
(NOTE: This is a simplified version, in the real program there will be several of these threads created almost simultaneously and inside each thread several heatmaps will be created)
---main.py---
import sys
from MAIN_GUI import *
from PyQt4 import QtGui, QtCore
from excel_dummy import *
if __name__=="__main__":
app = QtGui.QApplication(sys.argv)
class MAIN_GUI(QtGui.QMainWindow):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.uiM = Ui_MainWindow()
self.uiM.setupUi(self)
self.connect(self.uiM.updateALL_Button,QtCore.SIGNAL('clicked()'),self.newThread)
def newThread(self):
Excelify = excelify()
Excelify.start()
self.connect(Excelify,QtCore.SIGNAL('donethread(QString)'),(self.done))
def done(self):
print('done')
main_gui = MAIN_GUI()
main_gui.show()
main_gui.raise_()
sys.exit(app.exec_())
---excel_dummy.py---
import os, pandas as pd
from pandas import ExcelWriter
import numpy as np
import seaborn.matrix as sm
from PyQt4 import QtCore
from PyQt4.QtCore import QThread
from matplotlib.backends.backend_agg import FigureCanvas
from matplotlib.figure import Figure
import time
class excelify(QThread):
def __init__(self):
QThread.__init__(self)
def run(self):
path = 'home/desktop/produced_files'
with ExcelWriter(path + '/final.xlsx', engine='xlsxwriter') as writer:
workbook = writer.book
worksheet = workbook.add_worksheet()
heatit = self.heatmap()
worksheet.insert_image('C3',path + '/' + 'heat.jpg')
worksheet.write(2, 2, 'just write something')
writer.save()
print('file size: %s "%s"' % (os.stat(path).st_size, path))
time.slee(0.3)
self.emit(QtCore.SIGNAL('donethread(QString)'),'')
def heatmap(self):
df = pd.DataFrame(np.array([[1,22222,33333],[2,44444,55555],[3,44444,22222],[4,55555,33333]]),columns=['hour','in','out'])
dfu = pd.DataFrame(df.groupby([df.in,df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0':'Count'})
dfu.columns=['in','hour','Count']
dfu_2 = dfu.copy()
mask=0
fig = Figure()
ax = fig.add_subplot(1,1,1)
canvas = FigureCanvas(fig)
df_heatmap = dfu_2.pivot('in','hour','Count').fillna(0)
sm.heatmap(df_heatmap,ax=ax,square=True,annot=False,mask=mask)
fig.savefig(path + '/' + heat.jpg')
---MAIN_GUI.py---
from PyQt4 import QtCore,QtGui
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.unicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName(_fromUtf8("MainWindow"))
MainWindow.resize(320,201)
self.centralwidget = QtGui.QWidget(MainWindow)
self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
self.updateALL_Button = QtGui.QPushButton(self.centralwidget)
self.updateALL_Button.setGeometry(QtCore.QRect(40,110,161,27))
self.updateALL_Button.setFocusPolicy(QtCore.Qt.NoFocus)
self.updateALL_Button.setObjectName(_fromUtf8("Options_updateALL_Button"))
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtGui.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 320, 24))
self.menubar.setObjectName(_fromUtf8("menubar"))
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtGui.QStatusBar(MainWindow)
self.statusbar.setObjectName(_fromUtf8("statusbar"))
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self,MainWindow):
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow", None))
self.updateALL_Button.setText(_translate("MainWindow", "updateALL", None))
Even though you are explicitely using the Agg backend to generate your figure, it looks like Seaborn is still using the default backend on your system, which is most likely Qt4Agg, an interactive backend. We want Seaborn to use a non-interactive backend instead to avoid any error (see matplotlib documentation for more details about backends). To do so, tell Matplotlib in your imports to use the Agg backend and import Seaborn after Matplotlib.
You will also need to save your figure as a png, since jpg is not supported by the Agg backend. Unless you have some specific reasons for using jpg, png is usually a better format for graphs.
Finally, you could use a memory buffer instead of saving your images to a temporary file before saving them in an Excel Workbook. I haven't tested it, but it will probably be faster if you are working with large files.
Below is a MWE I've written which includes the aformentioned points and which does not give any error on my system in Python3.4:
import pandas as pd
import time
from pandas import ExcelWriter
import numpy as np
from PyQt4 import QtCore, QtGui
import matplotlib as mpl
mpl.use('Agg')
from matplotlib.backends.backend_agg import FigureCanvas
import seaborn.matrix as sm
try: # Python 2 (not tested)
from cStringIO import StringIO as BytesIO
except ImportError: # Python 3
from io import BytesIO
class MAIN_GUI(QtGui.QWidget):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.worker = Excelify()
btn = QtGui.QPushButton('Run')
disp = QtGui.QLabel()
self.setLayout(QtGui.QGridLayout())
self.layout().addWidget(btn, 0, 0)
self.layout().addWidget(disp, 2, 0)
self.layout().setRowStretch(1, 100)
btn.clicked.connect(self.worker.start)
self.worker.figSaved.connect(disp.setText)
class Excelify(QtCore.QThread):
figSaved = QtCore.pyqtSignal(str)
def run(self):
self.figSaved.emit('Saving figure to Workbook.')
t1 = time.clock()
image_data = self.heatmap()
with ExcelWriter('final.xlsx', engine='xlsxwriter') as writer:
wb = writer.book
ws = wb.add_worksheet()
ws.insert_image('C3', 'heat.png', {'image_data': image_data})
writer.save()
t2 = time.clock()
self.figSaved.emit('Done in %f sec.' % (t2-t1))
def heatmap(self):
df = pd.DataFrame(np.array([[1, 22222, 33333], [2, 44444, 55555],
[3, 44444, 22222], [4, 55555, 33333]]),
columns=['hour', 'in', 'out'])
dfu = pd.DataFrame(df.groupby([df.out, df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0': 'Count'})
dfu.columns = ['in', 'hour', 'Count']
fig = mpl.figure.Figure()
fig.set_canvas(FigureCanvas(fig))
ax = fig.add_subplot(111)
df_heatmap = dfu.pivot('in', 'hour', 'Count').fillna(0)
sm.heatmap(df_heatmap, ax=ax, square=True, annot=False, mask=0)
buf= BytesIO()
fig.savefig(buf, format='png')
return(buf)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
w = MAIN_GUI()
w.show()
w.setFixedSize(200, 100)
sys.exit(app.exec_())