Accessing methods within a class from bokeh FileInput widget - pandas

I am working on a Bokeh serve UI and am running into trouble interfacing a class (and its methods) with the FileInput widget. I am using a class (in this example, called "EIS_data") which, when instantiated, loads a file using pd.read_csv. The EIS_data class also has a method to plot the data in a particular way, and I'd like to be able to load the pandas dataframe and call and manipulate the data using the methods already in place in the class.
So far, I have been able to load the data successfully using the FileInput widget, but I can't figure out how to access the dataframe again once it's loaded in. In a standalone Jupyter notebook, I could run d = EIS_data("filename") and then ```d.plot''' to load the data into a pandas dataframe and then plot it according to the method defined in the EIS_data class, but I can't figure out how to replicate this in the UI code once the data are loaded using the FileInput widget.
Is there a way I can interface this with Bokeh widgets, such that I could simply add d.plot() to curdoc()? I have found a workaround using ColumnDataSource, but it seems a shame to redefine plotting methods and data handling when they are already defined in the class. Below are minimal working examples of the UI code and the class definition.
UI Code:
import numpy as np
import pandas as pd
from eis_analysis_trimmed import EIS_data
import bokeh
from bokeh.io import curdoc
from bokeh import layouts
from bokeh.layouts import column,row,gridplot
from bokeh.plotting import figure
from bokeh.models import *
import base64
import io
## Instantiate the EIS_data class for loading data
def load_data(f):
return EIS_data(f)
## updater function called to load data with FileInput widget
## Must be decoded using base64
def load_file(attr, old, new):
decoded = base64.b64decode(new)
d = io.BytesIO(decoded)
dat = load_data(d)
print(dat.df)
print(dat)
print("EIS Data Uploaded Successfully")
return dat
f_load = Paragraph(text="""Load Data""",height=15)
f = FileInput()
f.on_change('value',load_file)
curdoc().add_root(column(f))
and here is the EIS_data class:
import numpy as np
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from bokeh.plotting import figure, show
from bokeh.models import LinearAxis, Range1d
from bokeh.resources import INLINE
import bokeh.io
#locally include javascript dependencies in html
bokeh.io.output_notebook(INLINE)
class EIS_data:
def __init__(self, file_name, delimiter='\t',
header=0, f_low=None, f_high=None):
#load eis data into a pandas dataframe
eis_data = pd.read_csv(file_name, delimiter=delimiter, header=header)
#iterate through all of the columns and check to see
#if all of the values in that column are null
#if they are, then remove that column
for c in eis_data.columns:
if eis_data[c].isnull().all():
eis_data = eis_data.drop([c], axis=1)
#make sure that the data are imported as floats and not strings
eis_data = eis_data[['freq/Hz', 'Re(Z)/Ohm', '-Im(Z)/Ohm']]
eis_data['freq/Hz'] = pd.to_numeric(eis_data['freq/Hz'])
eis_data['Re(Z)/Ohm'] = pd.to_numeric(eis_data['Re(Z)/Ohm'])
eis_data['-Im(Z)/Ohm'] = pd.to_numeric(eis_data['-Im(Z)/Ohm'])
self.df = eis_data.sort_values(by='freq/Hz')
def plot(self, fit_vals = None):
plot = figure(title="Nyquist Plot",
x_axis_label='Re(Z) Ohm',
y_axis_label='-Im(Z) Ohm',
plot_width=600,
plot_height=600)
plot.circle(self.df['Re(Z)/Ohm'], self.df['-Im(Z)/Ohm'],
size=7, color='navy', name='Data')
return plot
EDIT: Adding the workaround using ColumnDataSource
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import *
from bokeh.models.widgets import FileInput
import base64
import io
from eis_analysis2 import EIS_data
# Instantiate the EIS_data class for loading data
def load_data(data):
return EIS_data(data)
# updater function called to load data with FileInput widget
# Must be decoded using base64
def load_file(attr, old, new):
decoded = base64.b64decode(new)
d = io.BytesIO(decoded)
dat = load_data(d)
dat_df = dat.df
# Replace plot data with data from newly-loaded file
source.data = dict(freq=dat_df[dat_df.columns[0]], reZ=dat_df[dat_df.columns[1]], imZ=dat_df[dat_df.columns[2]])
#phase,mag = bode_calc(reZ,imZ)
print(dat_df)
print("EIS Data Uploaded Successfully")
# Create Column Data Source that will be used by the plot
source = ColumnDataSource(data=dict(freq=[], reZ=[], imZ=[]))
##Make the nyquist plot
nyq_plot = figure(title="Nyquist Plot",
x_axis_label='Re(Z) Ohm',
y_axis_label='-Im(Z) Ohm',
plot_width=600,
plot_height=600)
nyq_plot.circle(x="reZ", y="imZ",source=source,size=7, color='navy', name='Data')
f = FileInput()
f.on_change('value', load_file)
layout = column(f, nyq_plot)
curdoc().add_root(layout)

Related

OCR in the background while displaying findings in GUI

I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)

How to find the code that generates graphs?

Here is the code sample that you can find in the 'https://realpython.com/'
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
data = pd.read_csv("avocado.csv")
data = data.query("type == 'conventional' and region == 'Albany'")
data["Date"] = pd.to_datetime(data["Date"], format="%Y-%m-%d")
data.sort_values("Date", inplace=True)
app = dash.Dash(__name__)
However, when I run this code, two graphs show up irrelevantly.
Where can I find the codes that generate these graphs? Why do these graphs occur?

Using Sklearn with NumPy and Images and get this error 'setting an array element with a sequence'

I am trying to create a simple image classification tool.
I would like the code below to work with classifying images. It works fine when it is a non image NumPy array.
#https://e2eml.school/images_to_numbers.html
import numpy as np
from sklearn.utils import Bunch
from PIL import Image
monkey = [1]
dog = [2]
example_animals = Bunch(data = np.array([monkey,dog]),target = np.array(['monkey','dog']))
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example animal data passed through
import pandas as pd
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)
I have looked into how to make an image into a NumPy array at https://e2eml.school/images_to_numbers.html
The code below where I have converted images to NumPy array doesn't work.
When run it gets the following error
** 'setting an array element with a sequence'**
#https://e2eml.school/images_to_numbers.html
import numpy as np
from sklearn.utils import Bunch
from PIL import Image
monkey = np.asarray(Image.open("monkey.jpg"))
dog = np.asarray(Image.open("dog.jpeg"))
example_animals = Bunch(data = np.array([monkey,dog]),target = np.array(['monkey','dog']))
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example animal data passed through
import pandas as pd
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)
I would appreciate any insight how I fix the error 'setting an array element with a sequence' so that the images will be compatible with the sklearn processing.
You need to be sure that your images "monkey.jpg" and "dog.jpeg" have the same number of pixels. Otherwise, you will have to resize the images to have the same size. Moreover, the data of your Bunch object need to be of shape (n_samples, n_features) (you can check the documentation https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans.fit)
You need to be aware that you use an unserpervised learning model (Kmeans). So the output of the model is not directly "monkey" or "dog".
I found the solution to error setting an array element with a sequence
Kmeans requires the data arrays for comparison need to be the same size.
This means if importing pictures, the pictures need to be resized, converted into a numpy array (a format that is compatible with Kmeans) and finally made into a 1 dimensional array.
#https://e2eml.school/images_to_numbers.html
#https://machinelearningmastery.com/how-to-load-and-manipulate-images-for-deep-learning-in-python-with-pil-pillow/
import numpy as np
from matplotlib import pyplot as plt
from sklearn.utils import Bunch
from PIL import Image
from sklearn.cluster import KMeans
import pandas as pd
monkey = Image.open("monkey.jpg")
dog = Image.open("dog.jpeg")
#resize pictures
monkey1 = monkey.resize((180,220))
dog1 = dog.resize((180,220))
#make pictures into numpy array
monkey2 = np.asarray(monkey1)
dog2 = np.asarray(dog1)
#https://www.quora.com/How-do-I-convert-image-data-from-2D-array-to-1D-using-python
#make numpy array into 1 dimensional array
monkey3 = monkey2.reshape(-1)
dog3 = dog2.reshape(-1)
example_animals = Bunch(data = np.array([monkey3,dog3]),target = np.array(['monkey','dog']))
kmeans = KMeans(n_clusters=2) #with KMeans you get to pre specify the number of Clusters
KModel = kmeans.fit(example_animals.data) #fit a model using the training data , in this case original example food data passed through
crosstab = pd.crosstab(example_animals.target,KModel.labels_)
print(crosstab)

Blank Bokeh plot when reading data from dataframe and using time on x-axis

I am unable to display plot using Bokeh. I am reading the data from dataframe. Here's a snippet of my Python code.
I am new to Bokeh. I tried following some of the examples from the User Guide. I'm unable to figure out what's going wrong here. Please advise.
import datetime
import pandas
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource
PATH_TO_CSV = "Sample_Data.csv"
output_notebook()
data = pd.read_csv(PATH_TO_CSV, index_col=False)
data['timestamp'] = pd.to_datetime(data['timestamp']).dt.strftime("%H:%M:%S")
source = ColumnDataSource(data)
p = figure(plot_width=400, plot_height=400, x_axis_type="datetime")
p.line('timestamp', 'event_msg', source=source)
show(p)
Here's sample .csv
event_msg,timestamp
Created,2019-03-02 13:19:44.164562-0700
Created,2019-03-02 13:20:32.212323-0700
Created,2019-03-02 13:20:56.582761-0700
Modified,2019-03-02 13:21:48.021752-0700
Deleted,2019-03-02 13:22:16.938382-0700
Modified,2019-03-02 13:22:22.139714-0700
Permission changed,2019-03-02 13:24:20.195975-0700
Deleted,2019-03-02 13:33:53.049900-0700
Modified,2019-03-02 13:33:56.266113-0700
Deleted,2019-03-02 13:33:59.757584-0700
I am seeing completely blank plot. Ideally, I am interested in plotting different line plots based on the event messages.
You should convert your time like this:
data['timestamp'] = pd.to_datetime(data['timestamp'])
So your code should look like (tested with Bokeh v1.1.0):
import os
import datetime
import pandas as pd
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource
PATH_TO_CSV = "Sample_Data.csv"
output_notebook()
data = pd.read_csv(os.path.join(os.path.dirname(__file__), PATH_TO_CSV), index_col = False)
data['timestamp'] = pd.to_datetime(data['timestamp'])
source = ColumnDataSource(data)
p = figure(plot_width = 400, plot_height = 400, x_axis_type = "datetime", y_range = data['event_msg'].unique())
p.line('timestamp', 'event_msg', source = source)
show(p)
Result:

How to implement QThread correctly with matplotlib and pyplot

I understand that there have been one or two other questions posted that are related but not exactly what I need. I'm building this gui that activates a module by clicking a button. This python module that gets activated by pushing the button generates heatmaps from multiple pandas dataframes and saves those images, which in turn is then saved into an xlsx using pandas ExcelWriter.
I've tried to implement QThread, as other stackoverflow examples tried to explain similar problems but I continue getting this error: "It is not safe to use pixmaps outside the GUI thread". I understand that technically I'm not creating the heatmap inside the MAIN gui thread but I thought with QThread that I am still inside "a" gui thread. These dataframes that the heatmaps are based off of can be of a large size at times and I am somewhat grasping the concept of sending a signal to the main gui thread when a heatmap is to be created and have the heatmap function inside the main gui class...but I fear that will be troublesome later in passing so much data around..this is more like pipelining than threading. I just want this working thread to create these images and save them and then take those saved files and save them into an xlsx without interrupting the main gui..
(NOTE: This is a simplified version, in the real program there will be several of these threads created almost simultaneously and inside each thread several heatmaps will be created)
---main.py---
import sys
from MAIN_GUI import *
from PyQt4 import QtGui, QtCore
from excel_dummy import *
if __name__=="__main__":
app = QtGui.QApplication(sys.argv)
class MAIN_GUI(QtGui.QMainWindow):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.uiM = Ui_MainWindow()
self.uiM.setupUi(self)
self.connect(self.uiM.updateALL_Button,QtCore.SIGNAL('clicked()'),self.newThread)
def newThread(self):
Excelify = excelify()
Excelify.start()
self.connect(Excelify,QtCore.SIGNAL('donethread(QString)'),(self.done))
def done(self):
print('done')
main_gui = MAIN_GUI()
main_gui.show()
main_gui.raise_()
sys.exit(app.exec_())
---excel_dummy.py---
import os, pandas as pd
from pandas import ExcelWriter
import numpy as np
import seaborn.matrix as sm
from PyQt4 import QtCore
from PyQt4.QtCore import QThread
from matplotlib.backends.backend_agg import FigureCanvas
from matplotlib.figure import Figure
import time
class excelify(QThread):
def __init__(self):
QThread.__init__(self)
def run(self):
path = 'home/desktop/produced_files'
with ExcelWriter(path + '/final.xlsx', engine='xlsxwriter') as writer:
workbook = writer.book
worksheet = workbook.add_worksheet()
heatit = self.heatmap()
worksheet.insert_image('C3',path + '/' + 'heat.jpg')
worksheet.write(2, 2, 'just write something')
writer.save()
print('file size: %s "%s"' % (os.stat(path).st_size, path))
time.slee(0.3)
self.emit(QtCore.SIGNAL('donethread(QString)'),'')
def heatmap(self):
df = pd.DataFrame(np.array([[1,22222,33333],[2,44444,55555],[3,44444,22222],[4,55555,33333]]),columns=['hour','in','out'])
dfu = pd.DataFrame(df.groupby([df.in,df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0':'Count'})
dfu.columns=['in','hour','Count']
dfu_2 = dfu.copy()
mask=0
fig = Figure()
ax = fig.add_subplot(1,1,1)
canvas = FigureCanvas(fig)
df_heatmap = dfu_2.pivot('in','hour','Count').fillna(0)
sm.heatmap(df_heatmap,ax=ax,square=True,annot=False,mask=mask)
fig.savefig(path + '/' + heat.jpg')
---MAIN_GUI.py---
from PyQt4 import QtCore,QtGui
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.unicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName(_fromUtf8("MainWindow"))
MainWindow.resize(320,201)
self.centralwidget = QtGui.QWidget(MainWindow)
self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
self.updateALL_Button = QtGui.QPushButton(self.centralwidget)
self.updateALL_Button.setGeometry(QtCore.QRect(40,110,161,27))
self.updateALL_Button.setFocusPolicy(QtCore.Qt.NoFocus)
self.updateALL_Button.setObjectName(_fromUtf8("Options_updateALL_Button"))
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtGui.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 320, 24))
self.menubar.setObjectName(_fromUtf8("menubar"))
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtGui.QStatusBar(MainWindow)
self.statusbar.setObjectName(_fromUtf8("statusbar"))
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self,MainWindow):
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow", None))
self.updateALL_Button.setText(_translate("MainWindow", "updateALL", None))
Even though you are explicitely using the Agg backend to generate your figure, it looks like Seaborn is still using the default backend on your system, which is most likely Qt4Agg, an interactive backend. We want Seaborn to use a non-interactive backend instead to avoid any error (see matplotlib documentation for more details about backends). To do so, tell Matplotlib in your imports to use the Agg backend and import Seaborn after Matplotlib.
You will also need to save your figure as a png, since jpg is not supported by the Agg backend. Unless you have some specific reasons for using jpg, png is usually a better format for graphs.
Finally, you could use a memory buffer instead of saving your images to a temporary file before saving them in an Excel Workbook. I haven't tested it, but it will probably be faster if you are working with large files.
Below is a MWE I've written which includes the aformentioned points and which does not give any error on my system in Python3.4:
import pandas as pd
import time
from pandas import ExcelWriter
import numpy as np
from PyQt4 import QtCore, QtGui
import matplotlib as mpl
mpl.use('Agg')
from matplotlib.backends.backend_agg import FigureCanvas
import seaborn.matrix as sm
try: # Python 2 (not tested)
from cStringIO import StringIO as BytesIO
except ImportError: # Python 3
from io import BytesIO
class MAIN_GUI(QtGui.QWidget):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.worker = Excelify()
btn = QtGui.QPushButton('Run')
disp = QtGui.QLabel()
self.setLayout(QtGui.QGridLayout())
self.layout().addWidget(btn, 0, 0)
self.layout().addWidget(disp, 2, 0)
self.layout().setRowStretch(1, 100)
btn.clicked.connect(self.worker.start)
self.worker.figSaved.connect(disp.setText)
class Excelify(QtCore.QThread):
figSaved = QtCore.pyqtSignal(str)
def run(self):
self.figSaved.emit('Saving figure to Workbook.')
t1 = time.clock()
image_data = self.heatmap()
with ExcelWriter('final.xlsx', engine='xlsxwriter') as writer:
wb = writer.book
ws = wb.add_worksheet()
ws.insert_image('C3', 'heat.png', {'image_data': image_data})
writer.save()
t2 = time.clock()
self.figSaved.emit('Done in %f sec.' % (t2-t1))
def heatmap(self):
df = pd.DataFrame(np.array([[1, 22222, 33333], [2, 44444, 55555],
[3, 44444, 22222], [4, 55555, 33333]]),
columns=['hour', 'in', 'out'])
dfu = pd.DataFrame(df.groupby([df.out, df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0': 'Count'})
dfu.columns = ['in', 'hour', 'Count']
fig = mpl.figure.Figure()
fig.set_canvas(FigureCanvas(fig))
ax = fig.add_subplot(111)
df_heatmap = dfu.pivot('in', 'hour', 'Count').fillna(0)
sm.heatmap(df_heatmap, ax=ax, square=True, annot=False, mask=0)
buf= BytesIO()
fig.savefig(buf, format='png')
return(buf)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
w = MAIN_GUI()
w.show()
w.setFixedSize(200, 100)
sys.exit(app.exec_())