matplotlib in gtk window with i18n (gettext) support - matplotlib

I am trying to show a matplotlib plot with axes labeled using gettext's _("label") construct. Trying to create a minimal example, I came up with the following python code. It runs fine through the NULLTranslations() like this:
python mpl_i18n_test.py
But when I switch to japanese, I get nothing but small squares in the plot -- though on the command-line, the translations look fine:
LANG=ja_JP.utf8 python mpl_i18n_test.py
Here is the file mpl_i18n_test.py
Note that this requires the mona-sazanami font installed, and the various python modules: pygtk, numpy, matplotlib, gettext and polib
So my question: Is there some trick to getting matplotlib play nicely with gettext? Am I missing something obvious here? Thank you.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import gtk
import numpy as np
import matplotlib as mpl
from matplotlib.figure import Figure
from matplotlib.backends.backend_gtkagg import \
FigureCanvasGTKAgg as FigureCanvas
from matplotlib.backends.backend_gtkagg import \
NavigationToolbar2GTKAgg as NavigationToolbar
import locale
import gettext
import polib
mpl.rcParams['font.family'] = 'mona-sazanami'
def append(po, msg):
occurances = []
for i,l in enumerate(open(__file__,'r')):
if "_('"+msg[0]+"')" in l:
occurances += [(__file__,str(i+1))]
entry = polib.POEntry(msgid=msg[0],
msgstr=msg[1],
occurrences=occurances)
print msg
print occurances
po.append(entry)
def generate_ja_mo_file():
po = polib.POFile()
msgs = [
(u'hello', u'こんにちは'),
(u'good-bye', u'さようなら'),
]
for msg in msgs:
append(po, msg)
po.save('mpl_i18n_test.po')
po.save_as_mofile('mpl_i18n_test.mo')
return 'mpl_i18n_test.mo'
def initialize():
'''prepare i18n/l10n'''
locale.setlocale(locale.LC_ALL, '')
loc,enc = locale.getlocale()
lang,country = loc.split('_')
l = lang.lower()
if l == 'ja':
filename = generate_ja_mo_file()
trans = gettext.GNUTranslations(open(filename, 'rb'))
else:
trans = gettext.NullTranslations()
trans.install()
if __name__ == '__main__':
initialize() # provides _() method for translations
win = gtk.Window(gtk.WINDOW_TOPLEVEL)
win.connect("destroy", lambda x: gtk.main_quit())
win.connect("delete_event", lambda x,y: False)
win.set_default_size(400,300)
win.set_title("Test of unicode in plot")
fig = Figure()
fig.subplots_adjust(bottom=.14)
ax = fig.add_subplot(1,1,1)
xx = np.linspace(0,10,100)
yy = xx*xx + np.random.normal(0,1,100)
ax.plot(xx,yy)
print 'hello --> ', _('hello')
print 'good-bye --> ', _('good-bye')
ax.set_title(u'こんにちは')
ax.set_xlabel(_('hello'))
ax.set_ylabel(_('good-bye'))
can = FigureCanvas(fig)
tbar = NavigationToolbar(can,None)
vbox = gtk.VBox()
vbox.pack_start(can, True, True, 0)
vbox.pack_start(tbar, False, False, 0)
win.add(vbox)
win.show_all()
gtk.main()

A solution I found was to merely specify unicode when the translation is "installed." It was a one-line change:
trans.install(unicode=True)
I will add that this is only needed in python 2.7, but not needed in python 3. Looks like python 2.6 and earlier still have issues with this

Related

OCR in the background while displaying findings in GUI

I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)

how to plot labels TFRecords in histogram

Hello i have many files of TFRecords. i use python tensorflow and want to plot in one histogram all labels.
TFRecords is pair of (image,label)
so how i can extract all the labels ?
i have try to extract labels and have success plot several batches
all_label = []
for image, label in ds_train.take(10):
all_label.append(label)
sns.distplot(all_label)
Maybe something like this.
import re
#import pdftotext
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = open(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
text = retstr.getvalue()
fp.close()
device.close()
retstr.close()
return text
with open('C:\\Users\\Finance10K.txt') as f:
clean_cont = f.read().splitlines()
clean_cont
doc=[i.replace('\xe2\x80\x9c','') for i in clean_cont ]
doc=[i.replace('\xe2\x80\x9d','') for i in doc ]
doc=[i.replace('\xe2\x80\x99s','') for i in doc ]
docs = [x for x in doc if x != ' ']
docss = [x for x in docs if x != '']
doc
docs
docss
financedoc=[re.sub("[^a-zA-Z]+", " ", s) for s in docss]
financedoc
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
import pandas as pd
#%pylab
#%matplotlib inline
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
vect=CountVectorizer(ngram_range=(1,1),stop_words='english')
fin=vect.fit_transform(financedoc)
fin
pd.DataFrame(fin.toarray(),columns=vect.get_feature_names())
lda=LatentDirichletAllocation(n_components=5)
lda.fit_transform(fin)
lda_dtf=lda.fit_transform(fin)
sorting=np.argsort(lda.components_)[:,::-1]
features=np.array(vect.get_feature_names())
import mglearn
mglearn.tools.print_topics(topics=range(5), feature_names=features,
sorting=sorting, topics_per_chunk=5, n_words=10)
#from __future__ import print_function
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()
zit=pyLDAvis.sklearn.prepare(lda,fin,vect)
pyLDAvis.show(zit)

How to implement QThread correctly with matplotlib and pyplot

I understand that there have been one or two other questions posted that are related but not exactly what I need. I'm building this gui that activates a module by clicking a button. This python module that gets activated by pushing the button generates heatmaps from multiple pandas dataframes and saves those images, which in turn is then saved into an xlsx using pandas ExcelWriter.
I've tried to implement QThread, as other stackoverflow examples tried to explain similar problems but I continue getting this error: "It is not safe to use pixmaps outside the GUI thread". I understand that technically I'm not creating the heatmap inside the MAIN gui thread but I thought with QThread that I am still inside "a" gui thread. These dataframes that the heatmaps are based off of can be of a large size at times and I am somewhat grasping the concept of sending a signal to the main gui thread when a heatmap is to be created and have the heatmap function inside the main gui class...but I fear that will be troublesome later in passing so much data around..this is more like pipelining than threading. I just want this working thread to create these images and save them and then take those saved files and save them into an xlsx without interrupting the main gui..
(NOTE: This is a simplified version, in the real program there will be several of these threads created almost simultaneously and inside each thread several heatmaps will be created)
---main.py---
import sys
from MAIN_GUI import *
from PyQt4 import QtGui, QtCore
from excel_dummy import *
if __name__=="__main__":
app = QtGui.QApplication(sys.argv)
class MAIN_GUI(QtGui.QMainWindow):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.uiM = Ui_MainWindow()
self.uiM.setupUi(self)
self.connect(self.uiM.updateALL_Button,QtCore.SIGNAL('clicked()'),self.newThread)
def newThread(self):
Excelify = excelify()
Excelify.start()
self.connect(Excelify,QtCore.SIGNAL('donethread(QString)'),(self.done))
def done(self):
print('done')
main_gui = MAIN_GUI()
main_gui.show()
main_gui.raise_()
sys.exit(app.exec_())
---excel_dummy.py---
import os, pandas as pd
from pandas import ExcelWriter
import numpy as np
import seaborn.matrix as sm
from PyQt4 import QtCore
from PyQt4.QtCore import QThread
from matplotlib.backends.backend_agg import FigureCanvas
from matplotlib.figure import Figure
import time
class excelify(QThread):
def __init__(self):
QThread.__init__(self)
def run(self):
path = 'home/desktop/produced_files'
with ExcelWriter(path + '/final.xlsx', engine='xlsxwriter') as writer:
workbook = writer.book
worksheet = workbook.add_worksheet()
heatit = self.heatmap()
worksheet.insert_image('C3',path + '/' + 'heat.jpg')
worksheet.write(2, 2, 'just write something')
writer.save()
print('file size: %s "%s"' % (os.stat(path).st_size, path))
time.slee(0.3)
self.emit(QtCore.SIGNAL('donethread(QString)'),'')
def heatmap(self):
df = pd.DataFrame(np.array([[1,22222,33333],[2,44444,55555],[3,44444,22222],[4,55555,33333]]),columns=['hour','in','out'])
dfu = pd.DataFrame(df.groupby([df.in,df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0':'Count'})
dfu.columns=['in','hour','Count']
dfu_2 = dfu.copy()
mask=0
fig = Figure()
ax = fig.add_subplot(1,1,1)
canvas = FigureCanvas(fig)
df_heatmap = dfu_2.pivot('in','hour','Count').fillna(0)
sm.heatmap(df_heatmap,ax=ax,square=True,annot=False,mask=mask)
fig.savefig(path + '/' + heat.jpg')
---MAIN_GUI.py---
from PyQt4 import QtCore,QtGui
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.unicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName(_fromUtf8("MainWindow"))
MainWindow.resize(320,201)
self.centralwidget = QtGui.QWidget(MainWindow)
self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
self.updateALL_Button = QtGui.QPushButton(self.centralwidget)
self.updateALL_Button.setGeometry(QtCore.QRect(40,110,161,27))
self.updateALL_Button.setFocusPolicy(QtCore.Qt.NoFocus)
self.updateALL_Button.setObjectName(_fromUtf8("Options_updateALL_Button"))
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtGui.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 320, 24))
self.menubar.setObjectName(_fromUtf8("menubar"))
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtGui.QStatusBar(MainWindow)
self.statusbar.setObjectName(_fromUtf8("statusbar"))
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self,MainWindow):
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow", None))
self.updateALL_Button.setText(_translate("MainWindow", "updateALL", None))
Even though you are explicitely using the Agg backend to generate your figure, it looks like Seaborn is still using the default backend on your system, which is most likely Qt4Agg, an interactive backend. We want Seaborn to use a non-interactive backend instead to avoid any error (see matplotlib documentation for more details about backends). To do so, tell Matplotlib in your imports to use the Agg backend and import Seaborn after Matplotlib.
You will also need to save your figure as a png, since jpg is not supported by the Agg backend. Unless you have some specific reasons for using jpg, png is usually a better format for graphs.
Finally, you could use a memory buffer instead of saving your images to a temporary file before saving them in an Excel Workbook. I haven't tested it, but it will probably be faster if you are working with large files.
Below is a MWE I've written which includes the aformentioned points and which does not give any error on my system in Python3.4:
import pandas as pd
import time
from pandas import ExcelWriter
import numpy as np
from PyQt4 import QtCore, QtGui
import matplotlib as mpl
mpl.use('Agg')
from matplotlib.backends.backend_agg import FigureCanvas
import seaborn.matrix as sm
try: # Python 2 (not tested)
from cStringIO import StringIO as BytesIO
except ImportError: # Python 3
from io import BytesIO
class MAIN_GUI(QtGui.QWidget):
def __init__(self):
super(MAIN_GUI, self).__init__()
self.worker = Excelify()
btn = QtGui.QPushButton('Run')
disp = QtGui.QLabel()
self.setLayout(QtGui.QGridLayout())
self.layout().addWidget(btn, 0, 0)
self.layout().addWidget(disp, 2, 0)
self.layout().setRowStretch(1, 100)
btn.clicked.connect(self.worker.start)
self.worker.figSaved.connect(disp.setText)
class Excelify(QtCore.QThread):
figSaved = QtCore.pyqtSignal(str)
def run(self):
self.figSaved.emit('Saving figure to Workbook.')
t1 = time.clock()
image_data = self.heatmap()
with ExcelWriter('final.xlsx', engine='xlsxwriter') as writer:
wb = writer.book
ws = wb.add_worksheet()
ws.insert_image('C3', 'heat.png', {'image_data': image_data})
writer.save()
t2 = time.clock()
self.figSaved.emit('Done in %f sec.' % (t2-t1))
def heatmap(self):
df = pd.DataFrame(np.array([[1, 22222, 33333], [2, 44444, 55555],
[3, 44444, 22222], [4, 55555, 33333]]),
columns=['hour', 'in', 'out'])
dfu = pd.DataFrame(df.groupby([df.out, df.hour]).size())
dfu.reset_index(inplace=True)
dfu.rename(columns={'0': 'Count'})
dfu.columns = ['in', 'hour', 'Count']
fig = mpl.figure.Figure()
fig.set_canvas(FigureCanvas(fig))
ax = fig.add_subplot(111)
df_heatmap = dfu.pivot('in', 'hour', 'Count').fillna(0)
sm.heatmap(df_heatmap, ax=ax, square=True, annot=False, mask=0)
buf= BytesIO()
fig.savefig(buf, format='png')
return(buf)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
w = MAIN_GUI()
w.show()
w.setFixedSize(200, 100)
sys.exit(app.exec_())

using ginput in embedded matplotlib figure in PyQt4

I'm trying to use the 'ginput' to measure distance in a matplotlib figure by allowing the user to mouse click the locations. I am able to do this independently in the matplotlib figure, but I'm having problems when I tried to set the figure onto a matplotlib canvas and then embed it into PyQt4 widget. Below is my code, most of which were taken from the matplotlib examples. My solution will be to click a set of locations, and pass the (x,y) coordinates to the 'dist_calc' function to get the distance.
import sys
from PyQt4 import QtGui
from matplotlib.backends.backend_qt4agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
import random
import numpy as np
class Window(QtGui.QWidget):
def __init__(self, parent=None):
super(Window, self).__init__(parent)
self.fig = Figure((6.5, 5.0), tight_layout=True)
self.ax = self.fig.add_subplot(111)
self.canvas = FigureCanvas(self.fig)
self.toolbar = NavigationToolbar(self.canvas, self)
self.button = QtGui.QPushButton('Plot')
self.button.clicked.connect(self.plot)
self.ndist = QtGui.QPushButton('Measure')
self.ndist.clicked.connect(self.draw_line)
self.toolbar.addWidget(self.button)
self.toolbar.addWidget(self.ndist)
self.fig.tight_layout()
layout = QtGui.QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
def plot(self):
data = [random.random() for i in range(20)]
self.ax.hold(False)
self.ax.plot(data, '*-')
self.canvas.draw()
def draw_line(self):
self.xy = plt.ginput(0)
x = [p[0] for p in self.xy]
y = [p[1] for p in self.xy]
self.ax.plot(x,y)
self.ax.figure.canvas.draw()
self.get_dist(x, y)
def get_dist(self, xpts, ypts):
npts = len(xpts)
distArr = []
for i in range(npts-1):
apt = [xpts[i], ypts[i]]
bpt = [xpts[i+1], ypts[i+1]]
dist =self.calc_dist(apt,bpt)
distArr.append(dist)
tdist = np.sum(distArr)
print(tdist)
def calc_dist(self,apt, bpt):
apt = np.asarray(apt)
dist = np.sum((apt - bpt)**2)
dist = np.sqrt(dist)
return dist
if __name__ == '__main__':
app = QtGui.QApplication(sys.argv)
main = Window()
main.show()
sys.exit(app.exec_())
According to this comment by one of the lead Matplotlib developers, you must not import pyplot when you're embedding Matplotlib in Qt. Pyplot sets up its own gui, mainloop and canvas, which interfere with the Qt event loop.
Changing the line self.xy = plt.ginput(0) into self.xy = self.fig.ginput(0) did not help but gave an insightful error:
AttributeError: 'FigureCanvasQTAgg' object has no attribute 'manager'
Figure.show works only for figures managed by pyplot, normally created by pyplot.figure().
In short, I don't think this is possible. ginput is a blocking function and seems only to be implemented for a Matplotlib event loop. I'm afraid that you will have to build the functionality you want using Matplotlib mouse events, which do work when embedding in PyQt. Just be sure not to use pyplot!
Edit: I just remembered, perhaps the LassoSelector is what you need.

How to fix the python multiprocessing matplotlib savefig() issue?

I want to speed up matplotlib.savefig() for many figures by multiprocessing module, and trying to benchmark the performance between parallel and sequence.
Below is the codes:
# -*- coding: utf-8 -*-
"""
Compare the time of matplotlib savefig() in parallel and sequence
"""
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
import time
def gen_fig_list(n):
''' generate a list to contain n demo scatter figure object '''
plt.ioff()
fig_list = []
for i in range(n):
plt.figure();
dt = np.random.randn(5, 4);
fig = plt.scatter(dt[:,0], dt[:,1], s=abs(dt[:,2]*1000), c=abs(dt[:,3]*100)).get_figure()
fig.FM_figname = "img"+str(i)
fig_list.append(fig)
plt.ion()
return fig_list
def savefig_worker(fig, img_type, folder):
file_name = folder+"\\"+fig.FM_figname+"."+img_type
fig.savefig(file_name, format=img_type, dpi=fig.dpi)
return file_name
def parallel_savefig(fig_list, folder):
proclist = []
for fig in fig_list:
print fig.FM_figname,
p = multiprocessing.Process(target=savefig_worker, args=(fig, 'png', folder)) # cause error
proclist.append(p)
p.start()
for i in proclist:
i.join()
if __name__ == '__main__':
folder_1, folder_2 = 'Z:\\A1', 'Z:\\A2'
fig_list = gen_fig_list(10)
t1 = time.time()
parallel_savefig(fig_list,folder_1)
t2 = time.time()
print '\nMulprocessing time : %0.3f'%((t2-t1))
t3 = time.time()
for fig in fig_list:
savefig_worker(fig, 'png', folder_2)
t4 = time.time()
print 'Non_Mulprocessing time: %0.3f'%((t4-t3))
And I meet problem "This application has requested the Runtime to terminate it in an unusual way. Please contact the application's support team for more information." error caused by p = multiprocessing.Process(target=savefig_worker, args=(fig, 'png', folder)) .
Why ? And how to solve it ?
(Windows XP + Python: 2.6.1 + Numpy: 1.6.2 + Matplotlib: 1.2.0)
EDIT: (add error msg on python 2.7.3)
When run on IDLE of python 2.7.3, it gives below error msg:
>>>
img0
Traceback (most recent call last):
File "C:\Documents and Settings\Administrator\desktop\mulsavefig_pilot.py", line 61, in <module>
proc.start()
File "d:\Python27\lib\multiprocessing\process.py", line 130, in start
File "d:\Python27\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "d:\Python27\lib\pickle.py", line 748, in save_global
(obj, module, name))
PicklingError: Can't pickle <function notify_axes_change at 0x029F5030>: it's not found as matplotlib.backends.backend_qt4.notify_axes_change
EDIT: (My solution demo)
inspired by Matplotlib: simultaneous plotting in multiple threads
# -*- coding: utf-8 -*-
"""
Compare the time of matplotlib savefig() in parallel and sequence
"""
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
import time
def gen_data(fig_qty, bubble_qty):
''' generate data for fig drawing '''
dt = np.random.randn(fig_qty, bubble_qty, 4)
return dt
def parallel_savefig(draw_data, folder):
''' prepare data and pass to worker '''
pool = multiprocessing.Pool()
fig_qty = len(draw_data)
fig_para = zip(range(fig_qty), draw_data, [folder]*fig_qty)
pool.map(fig_draw_save_worker, fig_para)
return None
def fig_draw_save_worker(args):
seq, dt, folder = args
plt.figure()
fig = plt.scatter(dt[:,0], dt[:,1], s=abs(dt[:,2]*1000), c=abs(dt[:,3]*100), alpha=0.7).get_figure()
plt.title('Plot of a scatter of %i' % seq)
fig.savefig(folder+"\\"+'fig_%02i.png' % seq)
plt.close()
return None
if __name__ == '__main__':
folder_1, folder_2 = 'A1', 'A2'
fig_qty, bubble_qty = 500, 100
draw_data = gen_data(fig_qty, bubble_qty)
print 'Mulprocessing ... ',
t1 = time.time()
parallel_savefig(draw_data, folder_1)
t2 = time.time()
print 'Time : %0.3f'%((t2-t1))
print 'Non_Mulprocessing .. ',
t3 = time.time()
for para in zip(range(fig_qty), draw_data, [folder_2]*fig_qty):
fig_draw_save_worker(para)
t4 = time.time()
print 'Time : %0.3f'%((t4-t3))
print 'Speed Up: %0.1fx'%(((t4-t3)/(t2-t1)))
You can try to move all of the matplotlib code(including the import) to a function.
Make sure you don't have a import matplotlib or import matplotlib.pyplot as plt at the top of your code.
create a function that does all the matplotlib including the import.
Example:
import numpy as np
from multiprocessing import pool
def graphing_function(graph_data):
import matplotlib.pyplot as plt
plt.figure()
plt.hist(graph_data.data)
plt.savefig(graph_data.filename)
plt.close()
return
pool = Pool(4)
pool.map(graphing_function, data_list)
It is not really a bug, per-say, more of a limitation.
The explanation is in the last line of your error mesage:
PicklingError: Can't pickle <function notify_axes_change at 0x029F5030>: it's not found as matplotlib.backends.backend_qt4.notify_axes_change
It is telling you that elements of the figure objects can not be pickled, which is how MultiProcess passes data between the processes. The objects are pickled in the main processes, shipped as pickles, and then re-constructed on the other side. Even if you fixed this exact issue (maybe by using a different backend, or stripping off the offending function (which might break things in other ways)) I am pretty sure there are core parts of Figure, Axes, or Canvas objects that can not be pickled.
As #bigbug point to, an example of how to get around this limitation, Matplotlib: simultaneous plotting in multiple threads. The basic idea is that you push your entire plotting routine off to the sub-process so you only push numpy arrays an maybe some configuration information across the process boundry.