Hello i have many files of TFRecords. i use python tensorflow and want to plot in one histogram all labels.
TFRecords is pair of (image,label)
so how i can extract all the labels ?
i have try to extract labels and have success plot several batches
all_label = []
for image, label in ds_train.take(10):
all_label.append(label)
sns.distplot(all_label)
Maybe something like this.
import re
#import pdftotext
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = open(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
text = retstr.getvalue()
fp.close()
device.close()
retstr.close()
return text
with open('C:\\Users\\Finance10K.txt') as f:
clean_cont = f.read().splitlines()
clean_cont
doc=[i.replace('\xe2\x80\x9c','') for i in clean_cont ]
doc=[i.replace('\xe2\x80\x9d','') for i in doc ]
doc=[i.replace('\xe2\x80\x99s','') for i in doc ]
docs = [x for x in doc if x != ' ']
docss = [x for x in docs if x != '']
doc
docs
docss
financedoc=[re.sub("[^a-zA-Z]+", " ", s) for s in docss]
financedoc
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
import pandas as pd
#%pylab
#%matplotlib inline
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
vect=CountVectorizer(ngram_range=(1,1),stop_words='english')
fin=vect.fit_transform(financedoc)
fin
pd.DataFrame(fin.toarray(),columns=vect.get_feature_names())
lda=LatentDirichletAllocation(n_components=5)
lda.fit_transform(fin)
lda_dtf=lda.fit_transform(fin)
sorting=np.argsort(lda.components_)[:,::-1]
features=np.array(vect.get_feature_names())
import mglearn
mglearn.tools.print_topics(topics=range(5), feature_names=features,
sorting=sorting, topics_per_chunk=5, n_words=10)
#from __future__ import print_function
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()
zit=pyLDAvis.sklearn.prepare(lda,fin,vect)
pyLDAvis.show(zit)
Related
I am trying to run a OCR function in the background while displaying the findings in a GUI.
The OCR is working fine, but I can't seem to get the GUI started.
I think the issues is that there is not function to start the GUI, but I have not would a solution.
import time
import cv2
import mss
import numpy
import pytesseract
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import PySimpleGUI as sg
import os
import threading
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def ocr_function():
with mss.mss() as mss_instance:
mon = mss_instance.monitors[0]
screenshot = mss_instance.grab(mon) #Read all monitor(s)
with mss.mss() as sct:
while True:
im = numpy.asarray(sct.grab(mon))
plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
text = pytesseract.image_to_string(im) #image to text
time.sleep(5) #One screenshot per 5 seconds
os.system('cls') #Clear output
print(text) #Print the output text
return text #Return text to function
#plt.show()
Output = ocr_function
t = threading.Thread(target=ocr_function) #Create a thread for the OCR function
t.start() #Start the OCR thread
Output = df.sort_values(by=['Match_Acc.','D-level', 'R-level'], ascending=[False, False, False]) # Sort columes
Output = Output[Output['Match_Acc.'] >= 1]
font = ('Areal', 11)
sg.theme('BrownBlue')
data = Output
headings = ['Result', 'Column1','Column2','Column3','D-level','R-level','n_matches','nan','nonnan','Match_Acc.']
df = pd.DataFrame(data)
headings = df.columns.tolist()
data = df.values.tolist()
layout = [[sg.Table(data, headings=headings, justification='left', key='-TABLE-')],
[sg.Button('Run'), sg.Button('Exit')]]
sg.Window("Overview", layout).read(close=True)
Bonjour,
"sparkline" does not work in my code.
Already, I didn't manage to install it. So, I found a function that I call "sparkline_test. Nevertheless, the images that should be integrated in the table are outside. Something is wrong.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from io import BytesIO
from itertools import islice
import seaborn as sns
import base64
I cannot import sparklines:
#import sparklines
df = sns.load_dataset('titanic')
def percentile_90(x):
return x.quantile(.9)
from scipy.stats import trim_mean
def trim_mean_10(x):
return trim_mean(x, 0.1)
def largest(x):
return x.nlargest(1)
def sparkline_str(x):
bins=np.histogram(x)[0]
sl = ''.join(sparklines(bins))
return sl
def sparkline_test(data, figsize=(4,0.25),**kwags):
data = list(data)
fig,ax = plt.subplots(1,1,figsize=figsize,**kwags)
ax.plot(data)
for k,v in ax.spines.items():
v.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])
plt.plot(len(data)-1, data[len(data)-1], 'r.')
ax.fill_between(range(len(data)), data, len(data)*[min(data)], alpha=0.1)
img = BytesIO()
plt.savefig(img, transparent=True, bbox_inches='tight')
img.seek(0)
plt.show()
# plt.close()
return base64.b64encode(img.read()).decode("utf-8")
def sparkline_str(x):
bins=np.histogram(x)[0]
sl = ''.join(sparkline_test(bins))
return sl
agg_func_largest = {
'fare': [percentile_90, trim_mean_10, largest, sparkline_test]
#'fare': [percentile_90, trim_mean_10, largest]
}
df.groupby(['class', 'embark_town']).agg(agg_func_largest)
that produces:
What is expected is:
Something is wrong....But what?
Do you have any idea?
Regards,
Atapalou
ALL software version info
Python 3.7.4;
On iMac (21.5-inch, 2017);
Using IDLE.
Description of expected behavior and the observed behavior
Problem is: Different bins distribution between Matplotlib & Holoviews is obtained.
Complete, minimal, self-contained example code that reproduces the issue
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
wine = load_wine()
print("Feature Names : ", wine.feature_names)
print("\nTarget Names : ", wine.target_names)
wine_df = pd.DataFrame(wine.data, columns = wine.feature_names)
wine_df["Target"] = wine.target
wine_df["Target"] = ["Class_1" if typ==0 else "Class_2" if typ==1 else "Class_3" for typ in wine_df["Target"]]
print("\nDataset Size : ", wine_df.shape)
print(wine_df.head())
Target1=wine_df.query('Target == "Class_1"')
Target2=wine_df.query('Target == "Class_2"')
Target3=wine_df.query('Target == "Class_3"')
x = Target1['proline']
y = Target2['proline']
z = Target3['proline']
plt.hist(x, bins=20,histtype='bar',color='blue',alpha=0.7,label='Class_1')
plt.hist(y, bins=20,histtype='bar',color='red',alpha=0.7,label='Class_2')
plt.hist(z, bins=20,histtype='bar',color='orange',alpha=0.7,label='Class_3')
plt.xlabel('proline')
plt.ylabel('Frequency')
plt.title('Malic Acid Distribution')
plt.legend(frameon=False)
plt.tight_layout()
plt.savefig("Test", dpi=300)
plt.show()
import holoviews as hv
hv.extension('bokeh')
from bokeh.plotting import show
from holoviews import dim, opts
import hvplot.pandas
hist=wine_df.hvplot.hist(y="proline", by="Target", width=600, height=400, ylim=(0,16), alpha=0.7, bins=20, ylabel="Frequency", title="Malic Acid Distribution")
show(hv.render(hist))
I'm now trying to convert the signal into a Fast Fourier transform in Python and draw a graph. I have a problem with Len here. How can I fix this? And does anyone have any other ideas about converting Fast Fourier transform?
Exception has occurred: TypeError
object of type 'method' has no len()
That is my problem.
from PyQt5.QtWidgets import*
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import random
from PyQt5 import QtCore, QtGui, QtWidgets
import datetime
import serial
import time
import random
import numpy as np
from matplotlib import animation
from collections import deque
import threading
x = 0
value = [0]
ser = serial.Serial('com5', 9600)
class scope :
def data(self) :
if ser.readable() :
time.sleep(0.01)
reciving = ser.readline(ser.inWaiting())
str = reciving.decode()
if len(str) > 0 :
if str[:1] == 'X' :
value[0] = str[1:]
#print(float(value[5]))
time.sleep(0.5)
x = float(value[0])
return x
s = scope()
n = len(s.data)
Ts = 0.01
Fs = 1/Ts
# length of the signal
k = np.arange(n)
T = n/Fs
freq = k/T # two sides frequency range
freq = freq[range(int(n/2))] # one side frequency range
Y = np.fft.fft(x)/n # fft computing and normalization
Y = Y[range(int(n/2))]
fig, ax = plt.subplots(2, 1)
ax.plot(freq, abs(Y), 'r', linestyle=' ', marker='^')
ax.set_xlabel('Freq (Hz)')
ax.set_ylabel('|Y(freq)|')
#3ax.vlines(freq, [0], abs(Y))
ax.grid(True)
t = threading.Thread(target= s.data)
t.daemon = True
t.start()
plt.show()
I am trying to show a matplotlib plot with axes labeled using gettext's _("label") construct. Trying to create a minimal example, I came up with the following python code. It runs fine through the NULLTranslations() like this:
python mpl_i18n_test.py
But when I switch to japanese, I get nothing but small squares in the plot -- though on the command-line, the translations look fine:
LANG=ja_JP.utf8 python mpl_i18n_test.py
Here is the file mpl_i18n_test.py
Note that this requires the mona-sazanami font installed, and the various python modules: pygtk, numpy, matplotlib, gettext and polib
So my question: Is there some trick to getting matplotlib play nicely with gettext? Am I missing something obvious here? Thank you.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import gtk
import numpy as np
import matplotlib as mpl
from matplotlib.figure import Figure
from matplotlib.backends.backend_gtkagg import \
FigureCanvasGTKAgg as FigureCanvas
from matplotlib.backends.backend_gtkagg import \
NavigationToolbar2GTKAgg as NavigationToolbar
import locale
import gettext
import polib
mpl.rcParams['font.family'] = 'mona-sazanami'
def append(po, msg):
occurances = []
for i,l in enumerate(open(__file__,'r')):
if "_('"+msg[0]+"')" in l:
occurances += [(__file__,str(i+1))]
entry = polib.POEntry(msgid=msg[0],
msgstr=msg[1],
occurrences=occurances)
print msg
print occurances
po.append(entry)
def generate_ja_mo_file():
po = polib.POFile()
msgs = [
(u'hello', u'こんにちは'),
(u'good-bye', u'さようなら'),
]
for msg in msgs:
append(po, msg)
po.save('mpl_i18n_test.po')
po.save_as_mofile('mpl_i18n_test.mo')
return 'mpl_i18n_test.mo'
def initialize():
'''prepare i18n/l10n'''
locale.setlocale(locale.LC_ALL, '')
loc,enc = locale.getlocale()
lang,country = loc.split('_')
l = lang.lower()
if l == 'ja':
filename = generate_ja_mo_file()
trans = gettext.GNUTranslations(open(filename, 'rb'))
else:
trans = gettext.NullTranslations()
trans.install()
if __name__ == '__main__':
initialize() # provides _() method for translations
win = gtk.Window(gtk.WINDOW_TOPLEVEL)
win.connect("destroy", lambda x: gtk.main_quit())
win.connect("delete_event", lambda x,y: False)
win.set_default_size(400,300)
win.set_title("Test of unicode in plot")
fig = Figure()
fig.subplots_adjust(bottom=.14)
ax = fig.add_subplot(1,1,1)
xx = np.linspace(0,10,100)
yy = xx*xx + np.random.normal(0,1,100)
ax.plot(xx,yy)
print 'hello --> ', _('hello')
print 'good-bye --> ', _('good-bye')
ax.set_title(u'こんにちは')
ax.set_xlabel(_('hello'))
ax.set_ylabel(_('good-bye'))
can = FigureCanvas(fig)
tbar = NavigationToolbar(can,None)
vbox = gtk.VBox()
vbox.pack_start(can, True, True, 0)
vbox.pack_start(tbar, False, False, 0)
win.add(vbox)
win.show_all()
gtk.main()
A solution I found was to merely specify unicode when the translation is "installed." It was a one-line change:
trans.install(unicode=True)
I will add that this is only needed in python 2.7, but not needed in python 3. Looks like python 2.6 and earlier still have issues with this