only when building a .exe from working code: AttributeError: Can only use .dt accessor with datetimelike values - pandas

I have a working python script based on pandas.
Converting a similar script into a .exe worked at my computer at work. Unfortunately this isn't the case for my computer at home. I tried pyinstaller and py2exe and both bring up this error.
It seems to me that the conversion puts up a number of errors(I already fixed some of them), so it's not ultimately about the datetime issue I think.
import pandas as pd
import os
import glob
from datetime import datetime
import shutil
import os.path
try:
parentfolder = os.path.dirname(__file__)
parentfolder = os.path.abspath(os.path.join(parentfolder, '..'))#parentfolder der skriptdatei
except NameError: # We are the main py2exe script, not a module
import sys
parentfolder = os.path.dirname(sys.argv[0])
parentfolder = os.path.abspath(os.path.join(parentfolder, '..'))#parentfolder der skriptdatei
today = datetime.now()
day1 = today.strftime("%d-%m-%Y")
time1= today.strftime("%d-%m-%Y_%H-%M-%S")
day1=day1+'_cleaned'
logname="logfile_" + time1 + ".txt"
resultfolder=os.path.join(parentfolder, day1)
logfile = os.path.join(resultfolder, logname)
if os.path.exists(resultfolder):
shutil.rmtree(resultfolder) #deletes folder and all subfolders
os.makedirs(resultfolder)
pd.set_option('display.max_columns', 5)
pd.set_option('display.max_colwidth', 99)
f = open(logfile, "w")
f.close()
all_files = glob.glob(parentfolder + "/*.xls")
filecounter=0
first_run_counter=0
first_day_counter=0
for filename in all_files:
file_name=(os.path.splitext(os.path.basename(filename))[0])
writepath = os.path.join(resultfolder, '{}.xlsx'.format(str(file_name)+"_py"))
writer = pd.ExcelWriter(writepath, engine = 'xlsxwriter')
with open(logfile, "a") as file:
file.write("{} \n".format(str(file_name)))
filecounter += 1
if filecounter > 1:
print("WARNING, JUST CONVERT 1 FILE")
break
list1 = []
dfs_by_day= []
df = pd.read_excel(filename,header=None,parse_dates=False)#ohne header einlesen ,decimal=","
#df=df.convert_dtypes(convert_string=True)
df_help=df.copy()
df_help[1] = df_help[1].astype(str)
df_help[0] = df_help[0].astype(str)
#####datei ordnen,filtern etc
df.dropna(axis=0,how='any',thresh=None,subset=None,inplace=True)#löscht zeilen mit leeren zellen
df.drop_duplicates(inplace=True) #dropt auch doppelte header?!
df.reset_index(drop=True, inplace=True)
new_header = df.iloc[0] #grab the first row for the header
df = df[1:] #take the data less the header row
df.columns = new_header#nimmt 2, reihe als header
df = df.sort_values(['Date (MM/DD/YYYY)','Time (HH:mm:ss)'], ascending=[True,True])
df.reset_index(drop=True, inplace=True)
df.rename(columns={'Date (MM/DD/YYYY)':'Date (DD/MM/YYYY)'}, inplace=True)
#df['Date (DD/MM/YYYY)']=df['Date (DD/MM/YYYY)'].astype(str)#WICHTIG! datumsangabe unabhängig von / oder . machen
#df['Date (DD/MM/YYYY)'] = df['Date (DD/MM/YYYY)'].str.replace('/','.')#/ mit . ersetzen
df_help2=df.copy() #deepcopy vom noch nicht datetime, aber getrimmten dataframe
#################################################################### datei in tage aufspalten
##df_help2['Date (DD/MM/YYYY)'] = pd.to_datetime(df_help2['Date (DD/MM/YYYY)'],format='%d.%m.%Y')#EVTL FORMAT EINFÜGEN ,format='%d.%m.%Y'
df_help2['next day'] = (df_help2['Date (DD/MM/YYYY)'].diff()).dt.days > 0 #ob neue zeile=neuer tag
###############datumsangabe unabhängig von / oder . machen
for i in range(df_help2.shape[0]):
if df_help2.at[i,'next day'] == True:
list1.append(i)
#spaltalgorithmus gesamtfile in tage
l_mod = [0] + list1 + [df.shape[0]]
dfs_by_day = [df.iloc[l_mod[n]:l_mod[n+1]] for n in range(len(l_mod)-1)]
################################################################# tage in runs aufspalten
for j in dfs_by_day:
memo=0
run_counter=1
df1 = j
df1=df1.reset_index(drop=True)
df_help4 = df1.iloc[0:1,0:2].reset_index(drop=True).copy()
df1['Date (DD/MM/YYYY)'] = df1['Date (DD/MM/YYYY)'].dt.strftime('%d.%m.%Y')
list3=[]
dfdate= str(df1.at[0,'Date (DD/MM/YYYY)'])
print(dfdate)
df_help3=df1.copy() #deepcopy für tageszeitanalyse/runs
df_help3['Time (HH:mm:ss)'] = pd.to_datetime(df_help3['Time (HH:mm:ss)'],format='%H:%M:%S')
df_help3['next run'] = (df_help3['Time (HH:mm:ss)'].diff()).dt.seconds > 2000
df_help3.reset_index(drop=True, inplace=True)
for i in range(df_help3.shape[0]):
if df_help3.at[i,'next run'] == True:
list3.append(i)
###algorithmus spaltet tag in runs auf
l_mod2 = [0] + list3 + [df1.shape[0]]
dfs_by_run = [df1.iloc[l_mod2[n]:l_mod2[n+1]] for n in range(len(l_mod2)-1)]
for k in dfs_by_run:
df_run = k
df_run['Depth m'] = pd.to_numeric(df_run['Depth m'])
df_run['depth rounded'] = df_run['Depth m'].astype(int) #rundet
df_run=df_run.reset_index(drop=True)
df_run = df_run.drop_duplicates(subset=['depth rounded'], keep='last')#letzter wert
del df_run['depth rounded']
df_run=df_run.dropna(axis=0,how='any',thresh=2)
df_run=df_run.reset_index(drop=True)
run_name = str(dfdate) +'_run' + str(run_counter)
#####sensortoresultfile
if first_run_counter==0:
last_df=df_run.copy()
last_df=last_df[0:0]
last_df=last_df.append(df_run)
first_run_counter+=1
with open(logfile, "a") as file:
file.write("{0} has {1} last measurement(s) \n".format(run_name,df_run.shape[0]))
run_counter+=1
#alle daten raw aber mit sensor und header pro tag
df_help4['Time (HH:mm:ss)'] = df_help4['Time (HH:mm:ss)'].astype(str)
df_help4['Date (DD/MM/YYYY)'] = df_help4['Date (DD/MM/YYYY)'].astype(str)
for i in range(df_help.shape[0]):
if df_help4.at[0,'Date (DD/MM/YYYY)'] == df_help.at[i,0]:
if df_help4.at[0,'Time (HH:mm:ss)'] == df_help.at[i,1]:
memo=i
break
for n in reversed(list(range(memo))):
if df_help.at[n,3] == 'SENSOR SERIAL NUMBER:':
sensor_info=df_help.iloc[n:n+1,:]
sensor_info.reset_index(drop=True,inplace=True)
break
sensor_info.at[0,0:2]='-'
df1 = df1.columns.to_frame().T.append(df1, ignore_index=True)#fügt header als zeile ganz oben hinzu
df1.columns = range(len(df1.columns))#header neu 0 bis n
if first_day_counter==0:
raw_df=df1.copy()
raw_df=raw_df[0:0]
sensor_info.columns= range(len(df1.columns))
df1 = pd.concat([df1.iloc[:(0)], sensor_info, df1.iloc[0:]]).reset_index(drop=True)
raw_df=raw_df.append(df1)
first_day_counter += 1
last_df.to_excel(writer, sheet_name='{}'.format("last"),header=False, index = False)
#raw_df['Date (DD/MM/YYYY)'] = raw_df['Date (DD/MM/YYYY)'].dt.strftime('%d.%m.%Y')
raw_df.to_excel(writer, sheet_name='{}'.format("raw"),header=False, index = False)
writer.save()
with open(logfile, "a") as file:
file.write("total number of last measurements: {} \n".format(last_df.shape[0]))
file.write("total number of raw measurements: {} \n".format(raw_df.shape[0]))
f.close()
error:
Traceback (most recent call last):
File "tsk-py-convert.py", line 95, in <module>
File "pandas\core\generic.pyc", line 5458, in __getattr__
File "pandas\core\accessor.pyc", line 180, in __get__
File "pandas\core\indexes\accessors.pyc", line 494, in __new__
AttributeError: Can only use .dt accessor with datetimelike values

Within spyder the code was using an old pandas version (0.23.4). My code doesn't seem to work with a new version. I had the latest pandas version pip installed on windows and now manually installed the version of anaconda (0.23.4).
I can now run the code thorugh cmd, IDLE and the .exe that is created with pyinstaller works!

Related

Selenium Issue with Exec?

I am running a webscraper with selenium to get some data on the NBA. I have urls to get to the websites for each of the 30 teams, but when I run the code it only gets through a few of the urls and then crashes with the errors below being shown:
#web scraper
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import pandas as pd
import os
class NBAScraper:
def __init__(self):
#part 1
url = "https://www.nba.com/teams"
HTML = requests.get(url)
soup = BeautifulSoup(HTML.text, 'html.parser')
text = str(soup.find_all("a", "Anchor_anchor__cSc3P TeamFigureLink_teamFigureLink__uqnNO"))
ids = []
for i in range(0, 30):
hr = text.find("stats")
ids.append(text[(hr+11):(hr+21)])
text = text[(hr+22):]
#part 2
names = []
for j in range(0, 30):
url2 = "https://www.nba.com/stats/team/"+str(ids[j])+"/advanced"
HTML2 = requests.get(url2)
soup2 = BeautifulSoup(HTML2.text, 'html.parser')
##div class="TeamHeader_name__MmHlP
name = str(soup2.find("div", "TeamHeader_name__MmHlP"))
ni = name.find("div>")
ni2 = name.find("<!")
name1 = name[(ni+4):ni2]
name = name[ni2:]
ni3 = name.find("<div>")
name = name[(ni3+5):]
ni4 = name.find("</div>")
name2 = name[:ni4]
n = name1 + " " + name2
names.append(n)
##tbody class="Crom_body__UYOcU"
#part 3
offrtg = []
defrtg = []
reb = []
tov = []
efg = []
for k in range(0, 30):
self.driver = webdriver.Chrome()
url3 = "https://www.nba.com/stats/team/"+str(ids[k])+"/advanced"
self.driver.get(url3)
rndrhtml = self.driver.page_source
self.driver.close()
#self.driver.quit()
soup3 = BeautifulSoup(rndrhtml, 'html.parser')
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
for d in range(0, 13):
di = ovrall.find("<td>")
ovrall = ovrall[(di+4):]
#conditions
if d == 2:
di2 = ovrall.find("</td>")
offrtg.append(float(ovrall[:di2]))
elif d == 3:
di2 = ovrall.find("</td>")
defrtg.append(float(ovrall[:di2]))
elif d == 10:
di2 = ovrall.find("</td>")
reb.append(float(ovrall[:di2]))
elif d == 11:
di2 = ovrall.find("</td>")
tov.append(float(ovrall[:di2]))
elif d == 12:
di2 = ovrall.find("</td>")
efg.append(float(ovrall[:di2]))
#writing to excel
os.remove(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx")
d = {'Name': names, 'OFFRTG': offrtg, 'DEFRTG': defrtg, 'REB': reb,
'TOV': tov, 'EFG': efg}
df = pd.DataFrame(data=d)
df.to_excel(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx", sheet_name="STATS")
NBAScraper()
I tried to play around with the closing and quitting functions for the driver, or put the driver in a separate function and run it outside the class, but none of that worked. I realized through some testing that even if it's not inside a loop, selenium will throw the error for a url but run it fine the second time. I tried using implicit waits to solve this but to no avail.
Traceback (most recent call last):
File "C:\Program Files\Spyder\pkgs\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "c:\users\jackm\spyder\nba.py", line 104, in <module>
NBAScraper()
File "c:\users\jackm\spyder\nba.py", line 71, in __init__
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
AttributeError: 'NoneType' object has no attribute 'find_all'

Multithread and AttributeError: 'NoneType' object has no attribute 'groups'

We wrote this code in order to plot the data conteined in a txt file:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import re
import numpy as np
import os
names = ['CH','LG','HG','Ts(ns)','ToT(ns)']
righe_primo_header = 5
righe_header = 5
canali = 64
# input file
infile = 'Run1_list.txt'
# determinare numero di righe, poi di eventi nel file di input
stream = os.popen('wc -l '+ infile)
nrighe = stream.read()
match = re.match(r" (\S+)\s*", nrighe, re.I)
items = match.groups()
nrighe = float(items[0])
#print( 'nrighe = ',nrighe)
# numero di blocchi di dati da leggere
ntrigger = (nrighe - righe_primo_header) / (canali + righe_header) - 1
ntrigger = int( ntrigger)
print('trovati ',ntrigger,' eventi')
ncanali_histo = int(np.sqrt(ntrigger))
ncanali_histo = 4096
events = []
file1 = open( infile, 'r')
for line in range(righe_primo_header-1):
line = file1.readline()
#print('saltiamo riga ', line)
line=file1.readline()
for trigger in range(ntrigger):
#while line:
for lineh in range(righe_header):
line = file1.readline()
#print('saltiamo ',line)
for canale in range(canali):
line = file1.readline()
#print(' elaboriamo ',line)
match = re.match(r"(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+", line, re.I)
temparray = []
if match:
items = match.groups()
#print(items)
for colonna in range(len(items)):
col = items[colonna]
if col == '-':
valore = 0
else:
valore = float(items[colonna])
temparray.append( valore )
#print('blocco ', trigger, ' colonna ', colonna, ' ', items[colonna],' -> ',valore)
#print('temparray = ',temparray)
events.append(temparray)
file1.close()
print('ultimo trigger ID letto: ', trigger)
#print('events = ',events)
df = pd.DataFrame( events, columns = names)
print(df)
# istogramma di HG per canale fissato
canale = 44
plot_df = df.loc[ df['CH'] == canale ]
print('plot_df per istogramma:')
print(plot_df)
plot_df.hist(column='HG', bins=ncanali_histo)
plt.title('Multiphoton spectrum HG channel ' + str(canale) )
# seleziona un evento
evento = 3
plot_df = df[ (canali * evento):(canali*evento + canali) ]
print('plot_df per scatter plot:')
print(plot_df)
plot_df.plot.scatter(x='CH', y='HG', c='red')
plt.title('HG vs CH event ' + str(evento) )
plt.show()
This code perfectly works in MacOs but not in Linux and Windows (of course becouse we dont use wc command, no problem) and we get the following error:
Traceback (most recent call last):
File "Read_list.py", line 20, in <module>
items = match.groups()
AttributeError: 'NoneType' object has no attribute 'groups'
Why this errors happens?
Then, the txt file is of the order of GB, how can i run the code using the multithread? Can you help me?
I upload a small example of data here (see raw): https://pastebin.com/raw/PjVYc3vn
I resolve the firt issue:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import re
import numpy as np
import os
names = ['CH','LG','HG','Ts(ns)','ToT(ns)']
righe_primo_header = 5
righe_header = 5
canali = 64
# input file
infile = 'Run1_list.txt'
# determinare numero di righe, poi di eventi nel file di input
stream = os.popen('wc -l '+ infile)
nrighe = stream.read()
#print( 'nrighe = ',nrighe)
match = re.match(r"\s*(\S+) (.*)", nrighe, re.I)
#print( match)
items = match.groups()
nrighe = float(items[0])
#print( 'nrighe = ',nrighe)
# numero di blocchi di dati da leggere
ntrigger = (nrighe - righe_primo_header) / (canali + righe_header) - 1
ntrigger = int( ntrigger)
print('trovati ',ntrigger,' eventi')
ncanali_histo = int(np.sqrt(ntrigger))
ncanali_histo = 4096
events = []
file1 = open( infile, 'r')
for line in range(righe_primo_header-1):
line = file1.readline()
#print('saltiamo riga ', line)
line=file1.readline()
for trigger in range(ntrigger):
#while line:
for lineh in range(righe_header):
line = file1.readline()
#print('saltiamo ',line)
for canale in range(canali):
line = file1.readline()
#print(' elaboriamo ',line)
match = re.match(r"(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+", line, re.I)
temparray = []
if match:
items = match.groups()
#print(items)
for colonna in range(len(items)):
col = items[colonna]
if col == '-':
valore = 0
else:
valore = float(items[colonna])
temparray.append( valore )
#print('blocco ', trigger, ' colonna ', colonna, ' ', items[colonna],' -> ',valore)
#print('temparray = ',temparray)
events.append(temparray)
file1.close()
print('ultimo trigger ID letto: ', trigger)
#print('events = ',events)
df = pd.DataFrame( events, columns = names)
print('Il dataframe totale è il seguente: ', df)
# istogramma di HG per canale fissato
canale = 44
plot_df = df.loc[ (df['CH'] == canale) | (df['CH'] == 50)]
print('Il dataframe selezionato è il seguente: ', plot_df)
pd.options.mode.chained_assignment = None # default='warn'
plot_df['HG'][df['CH']==44] *= (1.096)
fig = px.histogram(plot_df, x='HG', color='CH', barmode='overlay', opacity=0.8, title='Multiphoton spectrum HG channel')
fig.update_traces(xbins=dict(start=0.0, end=4096.0, size=1))
fig.show()
# seleziona un evento
evento = 3
plot_df2 = df[(canali * evento):(canali*evento + canali)]
print('Il dataframe per lo scatter plot HG vs Ch relativo all evento ',evento, 'è il seguente: ', plot_df2)
fig2 = px.scatter(plot_df2, x='CH', y='HG', title='HG vs CH event ' + str(evento) )
fig2.show()
Now, how can i compile it in multithread?

AttributeError:'str' object has no attribute 'unique' (Pandas.unique)

In my script, I use pandas module. When I execute my file.py - everything works well. But I've converted my file.py to file.exe with auto-py-to-exe and got an error: AttributeError:'str' object has no attribute 'unique'. It's strange because it worked normally. The line where becomes an error: wells=list(file[0].unique()). Who knows this issue, please help.
import tkinter as tk
import tkinter.filedialog as fd
import pandas as pd
import os
import datetime
from datetime import datetime, date
import numpy as np
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 80)
pd.set_option('display.max_rows', 200)
pd.set_option('display.width', 800)
def resource_path(relative_path):
try:
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
def open():
global file_excel, name
file_excel = fd.askopenfilename(initialdir='/Desktop', title='Открыть файл', filetypes = [("Excel", "*.xlsx")])
name = os.path.basename(file_excel)
name=os.path.splitext(name)[0]
file_excel=pd.read_excel(file_excel, skiprows=[0], header=None)
win.destroy()
return file_excel, name
win = tk.Tk()
path = resource_path("image.png")
photo = tk.PhotoImage(file=path)
win.iconphoto(False, photo)
win.config(bg='#FFC')
win.title('Конвертация в формат .ev')
win.geometry('400x130+500+500')
win.resizable(False, False)
label_1 = tk.Label(win, text = 'Выберите файл с испытаниями скважин:',
bg = '#FFC',
font=('Arial', 10, 'bold'),
padx=20,
pady=10).pack()
btn_1 = tk.Button(win, text = 'Выбрать Excel',
command = open,
activebackground = '#6F6',
font=('Arial', 12, 'bold'),
padx=20,
pady=10,
relief = tk.RAISED,
bd=2).pack()
win.mainloop()
wells=list(file_excel[0].unique())
file_excel[1] = pd.to_datetime(file_excel[1], errors='coerce').dt.strftime("%d/%m/%Y")
file_excel[4] = np.where(file_excel[1].str, 'Perforation', np.nan)
file_excel.iloc[:,[2,3]]=file_excel.iloc[:,[2,3]].abs()
col_list = list(file_excel)
col_list[4], col_list[2] = col_list[2], col_list[4]
file_excel.columns = col_list
Perforation=pd.DataFrame(data=None)
for i in wells:
well_name=pd.DataFrame({'WELLNAME '+i}, columns=[1])
Perforation=Perforation.append(well_name)
Perforation=Perforation.append(file_excel.iloc[:,[1,2,3,4]][file_excel.iloc[:,0]==i])
Perforation=Perforation.append(pd.Series(dtype = 'object'), ignore_index=True)
def SaveFile():
Save=fd.asksaveasfile(mode='w',defaultextension=".ev", initialfile=name)
Save.write(Perforation.to_string(index=False, header=False, na_rep=' '))
win.destroy()
win = tk.Tk()
path = resource_path("image.png")
photo = tk.PhotoImage(file=path)
win.iconphoto(False, photo)
win.config(bg='#FFC')
win.title('Конвертация в формат .ev')
win.geometry('400x130+500+500')
win.resizable(False, False)
label_1 = tk.Label(win, text = 'Сохранение:',
bg = '#FFC',
font=('Arial', 10, 'bold'),
padx=20,
pady=10).pack()
btn_1 = tk.Button(win, text = 'Сохранить как',
command = SaveFile,
activebackground = '#6F6',
font=('Arial', 12, 'bold'),
padx=20,
pady=10,
relief = tk.RAISED,
bd=2).pack()
win.mainloop()
type of file[0]
Error screen
When I created virtual env I should have added openpyxl module. And I made it and everything is fine now

TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str

My Code.
#!/usr/bin/env python
#coding: utf-8
userid="NicoNicoCreate#gmail.com"
passwd="********"
import sys, re, cgi, urllib, urllib.request, urllib.error, http.cookiejar, xml.dom.minidom, time, urllib.parse
import simplejson as json
def getToken():
html = urllib.request.urlopen("http://www.nicovideo.jp/my/mylist").read()
for line in html.splitlines():
mo = re.match(r'^\s*NicoAPI\.token = "(?P<token>[\d\w-]+)";\s*',line)
if mo:
token = mo.group('token')
break
assert token
return token
def mylist_create(name):
cmdurl = "http://www.nicovideo.jp/api/mylistgroup/add"
q = {}
q['name'] = name.encode("utf-8")
q['description'] = ""
q['public'] = 0
q['default_sort'] = 0
q['icon_id'] = 0
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
return j['id']
def addvideo_tomylist(mid,smids):
for smid in smids:
cmdurl = "http://www.nicovideo.jp/api/mylist/add"
q = {}
q['group_id'] = mid
q['item_type'] = 0
q['item_id'] = smid
q['description'] = u""
q['token'] = token
cmdurl += "?" + urllib.parse.urlencode(q).encode("utf-8")
j = json.load( urllib.request.urlopen(cmdurl), encoding='utf-8')
time.sleep(0.5)
#Login
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar()))
urllib.request.install_opener(opener)
urllib.request.urlopen("https://secure.nicovideo.jp/secure/login",
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
#GetToken
token = getToken()
#MakeMylist&AddMylist
mid = mylist_create(u"Testlist")
addvideo_tomylist(mid, ["sm9","sm1097445", "sm1715919" ] )
MyError.
Traceback (most recent call last):
File "Nico3.py", line 48, in <module>
urllib.parse.urlencode( {"mail":userid, "password":passwd}) ).encode("utf-8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 463, in open
req = meth(req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1170, in do_request_
raise TypeError(msg)
TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str.
I've tried encode but it did not help.
I'm japanese accademic students.
It was not able to be settled by my knowledge.
I am aware of this similar question, TypeError: POST data should be bytes or an iterable of bytes. It cannot be str, but am too new for the answer to be much help.
You paren is in the wrong place so you are not actually encoding:
.urlencode({"mail":userid, "password":passwd}).encode("utf-8")) # <- move inside

How to use the PyPy as the notebook interpreter?

I Have a Script for data extraction from some CSV files and bifurcating the Data into different excel files. I using Ipython for the that and I m sure it using CPython as the Default interpreter.
But the script is taking too much time for the whole process to finish. Can someone please help to how use that script using the PyPy as i heard it is much faster than CPython.
Script is something like this:
import pandas as pd
import xlsxwriter as xw
import csv
import pymsgbox as py
file1 = "vDashOpExel_Change_20150109.csv"
file2 = "vDashOpExel_T3Opened_20150109.csv"
path = "C:\Users\Abhishek\Desktop\Pandas Anlaysis"
def uniq(words):
seen = set()
for word in words:
l = word.lower()
if l in seen:
continue
seen.add(l)
yield word
def files(file_name):
df = pd.read_csv( path + '\\' + file_name, sep=',', encoding = 'utf-16')
final_frame = df.dropna(how='all')
file_list = list(uniq(list(final_frame['DOEClient'])))
return file_list, final_frame
def fill_data(f_list, frame1=None, frame2=None):
if f_list is not None:
for client in f_list:
writer = pd.ExcelWriter(path + '\\' + 'Accounts'+ '\\' + client + '.xlsx', engine='xlsxwriter')
if frame1 is not None:
data1 = frame1[frame1.DOEClient == client] # Filter the Data
data1.to_excel(writer,'Change',index=False, header=True) # Importing the Data to Excel File
if frame2 is not None:
data2 = frame2[frame2.DOEClient == client] # Filter the Data
data2.to_excel(writer,'Opened',index=False, header=True) # Importing the Data to Excel File
else:
py.alert('Please enter the First Parameter !!!', 'Error')
list1, frame1 = files(file1)
list2, frame2 = files(file2)
final_list = set(list1 + list2)