In my script, I use pandas module. When I execute my file.py - everything works well. But I've converted my file.py to file.exe with auto-py-to-exe and got an error: AttributeError:'str' object has no attribute 'unique'. It's strange because it worked normally. The line where becomes an error: wells=list(file[0].unique()). Who knows this issue, please help.
import tkinter as tk
import tkinter.filedialog as fd
import pandas as pd
import os
import datetime
from datetime import datetime, date
import numpy as np
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 80)
pd.set_option('display.max_rows', 200)
pd.set_option('display.width', 800)
def resource_path(relative_path):
try:
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
def open():
global file_excel, name
file_excel = fd.askopenfilename(initialdir='/Desktop', title='Открыть файл', filetypes = [("Excel", "*.xlsx")])
name = os.path.basename(file_excel)
name=os.path.splitext(name)[0]
file_excel=pd.read_excel(file_excel, skiprows=[0], header=None)
win.destroy()
return file_excel, name
win = tk.Tk()
path = resource_path("image.png")
photo = tk.PhotoImage(file=path)
win.iconphoto(False, photo)
win.config(bg='#FFC')
win.title('Конвертация в формат .ev')
win.geometry('400x130+500+500')
win.resizable(False, False)
label_1 = tk.Label(win, text = 'Выберите файл с испытаниями скважин:',
bg = '#FFC',
font=('Arial', 10, 'bold'),
padx=20,
pady=10).pack()
btn_1 = tk.Button(win, text = 'Выбрать Excel',
command = open,
activebackground = '#6F6',
font=('Arial', 12, 'bold'),
padx=20,
pady=10,
relief = tk.RAISED,
bd=2).pack()
win.mainloop()
wells=list(file_excel[0].unique())
file_excel[1] = pd.to_datetime(file_excel[1], errors='coerce').dt.strftime("%d/%m/%Y")
file_excel[4] = np.where(file_excel[1].str, 'Perforation', np.nan)
file_excel.iloc[:,[2,3]]=file_excel.iloc[:,[2,3]].abs()
col_list = list(file_excel)
col_list[4], col_list[2] = col_list[2], col_list[4]
file_excel.columns = col_list
Perforation=pd.DataFrame(data=None)
for i in wells:
well_name=pd.DataFrame({'WELLNAME '+i}, columns=[1])
Perforation=Perforation.append(well_name)
Perforation=Perforation.append(file_excel.iloc[:,[1,2,3,4]][file_excel.iloc[:,0]==i])
Perforation=Perforation.append(pd.Series(dtype = 'object'), ignore_index=True)
def SaveFile():
Save=fd.asksaveasfile(mode='w',defaultextension=".ev", initialfile=name)
Save.write(Perforation.to_string(index=False, header=False, na_rep=' '))
win.destroy()
win = tk.Tk()
path = resource_path("image.png")
photo = tk.PhotoImage(file=path)
win.iconphoto(False, photo)
win.config(bg='#FFC')
win.title('Конвертация в формат .ev')
win.geometry('400x130+500+500')
win.resizable(False, False)
label_1 = tk.Label(win, text = 'Сохранение:',
bg = '#FFC',
font=('Arial', 10, 'bold'),
padx=20,
pady=10).pack()
btn_1 = tk.Button(win, text = 'Сохранить как',
command = SaveFile,
activebackground = '#6F6',
font=('Arial', 12, 'bold'),
padx=20,
pady=10,
relief = tk.RAISED,
bd=2).pack()
win.mainloop()
type of file[0]
Error screen
When I created virtual env I should have added openpyxl module. And I made it and everything is fine now
Related
I am running a webscraper with selenium to get some data on the NBA. I have urls to get to the websites for each of the 30 teams, but when I run the code it only gets through a few of the urls and then crashes with the errors below being shown:
#web scraper
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import pandas as pd
import os
class NBAScraper:
def __init__(self):
#part 1
url = "https://www.nba.com/teams"
HTML = requests.get(url)
soup = BeautifulSoup(HTML.text, 'html.parser')
text = str(soup.find_all("a", "Anchor_anchor__cSc3P TeamFigureLink_teamFigureLink__uqnNO"))
ids = []
for i in range(0, 30):
hr = text.find("stats")
ids.append(text[(hr+11):(hr+21)])
text = text[(hr+22):]
#part 2
names = []
for j in range(0, 30):
url2 = "https://www.nba.com/stats/team/"+str(ids[j])+"/advanced"
HTML2 = requests.get(url2)
soup2 = BeautifulSoup(HTML2.text, 'html.parser')
##div class="TeamHeader_name__MmHlP
name = str(soup2.find("div", "TeamHeader_name__MmHlP"))
ni = name.find("div>")
ni2 = name.find("<!")
name1 = name[(ni+4):ni2]
name = name[ni2:]
ni3 = name.find("<div>")
name = name[(ni3+5):]
ni4 = name.find("</div>")
name2 = name[:ni4]
n = name1 + " " + name2
names.append(n)
##tbody class="Crom_body__UYOcU"
#part 3
offrtg = []
defrtg = []
reb = []
tov = []
efg = []
for k in range(0, 30):
self.driver = webdriver.Chrome()
url3 = "https://www.nba.com/stats/team/"+str(ids[k])+"/advanced"
self.driver.get(url3)
rndrhtml = self.driver.page_source
self.driver.close()
#self.driver.quit()
soup3 = BeautifulSoup(rndrhtml, 'html.parser')
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
for d in range(0, 13):
di = ovrall.find("<td>")
ovrall = ovrall[(di+4):]
#conditions
if d == 2:
di2 = ovrall.find("</td>")
offrtg.append(float(ovrall[:di2]))
elif d == 3:
di2 = ovrall.find("</td>")
defrtg.append(float(ovrall[:di2]))
elif d == 10:
di2 = ovrall.find("</td>")
reb.append(float(ovrall[:di2]))
elif d == 11:
di2 = ovrall.find("</td>")
tov.append(float(ovrall[:di2]))
elif d == 12:
di2 = ovrall.find("</td>")
efg.append(float(ovrall[:di2]))
#writing to excel
os.remove(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx")
d = {'Name': names, 'OFFRTG': offrtg, 'DEFRTG': defrtg, 'REB': reb,
'TOV': tov, 'EFG': efg}
df = pd.DataFrame(data=d)
df.to_excel(r"C:\Users\jackm\OneDrive\Desktop\NBA\NBASTATS.xlsx", sheet_name="STATS")
NBAScraper()
I tried to play around with the closing and quitting functions for the driver, or put the driver in a separate function and run it outside the class, but none of that worked. I realized through some testing that even if it's not inside a loop, selenium will throw the error for a url but run it fine the second time. I tried using implicit waits to solve this but to no avail.
Traceback (most recent call last):
File "C:\Program Files\Spyder\pkgs\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "c:\users\jackm\spyder\nba.py", line 104, in <module>
NBAScraper()
File "c:\users\jackm\spyder\nba.py", line 71, in __init__
ovrall = str(soup3.find("tbody", "Crom_body__UYOcU").find_all("td"))
AttributeError: 'NoneType' object has no attribute 'find_all'
I need some assistance, I have written an application that queries all records in a table and prints it out but when I run the program it prints it out in the IDE, and trying to get it to print in the lower label.
from __future__ import print_function
from ast import Lambda
import sqlalchemy as sa
import pandas as pd
import tkinter as tk
from tkinter import *
from PIL import ImageTk, Image
server = 'ABQSQ03t'
username = 'TECO//AAPWP'
password = '*****##'
timeout = '60'
database = 'NMGC_PIM_DEV'
driver = 'ODBC+Driver+17+for+SQL+Server'
engine = sa.create_engine(
f'mssql+pyodbc://{server}/{database}?username={username}?password={password}?timeout=
{timeout}&driver={driver}')
cn = engine.connect()
root = tk.Tk()
root.title("Compliance SQL Backend Reporting")
HEIGHT = 800
WIDTH = 1000
#background image
canv = tk.Canvas(root, width=80, height=80, bg='white')
canv.place(relwidth=1, relheight=1)
img = ImageTk.PhotoImage(Image.open("NMGC_Emera_color240x75.png")) # PIL solution
canv.create_image(20, 20, anchor='nw', image=img)
background_label= tk.Label(root, image=img)
background_label.place(x=0,y=0, relwidth=1, relheight=1)
#button Functionaility
#def test_function(entry):
#print("Button Clicked you typed:", entry)
def test_function(entry):
sql = pd.read_sql(f'Select * from [dbo].[{entry}]', con=cn)
df=pd.DataFrame(sql)
final_str = print(sql)
#Guid setup
canvas = tk.Canvas(root, height = HEIGHT, width=WIDTH)
canvas.pack()
frame = tk.Frame(root, bg='#7B94AD', bd=5)
frame.place(relx=0.5, rely=0.1, relwidth=0.75, relheight=0.1, anchor='n')
entry = tk.Entry(frame, bg='white')
entry.place(relwidth=0.65, relheight=1)
button = tk.Button(frame, text='Generate Report', font = 40, command=lambda:
test_function(entry.get()))
button.place(relx=0.7, relheight=1, relwidth=0.3)
lower_frame = tk.Frame(root, bg='#7B94AD', bd=10)
lower_frame.place(relx=0.5, rely=0.25, relwidth=0.75, relheight=0.6, anchor='n')
label=tk.Label(lower_frame, bg='white', textvariable=test_function)
label.place(relwidth=1, relheight=1)
root.mainloop()
Sample dataframe headers:
APPGROUPID APPTITLE DESCRIPTION ORGAREANAME CONGROUPNAME DISPLAYLEVELCOUNT CreatedDate CreatedBy ModifiedDate ModifiedBy
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys # keys içerisinden enter yapabilmesini sağlıyoruz
browser = webdriver.Chrome("C:/Users/EMRE/Desktop/SCRAPE/chromedriver_win32/chromedriver.exe")
import pandas as pd
browser.get("http://event.ybu.edu.tr/kulupler/")
import csv
#browser.fullscreen_window()
#time.sleep(2)
#for i in range(6):
#browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
#time.sleep(1)
Kulup_button = browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/a/div/div[1]/div") #ilk kulüp için sonra değiştir
Kulup_button.click()
time.sleep(1)
for i in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList=[]
for kulupname in kulupnames:
kulupList.append(kulupname.text)
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList=[]
for mail in mails:
MailList.append(mail.text)
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList=[]
for FacebookAdress in FacebookAdresses:
FacebookList.append(FacebookAdress.text)
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList=[]
for TwitterAdress in TwitterAdresses:
TwitterList.append(TwitterAdress.text)
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList=[]
for InstagramAdress in InstagramAdresses:
InstagramList.append(InstagramAdress.text)
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList=[]
for AkademikDanisman in AkademikDanismanlar:
DanismanList.append(AkademikDanisman.text)
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList=[]
for KulupBaskani in KulupBaskanlari:
BaskanList.append(KulupBaskani.text)
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList=[]
for Uye in ToplamUyeler:
UyeList.append(Uye.text)
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList=[]
for Etkinlik in Etkinlikler:
EtkinlikList.append(Etkinlik.text)
time.sleep(5)
browser.quit()
DataFile = csv.writer(open('AYBU.csv','w'))
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = ['kulupList','MailList','FacebookList','TwitterList','InstagramList','DanismanList','BaskanList','UyeList','EtkinlikList']
df = pd.DataFrame(data = liste)
liste.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
I am trying to save my variable list as dataframe to csv.
You have a couple flaws in your code that I can see.
I took your code and made it work and I'll explain how:
import csv
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys # keys içerisinden enter yapabilmesini sağlıyoruz
browser = webdriver.Chrome()
time.sleep(5)
browser.get("http://event.ybu.edu.tr/kulupler/")
Kulup_button = browser.find_element_by_xpath("/html/body/div[2]/div[2]/div[1]/a/div/div[1]/div") #ilk kulüp için sonra değiştir
Kulup_button.click()
time.sleep(1)
for _ in range(1):
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #sayfayı aşağıya doğru çekmek için
time.sleep(1)
kulupnames = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[1]/td[2]")
kulupList = [kulupname.text for kulupname in kulupnames]
mails = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-orange.btn-social")
MailList = [mail.text for mail in mails]
FacebookAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.bg-blue.btn-social")
FacebookList = [FacebookAdress.text for FacebookAdress in FacebookAdresses]
TwitterAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-aqua")
TwitterList = [TwitterAdress.text for TwitterAdress in TwitterAdresses]
InstagramAdresses = browser.find_elements_by_css_selector("#bilgiler > a.btn.btn-social.bg-light-blue")
InstagramList = [InstagramAdress.text for InstagramAdress in InstagramAdresses]
AkademikDanismanlar = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[2]/td[2]")
DanismanList = [
AkademikDanisman.text for AkademikDanisman in AkademikDanismanlar
]
KulupBaskanlari = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[3]/td[2]")
BaskanList = [KulupBaskani.text for KulupBaskani in KulupBaskanlari]
ToplamUyeler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[4]/td[2]")
UyeList = [Uye.text for Uye in ToplamUyeler]
Etkinlikler = browser.find_elements_by_xpath("/html/body/div[2]/div[2]/section/div/div[2]/div/div[1]/div/div[1]/table/tbody/tr[5]/td[2]")
EtkinlikList = [Etkinlik.text for Etkinlik in Etkinlikler]
time.sleep(5)
browser.quit()
with open('AYBU.csv','w') as datafile:
DataFile = csv.writer(datafile)
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
liste = [kulupList,MailList,FacebookList,TwitterList,InstagramList,DanismanList,BaskanList,UyeList,EtkinlikList]
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
The key changes here are at the bottom (don't mind the clean up of the generators).
DataFile = csv.writer(open('AYBU.csv','w'))
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
DataFile.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
liste = ['kulupList','MailList','FacebookList','TwitterList','InstagramList','DanismanList','BaskanList','UyeList','EtkinlikList']
df = pd.DataFrame(data = liste)
liste.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
This code doesnt work.
with open('AYBU.csv','w') as datafile:
DataFile = csv.writer(datafile)
DataFile.writerow(['KulupAdi','MailAdresi','FacebookAdresi','TwitterAdresi','InstagramAdres','AkademikDanisman','KulupBaskani','ToplamUyeSayisi','ToplamEtkinlikSayisi'])
liste = [kulupList,MailList,FacebookList,TwitterList,InstagramList,DanismanList,BaskanList,UyeList,EtkinlikList]
df = pd.DataFrame(data = liste)
df.to_csv("AYBU.csv", index = False, encoding='utf-8-sig')
You had 'list' as strings.
pandas is able to use .to_csv but csv.writer is not.
I am scraping names, prices and images from this website. There are 8 items in total, but in the DF I would like to filter only the items that contain the pattern "Original Zaino Antifurto". When I try to apply the bp_filter to the DF I get an error, probably due to hidden characters.
Does anyone know how to filter for this pattern avoiding the error?
import requests
from bs4 import BeautifulSoup
import pandas as pd
url_xd = 'https://www.xd-design.com/it-it/catalogsearch/result/?q=Bobby+Original+Zaino+Antifurto'
req_xd = requests.get(url_xd)
pars_xd = BeautifulSoup(req_xd.content, 'html.parser')
con_xd = pars_xd.find_all('div', class_ = 'product details product-item-details')
names_xd = []
prices_xd = []
picts_xd = []
for container in con_xd:
name = container.find("a", class_="product-item-link").text
names_xd.append(name)
for container in con_xd:
price = container.find("span", class_="price").text
prices_xd.append(price)
for container in con_xd:
pict = container.find("a").get("href")
picts_xd.append(pict)
bp_xd = pd.DataFrame({'(XD-Design) Item_Name': names_xd,
'Item_Price_EUR': prices_xd,
'Link_to_Pict': picts_xd })
bp_xd['Item_Price_EUR'] = bp_xd['Item_Price_EUR'].str.replace('€','').str.replace(',','.').astype(float)
bp_xd['(XD-Design) Item_Name'] = bp_xd['(XD-Design) Item_Name'].str.strip()
bp_filter = bp_xd['(XD-Design) Item_Name'][bp_xd['(XD-Design) Item_Name'].str.contains('Original Zaino Antifurto')]
# bp_xd[bp_filter]
Here you have the fixed working code
import requests
from bs4 import BeautifulSoup
import pandas as pd
url_xd = 'https://www.xd-design.com/it-it/catalogsearch/result/?q=Bobby+Original+Zaino+Antifurto'
req_xd = requests.get(url_xd)
pars_xd = BeautifulSoup(req_xd.content, 'html.parser')
con_xd = pars_xd.find_all('div', class_ = 'product details product-item-details')
names_xd = [c.find("a", class_="product-item-link").text for c in con_xd]
prices_xd = [c.find("span", class_="price").text for c in con_xd]
picts_xd = [c.find("a").get("href") for c in con_xd]
df = pd.DataFrame({'(XD-Design) Item_Name': names_xd,
'Item_Price_EUR': prices_xd,
'Link_to_Pict': picts_xd })
df['Item_Price_EUR'] = df['Item_Price_EUR'].str.replace('€','').str.replace(',','.').astype(float)
df['(XD-Design) Item_Name'] = df['(XD-Design) Item_Name'].str.strip()
df = df.loc[df['(XD-Design) Item_Name'].apply(lambda x: 1 if 'Original Zaino Antifurto' in x else 0) == 1]
would anyone advise me how to adjust the X axis to better display the date on this graph?
from math import pi
import pandas as pd
from bokeh.io import show
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar
from bokeh.plotting import figure
#cesta k souboru
path = "C://Users//Zemi4//Desktop//zpr3//all2.csv"
#nacteni dataframu
data = pd.read_csv(path, delimiter = ",")
data['Cas'] = data['Cas'].astype(str)
data = data.set_index('Cas')
data.columns.name = 'Mistnost'
times = list(data.index)
rooms = list(data.columns)
df = pd.DataFrame(data.stack(), columns=['float']).reset_index()
colors = ['#440154', '#404387', '#29788E', '#22A784', '#79D151', '#FDE724', '#FCFEA4', '#FBA40A', '#DC5039']
mapper = LinearColorMapper(palette=colors, low=df.float.min(), high=df.float.max())
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title="Heatmap ({0} - {1})".format(times[0], times[-1]),
x_range=times, y_range=list(reversed(rooms)),
x_axis_location="above", plot_width=1500, plot_height=900,
tools=TOOLS, toolbar_location='below',
tooltips=[('Time: ', '#Cas'), ('Temperature: ', '#float'), ('Room: ', '#Mistnost')],
x_axis_type='datetime')
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
p.rect(x="Cas", y="Mistnost", width=1, height=1,
source=df,
fill_color={'field': 'float', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%f"),
label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p) # show the pl
Try: p.xaxis[0].ticker.desired_num_ticks = <number_ticks_you_want_to_display>.
Or apply a specific ticker (see Bokeh docs) like you did for the ColorBar.