Not scraping xpath correctly

Not scraping xpath correctly - selenium

I'm trying to use the following xpath for this page but it is not loading correctly.
groups = ".//*[contains(#class, 'sl-CouponParticipantWithBookCloses_Name ')]"
xp_bp1 = ".//following::div[contains(#class,'sl-MarketCouponValuesExplicit33')][./div[contains(#class,'gl-MarketColumnHeader')][.='1']]//span[#class='gl-ParticipantOddsOnly_Odds']"
The output currently is..
[['3.00'], ['3.00'], ['3.00'] etc,,
Desired:
[['3.00'], ['1.30'], ['1.25'] etc,,
Data I am after
Script:
import csv
import time
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://www.bet365.com.au/#/AC/B1/C1/D13/E108/F16/S1/')
driver.get('https://www.bet365.com.au/#/AC/B1/C1/D13/E108/F16/S1/')
time.sleep(10)
groups = ".//*[contains(#class, 'sl-CouponParticipantWithBookCloses_Name ')]"
#//div[contains(#class, 'gl-ParticipantOddsOnlyDarker gl-ParticipantOddsOnly gl-Participant_General sl-MarketCouponAdvancedBase_LastChild ')]
xp_bp1 = ".//following::div[contains(#class,'sl-MarketCouponValuesExplicit33')][./div[contains(#class,'gl-MarketColumnHeader')][.='1']]//span[#class='gl-ParticipantOddsOnly_Odds']"
while True:
try:
time.sleep(2)
data = []
for elem in driver.find_elements_by_xpath(groups):
try:
bp1 = elem.find_element_by_xpath(xp_bp1).text
except:
bp1 = None
url1 = driver.current_url
data.append([bp1])
print(data)
url1 = driver.current_url
with open('test.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in data:
writer.writerow(row + [url1])
except TimeoutException as ex:
pass
except NoSuchElementException as ex:
print(ex)
break

Related

page_source in selenium changes

I'm trying to make crawler for Youtube.
I encountered strange behavior.
In the following source code, driver.page_source is obtained by selenium.
I passed the result to Beautifulsoup for parsing.
The problem is that the length of driver.page_source changes.
How can this happen? Is there any idea about this?
elif 'src' in seq:
print('video-src')
print(seq['src'])
soup = bs(driver.page_source, "html.parser")
print('driver.page_source length='+str(len(driver.page_source)))
f = open('test.txt','w',encoding='UTF-8')
f.write(driver.page_source)
f.close()
print('driver.page_source length='+str(len(driver.page_source)))
tag = '<span dir="auto" class="style-scope yt-formatted-string">'
find_start = driver.page_source.find(tag+'댓글')
print('driver.page_source length='+str(len(driver.page_source)))
tag_value = driver.page_source[find_start:find_start+200]
print('driver.page_source length='+str(len(driver.page_source)))
p = re.compile('\>([\d,]+)\<')
m = p.search(tag_value)
if m:
print(m.group(1))
video[item['name']] = m.group(1)
else:
print('error')
print(tag_value)
driver.page_source length=4103114
driver.page_source length=4102392
driver.page_source length=4102392
driver.page_source length=4103129

The page_source can change, elements can be loaded later.
Instead of checking the page_source length you can save the different driver.page_sources in text files and compare them to understand what is different. A method to do so could be using difflib:
import difflib
source1 = driver.page_source
file1 = open("file1.txt", "w")
text1 = file1.write(source1)
text1.close()
source2 = driver.page_source
file2 = open("file2.txt", "w")
text2 = file2.write(source2)
text2.close()
with open('file1.txt') as file_1:
file_1_text = file_1.readlines()
with open('file2.txt') as file_2:
file_2_text = file_2.readlines()
# Find and print the diff:
for line in difflib.unified_diff(
file_1_text, file_2_text, fromfile='file1.txt',
tofile='file2.txt', lineterm=''):
print(line)

Trying to fetch option chain data from NSE... but getting error using Chromedriver

Below is the code through which I am trying to fetch option chain data but I am getting errors.
Any help will be appreciated.
Thanks
Also, if someone can help me with a better code to record tick data, that would be great
import time
import datetime
import pandas as pd
from bs4 import BeautifulSoup,SoupStrainer
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# from selenium.webdriver import Chrome as uc
# options=chrome_options
df=pd.DataFrame(columns=['SNO','call OI','call CHNG IN OI','call VOLUME','call IV','call LTP','call CHNG','call BID QTY','call BID PRICE','call ASK PRICE','call ASK QTY','STRIKE PRICE','put BID QTY','put BID PRICE','put ASK PRICE','put ASK QTY','put CHNG','put LTP','put IV','put VOLUME','put CHNG IN OI','put OI'])
chrome_options = Options()
chrome_options.add_argument("--log-level=3")
from selenium.webdriver import Chrome as uc
chrome_options = uc.ChromeOptions() #new solution
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(executable_path=r"C:\Users\rohit taparia\Downloads\chromedriver_win32\chromedriver.exe",options=chrome_options)
driver.get('https://www.nseindia.com/get-quotes/derivatives?symbol=BANKNIFTY')
driver.minimize_window()
time.sleep(3)
for j in range(0,50):
print(j)
#.....refresh the page and read data again
driver.refresh()
continue_link1 = driver.find_element_by_xpath('''//*[#id="subtab-derivatives"]/div[2]/nav/div/div/a[2]''')
time.sleep(10)
filter_tag=SoupStrainer("table")
continue_link1.click()
time.sleep(3)
rtime = str(driver.find_element_by_xpath('''//*[#id="asondate"]''').text)
if rtime=='':
continue
print(rtime)
page=driver.page_source
soup = BeautifulSoup(page, "html.parser",parse_only=filter_tag)
gdp_table = soup.find("table", attrs={"id": "optionChainTable-indices"})
gdp_table_data = gdp_table.tbody.find_all("tr")
if len(gdp_table_data)==1:
continue
else:
for i in range (0,len(gdp_table_data)):
list1 = []
for td in gdp_table_data[i].find_all("td"):
# remove any newlines and extra spaces from left and right
cell_text=td.text
if cell_text is None or cell_text=='':
cell_text ='0'
cell_text=cell_text.replace(',','')
list1.append(cell_text)
if len(list1) > 0:
list1 = ['0' if i=='-' else i for i in list1]
else:
continue
del list1[0]
del list1[-1]
list1 = list(map(float, list1))
list1.insert(0,rtime)
df.loc[len(df)] = list1
df.to_excel("option-data.xlsx")

Selenium StaleElementReferenceException and command(related wait) does not work

When I run below Program, Code "option2[opt2].click()" work well.
But second "option2[opt2].click()" occur "StaleElementReferenceException"
I tried to solve this problem with "time.sleep(), implicitly_wait(), WebDriverWait(browser, 30).until(EC.presence_of_element_located((By.XPATH, {Xpath}).
But Doesn't work.
What is the problem........
#If you run my program, you can understand my code easily. I am Beginer in Programming and Korean. If explanation is not enough... sorry :( Please help me........
from selenium import webdriver
import pandas as pd
import time
import random
options = webdriver.ChromeOptions()
options.add_argument("user-agent={myagent}")
browser = webdriver.Chrome(options = options)
browser.get("https://yeyak.seoul.go.kr/web/main.do") #첫페이지로
browser.maximize_window()
#노원구 공간시설, 문화체험, 교육강좌 카테고리 크롤링
#column 이름
all_column_name = ['구', '카테고리', '대상', '장소', '이용기간', '접수기간', '선별방법',
'모집정원', '신청제한', '취소기간', '이용요금', '예약방법', '문의전화']
all_data = []
#노원구 선택(이미 Click되어있는데, 또 Click을 하면 오류가 남)
browser.find_element_by_xpath("//*[#id=\"sch_loc\"]/option[11]").click()
time.sleep(1)
for i in range(2,5):
#공간시설, 문화체험, 교육강좌 카테고리 선택
browser.find_element_by_xpath(f"//*[#id='contents']/div[1]/div[1]/div/div/div[1]/ul/li[{i}]").click()
time.sleep(1)
category_name = browser.find_element_by_xpath(f"//*[#id='contents']/div[1]/div[1]/div/div/div[1]/ul/li[{i}]").text
district = browser.find_element_by_xpath("//*[#id=\"contents\"]/div[1]/div[1]/div/div/div[2]/div[3]/div[1]/select/option[11]").text #구 이름 district 저장
#시설 선택 버튼_소주제
button2 = browser.find_element_by_xpath("//*[#id=\"contents\"]/div[1]/div[1]/div/div/div[2]/div[3]/div[2]/select")
button2.find_elements_by_tag_name("option")
option2 = button2.find_elements_by_tag_name("option")
if len(option2) == 1: #만약 옵션이 별도로 없는 경우 다음 카테고리로 넘어가기
continue
for opt2 in range(1, len(option2)+1):
print(option2[opt2].text)
option2[opt2].click()
time.sleep(1)
#시설 상세 선택 버튼
button3 = browser.find_element_by_xpath("//*[#id=\"contents\"]/div[1]/div[1]/div/div/div[2]/div[3]/div[3]/select")
option3 = button3.find_elements_by_tag_name("option")
if len(option3) == 1: #만약 옵션이 별도로 없는 경우 다음 카테고리로 넘어가기
continue
for opt3 in range(1, len(option3)+1):
small_data = []
option3[opt3].click()
time.sleep(1)
#예약하기 버튼 클릭
browser.find_element_by_xpath("//*[#id=\"contents\"]/div[1]/div[1]/div/div/div[2]/div[3]/button").click()
time.sleep(1)
#테이블에서 데이터 가져오기
table = browser.find_element_by_xpath("//*[#id=\"aform\"]/div[1]/div[2]/ul")
rows = table.find_elements_by_tag_name("li")
small_data.append(district) # 구 이름 넣기
small_data.append(category_name) # 카테고리 이름 넣기
for row in rows:
small_data.append(row.text.split("\n")[1])
all_data.append(small_data)
time.sleep(random.uniform(2,3))
browser.get("https://yeyak.seoul.go.kr/web/main.do")
time.sleep(random.uniform(2,3))
browser.find_element_by_xpath("//*[#id=\"sch_loc\"]/option[11]").click() #노원구 클릭
time.sleep(random.uniform(2,3))
browser.find_element_by_xpath(f"//*[#id='contents']/div[1]/div[1]/div/div/div[1]/ul/li[{i}]").click() #카테고리 클릭
time.sleep(random.uniform(2,3))
browser.find_element_by_xpath(f"//*[#id=\"contents\"]/div[1]/div[1]/div/div/div[2]/div[3]/div[2]/select/option[{opt2}]")
time.sleep(random.uniform(2,3))
if opt3 == len(option3)+1:
break

Selenium PATH Difficulty [duplicate]

This question already has answers here:
WebDriverException: Message: 'chromedriver' executable needs to be in PATH while setting UserAgent through Selenium Chromedriver python
(1 answer)
selenium.common.exceptions.WebDriverException: Message: 'chromedriver' executable needs to be in PATH error with Headless Chrome
(1 answer)
Closed 1 year ago.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import ElementNotInteractableException, NoSuchElementException, StaleElementReferenceException
from random import randint, randrange
#from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import time
import random
BESTBUY_URL = 'https://www.bestbuy.com/site/sony-playstation-5-digital-edition-console/6430161.p?skuId=6430161'
BESTBUY_TEST_URL = 'https://www.bestbuy.com/site/sony-playstation-5-dualsense-wireless-controller-cosmic-red/6464309.p?skuId=6464309'
WAIT_TIME = 7
PRICE_LIMIT = 500
class Zac:
def __init__(self, username, password):
self.username = username
self.password = password
#binary = FirefoxBinary("/Applications/Firefox.app/Contents/MacOS/firefox-bin")
#self.driver = webdriver.Firefox("Applications/Firefox.app")
driver = webdriver.Chrome(executable_path="Applications/Google Chrome.app")
driver = webdriver.Chrome('/path/to/chromedriver') # Optional argument, if not specified will search path.
def signIn(self):
driver = self.driver
##Username
username_elem = driver.find_element_by_xpath("_____")
username_elem.clear()
username_elem.send_keys(self.username)
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
username_elem.send_keys(Keys.RETURN)
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
##Password
password_elem = driver.find_element_by_xpath("_____")
password_elem.clear()
password_elem.send_keys(self.password)
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
password_elem.send_keys(Keys.RETURN)
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
def findProduct(self):
driver = self.driver
driver.get(BESTBUY_TEST_URL)
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
isAvailable = self.isProductAvailable()
if isAvailable == 'Sold Out':
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
self.findProduct()
elif isAvailable <= PRICE_LIMIT:
buy_now = driver.find_by_name('submit.buy-now')
buy_now.click()
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
self.signIn()
##Place Order
place_order = driver.find_element_by_name('placeYourOrder1').text
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
## place_order.click()
## time.sleep(randict(int(WAIT_TIME/2), WAIT_TIME))
else:
time.sleep(randint(int(WAIT_TIME/2), WAIT_TIME))
self.findProduct()
def isProductAvailable(self):
driver = self.driver
available = driver.find_element_by_class_name('a-color-price').text
if available == 'Currently unavailable.':
print(f'***** AVAILABLE: {available}')
return available
else:
print(f'***** PRICE: {available}')
return float(available[1:]) ## $123.22 -> 123. 22
def closeBrowser(self):
self.driver.close()
if __name__ == '__main__':
shopBot = Zac(username="_____", password="_____")
shopBot.findProduct()
shopBot.closeBrowser()
For some reason, it is saying that Google Chrome.app executable needs to be in PATH. I am trying to make a scalping bot, and this seems to be my biggest mistake area. I am open to using any browser that is easiest. As you can see in the code I first tried this with FireFox. If anybody could help that would be greatly appreciated.

X-Path - How To Pull This Field?

My code goes into a webpage, clicks on a record, which then drops other records.
Is there a way to use xPath to pull all of these drop-down titles?
Currently, I copied the first drop down titles full xpath, and its only pulling the first one.
That is fine, but how do I pull all entry titles that drop down?
My current code is specifically only for the first line
from selenium import webdriver
import time
driver = webdriver.Chrome()
for x in range (1,2):
driver.get(f'https://library.iaslc.org/conference-program?product_id=24&author=&category=&date=&session_type=&session=&presentation=&keyword=&available=&cme=&page={x}')
time.sleep(4)
productlist_length = len(driver.find_elements_by_xpath("//div[#class='accordin_title']"))
for i in range(1, productlist_length + 1):
product = driver.find_element_by_xpath("(//div[#class='accordin_title'])[" + str(i) + "]")
title = product.find_element_by_xpath('.//h4').text.strip()
buttonToClick = product.find_element_by_xpath('.//div[#class="sign"]')
buttonToClick.click()
time.sleep(5)
dropDownTitle=product.find_element_by_xpath('//*[#id="accordin"]/div/ul/li[1]/div[2]/div/ul/li/div[1]/div[3]/h4').text #this line is the full xpath
print(dropDownTitle)

So can you check with the below line of code
#try to execute it in maximize mode sometimes element is overlayed
driver.maximize_window()
for x in range (1,5):
driver.get(f'https://library.iaslc.org/conference-program?product_id=24&author=&category=&date=&session_type=&session=&presentation=&keyword=&available=&cme=&page={x}')
time.sleep(4)
productlist_length = len(driver.find_elements_by_xpath("//div[#class='accordin_title']"))
for i in range(1, productlist_length + 1):
product = driver.find_element_by_xpath("(//div[#class='accordin_title'])[" + str(i) + "]")
title = product.find_element_by_xpath('.//h4').text.strip()
#So at some point the dropdown doesn't display any records so at that point it throws ClickInterceptedException, Also I ActionChain to move to the particular element
buttonToClick = product.find_element_by_xpath('.//*[#class="info_right"]/h4')
action = ActionChains(driver)
action.move_to_element(buttonToClick).click().perform()
time.sleep(5)
#Here if you just provide the index of the li it will print the title
dropDownTitle=product.find_element_by_xpath("//*[#id='accordin']/div/ul/li["+str(i)+"]/div[1]/div[3]/h4").text
print(dropDownTitle)
import
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
Output

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Not scraping xpath correctly - selenium

Related

page_source in selenium changes

Trying to fetch option chain data from NSE... but getting error using Chromedriver

Selenium StaleElementReferenceException and command(related wait) does not work

Selenium PATH Difficulty [duplicate]

X-Path - How To Pull This Field?

Categories

Resources