I am trying to use selenium to scrape dynamic webpages.
Here, I tried to print all the authors in the website
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://quotes.toscrape.com/js")
elements = driver.find_elements_by_class_name("author")
for i in elements:
print(i.text)
driver.quit()
Which worked pretty well and printed me the right result:
Albert Einstein
J.K. Rowling
Albert Einstein
Jane Austen
Marilyn Monroe
Albert Einstein
André Gide
Thomas A. Edison
Eleanor Roosevelt
Steve Martin
But when I try to use a similar code for another website
I get an error:
selenium.common.exceptions.InvalidArgumentException: Message: invalid argument: invalid locator
(Session info: chrome=98.0.4758.102)
This is my second code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
url = 'https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91%D7%A8?order=up_title'
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
elements = driver.find_elements_by_class_name("title text-center")
for i in elements:
print(i.text)
driver.quit()
What I am trying to do in this code is to print all the names of the perdumes in the webpage.
After inspecting I saw that all of the names are in a class that called: 'title text-center'.
How can I fix my code?
title text-center are actually 2 class names title and text-center.
In order to locate elements by 2 class names you have to use XPath or CSS Selector.
So, instead of
elements = driver.find_elements_by_class_name("title text-center")
You can use
elements = driver.find_elements_by_xpath("//h3[#class='title text-center']")
Or
elements = driver.find_elements_css_selector("h3.title.text-center")
Also, you should add waits to access the web elements only when they are loaded and ready.
This should be done with Expected Conditions explicit waits, as following:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91%D7%A8?order=up_title'
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
driver.get(url)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "h3.title.text-center")))
elements = driver.find_elements_css_selector("h3.title.text-center")
for i in elements:
print(i.text)
driver.quit()
This error message...
selenium.common.exceptions.InvalidArgumentException: Message: invalid argument: invalid locator
...implies that the locator strategy you have used is not a valid locator strategy as By.CLASS_NAME takes a single classname as an argument.
To print all the names of the perfumes in the webpage you can use List Comprehension you can use the following Locator Strategy:
Using css_selector:
driver.get("https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91%D7%A8?order=up_title")
print([my_elem.get_attribute("innerHTML") for my_elem in driver.find_elements_by_css_selector("h3.title")])
Ideally you need to induce WebDriverWait for visibility_of_all_elements_located() and you can use the following Locator Strategy:
Using CSS_SELECTOR and get_attribute("innerHTML"):
driver.get("https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91%D7%A8?order=up_title")
print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "h3.title")))])
Console Output:
[' 212 וי אי פי לגבר א.ד.ט 212 vip for men e.d.t ', ' 212 ניו יורק לגבר א.ד.ט 212 nyc for men e.d.t ', ' 212 סקסי לגבר א.ד.ט 212 sexy men e.d.t ', ' אברקרומבי פירס 100 מל א.ד.ק Abercrombie & Fitch Fierce 100 ml e.d.c ', ' אברקרומבי פירס 50 מל א.ד.ק Abercrombie & Fitch Fierce 50 ml e.d.c ', ' אברקרומבי פירס גודל ענק 200 מל א.ד.ק Abercrombie & Fitch Fierce 200 ml e.d.c ', ' אברקרומבי פירסט אינסטינקט לגבר א.ד.ט Abercrombie & Fitch First Instinct e.d.t ', ' אגואיסט א.ד.ט Egoiste e.d.t ', ' אגואיסט פלטינום א.ד.ט Egoiste Platinum e.d.t ', ' או דה בלנק א.ד.ט Eau De Blanc e.d.t ', ' או דה פרש א.ד.ט Eau Fraiche e.d.t ', ' אובסיישן לגבר א.ד.ט Obsession for men e.d.t ']
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Related
I am trying to extract the percentage values (Target Rate Probability) from an interactive chart with preselected conditions "13 Dez23" and "Historical" on the following website: https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html?redirect=/trading/interest-rates/countdown-to-fomc.html. The percentage probabilities only appear when I hover over the respective data points with a mouse.
I have tried the following:
XPath of tried element: /html/body/form/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/div/div[1]/div/svg/g[5]/g[2]/path[2]
1. Method Approach: Webdriverwait in order for the elements to load on the page
1. Method Error: TimeoutException
2. Method Approach:
driver.execute_script("document.evaluate('/html/body
/form/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/div/div[1]/div/svg/g[5]/g[2]/path[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.style.display = 'block';")
2. Method Error:
selenium.common.exceptions.JavascriptException: Message: javascript error: Cannot read properties of null (reading 'style')
3. Method Approach:
driver.execute_script("document.evaluate('/html/body/form
/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/div/div[1]/div/svg/g[5]/g[2]/path[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.style.visibility = 'visible';")
3. Method Error:
selenium.common.exceptions.JavascriptException: Message: javascript error: Cannot read properties of null (reading 'style')
EC: Invisibility of element is True
**Code: **
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import datetime as dt
import time
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver.get('https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html?redirect=/trading/interest-rates/countdown-to-fomc.html')
time.sleep(10)
page_source=driver.page_source
time.sleep(5)
driver.switch_to.frame('cmeIframe-jtxelq2f')
time.sleep(5)
driver.find_element(By.XPATH,"/html/body/form/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/ul/li[8]/a").click()
time.sleep(5)
driver.find_element(By.CSS_SELECTOR,"#ctl00_MainContent_ucViewControl_IntegratedFedWatchTool_lbHistorical").click()
time.sleep(10)
# 175-200 target rate probability
action = webdriver.ActionChains(driver)
CSS_Selector = '#highcharts-986wlrn-17 > svg > g.highcharts-series-group > g.highcharts-markers.highcharts-series-0.highcharts-line-series.highcharts-color-0.highcharts-tracker > path.highcharts-halo.highcharts-color-0'
# Wait for the element
wait = WebDriverWait(driver, 10)
# Check, if the element is invisible (IT IS)
print(wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, CSS_Selector))))
#Change the data-z-index', '4' to put infront of the other elements (DOESNT WORK!)
driver.execute_script("document.querySelector('#highcharts-986wlrn-17 > svg > g.highcharts-series-group > g.highcharts-markers.highcharts-series-0.highcharts-line-series.highcharts-color-0.highcharts-tracker > path.highcharts-halo.highcharts-color-0').setAttribute('data-z-index', '4')")
time.sleep(10)
#Doesnt find the first element of 175-200 (DOESNT WORK!)
element = driver.find_element(By.CSS_SELECTOR,'#highcharts-986wlrn-17 > svg > g.highcharts-series-group > g.highcharts-markers.highcharts-series-0.highcharts-line-series.highcharts-color-0.highcharts-tracker > path.highcharts-halo.highcharts-color-0')
# Move the mouse over the very left element first element 175-200 (DOESNT WORK!)
action.move_to_element(element)
action.perform()
#Determining the location and size in order for the mouse to move along the interactive chart for 175-200
loc = element.location
size = element.size
#moving to right end of the interactive chart for 175-200 (DOESNT WORK!)
action.move_to_element_with_offset(element,510, 0).perform() #found 510 when inspecting the chart metrics
#first date (found on the very left of the chart) (DOESNT WORK!)
driver.find_element(By.XPATH,'/html/body/form/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/div/div[1]/div/svg/g[10]/g/text/tspan[1]')
#first value (found on the very left of the chart) (DOESNT WORK!)
driver.find_element(By.XPATH,'/html/body/form/div[3]/div[2]/div[3]/div[1]/div/div/div[1]/div/div[3]/div[3]/div/div/div/div[1]/div/svg/g[5]/g[2]/path[1]')
#setting the limit of when to break out of the while loop (DOESNT WORK!)
limit = dt.datetime.strptime('Thu, May 5, 2022', '%a, %B %-m, %B')
pace = -5
dictionary = {}
while True:
#moving back to the very left first element of the interactive chart
action.move_by_offset(pace, 0).perform() # moving by a pace of -5 (-5 is trial value)
date = driver.find_element(By.CSS_SELECTOR,'#highcharts-986wlrn-17 > svg > g.highcharts-tooltip > g.highcharts-label.highcharts-tooltip-header.highcharts-tooltip-box > text').text
value = driver.find_element(By.CSS_SELECTOR,'#highcharts-986wlrn-17 > svg > g.highcharts-tooltip > g.highcharts-label.highcharts-tooltip-box.highcharts-color-0 > text').text
if dt.datetime.strptime(date, '%a, %B %-m, %B') < limit:
break
# add results to dictionary
if date in dictionary:
pass
else:
dictionary[date] = value
driver.quit()
[![Interactive Line Chart][1]][1]
[1]: https://i.stack.imgur.com/ZDHQt.jpg
The code below assumes that you already clicked on "13 Dec23" and "Historical":
import time
import pyautogui # pip install pyautogui
# sleep 3 seconds so that you can switch to the browser and move the mouse to the start position
time.sleep(3)
# start position: put mouse on chart's left border, for example between 0% and 20%
start_pos = pyautogui.position()
# this works only if the tooltip containing the percentage values was already been displayed once
old_values = driver.find_element(By.CSS_SELECTOR, 'g.highcharts-tooltip').text.replace('\n','').split('●')
data = []
for k in range(500):
pyautogui.moveTo(start_pos.x + 3*k , start_pos.y)
new_values = driver.find_element(By.CSS_SELECTOR, 'g.highcharts-tooltip').text.replace('\n','').split('●')
if new_values != old_values:
old_values = new_values
data.append(new_values)
# uncomment this if you want the mouse to move slower
# time.sleep(.1)
data will then be a list of lists, where each list contains the day label and the corresponding percentage values (as strings)
[['Thu, May 5, 2022',
'350-375: 17.18%',
'325-350: 28.11%',
'300-325: 27.01%',
'250-275: 4.39%',
'275-300: 14.80%',
'225-250: 0.65%',
'375-400: 6.27%',
'400-425: 1.36%',
'425-450: 0.17%'],
['Fri, May 6, 2022',
'350-375: 15.54%',
'325-350: 26.14%',
'300-325: 26.94%',
'250-275: 6.08%',
'275-300: 16.73%',
'225-250: 1.24%',
'375-400: 5.71%',
'400-425: 1.29%',
'425-450: 0.17%',
'200-225: 0.14%'],
... etc ...
My code goes into a webpage, clicks on a record, which then drops other records.
Is there a way to use xPath to pull all of these drop-down titles?
Currently, I copied the first drop down titles full xpath, and its only pulling the first one.
That is fine, but how do I pull all entry titles that drop down?
My current code is specifically only for the first line
from selenium import webdriver
import time
driver = webdriver.Chrome()
for x in range (1,2):
driver.get(f'https://library.iaslc.org/conference-program?product_id=24&author=&category=&date=&session_type=&session=&presentation=&keyword=&available=&cme=&page={x}')
time.sleep(4)
productlist_length = len(driver.find_elements_by_xpath("//div[#class='accordin_title']"))
for i in range(1, productlist_length + 1):
product = driver.find_element_by_xpath("(//div[#class='accordin_title'])[" + str(i) + "]")
title = product.find_element_by_xpath('.//h4').text.strip()
buttonToClick = product.find_element_by_xpath('.//div[#class="sign"]')
buttonToClick.click()
time.sleep(5)
dropDownTitle=product.find_element_by_xpath('//*[#id="accordin"]/div/ul/li[1]/div[2]/div/ul/li/div[1]/div[3]/h4').text #this line is the full xpath
print(dropDownTitle)
So can you check with the below line of code
#try to execute it in maximize mode sometimes element is overlayed
driver.maximize_window()
for x in range (1,5):
driver.get(f'https://library.iaslc.org/conference-program?product_id=24&author=&category=&date=&session_type=&session=&presentation=&keyword=&available=&cme=&page={x}')
time.sleep(4)
productlist_length = len(driver.find_elements_by_xpath("//div[#class='accordin_title']"))
for i in range(1, productlist_length + 1):
product = driver.find_element_by_xpath("(//div[#class='accordin_title'])[" + str(i) + "]")
title = product.find_element_by_xpath('.//h4').text.strip()
#So at some point the dropdown doesn't display any records so at that point it throws ClickInterceptedException, Also I ActionChain to move to the particular element
buttonToClick = product.find_element_by_xpath('.//*[#class="info_right"]/h4')
action = ActionChains(driver)
action.move_to_element(buttonToClick).click().perform()
time.sleep(5)
#Here if you just provide the index of the li it will print the title
dropDownTitle=product.find_element_by_xpath("//*[#id='accordin']/div/ul/li["+str(i)+"]/div[1]/div[3]/h4").text
print(dropDownTitle)
import
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
Output
In this section of my text, I am looping to select all check boxes across a number of pages (successful). When the next button is no longer enabled (and thus no more pages to turn, try command), I want to click the apply button. There are two different apply buttons, a visible and invisible one. The way my text is written, I receive an element not interactable exception, and thus my timeout exception isn't seen through. That's unexpected because the button I am trying to click is clearly visible and clickable (manually).
Original website: https://icem.data-archive.ac.uk/#step2
Does anyone have an idea what could be causing this element not interactable? I have attached a photo of the HTML of the two applied buttons: https://i.stack.imgur.com/xBl9X.png
Note: introducing WebDriverWait on the exception command just induces another timeoutexception.
county_pop < 1000000:
while True:
checklist = webD.find_elements_by_xpath('//*[#class="modal-content"]//input[#class="ng-pristine ng-valid"]')
for elem in checklist:
elem.click()
try:
WebDriverWait(webD, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[not(contains(#class, "disabled"))]/a[.="Next"]'))).click()
except TimeoutException:
webD.find_element_by_xpath('//div[#class="modal-content"]//div[#class="col-sm-3 text-left visible-xs"]/button').click()
Script until then:
import selenium
import time
from selenium import webdriver as wd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException, NoSuchWindowException
from selenium.webdriver.common.action_chains import ActionChains
chrome_path = r'C:\webdrivers\chromedriver.exe'
webD=wd.Chrome(executable_path=chrome_path)
webD.get('https://beta.ukdataservice.ac.uk/myaccount')
WebDriverWait(webD, 20).until(EC.element_to_be_clickable(
(By.XPATH, '//*[#id="myaccount-login"]/div/div/div/div/div/div[2]/div/div/div/form/div/p/button'))).click()
WebDriverWait(webD, 20).until(EC.element_to_be_clickable(
(By.XPATH, '//*[#id="username"]'))).send_keys('ukd1217078879')
WebDriverWait(webD, 20).until(EC.element_to_be_clickable(
(By.XPATH, '//*[#id="password"]'))).send_keys('Census4Me!')
webD.find_element_by_xpath('/html/body/div/div/div[2]/div[1]/form/div[6]/button').click()
webD.get('https://icem.data-archive.ac.uk/#step1')
#YEARS LOOP BEGINS
for year in [1851, 1861, 1881, 1891, 1901, 1911]:
webD.find_element_by_xpath("//b[#class='ng-binding' and text()='{}']/preceding-sibling::input[#type='radio']".format(str(year))).click()
# Continuing to England & Step 2, Opening Counties Variable, More Variables
WebDriverWait(webD, 20).until(EC.element_to_be_clickable((By.XPATH, '//b[#id = "country_england"]/preceding-sibling::input'))).click()
webD.find_element_by_xpath('//html/body/div/section/section[1]/article[2]/div/div/button').click()
WebDriverWait(webD, 20).until(EC.element_to_be_clickable(
(By.XPATH, '//*[#id="county_category"]/button[1]'))).click()
WebDriverWait(webD, 20).until(EC.element_to_be_clickable(
(By.XPATH, '//*[#id="county_category"]/div[2]/div/div[2]/button'))).click()
# COUNTIES LOOP BEGINS
conto = 1
count = conto
while count > 10: #Getting to the correct county page
webD_find_element_by_link_text('Next').click()
count = count - 10
for i in range(count,11):
#Clicking correct county
WebDriverWait(webD, 20).until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[3]/div/div/div[3]/div[{}]/label/input'.format(count)))).click()
#Adding 1 to absolute count, conto
conto = conto + 1
#Store variables
county_text = webD.find_element_by_xpath('/html/body/div[3]/div/div/div[3]/div[{}]/label'.format(count)).text
county_pop = int(county_text.split("(")[-1].replace(")","").replace(",", ""))
print(county_pop)
#Apply County Selection, open HISCO & More Variables
WebDriverWait(webD, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(.,'Apply')]"))).click()
WebDriverWait(webD, 20).until(EC.presence_of_element_located((By.XPATH, '//span[#class = "ice-allow-download-alert ng-animate ng-hide-remove ng-hide-remove-active"]')))
WebDriverWait(webD, 20).until(EC.invisibility_of_element_located((By.XPATH, '//span[#class = "ice-allow-download-alert ng-animate ng-hide-remove ng-hide-remove-active"]')))
WebDriverWait(webD, 20).until(EC.presence_of_element_located((By.XPATH, '//b[text()[contains(.,"HISCO classified occupation")]]/..'))).click()
WebDriverWait(webD, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="workhistoryunit_category"]/div[2]/div/div[2]/button'))).click()
#HISCO OCCUPATION SELECTION LOOP
if county_pop < 1000000:
while True:
checklist = webD.find_elements_by_xpath('//*[#class="modal-content"]//input[#class="ng-pristine ng-valid"]')
for elem in checklist:
elem.click()
try:
WebDriverWait(webD, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[not(contains(#class, "disabled"))]/a[.="Next"]'))).click()
except TimeoutException:
webD.find_element_by_xpath('//div[#class="modal-content"]//div[#class="col-sm-3 text-left visible-xs"]/button').click()
temp = True
if county_pop < 1000000:
while temp:
checklist = webD.find_elements_by_xpath(
'//*[#class="modal-content"]//input[#class="ng-pristine ng-valid"]')
for elem in checklist:
elem.click()
try:
WebDriverWait(webD, 10).until(EC.element_to_be_clickable(
(By.XPATH, '//*[not(contains(#class, "disabled"))]/a[.="Next"]'))).click()
except:
WebDriverWait(webD, 10).until(EC.element_to_be_clickable(
(By.XPATH, '//button[contains(text(),"Apply")]'))).click()
temp = False
in while loop you had True which will make it run infinitly , change it to a varaible Temp and set it to false as soon as the next button is not clickable.
Also use webdriver wait and click on the first button
I am trying to learn scraping with selenium while parsing the page_source with "html.parser" of BS4 soup. I have all the Tags that contain h2 tag and a class name, but extracting the text in between doesn't seem to work.
import os
import re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as soup
opts = webdriver.ChromeOptions()
opts.binary_location = os.environ.get('GOOGLE_CHROME_BIN', None)
opts.add_argument("--headless")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--no-sandbox")
browser = webdriver.Chrome(executable_path="chromedriver", options=opts)
url1='https://www.animechrono.com/date-a-live-series-watch-order'
browser.get(url1)
req = browser.page_source
sou = soup(req, "html.parser")
h = sou.find_all('h2', class_='heading-5')
p = sou.find_all('div', class_='text-block-5')
for i in range(len(h)):
h[i] == h[i].getText()
for j in range(len(p)):
p[j] = p[j].getText()
print(h)
print(p)
browser.quit()
My Output :
[<h2 class="heading-5">Season 1</h2>, <h2 class="heading-5">Date to Date OVA</h2>, <h2 class="heading-5">Season 2</h2>, <h2 class="heading-5">Kurumi Star Festival OVA</h2>, <h2 class="heading-5">Date A Live Movie: Mayuri Judgement</h2>, <h2 class="heading-5">Season 3</h2>, <h2 class="heading-5">Date A Bullet: Dead or Bullet Movie</h2>, <h2 class="heading-5">Date A Bullet: Nightmare or Queen Movie</h2>]
['Episodes 1-12', 'Date to Date OVA', 'Episodes 1-10', 'Kurumi Star Festival OVA', 'Date A Live Movie: Mayuri Judgement', 'Episodes 1-12', 'Date A Bullet: Dead or Bullet Movie', 'Date A Bullet: Nightmare or Queen Movie']
Add this line before driver.quit():
h = [elem.text for elem in h]
print(h)
Full code:
import os
import re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as soup
opts = webdriver.ChromeOptions()
opts.binary_location = os.environ.get('GOOGLE_CHROME_BIN', None)
opts.add_argument("--headless")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--no-sandbox")
browser = webdriver.Chrome(executable_path="chromedriver", options=opts)
url1='https://www.animechrono.com/date-a-live-series-watch-order'
browser.get(url1)
req = browser.page_source
sou = soup(req, "html.parser")
h = sou.find_all('h2', class_='heading-5')
p = sou.find_all('div', class_='text-block-5')
for j in range(len(p)):
p[j] = p[j].getText()
h = [elem.text for elem in h]
print(h)
browser.quit()
Output:
['Season 1', 'Date to Date OVA', 'Season 2', 'Kurumi Star Festival OVA', 'Date A Live Movie: Mayuri Judgement', 'Season 3', 'Date A Bullet: Dead or Bullet Movie', 'Date A Bullet: Nightmare or Queen Movie']
I'm trying to use the following xpath for this page but it is not loading correctly.
groups = ".//*[contains(#class, 'sl-CouponParticipantWithBookCloses_Name ')]"
xp_bp1 = ".//following::div[contains(#class,'sl-MarketCouponValuesExplicit33')][./div[contains(#class,'gl-MarketColumnHeader')][.='1']]//span[#class='gl-ParticipantOddsOnly_Odds']"
The output currently is..
[['3.00'], ['3.00'], ['3.00'] etc,,
Desired:
[['3.00'], ['1.30'], ['1.25'] etc,,
Data I am after
Script:
import csv
import time
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://www.bet365.com.au/#/AC/B1/C1/D13/E108/F16/S1/')
driver.get('https://www.bet365.com.au/#/AC/B1/C1/D13/E108/F16/S1/')
time.sleep(10)
groups = ".//*[contains(#class, 'sl-CouponParticipantWithBookCloses_Name ')]"
#//div[contains(#class, 'gl-ParticipantOddsOnlyDarker gl-ParticipantOddsOnly gl-Participant_General sl-MarketCouponAdvancedBase_LastChild ')]
xp_bp1 = ".//following::div[contains(#class,'sl-MarketCouponValuesExplicit33')][./div[contains(#class,'gl-MarketColumnHeader')][.='1']]//span[#class='gl-ParticipantOddsOnly_Odds']"
while True:
try:
time.sleep(2)
data = []
for elem in driver.find_elements_by_xpath(groups):
try:
bp1 = elem.find_element_by_xpath(xp_bp1).text
except:
bp1 = None
url1 = driver.current_url
data.append([bp1])
print(data)
url1 = driver.current_url
with open('test.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in data:
writer.writerow(row + [url1])
except TimeoutException as ex:
pass
except NoSuchElementException as ex:
print(ex)
break