I'm pretty sure this is a website specific thing because I've tried my code (modified the xpath) on other sites and it works. I'm trying to get all the PDF links on the listed website in the code line.
driver.find_elements_by_xpath(xpath) yield empty list []
Code:
def scrape_url(url):
xpath = '//*[#class="panel-body"]//a'
options = Options()
options.headless = True
# change filepath of chromedriver
driver = webdriver.Chrome(options=options, executable_path=r'C:\Users\User\Desktop\chromedriver')
try:
driver.get(url)
all_href_elements = driver.find_elements_by_xpath(xpath)
print("all_href_elements", all_href_elements) # <--empty list []
for href_element in all_href_elements:
article_url_text = href_element.text
print(article_url_text)
if article_url_text == "PDF":
article_url = href_element.get_attribute('href')
print(article_url_text, article_url)
if article_url:
self.urls.add(article_url)
print("num of urls", len(self.urls))
except Exception as e:
print(e)
print(url)
url = 'https://www.govinfo.gov/committee/senate-armedservices?path=/browsecommittee/chamber/senate/committee/armedservices/collection/BILLS/congress/106'
scrape_url(url)
But using the Chrome extension XPath Helper, the XPath query should return something. I think it might be due to how the urls are dynamic and aren't generated until the pane is "opened." But the url should call for the pane to be "open" for the web driver to get, no?
How would I get around this?
Thanks
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Just use explicit wait for elements:
all_href_elements = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH,xpath))
)
Related
Path = r"C:\WebDriverEdge\chromedriver.exe"
service = Service(Path)
options = Options()
options.add_argument('--user-data-dir=C:\\Users\\Admin\\AppData\\Local\\Google\\Chrome\\User Data\\')
options.add_argument("--profile-directory=Profile 1")
#connect to driver
driver = webdriver.Chrome(service = service, options = options)
driver.get("https://open.spotify.com/search")
x_path = '//*[#id="main"]/div/div[2]/div[1]/header/div[3]/div/div/form/input'
search = driver.find_element(By.XPATH, x_path)
action = webdriver.ActionChains(driver)
action.move_to_element(search).send_keys("Let me").perform()
I try to click on search bar at Spotify and use it to search. My problem is when I already login my code get error "unable to find element" but without sign in I can fill the search bar easily.
I don't know why. Is any one run into this before? Thanks in advance
p/s: XPath still the same
I'd rather use this form[role='search'] input css with explicit wait like below:
driver.get("https://open.spotify.com/search")
driver.maximize_window()
wait = WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "form[role='search']"))).send_keys('Let me')
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I have the following code snippet:
enter image description here
I am trying to select the button download in the page:
enter image description here
I am using the following code
from selenium import webdriver
from selenium.webdriver.common.by import By
import datetime
d_ref = datetime.date.today()
driver = webdriver.Chrome('D:\\User\\Download\\chromedriver.exe')
chrome_options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : 'D:\\User\\Download' }
chrome_options.add_experimental_option('prefs', prefs)
driver.get('https://www.anbima.com.br/pt_br/informar/sistema-reune.htm')
# driver.maximize_window()
driver.execute_script("window.scrollTo(0, 320);")
driver.switch_to.frame(0)
# driver.find_element(By.NAME, "Dt_Ref").clear()
# driver.find_element(By.NAME, "Dt_Ref").send_keys(d_ref.strftime('%d%m%Y'))
dropdown = driver.find_element(By.ID, "TpInstFinanceiro")
dropdown.find_element(By.XPATH, "//option[. = 'C F F']").click()
driver.find_element(By.CSS_SELECTOR, "fieldset:nth-child(3) input:nth-child(1)").click()
The CSS-selector fieldset:nth-child(3) is highlighting Financial Instrument and fieldset:nth-child(3) input:nth-child(1) is not highlighting any element in the DOM. Link to refer
Below CSS-selector is highlighting the Download option in the DOM.
fieldset:nth-child(5) input:nth-child(4)
Better to close the Cookie pop-up to interact with other elements. Use Explicit waits.
# Imports
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://www.anbima.com.br/pt_br/informar/sistema-reune.htm")
wait = WebDriverWait(driver,20)
# Click on Proceed on the Cookie pop-up
wait.until(EC.element_to_be_clickable((By.XPATH,"//a[#class='LGPD_ANBIMA_global_sites__text__btn']"))).click()
# Switch to Iframe
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#class]")))
# Select Download option
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"fieldset:nth-child(5) input:nth-child(4)"))).click()
With the help of the community I have been able to develop a piece of code that is able that prints the line of a webpage. However, I know want the code to print the piece of text for multiple webpages that match a certain xpath selector. How can this be done?
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import time
driver = webdriver.Chrome("C:\Program Files (x86)\chromedriver.exe")
driver.get('https://www.flashscore.com/')
wait = WebDriverWait(driver, 20)
driver.maximize_window() # For maximizing window
time.sleep(2)
driver.find_element_by_id('onetrust-reject-all-handler').click()
matchpages = driver.find_elements_by_xpath("//*[#class='preview-ico icon--preview']//*[name()='use']")
for matchpages in matchpages:
matchpages.click()
new_window = driver.window_handles[1]
original_window = driver.window_handles[0]
driver.switch_to.window(driver.window_handles[1])
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.previewShowMore.showMore"))).click()
main = driver.find_element(By.XPATH,"//div[#class='previewLine' and ./b[text()='Hot stat:']]").text
main = main.replace('Hot stat:','')
print(main)
driver.close()
driver.switch_to_window(original_window)
I think the following line selects the first 'preview' page:
new_window = driver.window_handles[1]
However, this then needs to be adjusted to all the 'preview' pages on flashscore.com.
Furthermore, the following lines should also be incorporated in the opened windows, as I would like to print out these lines in order to get a quick overview of the hot stats of that day.
main = driver.find_element(By.XPATH,"//div[#class='previewLine' and ./b[text()='Hot
stat:']]").text
main = main.replace('Hot stat:','')
print(main)
Thanks in advance! : )
The code you provided was close. I ended up changing a few things, such as:
Used webdriver manager instead of locally installed version
Used service and options within webdriver.Chrome()
Used XPATH for most of the elements
Code is below:
NOTE that I had to click to the next day to get PREVIEW buttons to test, code is within two blocks, remove if needed
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument('--no-sandbox')
options.add_argument('--disable-extensions')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--ignore-certificate-errors')
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
url = 'https://www.flashscore.com/'
driver.get(url)
wait = WebDriverWait(driver, 20)
driver.maximize_window() # For maximizing window
wait.until(ec.visibility_of_element_located((By.XPATH, "/html/body/div[6]/div[1]/div/div[1]/div[2]/div[4]/div[2]/div/section/div/div/div[2]")))
driver.find_element(By.ID, 'onetrust-reject-all-handler').click()
# Put this Code in so I could test (clicks next so I had PREVIEW buttons to click)
driver.find_element(By.XPATH, "/html/body/div[6]/div[1]/div/div[1]/div[2]/div[4]/div[2]/div/div[1]/div[2]/div/div[3]").click()
#
wait.until(ec.visibility_of_element_located((By.XPATH, "/html/body/div[6]/div[1]/div/div[1]/div[2]/div[4]/div[2]/div/section/div/div/div[2]")))
# Changed this to find all svg tags with the class of preview-ico icon--preview
matchpages = driver.find_elements(By.XPATH, "//*[local-name()='svg' and #class='preview-ico icon--preview']/..")
# Loop through those elements found
for matchpages in matchpages:
try:
matchpages.click()
# Switch to pop-up window
driver.switch_to.window(driver.window_handles[1])
wait.until(ec.visibility_of_element_located((By.XPATH, "/html/body/div[2]/div/div[7]/div[1]")))
# click on the show more
driver.find_element(By.XPATH, "/html/body/div[2]/div/div[7]/div[2]/div[3]").click()
# get text of Hot stat element
main = driver.find_element(By.XPATH, "/html/body/div[2]/div/div[7]/div[2]/div[6]").text
main = main.replace('Hot stat:', '')
print(main)
# Scroll to close window and click it
close = driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]")
driver.execute_script("arguments[0].scrollIntoView();", close)
driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]").click()
# Switch back to main window
driver.switch_to.window(driver.window_handles[0])
# Handle timeout
except TimeoutException:
close = driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]")
driver.execute_script("arguments[0].scrollIntoView();", close)
driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]").click()
driver.switch_to.window(driver.window_handles[0])
pass
# Handle no element found
except NoSuchElementException:
close = driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]")
driver.execute_script("arguments[0].scrollIntoView();", close)
driver.find_element(By.XPATH, "//*[contains(text(), 'Close window')]").click()
driver.switch_to.window(driver.window_handles[0])
pass
driver.quit()
EDIT
To handle possible Hot streak or Hot stat text field, please add an if/elif statement after finding the text field "main".
main = driver.find_element(By.XPATH, "/html/body/div[2]/div/div[7]/div[2]/div[6]").text
if 'Hot stat:' in main:
main = main.replace('Hot stat:', '')
elif 'Hot streak:' in main:
main = main.replace('Hot streak:', '')
I'm trying to use Python (Selenium, BeautifulSoup, and XPath) to scrape a span with an itemprop equal to "description", but every time I run the code, the "try" fails and it prints out the "except" error.
I do see the element in the code when I inspect elements on the page.
Line that isn't getting the desired response:
quick_overview = soup.find_element_by_xpath("//span[contains(#itemprop, 'description')]")
Personally, I think you should just keep working with selenium
quick_overview = driver.find_element_by_xpath("//span[contains(#itemprop, 'description')]")
for the element and add .text to end to get the text content.
To actually use soup to parse this out you would likely need a wait condition from selenium first so no real point.
However, should you decide to integrate bs4 then you need to change your function to work with the actual html from driver.page_source and parse that, then switch to select_one to grab your item. Then ensure you are returning from the function and assigning to new soup object.
from bs4 import BeautifulSoup
from selenium import webdriver # links w/ browser and carries out actions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PATH = "C:\Program Files (x86)\chromedriver_win32\chromedriver.exe"
baseurl = "http://www.waytekwire.com"
skus_to_find_test = ['WL16-8', 'WG18-12']
driver = webdriver.Chrome(PATH)
driver.get(baseurl)
def use_driver_current_html(driver):
soup = BeautifulSoup(driver.page_source, 'lxml')
return soup
for sku in skus_to_find_test[0]:
search_bar = driver.find_element_by_id('themeSearchText')
search_bar.send_keys(sku)
search_bar.send_keys(Keys.RETURN)
try:
product_url = driver.find_elements_by_xpath("//div[contains(#class, 'itemDescription')]//h3//a[contains(text(), sku)]")[0]
product_url.click()
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, "//span[contains(#itemprop, 'description')]")))
soup = use_driver_current_html(driver)
try:
quick_overview = soup.select_one("span[itemprop=description]").text
print(quick_overview)
except:
print('No Quick Overview Found.')
except:
print('Product not found.')
I want to read out a csv file with URL's and put them into a selenium webdriver command (one by one). The addresses are all in one column in the csv. I managed to loop through the URL's with no problem. However, I can't make it work with the webdriver. It always returns to me with the same error message "selenium.common.exceptions.InvalidArgumentException: Message: invalid argument", even if the print of the URL's looks fine. The only funny thing is, when I copy and paste the URL address from the print comamand in my editor it shows with a leading blank: ' http://[url].com'. I tried to remove the space with strip, but that had no effect on the printout. After hours of trying I feel pretty lost with this.
I use the following code:
with open('urls.csv', 'r', encoding='utf-8') as read_obj:
reader = reader(read_obj)
for row in reader:
URL = "'" + str((row[0])) + "'"
print(URL)
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
driver.quit()
Thank you for your help!
This code works for me:
#!/usr/bin/env python3
# coding: utf-8
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
source = 'myfile.csv'
with open(source, 'rt', encoding='utf-8-sig') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
url = row[0]
contents.append(links)
for url in contents:
driver.get(url)
#do something here
driver.quit()