Selenium: How use while loop to click link if it exists? - selenium

I am trying to write a Python program that uses Selenium to click a button to go to the next page if the button is clickable. This is because I am web scraping from varying amounts of pages.
I have tried to use a while loop that checks the href attribute, but the code doesn't click the button, nor does it return an error. If I simply write button.click(), but without a while loop or conditional check for the href attribute, then the program clicks the button correctly.
My code also has a while loop condition of "variable is not None". Is this a valid usage of "is not"? My logic is for the program to click the button to go to the next page if there is an href available from the to click.
Code:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import numpy as np
import pandas as pd
PATH = "C:\Program Files (x86)\chromedriver.exe"
wd = webdriver.Chrome(PATH)
wd.get("https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty")
time.sleep(1)
button = wd.find_element_by_xpath("""//a[#aria-label='Next page']""")
#<a tabindex="0" aria-label="Next page" class="ng-star-inserted" style=""> Next <span class="show-for-sr">page</span></a>
href_data = button.get_attribute('href')
while (href_data is not None):
time.sleep(0.5)
button.click()
href_data = button.get_attribute('href')
Would anyone here be willing to assist me with this? I understand that Selenium requires the user to download a webdriver, so I apologize for any difficulties with testing my code.
Thank you, ExactPlace441

To loop until all pages were clicked.
wd.get('https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty')
wait=WebDriverWait(wd, 10)
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#aria-label='Next page']"))).click()
time.sleep(5)
except:
break
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

I faced the same problem then I used gecko driver(selenium Firefox) instead of Chrome. My code was working perfectly in selenium Firefox but same code was not working in selenium Chrome. Without while loop I hadn't any problem to click on button in selenium Chrome browser but it was not working when added while loop. After using gecko driver(selenium Firefox) my problem was solved. Here is an example of while loop that you can use. It will clicking on button until the button disappeared or reach the last page.
i = 1
try:
while i < 2:
button_element = driver.find_element_by_xpath("give your button xpath")
button_element.click() #Our loop will continuing until our button xpath disappeared from web page
except:
pass #when the button xpath will disappeared it will ignore the error and jump to the next section of our code.
Here I modified your code:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import numpy as np
import pandas as pd
driver = webdriver.Firefox()
driver.maximize_window()
url = "https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty"
driver.get(url)
timeout = 20
# This container collect data from first page
containers = WebDriverWait(driver, timeout).until(EC.visibility_of_all_elements_located((By.XPATH,'//div[#class="column ng-star-inserted"]' )))
for container in containers:
name = container.find_element_by_css_selector('.header-details h5') #we are srcaping name from each page
print(name.text)
i = 1
try:
while i < 2: #Now it will look for “next page button” in every page and continuing click on “next page button” until it will reach the last page.
next_page_button = driver.find_element_by_xpath("//li[#class='pagination-next ng-star-inserted']")
next_page_button.click()
#our this container2 start collect data from second page to last page
containers = WebDriverWait(driver, timeout).until(EC.visibility_of_all_elements_located((By.XPATH,'//div[#class="column ng-star-inserted"]' )))
for container in containers:
name = container.find_element_by_css_selector('.header-details h5') #we are srcaping name from each page
print(name.text)
time.sleep(3)
except:
pass #if any page don't have “next page button” then our code will be end without any error.

Related

Selenium: how to see full DOM structure?

I've seen similar post about this question with the resolution being using the WebDriverWait. But I still kept getting an exception error saying the selector is not present.
Even when I printed driver.execute_script("return document.body.innerHTML;" at the end of my code the full DOM didn't show up, it looks exactly like the page source, but I need the rest of the HTML's elements
from multiprocessing.connection import wait
from ntpath import join
import os
from xml.dom.minidom import Element
from selenium import webdriver
from selenium.webdriver.common.by import By ## Used for grabbing elements by
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
os.environ['PATH'] += ";C:\seleniumDrivers"
chrome = webdriver.Chrome()
chrome.get("https://jklm.fun/XSNM")
chrome.implicitly_wait(10)
name = chrome.find_element(By.CLASS_NAME, "nickname")
name.clear()
name.send_keys("Mr.Roboto")
btn = chrome.find_element(By.XPATH, '/html/body/div[2]/div[3]/form/div[2]/button')
btn.click()
join_btn = WebDriverWait(chrome, 1000000).until(EC.presence_of_element_located(
chrome.find_element(
By.XPATH, '/html/body/div[2]/div[3]/div[1]/div[1]/button')))
#join_btn = chrome.find_element(By.XPATH, '/html/body/div[2]/div[3]/div[1]/div[1]/button')
#join_btn = chrome.find_element(By.CSS_SELECTOR, 'button[data-text="joinGame"')
join_btn.click()
Element was in an iframe. I used chrome.switch_to.frame()

Selenium trouble finding button

I have the following code snippet:
enter image description here
I am trying to select the button download in the page:
enter image description here
I am using the following code
from selenium import webdriver
from selenium.webdriver.common.by import By
import datetime
d_ref = datetime.date.today()
driver = webdriver.Chrome('D:\\User\\Download\\chromedriver.exe')
chrome_options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : 'D:\\User\\Download' }
chrome_options.add_experimental_option('prefs', prefs)
driver.get('https://www.anbima.com.br/pt_br/informar/sistema-reune.htm')
# driver.maximize_window()
driver.execute_script("window.scrollTo(0, 320);")
driver.switch_to.frame(0)
# driver.find_element(By.NAME, "Dt_Ref").clear()
# driver.find_element(By.NAME, "Dt_Ref").send_keys(d_ref.strftime('%d%m%Y'))
dropdown = driver.find_element(By.ID, "TpInstFinanceiro")
dropdown.find_element(By.XPATH, "//option[. = 'C F F']").click()
driver.find_element(By.CSS_SELECTOR, "fieldset:nth-child(3) input:nth-child(1)").click()
The CSS-selector fieldset:nth-child(3) is highlighting Financial Instrument and fieldset:nth-child(3) input:nth-child(1) is not highlighting any element in the DOM. Link to refer
Below CSS-selector is highlighting the Download option in the DOM.
fieldset:nth-child(5) input:nth-child(4)
Better to close the Cookie pop-up to interact with other elements. Use Explicit waits.
# Imports
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://www.anbima.com.br/pt_br/informar/sistema-reune.htm")
wait = WebDriverWait(driver,20)
# Click on Proceed on the Cookie pop-up
wait.until(EC.element_to_be_clickable((By.XPATH,"//a[#class='LGPD_ANBIMA_global_sites__text__btn']"))).click()
# Switch to Iframe
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#class]")))
# Select Download option
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"fieldset:nth-child(5) input:nth-child(4)"))).click()

Get text inside the href link inside the span marker using Selenium

How to extract the text which is displayed as part of the link inside the span marker.
<span class="pull-left w-100 font30 medium_blue_type mb10"><a href='/XLY'>XLY</a></span> <span class="w-100">Largest Allocation</span>
Output:
XLY
I've tried several approaches, among all, using
elems = driver.find_elements_by_class_name("span.pull-left.w-100.font30.medium_blue_type.mb10")
elems = driver.find_element_by_xpath('.//span[#class = "pull-left w-100 font30 medium_blue_type mb10"]')
but can't get it working. The website is https://www.etf.com/stock/TSLA.
EDIT:
Is it possible to do it without opening the window in the browser, e.g. using "headless" option?
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome(CHROME_DRIVER_PATH, options=op)
If you prefer to have a text-based locators, you can use the below:
//span[text()='Largest Allocation']/../span
You should click on the cookies I understand button first.
Make use of explicit waits.
So your effective code would be:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.etf.com/stock/TSLA")
try:
wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "I Understand"))).click()
print("Clicked on I understand button")
except:
pass
txt = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Largest Allocation']/../span"))).text
print(txt)
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output:
Clicked on I understand button
XLY
Process finished with exit code 0
If you are looking for locators not based on text, use the below line of code:
txt = wait.until(EC.visibility_of_element_located((By.XPATH, "(//span[contains(#class,'medium_blue_type')]//a)[2]"))).text
There are several possible problems here:
Maybe you are missing a delay
The locator you are using may be not unique
I can see here you are extracting the attribute value from the returned web element
The web element can be inside iframe etc.
Based on currently shared information you can try adding a wait and extracting the web element value as following:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 20)
href = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[#class = "pull-left w-100 font30 medium_blue_type mb10"]"))).get_attribute("href")
Use the following xpath to identify the href link.
//div[./span[text()='Largest Allocation']]//a
You need to induce some delay to get the element.
Use WebDriverWait() and wait for visibility of the element.
To get the text:
print(WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH, "//div[./span[text()='Largest Allocation']]//a"))).text)
To get the href:
print(WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH, "//div[./span[text()='Largest Allocation']]//a"))).get_attribute("href"))
you need to import below libraries.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Selenium won't click on all buttons only a few

I'm trying to click the buttons on the left-panel of the web-page I'm trying to scrape. However, by using Selenium, it seems to only click only on a few of these buttons. I have added a time.sleep between each click which did not make a difference.
I just get the following error:
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"(//div[#class='toggle-bottom-filter'])[7]"}
I have double checked that path and it does exist on the website. So I'm not certain as to why it's undetectable, any ideas?
Here's my script:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
driver.get("https://www.theparking.eu/#!/used-cars/")
wait=WebDriverWait(driver,15)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#bloc-filter")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"div[class='sd-cmp-25TOo'] span[class='sd-cmp-16t61 sd-cmp-2JYyd sd-cmp-3cRQ2']"))).click()
#WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#title='Electric car drivers will soon no longer be able to charge their cars here.']")))
stuff = []
for more in range(1, 9):
time.sleep(2)
driver.find_element(By.XPATH, f"(//div[#class='toggle-bottom-filter'])[{more}]").click()
data = driver.page_source
# ... parse with beautifulsoup
Not all the 9 elements you are trying to click are initially visible, you first will have to scroll the element into the view and only after that click them.
I see there are 11 elements matching //div[#class='toggle-bottom-filter'] locator there so possibly you should change the for loop to for more in range(1, 12):
I think the following code should work better:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome()
driver.get("https://www.theparking.eu/#!/used-cars/")
wait=WebDriverWait(driver,15)
actions = ActionChains(driver)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#bloc-filter")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"div[class='sd-cmp-25TOo'] span[class='sd-cmp-16t61 sd-cmp-2JYyd sd-cmp-3cRQ2']"))).click()
#WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#title='Electric car drivers will soon no longer be able to charge their cars here.']")))
stuff = []
for more in range(1, 12):
time.sleep(2)
button = driver.find_element(By.XPATH, f"(//div[#class='toggle-bottom-filter'])[{more}]")
actions.move_to_element(button).perform()
time.sleep(0.5)
button.click()
time.sleep(0.5)
data = driver.page_source
# ... parse with beautifulsoup

How to scroll to the end Selenium Python

I'm trying to scroll to the end in this page url
When got into the page, I click the button 'Show all 77 products' I got into a popup that shows partially the elements into the popup. Actually this is my code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
def getpage(driver):
driver.get('https://www.binance.com/it/pos')
sleep(3)
driver.find_element_by_xpath('//div[#id="savings-lending-pos-expend"]').click()
sleep(2)
elem = driver.find_element_by_xpath('//div[#class="css-n1ers"]')
elem.send_keys(Keys.END)
driver = webdriver.Firefox()
getpage(driver)
I have tried almost everything to work, apart from the solution in the code above, I tried with nu success the current solutions:
driver.execute_script("window.scrollTo(0, Y)")
and
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
and in this solutions I didn't understand which label to use
label.sendKeys(Keys.PAGE_DOWN);
I tried almost all solutions but none worked. I hope you can help me. Thank you.
Try like below and confirm:
You can try to find each row and apply scrollIntoView for the same.
# Imports required for Explicit wait:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def getpage(driver):
driver.get('https://www.binance.com/it/pos')
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.ID,"savings-lending-pos-expend"))).click() // show more option
i = 0
try:
while True:
options = driver.find_elements_by_xpath("//div[#id='modal-wrapper']/div") // Try to find rows.
driver.execute_script("arguments[0].scrollIntoView(true);",options[i])
time.sleep(1)
i+=1
except IndexError as e:
print("Exception: {}".format(e.args[-1]))
print(i)
getpage(driver)
You can use ActionChains
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
actions.send_keys(Keys.PAGE_DOWN).perform()
That will make the page scroll down similar to pressing the Page Down key.
This solution worked:
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from time import sleep
def getpage(driver):
driver.get('https://www.binance.com/it/pos')
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.ID,"savings-lending-pos-expend"))).click()
i = 0
sleep(5)
pop_up_window = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.XPATH, "//div[#id='modal-scroller']")))
while True:
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;', pop_up_window)
sleep(1)
driver = webdriver.Firefox()
getpage(driver)