I'm trying to scrape a website, that has a link that produces a popup that I want to scrape. If I click on the product it would give me the information, but then I would have to do back() back() a million times if I get the popup I can scrape the info close the popup, and move to the next product.
m/RlY2k.jpg
These are just some of the things I've tried:
quick = driver.find_element_by_xpath("/html/body/div2/div2/div/div3/div3/div[5]/ul2/li2/div")
quick.click()
//*[#id="js_proList"]/ul[1]/li[1]/div/div[1]/span
//body[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[5]/ul[1]/li[1]/div[1]/div[1]/span[1]
//span[contains(#xpath,'1')]
This the code.
<div class="goods_img pr fast-btn-hover js_goodsHoverImg" data-goods-id="475526308" xpath="1"> <span data-logsss-const-value="" data-href="/m-goods-a-fast-id-7684901.htm" class="fast-buy js_fast_buy">QUICK SHOP</span>
That Quick shop is hidden, you need to hover over it and then its interactible
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from time import sleep
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(executable_path="D:/chromedriver.exe")
driver.get('https://www.rosegal.com/plus-size-tops-120/')
driver.maximize_window()
#get all quick shop
quick_shop_as_list = driver.find_elements_by_xpath("//*[#class='fast-buy js_fast_buy']")
#for all quick shop hover over the produc tab to display the quick shop
for i in range(0,len(quick_shop_as_list)):
a = ActionChains(driver)
current_product_to_hover_over = driver.find_elements_by_xpath("//div[(#class='goods_img pr fast-btn-hover js_goodsHoverImg')]")
a.move_to_element(current_product_to_hover_over[i]).perform()
sleep(1)
quick_shop_as_list[i].click()
sleep(1)
#there's an iframe on this pop up, have to switch to it
iframe = driver.find_element_by_class_name('xubox_iframe')
driver.switch_to_frame(iframe)
#do the scrapping, im getting the whole div
popup_div = driver.find_element_by_xpath("//div[#id='page']")
print(popup_div.text)
#exit iframe and close the pop up
driver.switch_to.default_content()
driver.find_element_by_xpath("//*[contains(#class,'xubox_close')]").click()
sleep(1)
#will require some scroll down at some point, i think, in case that move_to_element fails
Related
I'am trying to scrape this web page: https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link
I would like to setup the python selenium code, in order to setup correctly multitems in: "50" pages per page
But my code click on wrong button. where is my code error?
options = webdriver.FirefoxOptions()
options.binary_location = r'C:/Users/Mozilla Firefox/firefox.exe'
driver = selenium.webdriver.Firefox(executable_path='C:/geckodriver.exe' , options=options)
driver.execute("get", {'url': 'https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link'})
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//label[#id='qtr-1-label']"))))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[#class='btn btn-default dropdown-toggle']"))).click()
thank you for your help.
-ag
You code clicked on wrong button because you have multiple elements with exact same class and you are fetching the first one and clicking on it.
Also I see on the page, you sometime get a popup which may make other elements not interactable. SO we would want the popup to close first(if appeared) then move ahead.
Using Chrome driver
Setup and Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
# REPLACE YOUR CHROME PATH HERE
chrome_path = r"C:\Users\hpoddar\Desktop\Tools\chromedriver_win32\chromedriver.exe"
s = Service(chrome_path)
driver = webdriver.Chrome(service=s)
Fetch the page
driver.get(' https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link')
Close the popup(if appeared)
try:
popup = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[#id='dfwid-close-184302']")))
popup.click()
except TimeoutException:
print("No Popup appeared on the page")
Click on dropdown and the menu item 50
dropdown = driver.find_element(By.CSS_SELECTOR, '.btn-group.dropdown')
dropdown.click()
ele50 = driver.find_element(By.XPATH, '//li[#role="menuitem"]/a[contains(text(), "50")]')
ele50.click()
Output
The above code clicks on item 50
Using Firefox driver
The imports would be same as above, the following code would also remains some with just a minute change.
# REPLACE YOUR FIREFOX DRIVER PATH HERE
firefoxpath = r'C:\Users\hpoddar\Desktop\Tools\firefoxdriver\geckodriver.exe'
s = Service(firefoxpath)
driver = webdriver.Firefox(service=s)
driver.get(' https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link')
try:
popup = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[#id='dfwid-close-184302']")))
popup.click()
except TimeoutException:
print("No Popup appeared on the page")
dropdown = driver.find_element(By.CSS_SELECTOR, '.btn-group.dropdown')
dropdown.click()
ele50 = driver.find_element(By.XPATH, '//li[#role="menuitem"]/a[contains(text(), "50")]')
ele50.click()
Output
which similarly clicks on the desired element
I'm trying to scrape data from https://in.puma.com/in/en/mens/mens-new-arrivals . The complete data is loaded when the show all button is clicked.
I used selenium to generate the click and load the rest of the page, however - I'm getting an error
"TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: "
See my code below.
from selenium.webdriver.support import expected_conditions as EC
import time
from lxml import etree as et
chrome_driver_path = "driver/chromedriver"
url = 'https://in.puma.com/in/en/mens/mens-new-arrivals'
browser = webdriver.Chrome(ChromeDriverManager().install())
browser.get(url)
x_path_to_load_more = '//*[#data-component-id="a_tspn9cqoeth"]'
browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
button_locate = wait(browser,10).until(EC.presence_of_element_located((By.XPATH,x_path_to_load_more)))
button_locate.click()
The xpath is not correct, try
x_path_to_load_more = "//button[contains(., 'Show All')]"
To verify the effectiveness of the xpath inspect the page, open the find bar with Command + F or Control + F and paste your xpath
#data-component-id seem to be dynamic. It means that the value will be different (not "a_tspn9cqoeth") each time you open that page. Try to search by another attribiute value:
x_path_to_load_more = '//div[#class="text-center"]/button[contains(#class, "show-more-button")]'
or
x_path_to_load_all = '//div[#class="text-center"]/button[contains(#class, "show-all-button")]'
Also it's better to use EC.element_to_be_clickable instead of EC.presence_of_element_located
UPDATE
Since click on button might be intercepted by Cookies footer try to scroll page down before making click:
from selenium.webdriver.common.keys import Keys
driver.find_element('xpath', '//body').send_keys(Keys.END)
I am trying to write a Python program that uses Selenium to click a button to go to the next page if the button is clickable. This is because I am web scraping from varying amounts of pages.
I have tried to use a while loop that checks the href attribute, but the code doesn't click the button, nor does it return an error. If I simply write button.click(), but without a while loop or conditional check for the href attribute, then the program clicks the button correctly.
My code also has a while loop condition of "variable is not None". Is this a valid usage of "is not"? My logic is for the program to click the button to go to the next page if there is an href available from the to click.
Code:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import numpy as np
import pandas as pd
PATH = "C:\Program Files (x86)\chromedriver.exe"
wd = webdriver.Chrome(PATH)
wd.get("https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty")
time.sleep(1)
button = wd.find_element_by_xpath("""//a[#aria-label='Next page']""")
#<a tabindex="0" aria-label="Next page" class="ng-star-inserted" style=""> Next <span class="show-for-sr">page</span></a>
href_data = button.get_attribute('href')
while (href_data is not None):
time.sleep(0.5)
button.click()
href_data = button.get_attribute('href')
Would anyone here be willing to assist me with this? I understand that Selenium requires the user to download a webdriver, so I apologize for any difficulties with testing my code.
Thank you, ExactPlace441
To loop until all pages were clicked.
wd.get('https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty')
wait=WebDriverWait(wd, 10)
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#aria-label='Next page']"))).click()
time.sleep(5)
except:
break
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
I faced the same problem then I used gecko driver(selenium Firefox) instead of Chrome. My code was working perfectly in selenium Firefox but same code was not working in selenium Chrome. Without while loop I hadn't any problem to click on button in selenium Chrome browser but it was not working when added while loop. After using gecko driver(selenium Firefox) my problem was solved. Here is an example of while loop that you can use. It will clicking on button until the button disappeared or reach the last page.
i = 1
try:
while i < 2:
button_element = driver.find_element_by_xpath("give your button xpath")
button_element.click() #Our loop will continuing until our button xpath disappeared from web page
except:
pass #when the button xpath will disappeared it will ignore the error and jump to the next section of our code.
Here I modified your code:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import numpy as np
import pandas as pd
driver = webdriver.Firefox()
driver.maximize_window()
url = "https://profiles.ucr.edu/app/home/search;name=;org=Physics%20and%20Astronomy;title=;phone=;affiliation=Faculty"
driver.get(url)
timeout = 20
# This container collect data from first page
containers = WebDriverWait(driver, timeout).until(EC.visibility_of_all_elements_located((By.XPATH,'//div[#class="column ng-star-inserted"]' )))
for container in containers:
name = container.find_element_by_css_selector('.header-details h5') #we are srcaping name from each page
print(name.text)
i = 1
try:
while i < 2: #Now it will look for “next page button” in every page and continuing click on “next page button” until it will reach the last page.
next_page_button = driver.find_element_by_xpath("//li[#class='pagination-next ng-star-inserted']")
next_page_button.click()
#our this container2 start collect data from second page to last page
containers = WebDriverWait(driver, timeout).until(EC.visibility_of_all_elements_located((By.XPATH,'//div[#class="column ng-star-inserted"]' )))
for container in containers:
name = container.find_element_by_css_selector('.header-details h5') #we are srcaping name from each page
print(name.text)
time.sleep(3)
except:
pass #if any page don't have “next page button” then our code will be end without any error.
I have an html which looks as following:
<div class="v-window-outerheader"><div class="v-window-maximizebox" tabindex="0" role="button" aria-label="maximize button" id="38_window_maximizerestore"></div>
<div class="v-window-closebox" tabindex="0" role="button" aria-label="close button" id="38_window_close"></div>
This code is for a message box which pops up on logging into the website.
I have been using the following code to click on the X on top right to close the message pop:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.keys import Keys
path_to_chromedriver = r'C:\chromedriver' # change path as needed
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
wait = WebDriverWait(browser, 10)
browser.get("https://ftrcenter.pjm.com/ftrcenter/pages/secure/")
wait.until(EC.presence_of_element_located((By.XPATH, '//*[#id="37_window_close"]'))).click()
But, the ID number 37_window_close changes every so often. Today the id is 38_window_close
Rest of the html code remains same. How do I modify my code so that my code doesn't break due to this change in ID?
The pop up box appears as following, where X is on top right to close it:
Use aria-label="close button" attribute.
wait.until(EC.presence_of_element_located((By.XPATH, '//*[#aria-label="close button"]'))).click()
Or class attribute.
wait.until(EC.presence_of_element_located((By.XPATH, '//*[#class="v-window-closebox"]'))).click()
Or use contains with id
wait.until(EC.presence_of_element_located((By.XPATH, '//*[contains(#id,"_window_close")]'))).click()
For dynamic IDs, You can match the substring instead of the exact string.
Use Below XPath:
//div[contains(#id,"_window_close")]
OR CSS:
div[id*="_window_close"]
Hope this helps :)
There is a website which shows links on a map (map layer currently can't be shown but links can be shown as points).
To view this website, this must be followed: (Pictures 1-2-3 also shows the way)
Firstly, click this website 'http://svtbilgi.dsi.gov.tr/Sorgu.aspx',
Secondly, choose '15. Kizilirmak Havzasi' from 'Havza' tab,
Finally, click 'sorgula' bottom.
After the final stage, you should view the website ('http://svtbilgi.dsi.gov.tr/HaritaNew.aspx') where the points can be shown on a map.
Normally, I can use selenium to download web pages or can grab all links using different libraries. However, these methods can't obtain the links because they are embedded almost in a secret way.
I would like to download all the links that these points have.
For example, this script doesn't continue after 'parent_handle = driver.current_window_handle' line. I don't know why?
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
driver = webdriver.Firefox(executable_path=r'D:\geckodriver.exe')
driver.get("http://svtbilgi.dsi.gov.tr/Sorgu.aspx")
driver.find_element_by_id("ctl00_hld1_cbHavza").click()
Select(driver.find_element_by_id("ctl00_hld1_cbHavza")).select_by_visible_text("15. Kizilirmak Havzasi")
driver.find_element_by_id("ctl00_hld1_cbHavza").click()
driver.find_element_by_id("ctl00_hld1_btnListele").click()
parent_handle = driver.current_window_handle
all_urls = []
all_images = driver.find_elements_by_xpath("//div[contains(#id,'OL_Icon')]/img")
for image in all_images :
image.click()
for handle in driver.window_handles :
if handle != parent_handle:
driver.switch_to_window(handle)
WebDriverWait(driver, 5).until(lambda d: d.execute_script('return document.readyState') == 'complete')
all_urls.append(driver.current_url)
driver.close()
driver.switchTo.window(parent_handle)
Why not click them one by one and then get the URL of the opened window, using driver.getCurrentUrl()?
In the below code, first I wait for all the images and then perform the click action using ActionChains class since the normal Selenium click() wasn't working.
Complete code in Python -
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(executable_path=r'D:\Test automation\chromedriver.exe')
driver.get("http://svtbilgi.dsi.gov.tr/Sorgu.aspx")
driver.find_element_by_id("ctl00_hld1_cbHavza").click()
Select(driver.find_element_by_id("ctl00_hld1_cbHavza")).select_by_visible_text("15. Kizilirmak Havzasi")
driver.find_element_by_id("ctl00_hld1_btnListele").click()
parent_handle = driver.current_window_handle
driver.maximize_window()
all_urls = []
all_images = WebDriverWait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH,"//div[contains(#id,'OL_Icon')]/img")))
print len(all_images)
for image in all_images :
webdriver.ActionChains(driver).move_to_element(image).click(image).perform()
for handle in driver.window_handles :
if handle != parent_handle:
driver.switch_to_window(handle)
WebDriverWait(driver, 15).until(lambda d: d.execute_script('return document.readyState') == 'complete')
all_urls.append(driver.current_url)
driver.close()
driver.switch_to.window(parent_handle)
print all_urls