I am doing an automation task to click some webpages.
What i want is to use chrome browser after my coded work is done. I've done this by time.sleep() but i think there are other ways to do this... Is there any good ideas??
import datetime
import time
from selenium import webdriver
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("disable-gpu")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option("detach", True)
options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(options=options)
driver.get("https://example.com")
driver.maximize_window()
driver.find_element(By.XPATH, '//*[#id="USER_ID"]').send_keys("id")
driver.find_element(By.XPATH, '//*[#id="PWD"]').send_keys("pwd")
driver.find_element(By.XPATH, '//*[#id="btn_Login"]').click()
WebDriverWait(driver, 10000).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="jqg_grd_basket_1"]')))
driver.find_element(By.XPATH, '//*[#id="cb_grd_basket"]').click()
enrollment_time = datetime.datetime(2023, 2, 6, 10, 20, 1, 0)
current_time = datetime.datetime.now()
wait_time = enrollment_time - current_time
time.sleep(wait_time.total_seconds())
driver.find_element(By.XPATH, '//*[#id="btn_basketSave"]').click()
alert = Alert(driver)
print(alert.text)
time.sleep(999999)
While you don't use driver.close() or driver.quit() it will keep open till you manually exit.
Related
I have problem. I am trying to extract the email addresses from a website.
When trying to extract the email addresses,I have to click on the email icon enter image description here in order for it to appear. Once I click on the icon, a new "popup" appears.
I have tried using Selenium get_attribute for data-mailto-token & data-mailto-vector enter image description here, but without any success. How can I extract the email addresses with Python from these so called "popups"? Any help would be greatly appreciated!
Kind regards
Linus
I have tried using Selenium and looked into to further libraries for cross plattform access, but without any success
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.webdriver.support.select import Select
from bs4 import BeautifulSoup
import requests
import re
#card_small = driver.find_elements_by_class_name("Card small")
i_num = 1
list_links = []
list_links_all = []
num_inc = 1
for i_p in range(0,14):
url = "https://www.hotelleriesuisse.ch/de/branche-und-politik/branchenverzeichnis/hotel-page-"+str(num_inc)+"?filterValues=QWN0aXZlLEluYWN0aXZlOzs7OzQsMzs7Ozs7OzQ5LDEzLDUsNDU7&cHash=30901b0e3080a928cd0ad32522e81b3f"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
time.sleep(5)
driver.find_element_by_css_selector("body > div.cc-window.cc-banner.cc-type-info.cc-theme-block.cc-bottom.cc-visible > div > div.cc-actions > a.cc-btn.cc-allow").click()
try:
driver.execute_script("window.scrollTo(0,2150)")
target = driver.find_elements_by_tag_name("a")
for i in target:
list_links.append(i.get_attribute("href"))
for i in range(10,22):
url_new = list_links[i]
print(url_new)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}
page = requests.get(url_new, headers=headers)
soup = BeautifulSoup(page.text, 'html.parser')
name = soup.find('span',class_="Avatar--name")
address = soup.find_all('span', class_="Button--label")
phone = soup.find_all('span', class_="Button--label")
if name != None:
name_text = soup.find('span', class_="Avatar--name").text
#print(name_text)
if address != None:
for i in address:
search=i.select("span p")
if search != []:
print(search[0].text)
if phone != None:
for i in phone:
match = re.search("[+]\d{2} \d{2} \d{3} \d{2} \d{2}",i.text)
if match !=None:
print(match.group())
time.sleep(5)
driver.get(url_new)
try:
driver.execute_script("window.scrollTo(0,900)")
time.sleep(5)
element=driver.find_element_by_link_text("E-Mail")
info = element.get_attribute("data-mailto-token")
print(info)
element.click()
except NoSuchElementException:
pass
list_links = []
num_inc = num_inc + 1
i_num = i_num + 1
driver.close()
"""
driver.find_element_by_css_selector("#main-content > section.CardGrid > nav > a.Button.nolabel.primary.Pagination--button.Pagination--next").click()
time.sleep(5)
print("This is the end of page: "+str(i_num))
i_num = i_num + 1
time.sleep(5)
"""
except ElementClickInterceptedException:
break
The email address can be obtained by decrypting the combination of data-mailto-token and data-mailto-vector values found in the button.
I am new in crawling and I am trying to crawl. https://www.stradivarius.com/tr/en/woman/clothing/shop-by-product/sweatshirts-c1390587.html webpage, sometimes i could get hrefs but generally code gave me empty list? Do you have any suggesition?
This are packages:
import requests
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import *
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import json
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')
from unidecode import unidecode
import re
import time
from webdriver_manager.chrome import ChromeDriverManager
browser = webdriver.Chrome(ChromeDriverManager().install())
urlist = []
browser.get('https://www.stradivarius.com/tr/kad%C4%B1n/giyim/%C3%BCr%C3%BCne-g%C3%B6re-al%C4%B1%C5%9Fveri%C5%9F/sweatshi%CC%87rt-c1390587.html')
html = browser.page_source
soup = BeautifulSoup(html)
browser.implicitly_wait(90)
product_links=soup.find_all('a', {'id':'hrefRedirectProduct'})
for a in product_links:
urlist.append(product_links["href"])
It's possible the data hasn't rendered yet. You have the .implicitly_wait(90) but it's after you've already pulled the html. So you need to move that up in your code.
urlist = []
browser.get('https://www.stradivarius.com/tr/kad%C4%B1n/giyim/%C3%BCr%C3%BCne-g%C3%B6re-al%C4%B1%C5%9Fveri%C5%9F/sweatshi%CC%87rt-c1390587.html')
browser.implicitly_wait(90) #<--- wait for the page to render BEFORE...
html = browser.page_source # ...grabing the html source
soup = BeautifulSoup(html)
product_links=soup.find_all('a', {'id':'hrefRedirectProduct'})
for a in product_links:
urlist.append(product_links["href"])
A better solution may be to go after the data from the source.
Does this include your desired href?
import requests
import pandas as pd
url = 'https://www.stradivarius.com/itxrest/2/catalog/store/54009571/50331068/category/1390587/product?languageId=-43&appId=1'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'}
jsonData = requests.get(url, headers=headers).json()
df = pd.DataFrame(jsonData['products'])
Output:
print(df['productUrl'])
0 kolej-sweatshirt-l06710711
1 oversize-hard-rock-cafe-baskl-sweatshirt-l0670...
2 oversize-hard-rock-cafe-baskl-sweatshirt-l0670...
3 oversize-hard-rock-cafe-kapusonlu-sweatshirt-l...
4 fermuarl-sweatshirt-l06521718
60 fermuarl-oversize-kapusonlu-sweatshirt-l06765643
61 dikisli-basic-sweatshirt-l06519703
62 jogging-fit-pantolon-ve-sweatshirt-seti-l01174780
63 naylon-sweatshirt-l08221191
64 dikisli-basic-sweatshirt-l06519703
Name: productUrl, Length: 65, dtype: object
I was trying to run this script to automate and click on the product list in the webpage using selenium. But this "raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: error" is occurring every time. What is wrong I'm doing here? Expecting your guidance.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chrome_options = Options()
chrome_options.add_argument('__headless')
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1920, 1080)
driver.get('https://www.galaxus.ch/search?q=5010533606001')
product_tab = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//article[#class='panelProduct panelLayout_panelLayout__BDQ6_ view_product__3AOqY']/a"))).click()
time.sleep(10)
driver.close()
output
PS G:\Python_Practice\scrapy_practice\test> [21628:13792:0911/125833.339:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled
> & C:/Users/raisu/anaconda3/envs/Scrapy_Workspace2/python.exe g:/Python_Practice/scrapy_practice/test/test.py
DevTools listening on ws://127.0.0.1:56456/devtools/browser/1d6d20ce-ecb9-44f7-be6e-1dbe1373526a
Traceback (most recent call last):
File "g:/Python_Practice/scrapy_practice/test/test.py", line 18, in <module>
product_tab = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//article[#class='panelProduct panelLayout_panelLayout__BDQ6_ view_product__3AOqY']/a"))).click()
File "C:\Users\raisu\anaconda3\envs\Scrapy_Workspace2\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Instead of using the xpath, I've used a CSS_SELECTOR and also I changed the wait condition for element to be visible
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chrome_options = Options()
chrome_options.add_argument('__headless')
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1920, 1080)
driver.get('https://www.galaxus.ch/search?q=5010533606001')
#time.sleep(5) use this only if the wait is not working
wait = WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'article > a')))
driver.find_element(By.CSS_SELECTOR,'article > a').click()
driver.close()
I'm trying to grab the price from this page: https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq
It should be getting $1,376.15 CAD however I'm getting some other text from the page and sometimes it doesn't work at all and gives me:
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Here's my code:
DRIVER_PATH = '/usr/bin/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
XPATH = '//*[#id="app"]/main/div/div/section[1]/div/div[3]/div/span[1]'
url = 'https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq'
driver.get(url)
price = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, XPATH)))
print(price)
driver.quit()
Try that out:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
DRIVER_PATH = '/usr/bin/chromedriver'
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
url = "https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq"
driver.get(url)
el = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'MuiTypography-root') and contains(#class,'MuiTypography-h3')][1]")))
text = el.text
print(text)
driver.quit()
I have it as wait until clickable so I can keep reloading on my own but i want it to just do it on it's own because constantly reloading isn't fun and is a waste of time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get('link.com')
wait = WebDriverWait(driver, 1000)
AddToKart = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="content"]/div/div/div[1]/div[3]/div[1]/section[2]/section/div[15]/div/div[2]/button'))).click()```
wait = WebDriverWait(driver, 5)
count = 1
while(count==1):
count=0
try:
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="content"]/div/div/div[1]/div[3]/div[1]/section[2]/section/div[15]/div/div[2]/button'))).click()
except Exception as e:
print(e)
count=1
try:
driver.refresh()
except Exception as e:
print(e)
Just wrapp it with while and try/except