I have made a youtube automation bot. I am getting error : unable to locate element (for the Xpath of subscribe button)
here is my code
from selenium import webdriver
from selenium import common
from selenium.webdriver.common import keys
from webdriver_manager.firefox import GeckoDriverManager
import time
class actions:
def __init__(self, email, password):
self.email = email
self.password = password
profile = webdriver.FirefoxProfile()
profile.set_preference("dom.webdriver.enabled", False)
profile.set_preference('useAutomationExtension', False)
profile.update_preferences()
driver = webdriver.Firefox(
executable_path=GeckoDriverManager().install(), firefox_profile=profile)
self.bot = driver
# self.bot.maximize_window()
self.bot.set_window_size(400, 700)
self.is_logged_in = False
def login(self):
bot = self.bot
bot.get("https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Den%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252F&hl=en&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin")
time.sleep(5)
try:
email = bot.find_element_by_name('identifier')
except common.exceptions.NoSuchElementException:
time.sleep(5)
email = bot.find_element_by_name('identifier')
email.clear()
email.send_keys(self.email + keys.Keys.RETURN)
time.sleep(5)
try:
password = bot.find_element_by_name('password')
except common.exceptions.NoSuchElementException:
time.sleep(5)
password = bot.find_element_by_name('password')
password.clear()
password.send_keys(self.password + keys.Keys.RETURN)
time.sleep(5)
self.is_logged_in = True
def kill(self):
bot = self.bot
bot.quit()
def subscribe(self, url):
if not self.is_logged_in:
return
bot = self.bot
bot.get(url)
time.sleep(4)
try:
value = bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').get_attribute('aria-label')
value = value.split()
except:
bot.execute_script(
'window.scrollTo(0,document.body.scrollHeight/3.5)')
time.sleep(3)
value = bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').get_attribute('aria-label')
value = value.split(':')
if value[0] == "Subscribe":
try:
bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').click()
time.sleep(3)
except:
bot.execute_script(
'window.scrollTo(0,document.body.scrollHeight/3.5)')
time.sleep(3)
bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').click()
time.sleep(3)
how can i resolve this issue. I am not able to understand where things are going wrong. Or i should try find elements by id or other ways instead of Xpath.
Or is there any problem with any software.
Please help me out
Always use relative XPath in your test. Using the absolute XPath will cause regular test failures.
Refer to this tutorial about writing the relative XPaths. https://www.guru99.com/xpath-selenium.html
This extension will help you to write the relative XPaths. https://chrome.google.com/webstore/detail/chropath/ljngjbnaijcbncmcnjfhigebomdlkcjo
You can refer how to write XPath in different ways using functions like text(), starts-with(), contains(). so you can locate them by visible texts also.
Refer this articlehere
Related
I need to collect all links from a webpage as seen below, which also has a load more news button. I wrote my script, but my script gives only the links from the first page, as if it does not click on the load more news button. I updated some of Selenium attributes. I really don't know why I could not get all the links, clicking on load_more button.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import json
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
url = "..."
base_url = "..."
driver.get(url)
outlinks = []
wait = WebDriverWait(driver, 90)
load_more_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.listing-load-more-btn[title="Load More News"]')))
num_links = 0
while True:
links = driver.find_elements(By.CSS_SELECTOR, 'a.text-truncate')
num_links_new = len(links)
if num_links_new > num_links:
num_links = num_links_new
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
load_more_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.listing-load-more-btn[title="Load More News"]')))
if load_more_button.is_displayed():
load_more_button.click()
sleep(10)
else:
break
new_links = driver.find_elements(By.CSS_SELECTOR, 'a.text-truncate')
for link in new_links:
href = link.get_attribute('href')
full_url = base_url + href
enurl=full_url.replace("ar-ae", "en")
outlinks.append(enurl)
print(outlinks)
data = json.dumps(outlinks)
with open('outlinks.json', 'w') as f:
f.write(data)
driver.close()
Although you have tagged selenium, this is a much better way to handle it.
Whenever you click on the "load more" button, it sends a POST request to:
https://www.mofaic.gov.ae/api/features/News/NewsListPartialView
So, you can just get all the data from there directly using the requests/BeautifulSoup modules. There's no need for Selenium, and the process will be much faster!
import requests
from bs4 import BeautifulSoup
data = {
"CurrentPage": "1",
"CurrentRenderId": "{439EC71A-4231-45C8-B075-975BD41099A7}",
"CategoryID": "{f9048938-c577-4caa-b1d9-ae1b7a5f1b20}",
"PageSize": "6",
}
BASE_URL = "https://www.mofaic.gov.ae"
POST_URL = "https://www.mofaic.gov.ae/api/features/News/NewsListPartialView"
response = requests.post(
POST_URL,
data=data,
)
for page in range(
1, 10
): # <-- Increase this number to get more Articles - simulates the "load more" button.
data["CurrentPage"] = page
response = requests.post(
POST_URL,
data=data,
)
soup = BeautifulSoup(response.text, "html.parser")
for link in soup.select("a.text-truncate"):
print(BASE_URL + link["href"])
Prints (truncated):
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/2/02-01-2023-uae-leaders
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/2/02-01-2023-uae-vatican
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/2/02-01-2023-uae-fm
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/1/01-01-2022-uae-cuba
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/1/01-01-2022-uae-sudan
https://www.mofaic.gov.ae/ar-ae/mediahub/news/2023/1/1/01-01-2023-uae-israel
My code:
#Open Website
profile_path = r'C:\Users\XXX\AppData\Local\Mozilla\Firefox\Profiles\ndefault-release'
options = Options()
options.binary_location = r'C:\Program Files\Mozilla Firefox\firefox.exe'
options.set_preference('profile', profile_path)
options.add_argument("--no-sandbox")
service = Service(r'C:\Users\XXX\geckodriver.exe')
driver = Firefox(service=service, options=options)
# declaration of variables
name = "x"
suffix = "x"
start_number = 1
end_number = 1000
for i in range(start_number, end_number):
driver.get('https://www.bauhaus.info/gewinnspiel')
time.sleep(3)
#driver.find_element(by=By.XPATH, value=f"/div/div/div/div/div[2]/div/div[2]/div/div/div/button").click()
element = driver.execute_script("""return document.querySelector('#usercentrics-root').shadowRoot.querySelector("button[data-testid='uc-accept-all-button']")""")
element.click()
time.sleep(1)
driver.switch_to.frame(0)
time.sleep(1)
driver.find_element(by=By.XPATH, value=f"/html/body/section/div/div/div/div[2]/div/div/div[1]/div[1]/form/div/div[3]/a").click()
time.sleep(2)
If i add the xpath in the firefox console i get the right output:
Array [ a.btn.btn-primary ]
But the white windows does not open, only if i click manually on it:
shadow_host = driver.find_element(By.ID, "usercentrics-root")
if driver.name == 'firefox':
shadow_root = driver.execute_script('return arguments[0].shadowRoot.children', shadow_host)[0]
else:
shadow_root = shadow_host.shadow_root
WebDriverWait(shadow_root, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-testid='uc-accept-all-button']"))).click()
iframe = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "iframe[title=' blackweekgewinnspiel']")))
driver.switch_to.frame(iframe)
canvas = driver.find_element(By.CSS_SELECTOR, "canvas")
driver.execute_script("arguments[0].scrollIntoView(true);",canvas)
driver.execute_script("arguments[0].click();", canvas);
driver.switch_to.default_content()
Two points to note:
the Accept Cookies popup is in shadow DOM, for which Chrome + Selenium 4.1 has a ready made solution, but Firefox will also work with Selenium 4 via execute_script.
the roulette wheel is in an iframe, so must switch there before clicking.
In the past i often run in problems when a website is "lazy loading" -
It helped when i used such a search for the id
element = driver.find_element_by_id ("analyst-estimate")
driver.execute_script ("arguments[0].scrollIntoView();", element)
now i saw that this is not working for every site
on the following site everything works fine:
link = "https://www.gurufocus.com/stock/AAPL/summary"
options = Options ()
options.add_argument ('--headless')
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
path = os.path.abspath (os.path.dirname (sys.argv[0]))
if platform == "win32": cd = '/chromedriver.exe'
elif platform == "linux": cd = '/chromedriver_linux'
elif platform == "darwin": cd = '/chromedriver'
driver = webdriver.Chrome (path + cd, options=options)
driver.get (link) # Read link
time.sleep (2) # Wait till the full site is loaded
element = driver.find_element_by_id ("analyst-estimate")
driver.execute_script ("arguments[0].scrollIntoView();", element)
time.sleep (1)
but on another site (also with an id - it is not working at all)
link = "https://finance.yahoo.com/quote/MSFT/analysis?p=MSFT"
options = Options ()
options.add_argument ('--headless')
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
path = os.path.abspath (os.path.dirname (sys.argv[0]))
if platform == "win32": cd = '/chromedriver.exe'
elif platform == "linux": cd = '/chromedriver_linux'
elif platform == "darwin": cd = '/chromedriver'
driver = webdriver.Chrome (path + cd, options=options)
driver.get (link) # Read link
time.sleep (2) # Wait till the full site is loaded
element = driver.find_element_by_id ("YDC-Col1")
# element = driver.find_element_by_id ("Col2-4-QuoteModule-Proxy")
# element = driver.find_element_by_id ("app")
driver.execute_script ("arguments[0].scrollIntoView();", element)
time.sleep (1)
Why is this not working for the second website?
Its the exact same code - why is he not finding the id - it exists on the webpage?
There is a accept cookies pop up displayed before the page is loaded you have to click that first :
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.NAME, "agree"))).click()
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.ID, "YDC-Col1")))
Before testing something in headless check in non headless mode to see the actual behavior , and aif it fails only in headless take a screenshot to know the state of the website during the failure.
you can take screen shot as :
try:
link = "https://finance.yahoo.com/quote/MSFT/analysis?p=MSFT"
options = ChromeOptions()
options.add_argument('--headless')
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get(link) # Read link
time.sleep(2) # Wait till the full site is loaded
element = driver.find_element_by_id("YDC-Col1")
# element = driver.find_element_by_id ("Col2-4-QuoteModule-Proxy")
# element = driver.find_element_by_id ("app")
driver.execute_script("arguments[0].scrollIntoView();", element)
time.sleep(1)
except:
driver.get_screenshot_as_file("a.jpeg")
Time.sleep() isn't very stable for waiting for page loads. Switch over to webdriver waits. Also it doesn't seem to take 2 seconds to load.
wait = WebDriverWait(driver, 5)
wait.until(EC.presence_of_element_located((By.ID, "YDC-Col1")))
Also another issue could be using headless and not setting window size.
options.add_argument('--headless')
options.add_argument("--window-size=1920,1080")
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
I am trying to login on Github using selenium-webdriver
whenever I am trying to run my python script GitHub ask me for the verification code to verify my device which leads to unsuccessful login because it is not possible for me to check my mail every time and verify my verification code every time. is there is something wrong with my code?
here is my code:
from selenium import webdriver
from importlib import reload
import time
import sys
reload(sys)
# Firefox used
driver = webdriver.Chrome()
# base url
driver.get("http://github.com/login")
username = driver.find_element_by_id("login_field")
password = driver.find_element_by_id("password")
# password and username need to go into these values
username.send_keys("username")
time.sleep(1)
password.send_keys("password")
time.sleep(1)
login_form = driver.find_element_by_xpath("//input[#value='Sign in']")
time.sleep(1)
login_form.click()
time.sleep(1)
# These are some of the most popular users on github
prepend = ["jashkenas", "ruanyf", "substack", "kennethreitz", "jlord", "daimajia", "mdo", "schacon", "mattt",
"sindresorhus", "defunkt", "douglascrockford", "mbostock", "jeresig",
"mojombo", "addyosmani", "paulirish", "vczh", "romannurik", "tenderlove", "chriscoyier", "johnpapa",
"josevalim",
"charliesome", "CoderMJLee", "ry", "antirez", "muan", "isaacs", "angusshire",
"hadley", "hakimel", "yyx990803", "fat", "fabpot", "ibireme", "tekkub",
"BYVoid", "laruence", "onevcat", "tpope", "mrdoob", "LeaVerou", "chrisbanes", "wycats", "lifesinger",
"cloudwu", "mitsuhiko", "michaelliao", "ryanb", "clowwindy", "JacksonTian", "yinwang0", "Trinea",
"pjhyett", "dhh", "gaearon"]
for user in prepend:
for t in range(1, 100):
string = "https://github.com/{}/followers?page={}".format(user, t)
driver.get(string)
time.sleep(1)
# make sure to pick the correct directory to save the files to
# follow_button = driver.find_elements_by_xpath("//button[#type='submit']")
follow_button = driver.find_elements_by_xpath("//button[#aria-label='Follow this person']")
# Once page is loaded this clicks all buttons for follow
try:
for i in follow_button:
i.submit()
except:
pass
time.sleep(1)
driver.close()
I have a program to download photos on various websites. Each url is formed at the end of the address by codes, which are accessed in a dataframe. In a dataframe of 8,583 lines
The sites have javascript, so I use selenium to access the src of the photos. And I download it with urllib.request.urlretrieve
Example of a photo site: http://divulgacandcontas.tse.jus.br/divulga/#/candidato/2018/2022802018/PB/150000608817
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from bs4 import BeautifulSoup
import time
import urllib.request, urllib.parse, urllib.error
# Root URL of the site that is accessed to fetch the photo link
url_raiz = 'http://divulgacandcontas.tse.jus.br/divulga/#/candidato/2018/2022802018/'
# Accesses the dataframe that has the "sequencial" type codes
candidatos = pd.read_excel('candidatos_2018.xlsx',sheet_name='Sheet1', converters={'sequencial': lambda x: str(x), 'cpf': lambda x: str(x),'numero_urna': lambda x: str(x)})
# Function that opens each page and takes the link from the photo
def pegalink(url):
profile = webdriver.FirefoxProfile()
browser = webdriver.Firefox(profile)
browser.get(url)
time.sleep(10)
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
browser.close()
link = soup.find("img", {"class": "img-thumbnail img-responsive dvg-cand-foto"})['src']
return link
# Function that downloads the photo and saves it with the code name "cpf"
def baixa_foto(nome, url):
urllib.request.urlretrieve(url, nome)
# Iteration in the dataframe
for num, row in candidatos.iterrows():
cpf = (row['cpf']).strip()
uf = (row['uf']).strip()
print(cpf)
print("-/-")
sequencial = (row['sequencial']).strip()
# Creates full page address
url = url_raiz + uf + '/' + sequencial
link_foto = pegalink(url)
baixa_foto(cpf, link_foto)
Please I look guidance for:
Put a try-Exception type to wait for the page to load (I'm having errors reading the src - after many hits the site takes more than ten seconds to load)
And I would like to record all possible errors - in a file or dataframe - to write down the "sequencial" code that gave error and continue the program
Would anyone know how to do it? The guidelines below were very useful, but I was unable to move forward
I put in a folder a part of the data I use and the program, if you want to look: https://drive.google.com/drive/folders/1lAnODBgC5ZUDINzGWMcvXKTzU7tVZXsj?usp=sharing
put your code within :
try:
WebDriverWait(browser, 30).until(wait_for(page_has_loaded))
# here goes your code
except: Exception
print "This is an unexpected condition!"
For waitForPageToLoad :
def page_has_loaded():
page_state = browser.execute_script(
'return document.readyState;'
)
return page_state == 'complete'
30 above is time in seconds. You can adjust it as per your need.
Approach 2 :
class wait_for_page_load(object):
def __init__(self, browser):
self.browser = browser
def __enter__(self):
self.old_page = self.browser.find_element_by_tag_name('html')
def page_has_loaded(self):
new_page = self.browser.find_element_by_tag_name('html')
return new_page.id != self.old_page.id
def __exit__(self, *_):
wait_for(self.page_has_loaded)
def pegalink(url):
profile = webdriver.FirefoxProfile()
browser = webdriver.Firefox(profile)
browser.get(url)
try:
with wait_for_page_load(browser):
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
browser.close()
link = soup.find("img", {"class": "img-thumbnail img-responsive dvg-cand-foto"})['src']
except Exception:
print ("This is an unexpected condition!")
print("Erro em: ", url)
link = "Erro"
return link