XPath- XPath is Printing Repeating Value - selenium

I want to get the "data-id" value for each row of this webpage.
I copied the XPath of the first row, however, it just prints the value of the "data-id" from the first row x times. Expected result should be the data-id of each row gets its own data-id printed
Expected Output-
"514"
"515"
"516"
...
...
import time
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.abstractsonline.com/pp8/#!/10517/sessions/#timeSlot=Apr08/1')
page_source = driver.page_source
element = driver.find_elements_by_xpath('.//li[#class="result clearfix"]')
for el in element:
id=el.find_element_by_xpath('/html/body/div[1]/div[2]/div/div/div[2]/div[2]/div/div[2]/div[2]/ul/li[1]/div[1]/div[1]/h1').get_attribute("data-id")
print(id)

Try something like this:
import time
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.abstractsonline.com/pp8/#!/10517/sessions/#timeSlot=Apr08/1')
//page_source = driver.page_source
element = driver.find_elements_by_xpath('.//li[#class="result clearfix"]//h1')
for el in element:
id=el.get_attribute("data-id")
print(id)

Related

Web Scrape - Scrape Fields with Same Tags

I get to a page where there are many rows of data per page.
My code gets to each row, and I am able to scrape the title of each row.
However, all the data after that, all looks to have the same tag names (for example the author and program number etc)
Based on this, how do I scrape all the data within each row.
from selenium import webdriver
from bs4 import BeautifulSoup
import time
driver = webdriver.Chrome()
baseURL = 'https://index.mirasmart.com/aan2022/'
for x in range (1,3):
driver.get(f'https://index.mirasmart.com/aan2022/SearchResults.php?pg={x}')
time.sleep(3)
page_source = driver.page_source
soup = BeautifulSoup(page_source,'html.parser')
eachRow=soup.find_all('div',class_='full search-result')
for item in eachRow:
title=item.find('h2').text
print(title)
You could use :-soup-contains to target by text
for result in soup.select('.detail'):
print('title: ', result.find_previous('h2').text)
for item in ['Author:', 'Session Name:', 'Author Disclosures:', 'Topic:', 'Program Number:', 'Author Institution:']:
try:
print(item, result.select_one(f'.cell:-soup-contains("{item}")').find_next('span').text)
except:
print(f'{item} not found')
print()

Unable to locate element : xpath

I have made a youtube automation bot. I am getting error : unable to locate element (for the Xpath of subscribe button)
here is my code
from selenium import webdriver
from selenium import common
from selenium.webdriver.common import keys
from webdriver_manager.firefox import GeckoDriverManager
import time
class actions:
def __init__(self, email, password):
self.email = email
self.password = password
profile = webdriver.FirefoxProfile()
profile.set_preference("dom.webdriver.enabled", False)
profile.set_preference('useAutomationExtension', False)
profile.update_preferences()
driver = webdriver.Firefox(
executable_path=GeckoDriverManager().install(), firefox_profile=profile)
self.bot = driver
# self.bot.maximize_window()
self.bot.set_window_size(400, 700)
self.is_logged_in = False
def login(self):
bot = self.bot
bot.get("https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Den%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252F&hl=en&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin")
time.sleep(5)
try:
email = bot.find_element_by_name('identifier')
except common.exceptions.NoSuchElementException:
time.sleep(5)
email = bot.find_element_by_name('identifier')
email.clear()
email.send_keys(self.email + keys.Keys.RETURN)
time.sleep(5)
try:
password = bot.find_element_by_name('password')
except common.exceptions.NoSuchElementException:
time.sleep(5)
password = bot.find_element_by_name('password')
password.clear()
password.send_keys(self.password + keys.Keys.RETURN)
time.sleep(5)
self.is_logged_in = True
def kill(self):
bot = self.bot
bot.quit()
def subscribe(self, url):
if not self.is_logged_in:
return
bot = self.bot
bot.get(url)
time.sleep(4)
try:
value = bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').get_attribute('aria-label')
value = value.split()
except:
bot.execute_script(
'window.scrollTo(0,document.body.scrollHeight/3.5)')
time.sleep(3)
value = bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').get_attribute('aria-label')
value = value.split(':')
if value[0] == "Subscribe":
try:
bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').click()
time.sleep(3)
except:
bot.execute_script(
'window.scrollTo(0,document.body.scrollHeight/3.5)')
time.sleep(3)
bot.find_element_by_xpath(
'/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[7]/div[2]/ytd-video-secondary-info-renderer/div/div/div/ytd-subscribe-button-renderer/tp-yt-paper-button').click()
time.sleep(3)
how can i resolve this issue. I am not able to understand where things are going wrong. Or i should try find elements by id or other ways instead of Xpath.
Or is there any problem with any software.
Please help me out
Always use relative XPath in your test. Using the absolute XPath will cause regular test failures.
Refer to this tutorial about writing the relative XPaths. https://www.guru99.com/xpath-selenium.html
This extension will help you to write the relative XPaths. https://chrome.google.com/webstore/detail/chropath/ljngjbnaijcbncmcnjfhigebomdlkcjo
You can refer how to write XPath in different ways using functions like text(), starts-with(), contains(). so you can locate them by visible texts also.
Refer this articlehere

Separating the scraped result of an active webpage in an array using python

I am trying to scrape the stock data but even though I'm using the "find elements by id" the result is one text.
i have tried various methods such as find elements by xpath and etc..
and i tried to make an array that contains all the IDs by finding "attribute 'target'" so i can loop through it but i wasn't successful so i had to code each ID.
import json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
url = 'http://www.tsetmc.com/Loader.aspx?ParTree=15131F'
delay = 100
driver = webdriver.Chrome()
driver.get(url)
WebDriverWait(driver, delay)
zapna = driver.find_elements_by_id(id_='43479730079120887')
renik = driver.find_elements_by_id(id_='33854964748757477')
retko = driver.find_elements_by_id(id_='3823243780502959')
rampna = driver.find_elements_by_id(id_='67126881188552864')
mafakher = driver.find_elements_by_id(id_='4247709727327181')
for ii in retko:
print(ii.text , "\n")
driver.close()
and the result is:
رتكوكنترل‌خوردگي‌تكين‌كو2,1512.531M63.044 B25,14523,88824,900-245-0.9724,907-238-0.9523,88825,699-749-33.2512,55324,90024,9035,4601
what i expect is:
رتكو
كنترل‌خوردگي‌تكين‌كو
2,151
2.531M
63.044 B
25,145
23,888
24,900
-245
-0.97
24,907
-238
-0.95
23,888
25,699
-749
-33.25
1
2,553
24,900
24,903
5,460
1
any idea ?
You just have to go one layer deeper (using, for example, xpath) and iterate through the children:
for ii in retko:
targets = ii.find_elements_by_xpath('.//*')
for target in targets:
print(target.text)
Output:
رتكو
رتكو
كنترل‌خوردگي‌تكين‌كو
كنترل‌خوردگي‌تكين‌كو
3,149
3.235M
3.235M
etc.

Selenium navigation through selenium keep looping (python)

I'm just started using selenium to scrape the table from webpage. So, I implemented the navigation of webpage using selenium. But, the the result keep looping when I run the code. Pretty sure that I wrote the code wrong. What should I fix the code so the navigation selenium works?
import requests
import csv
from bs4 import BeautifulSoup as bs
from selenium import webdriver
browser=webdriver.Chrome()
browser.get('https://dir.businessworld.com.my/15/posts/16-Computers-The-Internet')
# url = requests.get("https://dir.businessworld.com.my/15/posts/16-Computers-The-Internet/")
soup=bs(browser.page_source)
filename = "C:/Users/User/Desktop/test.csv"
csv_writer = csv.writer(open(filename, 'w'))
pages_remaining = True
while pages_remaining:
for tr in soup.find_all("tr"):
data = []
# for headers ( entered only once - the first time - )
for th in tr.find_all("th"):
data.append(th.text)
if data:
print("Inserting headers : {}".format(','.join(data)))
csv_writer.writerow(data)
continue
for td in tr.find_all("td"):
if td.a:
data.append(td.a.text.strip())
else:
data.append(td.text.strip())
if data:
print("Inserting data: {}".format(','.join(data)))
csv_writer.writerow(data)
try:
#Checks if there are more pages with links
next_link = driver.find_element_by_xpath('//*[#id="content"]/div[3]/table/tbody/tr/td[2]/table/tbody/tr/td[6]/a ]')
next_link.click()
time.sleep(30)
except NoSuchElementException:
rows_remaining = False
Check if there any next button present on the page then click else exit from while loop.
if len(browser.find_elements_by_xpath("//a[contains(.,'Next')]"))>0:
browser.find_element_by_xpath("//a[contains(.,'Next')]").click()
else:
break
No need to use time.sleep() instead use WebDriverWait()
Code:
import csv
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
browser=webdriver.Chrome()
browser.get('https://dir.businessworld.com.my/15/posts/16-Computers-The-Internet')
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table.postlisting")))
soup=bs(browser.page_source)
filename = "C:/Users/User/Desktop/test.csv"
csv_writer = csv.writer(open(filename, 'w'))
pages_remaining = True
while pages_remaining:
WebDriverWait(browser,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.postlisting")))
for tr in soup.find_all("tr"):
data = []
# for headers ( entered only once - the first time - )
for th in tr.find_all("th"):
data.append(th.text)
if data:
print("Inserting headers : {}".format(','.join(data)))
csv_writer.writerow(data)
continue
for td in tr.find_all("td"):
if td.a:
data.append(td.a.text.strip())
else:
data.append(td.text.strip())
if data:
print("Inserting data: {}".format(','.join(data)))
csv_writer.writerow(data)
if len(browser.find_elements_by_xpath("//a[contains(.,'Next')]"))>0:
browser.find_element_by_xpath("//a[contains(.,'Next')]").click()
else:
break

Java selenium - Can't select date from calendar popup

I'm new to automation with selenium webdriver. I'm trying to book a flight on a travel website. (ryanair.com)
I'm getting stuck with a popup datepicker. I can use .sendkeys to enter the day only, after any field on the date inputs is clicked it triggers the popup calendar to appear so if the format of the inputis dd/mm/yyyy and I want to enter "20042019" it only enters 20 in the dd and then automatically selects the current month and year as autocomplete and opens the calendar popup.
I'ver read a few articles saying that these calendars are usually of two types
1. iframe
2. datepicker - I think the one on ryanair is datepickers based on the xpath below
//div[#id='datetimepicker_dateview']//table//tbody//td[not(contains(#class,'k-other-month')
Maybe that's the wrong xpath? But I think it's correct
I have tried to find the list of dates to book using :
List<WebElement> list_AllDateToBook = driver.findElements(By.xpath()
System.out.println("size of list is : " + list_AllDateToBook.size() );
System.out.println("list is : " + list_AllDateToBook );
Output is:
size of list is : 0
list is : []
When I use xpath to enter the date of the date field it works for the first input using:
WebElement day = driver.findElement(By.xpath("//*[#id='row-dates-pax']/div[1]/div/div[1]/div/div[2]/div[2]/div/input[1]"));
However when I change the xpath to the second input it won't enter the second input(month)
WebElement day = driver.findElement(By.xpath("//*[#id='row-dates-pax']/div[1]/div/div[1]/div/div[2]/div[2]/div/input[2]"));
A sample of the datepicker HTML is below (it's too long to add it all!)
<core-datepicker class="start-date" default-date="" start-date="18-03-2019" end-date="28-03-2020" highlight-from="20-03-2019" highlight-to="20-04-2019" end-show="true" fly-out="true" value="dateRange.startDate" cb-event-id="cal-selector:select-start-date" unavailable-dates="dateRange.unavailabiltyStartDates" selected-month-view="dateRange.startDateSelectedMonthView" show-month-toggle="::dateRange.showMonthToggle" show-single-month=""><!----><div ng-class="::{'has-monthly-toggle': isMonthToggleVisible()}"><div bindonce="" class="datepicker-wrapper r scrollable value-selected" ng-class="{ scrollable: !device.isPhone(), mobile: device.isPhone(), 'value-selected': value, 'six-rows': checkIf6Rows()}" style="transition-timing-function: cubic-bezier(0.1, 0.57, 0.1, 1); transition-duration: 0ms; transform: translate(0px, 0px) translateZ(0px);"><!----><!----><ul ng-if="!device.isPhone()"><!----><li bindonce="" ng-repeat="i in _monthViews" ng-class="{ 'starting-month': checkIfIsSame(getDate(i), highlightFrom, 'month'), 'selected-month': checkIfIsSame(getDate(i), value, 'month'), 'highlight-on': canHighlight(i) }" class="calendar-view starting-month selected-month"><h1 class="month-name">March 2019</h1><ul class="week-days"><!---->
I'm really stuck here. Any advice would be great.
Thanks
Here is the working code from python. Please see the approach that I followed and simulate the same in java.
import os
import time
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
def get_full_path_to_folder(folderName):
folders = os.path.abspath(os.pardir).split(os.sep)
folderPath = ''
for folder in folders:
if folderPath == '':
folderPath = folder
else:
folderPath = folderPath + "\\" +folder
if os.path.isdir(os.path.join(folderPath, folderName)):
return os.path.join(folderPath, folderName)
break
def wait_for_element_present(locator_type, locator):
if locator_type == 'xpath':
return (wait.until(EC.presence_of_element_located((By.XPATH, locator))))
elif locator_type == "css":
return (wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, locator))))
chrome_options = ChromeOptions()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(executable_path=os.path.join(get_full_path_to_folder('drivers'), "chromedriver.exe"))
driver.implicitly_wait(10)
wait = WebDriverWait(driver,10)
url = "https://www.ryanair.com/us/en/"
# go to url
driver.get(url)
#================ you should focus the below code=======================
# close the cookie pop up, otherwise the date and country elements not interable for selenium for interaction
wait_for_element_present('css',".cookie-popup__close").click()
#click on destination edit box
driver.find_element_by_css_selector(".route-selector-destination").click()
# select desitnation country
destiCountry = wait_for_element_present('xpath',"//div[#ng-repeat='option in $ctrl.firstOptions track by option.id' and text()=' Italy']")
destiCountry.click()
#select destination airport
desti = wait_for_element_present('xpath',"//span[contains(#ng-if,'secondOptionsEnabled') and .='Naples']")
desti.click()
# select outbound date
dateEle = wait_for_element_present('css',"li[data-id='24-03-2019']")
if dateEle.value_of_css_property('font-size') == '15px':
dateEle.click()
#select in bound date
dateEle = wait_for_element_present('css',"li[data-id='20-04-2019']")
if dateEle.value_of_css_property('font-size') == '15px':
dateEle.click()
#hurry, the date selection is successful.
Please try with :
li[data-id='12-04-2019'] span
Note: Add till span tag. Just tried via console and working.
document.querySelector("li[data-id='12-04-2019'] span").click()