make dataframe by sub link and its title on home page - selenium

I want to get sub link and its title on a homepage of the website, I tried to get it by selenium but only get the homepage link. please help me to get all sub links and its title like : (link0,title0),(link1,title1),link2, title2),(link3,title3), (linkn,titlen)
i tried it but not work
enter image description herefrom selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
tem_data = {'URL':[], #make database,
'Title':[]}
driver = webdriver.Edge()
driver.get("https://m.cafe.naver.com/ca-fe/minivelobike")
URL = ([my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#class='txt_area' and #href]")))])
Title = "some code to get 'tit" #get title of url
tem_data['URL'].append(URL)
tem_data['Title'].append(Title)
print(tem_data)

Related

Can't determine the element on Spotify bar search

Path = r"C:\WebDriverEdge\chromedriver.exe"
service = Service(Path)
options = Options()
options.add_argument('--user-data-dir=C:\\Users\\Admin\\AppData\\Local\\Google\\Chrome\\User Data\\')
options.add_argument("--profile-directory=Profile 1")
#connect to driver
driver = webdriver.Chrome(service = service, options = options)
driver.get("https://open.spotify.com/search")
x_path = '//*[#id="main"]/div/div[2]/div[1]/header/div[3]/div/div/form/input'
search = driver.find_element(By.XPATH, x_path)
action = webdriver.ActionChains(driver)
action.move_to_element(search).send_keys("Let me").perform()
I try to click on search bar at Spotify and use it to search. My problem is when I already login my code get error "unable to find element" but without sign in I can fill the search bar easily.
I don't know why. Is any one run into this before? Thanks in advance
p/s: XPath still the same
I'd rather use this form[role='search'] input css with explicit wait like below:
driver.get("https://open.spotify.com/search")
driver.maximize_window()
wait = WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "form[role='search']"))).send_keys('Let me')
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Get link and title of sub link in a homepage

I want to get a link inside the homepage of the website,
I tried to get it by selenium but only get the homepage link.
please help me to get all links inside.
Code trials:
from selenium import webdriver
import time
driver = webdriver.Edge()
driver.get('https://m.cafe.naver.com/ca-fe/minivelobike')
time.sleep(7)
elems = driver.find_elements_by_xpath("//a[#href]")
for elem in elems:
print(elem.get_attribute("href"))
driver.close()
Snapshot of the links:
To print the value of the href attributes of the articles you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using CSS_SELECTOR:
driver.get("https://m.cafe.naver.com/ca-fe/minivelobike")
print([my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "a.txt_area[href]")))])
Using XPATH:
driver.get("https://m.cafe.naver.com/ca-fe/minivelobike")
print([my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#class='txt_area' and #href]")))])
Console Output:
['https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074719&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074718&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074717&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074716&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074715&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074714&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074713&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074712&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074711&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074710&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074709&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074708&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074707&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074704&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074703&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074702&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074701&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074700&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074699&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074698&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074697&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074696&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074695&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074694&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074693&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074692&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074691&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074690&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074689&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074688&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074687&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074686&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074685&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074682&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074681&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074680&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074679&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074678&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074677&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074676&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074675&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074674&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074673&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074672&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074671&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074670&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074669&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074668&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074667&boardtype=L', 'https://m.cafe.naver.com/ArticleRead.nhn?clubid=11853711&articleid=1074666&boardtype=L']
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
i think this should work
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
driver = webdriver.Edge()
driver.get('https://m.cafe.naver.com/ca-fe/minivelobike')
time.sleep(7)
wrapper = driver.find_element(By.ID, "ct")
main_ul = wrapper.find_element(By.TAG_NAME, "ul")
for li in main_ul.find_elements(By.TAG_NAME, "li"):
try:
anchor_tag = li.find_element(By.TAG_NAME, "a")
href = anchor_tag.get_attribute("href")
print(href)
except:
print("Anchor tag doesnt exist")

take title of sub link on home page

i've aready got the sub link on the homepage. But try to take title for each sub link.
Can anyone help me! thanks you!
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
tem_data = {'URL':[], #make database,
'Title':[]}
driver = webdriver.Edge()
driver.get("https://m.cafe.naver.com/ca-fe/minivelobike")
URL = ([my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#class='txt_area' and #href]")))])
Title = "some code to get 'tit" #get title of each URL[enter image description here][1]
tem_data['URL'].append(URL)
tem_data['Title'].append(Title)
print(tem_data)
Keyword_seach=('xxx.db')
The below should get you the titles:
Title = [my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "strong.tit")]
The thing about Korean characters is that if you want to view them in your terminal, you will need to set your output encoding to UTF-8:
import sys
sys.stdout.reconfigure(encoding='utf-8')
print(tem_data['Title'])

Get text inside the href link inside the span marker using Selenium

How to extract the text which is displayed as part of the link inside the span marker.
<span class="pull-left w-100 font30 medium_blue_type mb10"><a href='/XLY'>XLY</a></span> <span class="w-100">Largest Allocation</span>
Output:
XLY
I've tried several approaches, among all, using
elems = driver.find_elements_by_class_name("span.pull-left.w-100.font30.medium_blue_type.mb10")
elems = driver.find_element_by_xpath('.//span[#class = "pull-left w-100 font30 medium_blue_type mb10"]')
but can't get it working. The website is https://www.etf.com/stock/TSLA.
EDIT:
Is it possible to do it without opening the window in the browser, e.g. using "headless" option?
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome(CHROME_DRIVER_PATH, options=op)
If you prefer to have a text-based locators, you can use the below:
//span[text()='Largest Allocation']/../span
You should click on the cookies I understand button first.
Make use of explicit waits.
So your effective code would be:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.etf.com/stock/TSLA")
try:
wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "I Understand"))).click()
print("Clicked on I understand button")
except:
pass
txt = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Largest Allocation']/../span"))).text
print(txt)
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output:
Clicked on I understand button
XLY
Process finished with exit code 0
If you are looking for locators not based on text, use the below line of code:
txt = wait.until(EC.visibility_of_element_located((By.XPATH, "(//span[contains(#class,'medium_blue_type')]//a)[2]"))).text
There are several possible problems here:
Maybe you are missing a delay
The locator you are using may be not unique
I can see here you are extracting the attribute value from the returned web element
The web element can be inside iframe etc.
Based on currently shared information you can try adding a wait and extracting the web element value as following:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 20)
href = wait.until(EC.visibility_of_element_located((By.XPATH, "//span[#class = "pull-left w-100 font30 medium_blue_type mb10"]"))).get_attribute("href")
Use the following xpath to identify the href link.
//div[./span[text()='Largest Allocation']]//a
You need to induce some delay to get the element.
Use WebDriverWait() and wait for visibility of the element.
To get the text:
print(WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH, "//div[./span[text()='Largest Allocation']]//a"))).text)
To get the href:
print(WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH, "//div[./span[text()='Largest Allocation']]//a"))).get_attribute("href"))
you need to import below libraries.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Selenium will not click class id

I am trying to have selenium to do the following:
Open a website
Click on the search box
Type "Seattle" in the search box
Select the first result from the suggested results
My code fails at Step 2.
The class id for the search box is "class = input_search ng-pristine ng-valid ng-empty ng-touched"
Here's my code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
url = 'https://wego.here.com/'
driver.get(url)
driver.find_element_by_css_selector('.input_search.ng-pristine.ng-valid.ng-empty.ng-touched').click()
driver.find_element_by_css_selector('.input_search.ng-pristine.ng-valid.ng-empty.ng-touched').send_keys('Seattle')
driver.find_element_by_css_selector('.input_search.ng-pristine.ng-valid.ng-empty.ng-touched').send_keys(Keys.ENTER)
Any suggestions would be greatly appreciated!
ADDITIONAL QUESTION
Thanks to #Prophet I was able to get the first auto-click and auto-fill done, but when I try to do the same task with a different search box, it does not like it. Please refer to the following code that I added to the existing code:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button.btn"))).send_keys(Keys.ENTER)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.itinerary_item_input_0"))).click()
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.itinerary_item_input_0"))).send_keys('Chicago')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.itinerary_item_input_0"))).send_keys(Keys.ENTER)
button.btn did work, but not the input.itinerary_item_input_0. Here is the source screenshot:
You are using a wrong locator.
ng-empty and ng-touched may not be there all the times.
So instead of
driver.find_element_by_css_selector('.input_search.ng-pristine.ng-valid.ng-empty.ng-touched').click()
Try using this:
driver.find_element_by_css_selector('input.input_search').click()
input.input_search is an unique, stable locator for that element.
Also, you have to add a delay, preferably to use the expected conditions, as following:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
url = 'https://wego.here.com/'
driver.get(url)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.input_search"))).click()
wait=WebDriverWait(driver, 60)
driver.get('https://wego.here.com/')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input.input_search"))).send_keys("Seattle")
wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#class='dropdown_list']/div[1]"))).click()
This will select the first option after sending a string to input tag.
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC