building a web scraping using the selenium command

building a web scraping using the selenium command - selenium

I'm building a web scraping using the selenium command. I was able to read the data from the table on the first and second pages, however, I cannot read the data on the following pages. Can anybody help me?
Below is the code I am using.
NoSuchElementException: Message: no such element: Unable to locate
element:
{"method":"xpath","selector":"//table[1]/tbody[1]/tr[#class='painel'
and 1]/td[2]/a[1 and #href='javascript:pesquisar(2);']"} (Session
info: headless chrome=86.0.4240.75)
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import os
import json
url = 'https://www.desaparecidos.pr.gov.br/desaparecidos/desaparecidos.do?action=iniciarProcesso&m=false'
option = Options()
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
driver.get(url)
time.sleep (5)
lista = driver.find_element_by_xpath('//*[#id="list_tabela"]/tbody')
lista_text = lista.text
print (lista_text)
driver.implicitly_wait(5)
driver.find_element_by_xpath("//table[1]/tbody[1]/tr[#class='painel' and 1]/td[2]/a[1 and #href='javascript:pesquisar(2);']").click()
time.sleep (5)
lista = driver.find_element_by_xpath('//*[#id="list_tabela"]/tbody')
lista_text = lista.text
print (lista_text)
driver.implicitly_wait(10)
driver.find_element_by_xpath("//table[1]/tbody[1]/tr[#class='painel' and 1]/td[2]/a[3 and #href='javascript:pesquisar(3);']").click()
time.sleep (10)
lista = driver.find_element_by_xpath('//*[#id="list_tabela"]/tbody')
lista_text = lista.text
print (lista_text)

Related

Selenium (Instagram Web Assistant Extension) called INSSIST

I am trying to click the [+] Button in the INSSIST extension using selenium, but I just get an error everytime. I just want to know if it is possible to click it. Can't figure it out
For the code to work you have to make sure you got the chrome inssist extension and you have close all instances of chrome otherwise it will not load selenium correctly, because it is a background extension aswell
Here is the online instagram alternative called inssist
https://chrome.google.com/webstore/detail/inssist-web-client-for-in/bcocdbombenodlegijagbhdjbifpiijp
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument(r"user-data-dir=C:\\Users\\Alexander Smith\\AppData\\Local\\Google\\Chrome\\User Data")
chrome_options.add_argument('profile-directory=Profile 2')
driver = webdriver.Chrome(executable_path=r"C:\\Users\\Alexander Smith\\Desktop\\chromedriver.exe", options=chrome_options)
driver.get("chrome-extension://bcocdbombenodlegijagbhdjbifpiijp/inssist.html#instagram.com/")
print("Good")
create = WebDriverWait(driver,9).until(EC.element_to_be_clickable((By.XPATH, '//\*\[#id="mount_0_0_3c"\]/div/div/div/div\[1\]/div/div/div/div\[1\]/div\[1\]/div\[1\]/div/div/div/div/div\[1\]/div')))
print("Good")
ActionChains(driver).move_to_element(create)
print("Good")
create.send_keys(Keys.RETURN)

Automatically Scroll a Non-Active Selenium Based Browser Window

I need a way to automatically scroll a browser window that i've opened using selenium code. I am currently using pyautogui but this requires the window to be selected. This is running on a virtual machine that i'm remoting into and when i toggle off the VM to work on my main desktop the code does not scroll so i need a way to auto scroll on a non active window.
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import numpy as np
from pynput.mouse import Button, Controller
import win32api
import win32con
import pyautogui
import pandas as pd
import re
from selenium.common.exceptions import TimeoutException
pyautogui.FAILSAFE = False
s = Service(r'\\homedirpva1a01\USERSNC$\603225\Edgedriver\msedgedriver.exe')
browser = webdriver.Edge(service=s)
Options.page_load_strategy = 'eager'
time.sleep(5)
linklist=[]
browser.get('https://turo.com/us/en/search?country=US&defaultZoomLevel=11&delivery=false&deliveryLocationType=googlePlace&endDate=01%2F25%2F2023&endTime=10%3A00&isMapSearch=false&itemsPerPage=200&latitude=40.735657&location=Newark%2C%20New%20Jersey%2C%20USA&locationType=CITY&longitude=-74.1723667&pickupType=ALL&placeId=ChIJHQ6aMnBTwokRc-T-3CrcvOE&region=NJ&sortType=RELEVANCE&startDate=01%2F22%2F2023&startTime=10%3A00&useDefaultMaximumDistance=true')
time.sleep(3)
print('start4434')
i4434=1
elems4434=[]
while i4434<50:
if 'An error occurred while searching, please try again' in browser.page_source:
listsave=['i4434','i4434']
save = pd.DataFrame(listsave)
save.to_csv(r'\\PATH\savetest.csv')
exit()
elif 'vehicle-card-link-box' not in browser.page_source:
listsave=['i4434','i4434']
save = pd.DataFrame(listsave)
save.to_csv(r'\\PATH\savetest.csv')
break
else:
elems4434=browser.find_elements(By.XPATH, '//a[#href]')
distinctlist = list(set(linklist))
frame = pd.DataFrame(distinctlist)
frame.to_csv(r'\\PATH\turolinklisttest.csv')
listsave=['i4434','i4434']
save = pd.DataFrame(listsave)
save.to_csv(r'\\PATH\savetest.csv')
for elem in elems4434:
if 'car-rental/united-states' in elem.get_attribute('href'):
linklist.append(elem.get_attribute('href'))
if 'suv-rental/united-states' in elem.get_attribute('href'):
linklist.append(elem.get_attribute('href'))
pyautogui.scroll(-1000)
time.sleep(3)
i4434+=1`

Keep page refresh Selenium

I am struggling to keep the session open all the time. The code executes, but only when the link is present.
driver.refresh
doesn't seem to keep the page refreshed.
Is there a way to refresh the page every couple seconds?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
user_name = "username"
password = "password"
driver = webdriver.Chrome()
driver.get('https://mywebsite.com/boardslots.aspx?date=07%2F31%2F2022')
element = driver.find_element(By.ID,"txtUsername")
element.send_keys(user_name)
element = driver.find_element(By.ID,"txtPassword")
element.send_keys(password)
element.send_keys(Keys.RETURN)
while True:
try:
driver.find_element(By.LINK_TEXT,"Claim")
except NoSuchElementException:
driver.refresh
else:
driver.find_element(By.LINK_TEXT,"Claim").click()
try:
# element = WebDriverWait(driver,1)
element= WebDriverWait(driver,
10).until(expected_conditions.visibility_of_element_located((By.XPATH, '/html
/body/form/div[3]/div[3]/table/tbody/tr[2]/td/table/tbody/tr[2]/td[9]/a')))
finally:
driver.find_element(BY.ID,"btnSubmitSingle").click()
#/html/body/form/div[3]/div[3]/table/tbody/tr[2]/td/table/tbody/tr[2]/td[9]/a

use login from other selenium session

I would like to stay logged in a site which require login every about 2 hours. My idea is to open a parallel session, login and inject the cookies in the first session.
To achieve this, firstly, i tried to create a small example to use the use the cookies of webdriver_chrome session 1 to webdriver_chrome session 2, but a new login is asked.
Thanks in advance for your help
import pickle
import os
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import os
import pickle
option = webdriver.ChromeOptions()
option.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=option)
driver.get("https://BANK_WEB_SITE.com/login/")
sleep(5)
LOGIN()
pickle.dump(driver.get_cookies(), open("cookies.pkl", "wb"))
option = webdriver.ChromeOptions()
option.add_argument("--no-sandbox")
driver2 = webdriver.Chrome(options=option)
for cookie in cookies:
driver.add_cookie(cookie)
driver.refresh()
sleep(5)
driver2.get("https://BANK_WEB_SITE.com/MY_PORTFOLIO")

I solved the issue as the following code. The solution was:
1) fix the mistake changing "driver.add_cookie(cookie)" to "driver2.add_cookie(cookie)"
2) load the web page before loading cookies
import pickle
import os
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import os
import pickle
option = webdriver.ChromeOptions()
option.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=option)
driver.get("https://BANK_WEB_SITE.com/login/")
sleep(5)
LOGIN()
pickle.dump(driver.get_cookies(), open("cookies.pkl", "wb"))
option = webdriver.ChromeOptions()
option.add_argument("--no-sandbox")
driver2 = webdriver.Chrome(options=option)
driver2.get("https://BANK_WEB_SITE.com/MY_PORTFOLIO/")
cookies = pickle.load(open("cookies.pkl", "rb"))
for cookie in cookies:
driver2.add_cookie(cookie)
driver2.get("https://BANK_WEB_SITE.com/MY_PORTFOLIO/")

AttributeError: 'element_to_be_clickable' object has no attribute 'click' python3.7

As my code shows, I am trying to automate the process of logging and other things using selenium in python 3.7. I am struck as it is showing "AttributeError: element_to_be_clickable has no object click" in the line botton_to_click().
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
base = 'https://www.wsj.com'
url = 'https://www.wsj.com/search/term.html?KEYWORDS=cybersecurity&min-date=2018/04/01&max-date=2019/03/31&isAdvanced=true&daysback=90d&andor=AND&sort=date-desc&source=wsjarticle,wsjpro&page=1'
browser = webdriver.Safari(executable_path='/usr/bin/safaridriver')
browser.get(url)
browser.find_element_by_id('editions-select').click()
browser.find_element_by_id('na,us').click()
botton_to_click = WebDriverWait(browser, 10).until(EC.element_to_be_clickable, ((By.XPATH,"//button[#type='button' and contains(.,'Sign In')]")))
botton_to_click.click()
browser.find_element_by_id('username').send_keys('##$%*&^%##$')
browser.find_element_by_id('password').send_keys('##*$%^!#')
browser.find_element_by_id('basic-login').click()
browser.find_element_by_id('masthead-container').click()
browser.find_element_by_id('searchInput').send_keys('cybersecurity')
browser.find_element_by_name('ADVANCED SEARCH').click()
browser.find_element_by_id('dp1560924131783').send_keys('2018/04/01')
browser.find_element_by_id('dp1560924131784').send_keys('2019/03/31')
browser.find_element_by_id('wsjblogs').click()
browser.find_element_by_id('wsjvideo').click()
browser.find_element_by_id('interactivemedia').click()
browser.find_element_by_id('sitesearch').click()
browser.close()
Thanks.

Remove the comma after element_to_be_clickable as given below, It may resolve your issue.
botton_to_click = WebDriverWait(browser, 10).until(EC.element_to_be_clickable, ((By.XPATH,"//button[#type='button' and contains(.,'Sign In')]")))
to
botton_to_click = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH,"//button[#type='button' and contains(.,'Sign In')]")))

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

building a web scraping using the selenium command - selenium

Related

Selenium (Instagram Web Assistant Extension) called INSSIST

Automatically Scroll a Non-Active Selenium Based Browser Window

Keep page refresh Selenium

use login from other selenium session

AttributeError: 'element_to_be_clickable' object has no attribute 'click' python3.7

Categories

Resources