I have been trying to rotate some IPs with this piece of code. It didn't work. It still gave me my own IP. Could anyone help me check if there is anything wrong with it?
This is my code:
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
ips = ["185.199.228.220:7300", "185.199.231.45:8382"]
def rand_proxy():
proxy = random.choice(ips)
return proxy
def myip_now():
chrome_options = webdriver.ChromeOptions()
proxy = rand_proxy()
chrome_options.add_argument(f'--proxy-server = {proxy}')
driver = webdriver.Chrome(options = chrome_options)
driver.get("https://myexternalip.com/raw")
print(proxy)
time.sleep(10)
driver.quit()
myip_now()
What I expected was that on https://myexternalip.com/raw controlled by my bot, I should see either 185.199.228.220:7300 or 185.199.231.45:8382.
Seems some minor issues with the blank spaces and/or single quotes. You can tweak your code block a bit removing the extra spaces and replacing the single quotes with double quotes as follows:
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
ips = ["185.199.228.220:7300", "185.199.231.45:8382"]
def rand_proxy():
proxy = random.choice(ips)
return proxy
def myip_now():
chrome_options = webdriver.ChromeOptions()
proxy = rand_proxy()
chrome_options.add_argument(f"--proxy-server={proxy}")
driver = webdriver.Chrome(options = chrome_options)
driver.get("https://myexternalip.com/raw")
print(proxy)
time.sleep(10)
driver.quit()
myip_now()
Reference
You can find a couple of relevant detailed discussion in:
How to rotate Selenium webrowser IP address
f-strings in Python
Python 3's f-Strings: An Improved String Formatting Syntax (Guide)
Related
I am trying to scrape data from google finance with following link
https://www.google.com/finance/quote/ACN:NYSE
The section I am trying to fetch is on right side containing information like market cap, p/e ratio etc.
Earlier I thought it was javascript and wrote the following snippet:
class_name = 'gyFHrc'
options = Options()
options.headless = True
service = Service('/usr/local/bin/geckodriver')
browser = Firefox(service=service, options=options)
browser.get(base_url+suffix)
wait = WebDriverWait(browser, 15)
wait.until(presence_of_element_located((By.CLASS_NAME, class_name))) # <--line 58
stuff = browser.find_elements(By.CLASS_NAME, class_name)
print(f'stuff-->{stuff}')
for elem in stuff:
html = elem.get_attribute("outerHTML")
# print(f'html:{html}')
I get the following error:
File "scraping_google_finance_js.py", line 58, in <module>
wait.until(presence_of_element_located((By.CLASS_NAME, class_name)))
File "/Users/me/opt/anaconda3/envs/scraping/lib/python3.10/site-packages/selenium/webdriver/support/wait.py", line 90, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Stacktrace:
WebDriverError#chrome://remote/content/shared/webdriver/Errors.jsm:183:5
NoSuchElementError#chrome://remote/content/shared/webdriver/Errors.jsm:395:5
element.find/</<#chrome://remote/content/marionette/element.js:300:16
Later, I realised that this was plain HTML and I can use BeautifulSoup as follows:
class_name = 'gyFHrc'
soup = BeautifulSoup(html, 'html.parser')
box_rows = soup.find_all("div", class_name)
print(box_rows)
for row in box_rows:
print(type(row), str(row.contents[1].contents))
This worked with following output:
<class 'bs4.element.Tag'> ['$295.14']
<class 'bs4.element.Tag'> ['$289.67 - $298.00']
<class 'bs4.element.Tag'> ['$261.77 - $417.37']
.....
The question is, why did it not work with Selenium? Did I do something wrong? or Selenium only works with Javascript site?
Clearly time to load the page was not the problem as BeautifulSoup could fetch and parse the page
The error selenium.common.exceptions.TimeoutException says the element you are trying to load or find is not found within the given time.
Probably your internet is slow to load the stuff in time. Increase the wait time to get the result.
This error usually happens when selenium can't find the desired tag or element. But in your case, the element was there.
I checked the code with a few changes, and it worked for me so it's probably an issue with the element loading in time.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(ChromeDriverManager().install())
class_name = "gyFHrc"
driver.get("https://www.google.com/finance/quote/ACN:NYSE")
wait = WebDriverWait(driver, 15)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, class_name))) # <--line 58
stuff = driver.find_elements(By.CLASS_NAME, class_name)
print(f"stuff-->{stuff}")
for elem in stuff:
html = elem.get_attribute("outerHTML")
print(f"html:{html}")
Result
I'm trying to use Python (Selenium, BeautifulSoup, and XPath) to scrape a span with an itemprop equal to "description", but every time I run the code, the "try" fails and it prints out the "except" error.
I do see the element in the code when I inspect elements on the page.
Line that isn't getting the desired response:
quick_overview = soup.find_element_by_xpath("//span[contains(#itemprop, 'description')]")
Personally, I think you should just keep working with selenium
quick_overview = driver.find_element_by_xpath("//span[contains(#itemprop, 'description')]")
for the element and add .text to end to get the text content.
To actually use soup to parse this out you would likely need a wait condition from selenium first so no real point.
However, should you decide to integrate bs4 then you need to change your function to work with the actual html from driver.page_source and parse that, then switch to select_one to grab your item. Then ensure you are returning from the function and assigning to new soup object.
from bs4 import BeautifulSoup
from selenium import webdriver # links w/ browser and carries out actions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PATH = "C:\Program Files (x86)\chromedriver_win32\chromedriver.exe"
baseurl = "http://www.waytekwire.com"
skus_to_find_test = ['WL16-8', 'WG18-12']
driver = webdriver.Chrome(PATH)
driver.get(baseurl)
def use_driver_current_html(driver):
soup = BeautifulSoup(driver.page_source, 'lxml')
return soup
for sku in skus_to_find_test[0]:
search_bar = driver.find_element_by_id('themeSearchText')
search_bar.send_keys(sku)
search_bar.send_keys(Keys.RETURN)
try:
product_url = driver.find_elements_by_xpath("//div[contains(#class, 'itemDescription')]//h3//a[contains(text(), sku)]")[0]
product_url.click()
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, "//span[contains(#itemprop, 'description')]")))
soup = use_driver_current_html(driver)
try:
quick_overview = soup.select_one("span[itemprop=description]").text
print(quick_overview)
except:
print('No Quick Overview Found.')
except:
print('Product not found.')
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import lxml
import openpyxl as op
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# from lxml
html_text = 'https://store.steampowered.com/app/354400/Tadpole_Treble/'
# wb = op.load_workbook(
# 'https://1drv.ms/x/s!Aiw1ZpHhhvdugd0Z_eEBU_KzDlxiuA?e=cWevHn')
driver = webdriver.Chrome(
executable_path='C:/Users/atif/Downloads/chromedriver.exe')
driver.get(html_text)
driver.implicitly_wait(10)
names = driver.find_elements_by_xpath('//div[#class="persona_name"]/a')
print(len(names))
for name in names:
print(name.text)
# body = soup.body
# titles = headers.find_all('a', class_='title fw500 ellipsis')
# for h in headers:
# # title = h.find('a', class_='title fw500 ellipsis').text
# print(h.a['href'])
# a_links = body.find_all("a")
driver.close()
this is the answer I am getting
[20380:14344:0416/154733.584:ERROR:device_event_log_impl.cc(214)]
[15:47:33.584] Bluetooth: bluetooth_adapter_winrt.cc:1072 Getting
Default Adapter failed.
0 < = Result showing zero
It seems your code is correct but the element is not loaded at that time. That's why you are getting empty list. The reviews are loading once you scroll down the page to the end. I have tried the below code and it works for me. Do let me know if it works for you or not.
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get('https://store.steampowered.com/app/354400/Tadpole_Treble')
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
sleep(5)
ReviewersNames = driver.find_elements_by_xpath("//div[#class=\"persona_name\"]/a")
for reviewer in ReviewersNames:
print(reviewer.text)
Note - you can also use explicit wait above.
Please mark it as answer if it resolves your problem.
I want to read out a csv file with URL's and put them into a selenium webdriver command (one by one). The addresses are all in one column in the csv. I managed to loop through the URL's with no problem. However, I can't make it work with the webdriver. It always returns to me with the same error message "selenium.common.exceptions.InvalidArgumentException: Message: invalid argument", even if the print of the URL's looks fine. The only funny thing is, when I copy and paste the URL address from the print comamand in my editor it shows with a leading blank: ' http://[url].com'. I tried to remove the space with strip, but that had no effect on the printout. After hours of trying I feel pretty lost with this.
I use the following code:
with open('urls.csv', 'r', encoding='utf-8') as read_obj:
reader = reader(read_obj)
for row in reader:
URL = "'" + str((row[0])) + "'"
print(URL)
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
driver.quit()
Thank you for your help!
This code works for me:
#!/usr/bin/env python3
# coding: utf-8
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
source = 'myfile.csv'
with open(source, 'rt', encoding='utf-8-sig') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
url = row[0]
contents.append(links)
for url in contents:
driver.get(url)
#do something here
driver.quit()
I'm pretty sure this is a website specific thing because I've tried my code (modified the xpath) on other sites and it works. I'm trying to get all the PDF links on the listed website in the code line.
driver.find_elements_by_xpath(xpath) yield empty list []
Code:
def scrape_url(url):
xpath = '//*[#class="panel-body"]//a'
options = Options()
options.headless = True
# change filepath of chromedriver
driver = webdriver.Chrome(options=options, executable_path=r'C:\Users\User\Desktop\chromedriver')
try:
driver.get(url)
all_href_elements = driver.find_elements_by_xpath(xpath)
print("all_href_elements", all_href_elements) # <--empty list []
for href_element in all_href_elements:
article_url_text = href_element.text
print(article_url_text)
if article_url_text == "PDF":
article_url = href_element.get_attribute('href')
print(article_url_text, article_url)
if article_url:
self.urls.add(article_url)
print("num of urls", len(self.urls))
except Exception as e:
print(e)
print(url)
url = 'https://www.govinfo.gov/committee/senate-armedservices?path=/browsecommittee/chamber/senate/committee/armedservices/collection/BILLS/congress/106'
scrape_url(url)
But using the Chrome extension XPath Helper, the XPath query should return something. I think it might be due to how the urls are dynamic and aren't generated until the pane is "opened." But the url should call for the pane to be "open" for the web driver to get, no?
How would I get around this?
Thanks
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Just use explicit wait for elements:
all_href_elements = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH,xpath))
)