Web scraping yahoo finance with selenium

Web scraping yahoo finance with selenium - selenium

It's driving me nuts but I really don't find the answer to my problem. I have done some coding with python and selenium to scrape the yahoo finance news from Daimler. But it simply doesn't work. I always get the following message in pycharm:
selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
(Session info: chrome=85.0.4183.121)
But I am quite sure that the selector chosen is the only appropriate one to chose.. Here my coding:
from selenium import webdriver
import pandas as pd
url = 'https://finance.yahoo.com/quote/DAI.DE?p=DAI.DE&.tsrc=fin-srch'
driver = webdriver.Chrome('C:/Users/Startklar/Desktop/CFDS/chromedriver.exe')
driver.get(url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
articles = driver.find_elements_by_class_name('js-stream-content Pos(r)')
for article in articles:
source = article.find_element_by_xpath('.//*[#id="quoteNewsStream-0-Stream"]/ul/li[3]/div/div/div[2]/div/span[1]').text
title = article.find_element_by_xpath('//*[#id="quoteNewsStream-0-Stream"]/ul/li[5]/div/div/div[2]/h3/a').text
text = article.find_element_by_xpath('//*[#id="quoteNewsStream-0-Stream"]/ul/li[5]/div/div/div[2]/p').text
date = article.find_element_by_xpath('.//*[#id="quoteNewsStream-0-Stream"]/ul/li[5]/div/div/div[1]/div/span[2]').text
print(source,title,text,date)
What's wrong. Really appreciate some help!
Thx a lot
Maybe it is useful to see the whole error message:
Traceback (most recent call last):
File "C:/Users/Startklar/PycharmProjects/test/venv/Selenium Test.py", line 15, in <module>
articles = driver.find_elements_by_css_selector('li.js-stream-content Pos(r)')
File "C:\Users\Startklar\PycharmProjects\test\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 614, in find_elements_by_css_selector
return self.find_elements(by=By.CSS_SELECTOR, value=css_selector)
File "C:\Users\Startklar\PycharmProjects\test\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 1007, in find_elements
'value': value})['value'] or []
File "C:\Users\Startklar\PycharmProjects\test\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\Startklar\PycharmProjects\test\venv\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
(Session info: chrome=85.0.4183.121)
that's the latest code by the way
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://finance.yahoo.com/quote/DAI.DE?p=DAI.DE&.tsrc=fin-srch'
driver = webdriver.Chrome('C:/Users/Startklar/Desktop/CFDS/chromedriver.exe')
driver.get(url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
articles = WebDriverWait(driver, 100).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.js-stream-content")))
for article in articles:
try:
source = article.find_element_by_xpath('//div/div/div[2]/div/span[1]').text
title = article.find_element_by_xpath('//div/div/div[2]/h3/a').text
text = article.find_element_by_xpath('//div/div/div[2]/p').text
date = article.find_element_by_xpath('//div/div/div[2]/div/span[2]').text
print(source,title,text,date+'/n')
except:
print("")

Your xpaths and article selector was off.
articles = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.js-stream-content")))
for article in articles:
try:
source = article.find_element_by_xpath('//div/div/div[2]/div/span[1]').text
title = article.find_element_by_xpath('//div/div/div[2]/h3/a').text
text = article.find_element_by_xpath('//div/div/div[2]/p').text
date = article.find_element_by_xpath('//div/div/div[1]/div/span[2]').text
print(source,title,text,date+'/n')
except:
print("")
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Related

Selenium just opens Chrome and stops the program using ChromeDriverManager

My problem is that I want to execute the whole code, but it just opens Chrome and stops the program. What could be the reason for this? I installed every package needed such as the chromedriver into the same directory as the scriptfile.[![enter image description here][1]][1]
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
driver = webdriver.Chrome(ChromeDriverManager().install())
time.sleep(5)
# Öffne die angegebene URL
driver.get("https://www.nike.com/de/launch/t/air-force-1-07-fresh")
# Warte bis die Seite geladen ist
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "size-grid-button")))
# Scrolle nach unten
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Warte 3 Sekunden
import time
time.sleep(3)
# Wähle Größe 9 aus
size_button = driver.find_element_by_xpath('//*\[#class="size-grid-button" and contains(text(),"9")\]')
size_button.click()
# Drücke den Kauf-Knopf
buy_button = driver.find_element_by_class_name("buying-tools-cta-button")
buy_button.click()
# Gib die Nachricht "Zugriff erfolgt" zurück
print("Zugriff erfolgt")]
Snapshot:

Incase you are using selenium4 you need to pass the argument:
ChromeDriverManager().install()
along with the service keyword as follows:
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.nike.com/de/launch/t/air-force-1-07-fresh")

How to attach screenshot in pytest-html report with coftest.py?

I want to attach a screenshot to my HTML report but I haven't found any good resource on how to use the conftest.py file. I created the coftest.py file inside the pytest folder with the following code:
import pytest
#pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
pytest_html = item.config.pluginmanager.getplugin("html")
outcome = yield
report = outcome.get_result()
extra = getattr(report, "extra", [])
image="D:/Selenium/Insights/2022-11-02_00-13-18/error_page.png"
if report.when == "call":
# always add url to report
extra.append(pytest_html.extras.url("http://www.example.com/"))
extra.append(pytest_html.extra.image(image))
xfail = hasattr(report, "wasxfail")
if (report.skipped and xfail) or (report.failed and not xfail):
# only add additional html on failure
# extra.append(pytest_html.extras.html("<div>Additional HTML</div>"))
extra.append(pytest_html.extra.image(image))
report.extra = extra
And my test.py file is:
import time
from os import getenv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from dotenv import load_dotenv
from Login_actions import Login_activities
from Insights_actions import Insights_activities
from Locators import Locators
import pytest, os
from datetime import datetime
class Test_Insights():
#pytest.fixture
def test_setup(self):
#make new directory for downloads
new_dir = r"D:\Selenium\Insights\{timestamp}".format(timestamp=datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
# print(new_dir)
if not os.path.exists(new_dir):
os.makedirs(new_dir)
self.saved_dir=new_dir
prefs = {"download.default_directory": new_dir, "download.directory_upgrade": True, "download.prompt_for_download": False}
#intiating chrome browser instance
options=Options()
options.add_argument('--start-maximized')
# options.add_argument('--headless')
options.add_experimental_option("prefs", prefs)
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
#load credentials
load_dotenv()
self.username = getenv("TOP_USERNAME")
self.password = getenv("TOP_PWD")
#exiting ceremonies
yield
self.driver.close()
self.driver.quit()
print("Test executed")
def test_check_in(self, test_setup):
driver=self.driver
# login_url="https://tilt-sso.preprod.crto.in/" separate login page
# url="https://tilt-orange360.preprod.crto.in/insights/home"
url="https://tilt-sso.preprod.crto.in/auth?code=5515f8b0-4b64-4da4-b506-e6a6a3f81b23&scope=cn%20dn%20mail%20uid%20umsId&state=eyJyZWRpcmVjdF91cmkiOiJcL2hvbWUiLCJub25jZSI6IktaTFBxczU5T3lQUWJaRUp0OFhBQWZvZDNueDhPaENDbGlJWVRqZ08ifQ%3D%3D"
driver.get(url)
try:
welcome_text = driver.find_element(by=By.XPATH, value="//div[contains(text(),'Criteo')]")
assert welcome_text
login_actions = Login_activities(driver)
login_actions.enter_username(test_setup.username)
login_actions.enter_password(test_setup.password)
login_actions.login()
page_load_wait = WebDriverWait(driver, timeout=30).until(
EC.url_to_be("https://tilt-orange360.preprod.crto.in/insights/home"))
if (page_load_wait):
WebDriverWait(driver, timeout=20).until(
EC.visibility_of_element_located((By.XPATH, Locators.welcome_text)))
WebDriverWait(driver, timeout=20).until(EC.element_to_be_clickable((By.XPATH, Locators.run_insight)))
insights_actions = Insights_activities(driver)
insights_actions.insights_search("Check-In")
insights_actions.search_partners("BOOKINGIT")
insights_actions.smart_date_30days()
insights_actions.submit_insights()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, Locators.success_mesg)))
# submit_verify = driver.find_element(by=By.XPATH, value=Locators.success_mesg)
# assert(submit_verify)
print("Submission successful")
insights_actions.download_file()
time.sleep(20)
print(self.saved_dir)
arr=[]
arr+=[file for file in os.listdir(self.saved_dir) if file.endswith('.pptx')]
print("File in the directory: " + arr[0])
while not arr:
time.sleep(5)
if arr:
print("Insights completed. File downloaded successfully")
else:
print("File not available")
raise NoSuchElementException
except:
if driver.find_element(by=By.XPATH,value=Locators.error_page):
driver.get_screenshot_as_file('{dir}/error_page.png'.format(dir=self.saved_dir))
print("500 Internal server error")
Error_page=driver.current_url
print("The error page: "+Error_page)
raise NoSuchElementException
I do not know why is it not working. The document: https://pytest-html.readthedocs.io/en/latest/user_guide.html#enhancing-reports does not have much information. I really need help here, please.

163 INFO: UPX is not available. selenium pyinstaller one file.exe

I 've been reading almost all the posts related to this topic but I can´t find a solution!!!.
My folder path is : C:\Users\User\Desktop\Data Analytics Arg\py_inst
Inside the folder I created a virtual env., I added the chromedriver.exe and my script as it can be seen in the image:
this is my script:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
def resource_path(relative_path):
try:
import sys
import os
# PyInstaller creates a temp folder and stores path in _MEIPASS
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
try :
url = "https://pinterest.com"
driver_path = "chromedriver.exe"
#Instanciamos la clase de Options para definir ciertas opciones:
options = Options()
options.add_argument("--lang=es") #lenguaje que queremos utilizar
#options.add_argument("--headless") # utilizar un navegador sin cabeza
options.add_argument("--log-level=3")# omite los warnings en la consola
#definimos ntro.webdriver:
driver = webdriver.Chrome(executable_path=driver_path, options=options)
driver.get(url)
time.sleep(2)
buttons = driver.find_elements_by_css_selector("button[data-test-id='page-scroll-arrow']")
for button in buttons:
opacity = button.get_attribute("style").split(";")[0][-1]
if opacity is '1':
button.click()
time.sleep(3)
texto = driver.find_element_by_css_selector('h2.unauth-homepage-signup-title').text
with open('result.txt','w') as file:
file.write(texto)
driver.close()
except Exception as e:
print(e)
I added to the service.py file (C:\Users\User\Desktop\Data Analytics Arg\py_inst\venv\Lib\site-packages\selenium\webdriver\common\service.py) : creationflags= CREATE_NO_WINDOW and imported CREATE_NO_ WINDOW from subprocess , like this:
from subprocess import CREATE_NO_WINDOW, DEVNULL
import errno
import os
import subprocess
from platform import system
from subprocess import PIPE, CREATE_NO_WINDOW
from time import sleep
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common import utils
and :
self.process = subprocess.Popen(cmd, env=self.env,
close_fds=system() != 'Windows',
stdout=self.log_file,
stderr=self.log_file,
stdin=PIPE,
creationflags=self.creationflags,
creationflags=CREATE_NO_WINDOW)
All from this posts :
hide_chrome_answer
Finally, via terminal I try to create one .exe file :
pyinstaller --add-data "chromedriver.exe;." --windowed --onefile prueba_1.py
But I got this error :
124 INFO: PyInstaller: 4.7
124 INFO: Python: 3.10.0
147 INFO: Platform: Windows-10-10.0.19042-SP0
149 INFO: wrote C:\Users\User\Desktop\Data Analytics Arg\py_inst\prueba_1.spec
153 INFO: UPX is not available.
script 'C:\Users\User\Desktop\Data Analytics Arg\py_inst\prueba_1.py' not found
and now my folder is like this:
If I run the script, afther changes in service.py I got this message:
c:\Users\User\Desktop\Data Analytics Arg\py_inst\prueba_script.py:41: SyntaxWarning:
"is" with a literal. Did you mean "=="?
if opacity is '1':
Traceback (most recent call last):
File "c:\Users\User\Desktop\Data Analytics Arg\py_inst\prueba_script.py", line 1, in
<module>
from selenium import webdriver
File "C:\Users\User\Desktop\Data Analytics Arg\py_inst\venv\lib\site-
packages\selenium\webdriver\__init__.py", line 18, in <module>
from .firefox.webdriver import WebDriver as Firefox # noqa
File "C:\Users\User\Desktop\Data Analytics Arg\py_inst\venv\lib\site-
packages\selenium\webdriver\firefox\webdriver.py", line 31, in <module>
from .service import DEFAULT_EXECUTABLE_PATH, Service
File "C:\Users\User\Desktop\Data Analytics Arg\py_inst\venv\lib\site-
packages\selenium\webdriver\firefox\service.py", line 20, in <module>
from selenium.webdriver.common import (service, utils)
File "C:\Users\User\Desktop\Data Analytics Arg\py_inst\venv\lib\site-
packages\selenium\webdriver\common\service.py", line 77
creationflags=CREATE_NO_WINDOW)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Could someone help me please???.

This error message...
UPX is not available
...implies that pyinstaller can not find upx.exe to encrypt exe file.
Solution
In order to fix this error you need to download upx as per your system configuration. Incase of a 64-bit windows OS you need to download:
Incase you have downloaded upx and saved in D:\ you need to do:
pyinstaller main.py --key 123456 -n test -F -w --upx-dir D:\

Selenium: selenium.common.exceptions.TimeoutException: Message: error

I was trying to run this script to automate and click on the product list in the webpage using selenium. But this "raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: error" is occurring every time. What is wrong I'm doing here? Expecting your guidance.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chrome_options = Options()
chrome_options.add_argument('__headless')
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1920, 1080)
driver.get('https://www.galaxus.ch/search?q=5010533606001')
product_tab = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//article[#class='panelProduct panelLayout_panelLayout__BDQ6_ view_product__3AOqY']/a"))).click()
time.sleep(10)
driver.close()
output
PS G:\Python_Practice\scrapy_practice\test> [21628:13792:0911/125833.339:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled
> & C:/Users/raisu/anaconda3/envs/Scrapy_Workspace2/python.exe g:/Python_Practice/scrapy_practice/test/test.py
DevTools listening on ws://127.0.0.1:56456/devtools/browser/1d6d20ce-ecb9-44f7-be6e-1dbe1373526a
Traceback (most recent call last):
File "g:/Python_Practice/scrapy_practice/test/test.py", line 18, in <module>
product_tab = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//article[#class='panelProduct panelLayout_panelLayout__BDQ6_ view_product__3AOqY']/a"))).click()
File "C:\Users\raisu\anaconda3\envs\Scrapy_Workspace2\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:

Instead of using the xpath, I've used a CSS_SELECTOR and also I changed the wait condition for element to be visible
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chrome_options = Options()
chrome_options.add_argument('__headless')
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1920, 1080)
driver.get('https://www.galaxus.ch/search?q=5010533606001')
#time.sleep(5) use this only if the wait is not working
wait = WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'article > a')))
driver.find_element(By.CSS_SELECTOR,'article > a').click()
driver.close()

Xpath grabbing wrong text

I'm trying to grab the price from this page: https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq
It should be getting $1,376.15 CAD however I'm getting some other text from the page and sometimes it doesn't work at all and gives me:
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Here's my code:
DRIVER_PATH = '/usr/bin/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
XPATH = '//*[#id="app"]/main/div/div/section[1]/div/div[3]/div/span[1]'
url = 'https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq'
driver.get(url)
price = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, XPATH)))
print(price)
driver.quit()

Try that out:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
DRIVER_PATH = '/usr/bin/chromedriver'
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
url = "https://www.eq3.com/ca/en/product/cjv8cke45026q01786nahx8uf/lighting/lighting/pendant-lamps/nelson-bell-bubble-pendant-lamp?cjv49km02036401865sece5be=cjv49km0503650186hc90zvnq"
driver.get(url)
el = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'MuiTypography-root') and contains(#class,'MuiTypography-h3')][1]")))
text = el.text
print(text)
driver.quit()

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Web scraping yahoo finance with selenium - selenium

Related

Selenium just opens Chrome and stops the program using ChromeDriverManager

How to attach screenshot in pytest-html report with coftest.py?

163 INFO: UPX is not available. selenium pyinstaller one file.exe

Selenium: selenium.common.exceptions.TimeoutException: Message: error

Xpath grabbing wrong text

Categories

Resources