Chrome headless driver never makes it to website - selenium

Otherwise: Anyone have any good pypeteer guides they can link me to?
Hi guys, i'm trying to run chrome headless. This is my code:
def init_chrome(profile, headless, proxy: str = "159.197.220.31:5594"):
options = webdriver.ChromeOptions()
if headless:
options.add_argument("--headless")
print('headless set')
options.binary_location = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
options.add_argument("--remote-debugging-port=8000")
options.add_argument(f'--proxy-server={proxy}')
options.add_argument(f"user-data-dir=browser-profiles/chrome/{profile}")
options.add_argument("--disable-blink-features")
options.add_argument("--disable-notifications")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("excludeSwitches", ["disable-popup-blocking"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36")
driver = webdriver.Chrome(options=options,
executable_path=fr'{getcwd()}\chromedriver.exe')
print('driver launched')
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
# driver.set_window_size(1200, 900)
driver.implicitly_wait(5)
return driver
This is the output:
headless set
driver launched
As you can see, the driver is launched, but it never goes to the page. It just sits there and eats resources. What am I doing wrong? For context, this works fine when headless=False.

I suppose you need [--no-sandbox] to by pass OS security and if you using Windows add
[--disable-gpu] to enable headless.
It should look like this:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument('start-maximized')
driver =webdriver.Chrome(executable_path= path to your browser, options=options)
driver.get("http://google.com/")
driver.quit()

Related

undetected_chromedriver working in full mode but failing in headless model (cloudflare)

I am trying to open a website using undetected_chromedriver with the headless model. The website is using Cloudflare. If I am using without a headless model then the script is working but when I use headless = True it shows a captcha.
import ssl
import time
ssl._create_default_https_context = ssl._create_unverified_context
import undetected_chromedriver as uc
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.add_argument('--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"')
driver = uc.Chrome(options=options)
# driver = webdriver.Chrome()
driver.get('website_url')
# import time
time.sleep(10)
driver.save_screenshot('sample.png')
Now if I set headless = True, it's showing a captcha
import ssl
import time
ssl._create_default_https_context = ssl._create_unverified_context
import undetected_chromedriver as uc
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.add_argument('--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"')
options.headless = True
driver = uc.Chrome(options=options)
driver.get('website_url')
# import time
time.sleep(10)
driver.save_screenshot('sample.png')
How Can I make it undetectable from cloudflare?
Add this chrome options as well.
options.add_argument('--disable-blink-features=AutomationControlled')
This should prevent detection.

Website Loading Only once then access denied - Selenium Webdriver

We are trying to access a website
Url=https://www.nseindia.com/option-chain using selenium.
However, it loads only once if we reload it, we get an access denied error.
CODE-
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.chrome.options import Options
opts = Options()
user_agent = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/39.0.2171.95 Safari/537.36')
opts.add_argument(f'user-agent={user_agent}')
opts.add_argument('--disable-infobars')
browser = webdriver.Chrome(ChromeDriverManager().install())
browser.get('https://www.nseindia.com/option-chain')
time.sleep(1000)
Some website use anti-bot protection, that can detect your bot thanks to some differencies between automated brower and standard browser.
You should try to add these settings:
opts.add_argument('--disable-blink-features=AutomationControlled')
opts.add_experimental_option('useAutomationExtension', False)
opts.add_experimental_option("excludeSwitches", ["enable-automation"])
If this don't work, try Undetected Chromedriver, it work like the standard chrome driver, but it patch it with more setting to increase stealthiness.
By the way, your user-agent looks a little bit outdated, you should sue a newer one according to your chromedriver version, like this one: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36
options.add_argument("--disable-infobars")
options.add_argument("--disable-notifications")
options.add_argument("--disable-default-apps")
options.add_argument("--disable-web-security")
options.add_argument("--disable-site-isolation-trials")
options.add_argument("--disable-logging")
options.add_argument("--disable-bundled-ppapi-flash")
options.add_argument("--disable-gpu-compositing")
options.add_argument("--disable-gpu-shader-disk-cache")
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument("--disable-extensions")
options.add_argument("--log-level=3")
# options.add_argument("--window-size=600,600")
# options.page_load_strategy = 'none'
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

ERR_EMPTY_RESPONSE with selenium using proxy (chrome webdriver)

I've been trying to scrape some websites and decided to use proxies for a while it went fine i guess but now I'm getting ERR_EMPTY_RESPONSE quite often and once every 10 or so tries, the page will load correctly, without changing the code, of course.
When I'm not running headless and I get an empty response, I can click the "reload" button on the chrome webdriver and the page will sometimes load.
It's got something to do with the fact that I use a proxy but I can't really work out why it's working sometimes and why it gives me an empty response so often.
Has anyone else run into a similar problem?
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
PROXY = "35.230.111.164" + ":" + "8888"
webdriver.DesiredCapabilities.CHROME["proxy"] = {
"httpProxy": PROXY,
"ftpProxy": PROXY,
"sslProxy": PROXY,
"noProxy": None,
"proxyType": "MANUAL",
"autodetect": False,
}
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
options.add_argument(f"user-agent={user_agent}")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get(
"https://whatismyipaddress.com/"
)
driver.implicitly_wait(10)
#driver.find_element_by_xpath('//*[#id="footer_tc_privacy_button_2"]').click()
I have experienced problems with proxy + chrome/chromedriver forever now, and the only solution I came up with was trying to detect the error and reload programmatically, something like this:
err = driver.find_element_by_css('#main-frame-error .error-code')
# found chrome err! lets reload...
if(err){
driver.get(driver.current_url)
}

'--window-size=' not working with ChromeDriver

I can't seem to get the '--window-size=' switch working for ChromeDriver for Selenium. Any ideas? Just trying to randomize the window size.
from selenium import webdriver
TEST_URL = 'http://localhost:8000'
options = webdriver.ChromeOptions()
# options.add_argument('headless')
options.add_argument('--window-size=1920x1080')
options.add_argument("disable-gpu")
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_experimental_option('w3c', False)
options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
driver = webdriver.Chrome('chromedriver', options=options)
--window-size argument does not seem to work in Python Selenium.
You can use set_window_size.
driver.set_window_size(1920, 1080)
Call this right after creating browser.

User agent not changing with firefox selenium getting setcapabilities issue

This code was working fine, but when switched to firefox from chrome now this is giving me error.
Please help me to find whats the solution
FirefoxOptions options = new FirefoxOptions();
options.addArguments("--incognito");
options.addArguments("start-maximized");
// options.addArguments("--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac\n" + "OS X; en-us) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53");
options.addArguments("--user-agent=Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36");
capabilities.setCapability(FirefoxOptions(capabilities), options);
System.setProperty("webdriver.gecko.driver","/Users/abcd/Downloads/geckodriver");
WebDriver driver = null;
Please help ....
I can sense several issues:
driver.manage().window().maximize();
to browse incognito with Firefox use
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument("-private")
or see: Python/Selenium incognito/private mode
adjusting the user agent is more difficult: https://stackoverflow.com/a/42171906/8291949
Basically, there is an about:config option general.useragent.override, which you can specify in the user.js file in the profile folder with a line like:
user_pref("general.useragent.extra.firefox", "Mozilla/5.0 AppleWebKit/537.36…")
And than use capabilities to use the profile. Regarding capabilities see: Can't open browser with Selenium after Firefox update