I'm using scrapy to scrape sites that require login, but I'm no sure exactly which are the fields requires to save and load in order to keep the session.
With selenium I'm doing the following to save the cookies:
import pickle
import selenium.webdriver
driver = selenium.webdriver.Firefox()
driver.get("http://www.google.com")
pickle.dump( driver.get_cookies() , open("cookies.pkl","wb"))
And this to load them:
import pickle
import selenium.webdriver
driver = selenium.webdriver.Firefox()
driver.get("http://www.google.com")
cookies = pickle.load(open("cookies.pkl", "rb"))
for cookie in cookies:
driver.add_cookie(cookie)
And it works just fine, is it possible to do exactly this using scrapy
Sent a request using cookies:
request_with_cookies = Request(url="http://www.example.com", cookies={'currency': 'USD', 'country': 'UY'})
Get cookies from response:
cookies_from_response = response.headers[b'Cookies'].decode()
Related
The crypto exchange hotbit.io disabled some important endpoints I was using. Now I have to use the Cloudflare protected ones.
My Idea was to solve Cloudflare with selenium/undetected_chromedriver and pass the cookies to my session.
Code:
import time
import requests
import undetected_chromedriver as uc
#create a browser and solve cloudflare
driver = uc.Chrome()
url = "https://www.hotbit.io/"
driver.get(url)
time.sleep(5)
#storing the cookies generated by the browser
request_cookies_browser = driver.get_cookies()
print(request_cookies_browser)
s = requests.Session()
#passing the cookies generated from the browser to the session
c = [s.cookies.set(c['name'], c['value']) for c in request_cookies_browser]
print(s.cookies)
resp = s.get(url)
print(resp.text)
I changed some values so I'm not leaking anything
Output:
Cookies: https://pastebin.com/u7sGvjae
html-hotbit: https://pastebin.com/6sAfhWtv
It looks like all cookies are set correctly but I'm still on the Cloudflare solving page with my session.
Anyone has a solution to "bypass" the Cloudflare page and access the website with requests?
Looks like cookies AND user-agent are needed. To be precise only the cf_clearance cookie AND user-agent are needed.
Solution would looks something like that:
import time
import undetected_chromedriver as uc
import requests
#create driver with selenium/undetected_chromedriver
driver = uc.Chrome()
driver.get("https://www.hotbit.io/")
time.sleep(5) #5s sleep, so the driver can solve cloudflare
#get cookies and user_agent
brCookies = driver.get_cookies()
ua = driver.execute_script("return navigator.userAgent")
#filter out the cf_clearance cookie
cf_cookies = [cookie for cookie in brCookies if cookie['name'] == 'cf_clearance'][0]['value']
cookies = {"cf_clearance": cf_cookies}
headers = {"user-agent": ua}
#create session and get your site
session = requests.session()
res = session.get("https://www.hotbit.io/", headers=headers, cookies=cookies)
I am adding chrome options this way and it works if I use proxy ip authentication.
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument('--proxy-server=92.128.165.143:3399')
driver = uc.Chrome(options=options)
However, I have a proxy with authentication in this format:
http://username:password#91.92.128.165.143:3399
If I add it like
options.add_argument('--proxy-server=http://username:password#91.92.128.165.143:3399')
it doesn't work. How could I add it with username/password? This applies only to undetected chrome driver.
I think Ive achieved it already by the help of selenium wire but it didnt work with kivy gui so for scripting you can carryon like this but if you wanna use with kivy then definitely you will get error
from ast import Try
from pathlib import Path
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.proxy import *
from seleniumwire import undetected_chromedriver as uc
from fake_useragent import UserAgent
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from pyclick import HumanClicker
import pyautogui
import time
clicked=False
hostname = "188.74.183.126"
port = "8395"
proxy_username = "wclmiemy"
proxy_password = "a9hoxl4phkzr"
chrome_options = {
'proxy': {
'http': f'http://{proxy_username}:{proxy_password}#{hostname}:{port}',
'https': f'https://{proxy_username}:{proxy_password}#{hostname}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
def delete_cache(driver):
driver.execute_script("window.open('')") # Create a separate tab than the main one
driver.switch_to.window(driver.window_handles[-1]) # Switch window to the second tab
driver.get('chrome://settings/clearBrowserData') # Open your chrome settings.
pyautogui.click("clear_data.png")
if __name__ == '__main__':
email = "moqaddasmehran5#gmail.com"
password = "moqaddaszaheenzaheen"
ua = UserAgent()
userAgent = ua.random
options = webdriver.ChromeOptions()
options.add_argument(f'user-agent={userAgent}')
# options.add_argument('--ignore-certificate-errors-spki-list')
# # options.add_argument('--ignore-ssl-errors')
options.add_argument("--disable-infobars")
browser = uc.Chrome(
driver_executable_path="chromedriver",
seleniumwire_options=chrome_options,
options=options,
use_subprocess=True
)
browser.maximize_window()
browser.get('https://www.youtube.com/watch?v=uPxkrGL0l7U')
```
this code is kind of messy but im in Hurry hope you will be able to modify you willa also get ssle that you need to import in chrome thats it you will defnitely get it
Use the following code to add proxy with username and password:
from selenium import webdriver
PROXY = "http://username:password#91.92.128.165.143:3399"
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
driver = webdriver.Chrome(chrome_options=chrome_options)
edit:
I found this how to set proxy with authentication in selenium chromedriver python?
I am trying to send request with proxies, the problem is, if I use IP:Port proxies, my script work fine, but if I use IP:Port which ask for username and password too, that case my script unable to connect website.
How can I set up proxies properly with Firefox Selenium?
Here is the code:
import time
from stem import Signal
from stem.control import Controller
from selenium.webdriver import Firefox
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
# profile_path = r'C:\Users\hp\Desktop\temp\FirefoxPortable\Data'
options=Options()
# options.set_preference('profile', profile_path)
options.set_preference('network.proxy.type', 1)
options.set_preference('network.proxy.socks', 'myhostgoes here')
options.set_preference('network.proxy.socks_port',12323)
options.set_preference("network.proxy.socks_username",'myusername goes here')
options.set_preference("network.proxy.socks_password",'Password goes here')
options.set_preference('network.proxy.socks_remote_dns', True)
service = Service(r'C://SeleniumDrivers/geckodriver.exe')
driver = Firefox(service=service, options=options)
driver.get("https://www.whoer.net")
time.sleep(15)
driver.close()
driver = Firefox(service=service, options=options)
driver.get("https://www.whoer.net")
i'm currently to use the script below to upload tiktok videos, however when running the script im getting hit with the error message "Too many attempts. Try again later." regardless of what login method i use, rotating headers don't seem to fix the error. Any advice?
import time
import random
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager as CM
import undetected_chromedriver.v2 as uc
import undetected_chromedriver as uc
print('=====================================================================================================')
print('Heyy, you have to login manully on tiktok, so the bot will wait you 1 minute for loging in manually!')
print('=====================================================================================================')
time.sleep(8)
print('Running bot now, get ready and login manually...')
time.sleep(4)
bot = uc.Chrome()
options = webdriver.ChromeOptions()
option.add_argument("--profile-directory=Default")
bot = webdriver.Chrome(options=options, executable_path=CM().install())
bot.set_window_size(1680, 900)
bot.get('https://www.tiktok.com/login')
ActionChains(bot).key_down(Keys.CONTROL).send_keys(
'-').key_up(Keys.CONTROL).perform()
ActionChains(bot).key_down(Keys.CONTROL).send_keys(
'-').key_up(Keys.CONTROL).perform()
print('Waiting 50s for manual login...')
time.sleep(50)
bot.get('https://www.tiktok.com/upload/?lang=en')
time.sleep(3)
def check_exists_by_xpath(driver, xpath):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def upload(video_path):
while True:
file_uploader = bot.find_element_by_xpath(
'//*[#id="main"]/div[2]/div/div[2]/div[2]/div/div/input')
file_uploader.send_keys(video_path)
caption = bot.find_element_by_xpath(
'//*[#id="main"]/div[2]/div/div[2]/div[3]/div[1]/div[1]/div[2]/div/div[1]/div/div/div/div/div/div/span')
bot.implicitly_wait(10)
ActionChains(bot).move_to_element(caption).click(
caption).perform()
# ActionChains(bot).key_down(Keys.CONTROL).send_keys(
# 'v').key_up(Keys.CONTROL).perform()
with open(r"caption.txt", "r") as f:
tags = [line.strip() for line in f]
for tag in tags:
ActionChains(bot).send_keys(tag).perform()
time.sleep(2)
ActionChains(bot).send_keys(Keys.RETURN).perform()
time.sleep(1)
time.sleep(5)
bot.execute_script("window.scrollTo(150, 300);")
time.sleep(5)
post = WebDriverWait(bot, 100).until(
EC.visibility_of_element_located(
(By.XPATH, '//*[#id="main"]/div[2]/div/div[2]/div[3]/div[5]/button[2]')))
post.click()
time.sleep(30)
if check_exists_by_xpath(bot, '//*[#id="portal-container"]/div/div/div[1]/div[2]'):
reupload = WebDriverWait(bot, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[#id="portal-container"]/div/div/div[1]/div[2]')))
reupload.click()
else:
print('Unknown error cooldown')
while True:
time.sleep(600)
post.click()
time.sleep(15)
if check_exists_by_xpath(bot, '//*[#id="portal-container"]/div/div/div[1]/div[2]'):
break
if check_exists_by_xpath(bot, '//*[#id="portal-container"]/div/div/div[1]/div[2]'):
reupload = WebDriverWait(bot, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[#id="portal-container"]/div/div/div[1]/div[2]')))
reupload.click()
time.sleep(1)
# ================================================================
# Here is the path of the video that you want to upload in tiktok.
# Plese edit the path because this is different to everyone.
upload(r"C:\Users\redi\Videos\your-video-here.mov")
# ================================================================
Anyone have a workaround the Tiktok block?
While attempting to login, re-login or creating an account TikTok platform may even wrongly identify you as a bot or spam and for the next 5 minutes or so, you may keep getting the Too many attempts. Try again later. error message when trying to sign in to TikTok as follows.
To avoid getting detected as a bot you can use your default user login profile and you can use the following solution:
options = Options()
options.add_argument("start-maximized")
# Chrome is controlled by automated test software
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
# avoiding detection
options.add_argument('--disable-blink-features=AutomationControlled')
# Default User Profile
options.add_argument("--profile-directory=Default")
options.add_argument("--user-data-dir=C:/Users/Admin/AppData/Local/Google/Chrome/User Data")
bot = webdriver.Chrome(options=options, executable_path=CM().install())
bot.get('https://www.tiktok.com/login')
I want to sign in to my Google Account and enable a Google API and extract the developer's key. My main task is to automate this process.
Everyone knows that you can't log into the Google Account using an automated browser. I did manage to do that using scrapy splash.
import re
import time
import base64
import scrapy
from scrapy_splash import SplashRequest
from selenium import webdriver
class GoogleScraperSpider(scrapy.Spider):
name = 'google_scraper'
script = """
function main(splash)
splash:init_cookies(splash.args.cookies)
local url = splash.args.url
local youtube_url = "https://console.cloud.google.com/apis/library/youtube.googleapis.com"
assert(splash:go(url))
assert(splash:wait(1))
splash:set_viewport_full()
local search_input = splash:select('.whsOnd.zHQkBf')
search_input:send_text("xxxxxxxxxxx#gmail.com")
assert(splash:wait(1))
splash:runjs("document.getElementById('identifierNext').click()")
splash:wait(5)
local search_input = splash:select('.whsOnd.zHQkBf')
search_input:send_text("xxxxxxxx")
assert(splash:wait(1))
splash:runjs("document.getElementById('passwordNext').click()")
splash:wait(5)
return {
cookies = splash:get_cookies(),
html = splash:html(),
png = splash:png()
}
end
"""
def start_requests(self):
url = 'https://accounts.google.com'
yield SplashRequest(url, self.parse, endpoint='execute', session_id="1", args={'lua_source': self.script})
def parse(self, response):
imgdata = base64.b64decode(response.data['png'])
with open('image.png', 'wb') as file:
file.write(imgdata)
cookies = response.data.get("cookies")
driver = webdriver.Chrome("./chromedriver")
for cookie in cookies:
if "." in cookie["domain"][:1]:
url = f"https://www{cookie['domain']}"
else:
url = f"https://{cookie['domain']}"
driver.get(url)
driver.add_cookie(cookie)
driver.get("https://console.cloud.google.com/apis/library/youtube.googleapis.com")
time.sleep(5)
In the parse function I'm trying to retrieve those cookies and add them to my chromedriver to bypass the login process so I can move ahead to enabling the API and extracting the key but I always face the login page in the chromedriver.
Your help would be most appreciated.
Thanks.
try using pickle to save cookies instead, just use any python console to save the cookies with this code
import pickle
input('press enter when logged')
pickle.dump(driver.get_cookies(), open('cookies.pkl'))
then you get the cookies.pkl file with google login data, import it in your code using:
import pickle
cookies = pickle.load(open('cookies.pkl'))
for cookie in cookies:
driver.add_cookies(cookie)
driver.refresh()
# rest of work here...
refresh the driver to enable the cookies