How to HTTP 'Keep-Alive' in Python3.2 with urllib - urllib

I try to keep a HTTP Connection with urllib.request in Python 3.2.3 alive with this code:
handler = urllib.request.HTTPHandler()
opener = urllib.request.build_opener(handler)
opener.addheaders = [("connection", "keep-alive"), ("Cookie", cookie_value)]
r = opener.open(url)
But if I listen to the connection with Wireshark I get an Header with "Connection: closed" but set Cookie.
Host: url
Cookie: cookie-value
Connection: close
What do I have to do to set Headerinfo to Connection: keep-alive?

If you need something more automatic than plain http.client, this might help, though it's not threadsafe.
from http.client import HTTPConnection, HTTPSConnection
import select
connections = {}
def request(method, url, body=None, headers={}, **kwargs):
scheme, _, host, path = url.split('/', 3)
h = connections.get((scheme, host))
if h and select.select([h.sock], [], [], 0)[0]:
h.close()
h = None
if not h:
Connection = HTTPConnection if scheme == 'http:' else HTTPSConnection
h = connections[(scheme, host)] = Connection(host, **kwargs)
h.request(method, '/' + path, body, headers)
return h.getresponse()
def urlopen(url, data=None, *args, **kwargs):
resp = request('POST' if data else 'GET', url, data, *args, **kwargs)
assert resp.status < 400, (resp.status, resp.reason, resp.read())
return resp

I keep connection alive by use http-client
import http.client
conn = http.client.HTTPConnection(host, port)
conn.request(method, url, body, headers)
the headers just give dict and body still can use urllib.parse.urlencode.
so, you can make Cookie header by http client.
reference:
official reference

Related

modify scrapy middleware Block main program

I modified the request library in the middleware, but it becomes synchronous and cannot play the asynchronous performance. The library that sends requests cannot be asynchronous. What should I do
code snippet
class Ja3DownloaderMiddleware:
def __init__(self):
self.session = pyhttpx.HttpSession(browser_type='chrome')
def process_request(self, request, spider):
proxies = {
'https': '127.0.0.1:7890',
'http': '127.0.0.1:7890',
}
if request.method == 'GET':
response = self.session.get(request.url, headers=get_project_settings().get('DEFAULT_REQUEST_HEADERS'),
proxies=proxies)
elif request.method == 'POST':
response = self.session.post(request.url, headers=get_project_settings().get('DEFAULT_REQUEST_HEADERS'),
proxies=proxies)
else:
raise ValueError(f"未知请求方式: {request.method}")
# body = str.encode(response.body)
return HtmlResponse(response.request.url, body=response.text, encoding='utf-8', request=request)
I tried to use asynchrony, but this library does not support it. I hope it will not block after modification

How to set up a Scrapy proxy with authorization?

My middlewares settings:
from w3lib.http import basic_auth_header
class CustomProxyMiddleware(object):
def process_request(self, request, spider):
request.meta['proxy'] = "111.11.11.111:1111"
request.headers['Proxy - Authorization'] = basic_auth_header('login', 'password')
My settings:
DOWNLOADER_MIDDLEWARES = {
'my_project.middlewares.CustomProxyMiddleware': 350,
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 400,
}
After launching, I get an error:
scrapy.core.downloader.handlers.http11.TunnelError: Could not open CONNECT tunnel with proxy 217.29.53.106:51725 [{'status': 407, 'reason': b'Proxy Authentication Required'}]
What is the reason, how to fix it? (I use valid https proxies)
Try changing the header name to be Proxy-Authorization
request.headers['Proxy-Authorization'] = basic_auth_header('login', 'password')
proxy = [
'http': 'http://{user}:{password}#{host}:{port}',
'https': 'https://{user}:{password}#{host}:{port}',
]
yield scrapy.request(url=url, proxy=proxy}
doesn't this work?

asyncio stream vs synchronous stream in socket communication with a react native app

Objective is esp32 running micropython acts as a server while android app acts as a client. Before asyncio stream I am able to communicate successfully, but after switching to asyncio i fail to do so, only android app to esp32 is successful but app is failing to retrieve json output from server and I even tried text strings too . App side code remains unchanged for both synchronous/asyncio codes.
Desired output:
response = {
'error': 'invalid request',
'status': 'retry'
}
synchronous side:
conn.send('HTTP/1.1 200 OK\n')
conn.send('Content-Type: application/json\n')
conn.send('Connection: close\n\n')
conn.sendall(ujson.dumps(response ))
asyncio side:
swriter.write(ujson.dumps(response ))
await swriter.drain()
react native side:
fetch( 'http://192.168.0.110' )
.then(response => response.json())
.then((responseJson) => {
const data1 = responseJson;
console.log('getting data from fetch', data1)
setData({ data1 });
onConnectionMessage(data1);
})
synchronous way I was able to retrieve the json output sent from esp32 to android app(react native), but the same code using asyncio failed. What am I doing wrong?
sample asyncio server side code is:
import usocket as socket
import uasyncio as asyncio
import uselect as select
import ujson
from heartbeat import heartbeat # Optional LED flash
class Server:
def __init__(self, host='0.0.0.0', port=80, backlog=5, timeout=10):
self.host = host
self.port = port
self.backlog = backlog
self.timeout = timeout
async def run(self):
print('Awaiting client connection.')
self.cid = 0
asyncio.create_task(heartbeat(100))
self.server = await asyncio.start_server(self.run_client, self.host, self.port, self.backlog)
while True:
await asyncio.sleep(100)
async def run_client(self, sreader, swriter):
self.cid += 1
print('Got connection from client', self.cid)
try:
while True:
try:
res = await asyncio.wait_for(sreader.readline(), self.timeout)
except asyncio.TimeoutError:
res = b''
if res == b'':
raise OSError
print('Received {} from client {}'.format(ujson.loads(res.rstrip()), self.cid))
response = {
'error': 'invalid request',
'status': 'retry'
}
swriter.write(ujson.dumps(response))
await swriter.drain() # Echo back
except OSError:
pass
print('Client {} disconnect.'.format(self.cid))
await sreader.wait_closed()
print('Client {} socket closed.'.format(self.cid))
async def close(self):
print('Closing server')
self.server.close()
await self.server.wait_closed()
print('Server closed.')
server = Server()
try:
asyncio.run(server.run())
except KeyboardInterrupt:
print('Interrupted') # This mechanism doesn't work on Unix build.
finally:
asyncio.run(server.close())
_ = asyncio.new_event_loop()
got the error: asyncio.wait_for(sreader.readline(), self.timeout)------> changed to
asyncio.wait_for(sreader.read(2048), self.timeout). Now client is recieving json output immediately after closing the socket

aiohttp how do I retrieve peer certificate?

I want to get the certificate hash. But I have no idea how to get the server peer certificate. Either in the request or response. The server I send the request to sets the Connection close header, so the retrieving the original ssl socket in the response doesn't work.
Currently no way, sorry.
You can check a cert hash easy though: https://docs.aiohttp.org/en/stable/client_advanced.html#ssl-control-for-tcp-sockets
The following example uses SHA-256 fingerprint check:
fingerprint = b'...' # should be 64 bytes length hash (256/8)
r = await session.get('https://example.com',
ssl=aiohttp.Fingerprint(fingerprint))
I've come up with this solution/hack
import aiohttp
class WrappedResponseClass(aiohttp.ClientResponse):
def __init__(self, *args, **kwargs):
super(WrappedResponseClass, self).__init__(*args, **kwargs)
self._peer_cert = None
async def start(self, connection, read_until_eof=False):
try:
self._peer_cert = connection.transport._ssl_protocol._extra['ssl_object'].getpeercert(True)
except Exception:
pass
return await super(WrappedResponseClass, self).start(connection, read_until_eof)
#property
def peer_cert(self):
return self._peer_cert
session = aiohttp.ClientSession(otherargs..., response_class=WrappedResponseClass)
The following works for me with aiohttp 3.8.3:
async with aiohttp.ClientSession() as session:
r = await session.get('https://bbc.com')
cert = r.connection.transport.get_extra_info('peercert')

Why does this Python Reddit API call fail?

If someone could point me in the right direction I would be very grateful. I am trying to use the requests module to interact with Reddit's APIs, but for some reason I am getting HTTP status 403 codes. I am able to successfully log in I believe (since I get HTTP 200), but I cannot perform a successful API request. I am very new to this so please use small words. Thanks.
import requests
import json
from pprint import pprint
# URLs
url_base = r'https://www.reddit.com'
url_login = '/api/login'
url_me = '/api/v1/me'
# Credentials
user = '__kitten_mittens__'
passwd = 'password'
params = {'user': user,
'passwd': passwd,
'api_type': 'json',}
headers = {'user-agent': '/u/__kitten_mittens__ practice api bot'}
# Set up the session/headers
client = requests.session()
client.headers = headers
response = client.post(url_base + url_login, data = params)
j = json.loads(response.content.decode('utf-8'))
mh = j['json']['data']['modhash']
print("The modhash for {USER} is {mh}".format(USER=user, mh=mh))
# THIS CODE GIVES HTTP STATUS CODE 403
response = client.get(url_base + url_me)
me_json = json.loads(r.content.decode('utf-8'))
pprint(me_json)
EDIT: I fixed the missing single quote from the password field. That was not part of the problem.