Python Selenium causing error when screenshotting when running headless - selenium

I have a selenium app that does some work and then saves a screenshot of an image as png:
img = driver.find_element_by_xpath('//div[#id="qrcode"]/img')
with open('image.png', 'wb') as f:
f.write(img.screenshot_as_png)
and then it will be texted to me.
This was all working fine until I introduced headless:
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(executable_path="C:/chromedriver.exe", options=chrome_options)
Now, for some reason, it saves only the top half of the image. I took the headless argument away, and it works perfectly. Any suggestions?

It is not the Screenshotting that is causing the error. The resolution of headless browser is just simply different than the normal browser.
You can manually adjust your Selenium driver window size by this command. Try changing the size and see if you can get the full resolution image.
chrome_options.add_argument("window-size=1400,600")
You may also try:
chrome_options.add_argument("--start-maximized")

While #Hammad solutions seems a reasonable fix, even if that did not work and you are interested in request module then you can try the below code to take the screenshot.
import requests
path = 'target.jpg'
response = requests.get("Image SRC/URL here", stream=True)
if response.status_code == 200:
with open(path, 'wb') as file:
for pic in response:
file.write(pic)
also in place of URL here, you can pass
img = driver.find_element_by_xpath('//div[#id="qrcode"]/img').get_attribute('src')

Some websites have anti-scraping mechanisms that involves in detecting the webdriver property of the browser. When you enable the headless mode for Chrome this property is not set by the browser. Thus indication to websites that the origin of the request is through a bot or a program.
You can try to execute javascript that can set the webdriver property for your browser in headless mode.
However, please also note that this is just one of many mechanisms used by websites to detect bots or programs.
You may also check this answer
Here is a sample code I wrote using pyppeteer library.
import asyncio
from pyppeteer import launch
# from pyvirtualdisplay import Display
from argparse import ArgumentParser
class HTMLRetriever(object):
_page_source = None
_title = None
def __init__(self, url):
self.url = url
async def load(self):
# with Display(backend='xvfb') as disp:
await self._init_browser()
await self._init_webpage()
await self._connect_website()
await self._take_snapshot()
#classmethod
async def _init_display(cls):
cls.disp = Display(backend='xvfb')
#classmethod
async def _init_browser(cls):
cls.browser = await launch(headless=True, args=[
"--no-sandbox",
"--single-process",
"--disable-dev-shm-usage",
"--no-zygote",
'--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"'
])
#classmethod
async def _init_webpage(cls):
cls.page = await cls.browser.newPage()
await asyncio.sleep(1)
await cls.page.setJavaScriptEnabled(True)
#classmethod
async def _init_webpage_properties(cls):
await cls.page.evaluate('''() =>{
Object.defineProperties(navigator,{
webdriver:{
get: () => false
}
})
}''')
await cls.page.evaluate('''() =>{
Object.defineProperties(window,{
chrome:{
get: () => true
}
})
}''')
async def _connect_website(self):
await self.page.goto(self.url, {'waitUntil': 'networkidle2', 'timeout': 60000})
await asyncio.sleep(6)
self._title = await self.page.evaluate('''() => {
return document.title
}''')
self._page_source = await self.page.content()
async def _take_snapshot(self):
await self.page.screenshot({'path': f"snapshots/{self.url.strip('https://').strip('http://').replace('.', '_').replace('/','-')}.png"})
#property
def page_source(self):
return self._page_source
#property
def title(self):
return self._title
async def close(self):
await self.browser.close()
async def main():
parser = ArgumentParser(description='A tool to obtain HTMl of a web URL')
parser.add_argument('-u', '--url', dest='url', type=str, required=True, metavar='URL',
help='URL of the website for which HTML is to be retrieved')
args = parser.parse_args()
kwargs = vars(args)
if not kwargs.get('url') is None:
retriever = HTMLRetriever(url=kwargs.get('url'))
await retriever.load()
print(retriever.title)
await retriever.close()
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())

Related

Is flask_restful compatible with virtual environments?

I am making an API with Flask-RESTFUL, but when I make the POST
http://127.0.0.1:5000/bot?id_articulo=1&url_articulo=www.wiki.org
I get the message
"message": "The browser (or proxy) sent a request that this server could not understand."
My python code is
from flask import Flask
from flask_restful import Resource, Api, reqparse
import pandas as pd
app = Flask(__name__)
api = Api(app)
class Bot(Resource):
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('id_articulo' , required=True, type=int)
parser.add_argument('url_articulo', required=True, type=str)
args = parser.parse_args()
print(args)
data_articulo = pd.read_csv('articulos.csv')
print(data_articulo)
if args['url_articulo'] in list(data_articulo['url']):
return {
'mensage': f"El artículo '{args['url_articulo']}' ya existe."
}, 409
else:
nueva_columna = pd.DataFrame({
'id_articulo': [args['id_articulo']],
'url': [args['url_articulo']],
})
data_articulo = data_articulo.append(nueva_columna, ignore_index=True)
data_articulo.to_csv('articulos.csv', index=False)
return {'data': data_articulo.to_dict()}, 200
api.add_resource(Bot, '/bot', methods=['POST'])
if __name__ == '__main__':
app.run()
Now, I noticed that the error message is thrown only when I am in a virtual environment whose requirements.txt is
aniso8601==9.0.1
click==8.1.3
colorama==0.4.5
Flask==2.1.2
Flask-RESTful==0.3.9
importlib-metadata==4.12.0
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.1.0
MarkupSafe==2.1.1
numpy==1.23.1
pandas==1.4.3
python-dateutil==2.8.2
pytz==2022.1
six==1.16.0
Werkzeug==2.1.2
zipp==3.8.0
By this far, I don't have a clue about what is going on and it makes me think that the flask_restful library have issues with virtual environments and I would like to know how to make this work properly in one.

asyncio stream vs synchronous stream in socket communication with a react native app

Objective is esp32 running micropython acts as a server while android app acts as a client. Before asyncio stream I am able to communicate successfully, but after switching to asyncio i fail to do so, only android app to esp32 is successful but app is failing to retrieve json output from server and I even tried text strings too . App side code remains unchanged for both synchronous/asyncio codes.
Desired output:
response = {
'error': 'invalid request',
'status': 'retry'
}
synchronous side:
conn.send('HTTP/1.1 200 OK\n')
conn.send('Content-Type: application/json\n')
conn.send('Connection: close\n\n')
conn.sendall(ujson.dumps(response ))
asyncio side:
swriter.write(ujson.dumps(response ))
await swriter.drain()
react native side:
fetch( 'http://192.168.0.110' )
.then(response => response.json())
.then((responseJson) => {
const data1 = responseJson;
console.log('getting data from fetch', data1)
setData({ data1 });
onConnectionMessage(data1);
})
synchronous way I was able to retrieve the json output sent from esp32 to android app(react native), but the same code using asyncio failed. What am I doing wrong?
sample asyncio server side code is:
import usocket as socket
import uasyncio as asyncio
import uselect as select
import ujson
from heartbeat import heartbeat # Optional LED flash
class Server:
def __init__(self, host='0.0.0.0', port=80, backlog=5, timeout=10):
self.host = host
self.port = port
self.backlog = backlog
self.timeout = timeout
async def run(self):
print('Awaiting client connection.')
self.cid = 0
asyncio.create_task(heartbeat(100))
self.server = await asyncio.start_server(self.run_client, self.host, self.port, self.backlog)
while True:
await asyncio.sleep(100)
async def run_client(self, sreader, swriter):
self.cid += 1
print('Got connection from client', self.cid)
try:
while True:
try:
res = await asyncio.wait_for(sreader.readline(), self.timeout)
except asyncio.TimeoutError:
res = b''
if res == b'':
raise OSError
print('Received {} from client {}'.format(ujson.loads(res.rstrip()), self.cid))
response = {
'error': 'invalid request',
'status': 'retry'
}
swriter.write(ujson.dumps(response))
await swriter.drain() # Echo back
except OSError:
pass
print('Client {} disconnect.'.format(self.cid))
await sreader.wait_closed()
print('Client {} socket closed.'.format(self.cid))
async def close(self):
print('Closing server')
self.server.close()
await self.server.wait_closed()
print('Server closed.')
server = Server()
try:
asyncio.run(server.run())
except KeyboardInterrupt:
print('Interrupted') # This mechanism doesn't work on Unix build.
finally:
asyncio.run(server.close())
_ = asyncio.new_event_loop()
got the error: asyncio.wait_for(sreader.readline(), self.timeout)------> changed to
asyncio.wait_for(sreader.read(2048), self.timeout). Now client is recieving json output immediately after closing the socket

Error while trying to run the Client Code

enter image description herePlease I am following a guide to create a Server that responds to the text a Client sends to it but in the reversed order. The Server code runs without any issue but the Client code is not working, its is giving the error message: "OSError:[WinError 10022] An invalid argument was supplied"
Please I need help to resolve this.
Below is the code for both the Server and the Client:
#server.py
import asyncio
import socket
# Get the default "event loop" that we will run
loop = asyncio.get_event_loop()
# notice our new "async" before the function definition
async def server_method():
server = socket.socket()
server.bind(('localhost', 6789))
server.listen(1)
# await for a new client
# The event loop can run other code while we wait here!
client, _ = await loop.sock_accept(server)
# await for some data
data = await loop.sock_recv(client, 1024)
data = data[::-1]
# await for sending the data
await loop.sock_sendall(client, data)
# Close both the Server and Client connections
server.close()
client.close()
if __name__ == '__main__':
# run the loop until "server_method" is complete
loop.run_until_complete(server_method())
#client.py
import asyncio
import socket
loop = asyncio.get_event_loop()
addr = ('localhost', 6789)
async def client_method():
message = b'Hello Server!\n'
client = socket.socket()
client.setblocking(False)
# await to establish a connection
await loop.sock_connect(client, ('localhost', 6789))
# await asyncio.get_event_loop().sock_connect(client, addr)
# await to send the message
print('Sending', message)
await loop.sock_sendall(client, message)
# await to receive a response
response = loop.sock_recv(client, 1024)
print('Server replied', response)
client.close()
if __name__ == '__main__':
loop.run_until_complete(client_method())

Send file generated by pdfkit from Flask

I'm generating a PDF file using pdfkit. As I do it in memory, I use the following code:
result = pdfkit.from_string(html, False)
result is bytes type, then I want Flask to send it to the client to be downloaded:
response = make_response(result)
response.headers.set('Content-Type', 'application/pdf')
response.headers.set(
'Content-Disposition', 'attachment', filename= 'report.pdf')
return response
I take it on the client side (JavaScript, React) and try to save:
FileDownload(response.data, 'myfile.pdf')
But the file is always empty with weird title somewhere inside. I think the problem is with encoding but I can't figure out what exactly to do.
This worked for me and produced a valid pdf.
import pdfkit
app = Flask(__name__)
#app.route("/")
def index():
pdf = pdfkit.from_string('Hello!', False)
response = make_response(pdf)
response.headers.set('Content-Type', 'application/pdf')
response.headers.set('Content-Disposition', 'inline', filename='report.pdf')
return response
if __name__ == "__main__":
app.run(host="127.0.0.1", port=8080, debug=True)

Unable to emit after rabbitmq channel.start_consuming() call in flask-socketio handler

I'm trying to listen to a rabbitmq queue from within a flask-socketio event handler so I can send realtime notifications to a web app. My setup so far:
Server
import pika
import sys
from flask import Flask, request
from flask_socketio import SocketIO, emit, disconnect
app = Flask(__name__)
app.config['SECRET_KEY'] = 'not-so-secret'
socketio = SocketIO(app)
def is_authenticated():
return True
def rabbit_callback(ch, method, properties, body):
socketio.emit('connect', {'data': 'yes'})
print "body: ", body
#socketio.on('connect')
def connected():
emit('notification', {'data': 'Connected'})
creds = pika.PlainCredentials(
username="username",
password="password")
params = pika.ConnectionParameters(
host="localhost",
credentials=creds,
virtual_host="/")
connection = pika.BlockingConnection(params)
# This is one channel inside the connection
channel = connection.channel()
# Declare the exchange we're going to use
exchange_name = 'user'
channel.exchange_declare(exchange=exchange_name,
type='topic')
channel.queue_declare(queue='notifications')
channel.queue_bind(exchange='user',
queue='notifications',
routing_key='#')
channel.basic_consume(rabbit_callback,
queue='notifications',
no_ack=True)
channel.start_consuming()
if __name__ == '__main__':
socketio.run(app, port=8082)
Browser
<script type="text/javascript" charset="utf-8">
var socket = io.connect('http://' + document.domain + ':8082');
socket.on('connect', function(resp) {
console.log(resp);
});
socket.on('disconnect', function(resp) {
console.log(resp);
});
socket.on('error', function(resp) {
console.log(resp);
});
socket.on('notification', function(resp) {
console.log(resp);
});
</script>
If I comment out the "channel.start_consuming()" line at the bottom of the server code and load the browser page, I connect successfully to flask-socketio and I see {data: "Connected"} in my console.
When I uncomment the line, I do not see {data: "Connected"} in my console. Nevertheless, when I send a message to the notifications queue, the rabbit_callback function fires. I see my message printed to the server console, but the emit call doesn't seem to work. There are no errors on the server or in the browser. Any advice is much appreciated.
Thanks!
I had the same problem using eventlet and I just solved adding:
import eventlet
eventlet.monkey_patch()
,at the beginning of my source code.
Anyway my code is a bit different and using the start_background_task method:
import pika
from threading import Lock
from flask import Flask, render_template, session, request, copy_current_request_context
from flask_socketio import SocketIO, emit, join_room, leave_room, \
close_room, rooms, disconnect
app = Flask(__name__, static_url_path='/static')
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
thread = None
thread_lock = Lock()
#socketio.on('connect', namespace='/test')
def test_connect():
global thread
with thread_lock:
if thread is None:
thread = socketio.start_background_task(target=get_messages)
emit('my_response', {'data': 'Connected', 'count': 0})
print('connected')
def get_messages():
channel = connect_rabbitmq()
channel.start_consuming()
def connect_rabbitmq():
cred = pika.credentials.PlainCredentials('username', 'password')
conn_param = pika.ConnectionParameters(host='yourhostname',
credentials=cred)
connection = pika.BlockingConnection(conn_param)
channel = connection.channel()
channel.exchange_declare(exchange='ncs', exchange_type='fanout')
result = channel.queue_declare(exclusive=True)
queue_name = result.method.queue
channel.queue_bind(exchange='myexchangename', queue=queue_name)
channel.basic_consume(callback, queue=queue_name, no_ack=True)
return channel
Hope this helps...