StaticLiveServerTestCase not logging even if Client().login works - django-testing

I'm trying to use selenium with StaticLiveServerTestCase in order to run the django server in the tests automatically.
If I'm using Client().login which returns success, I still can't pass the login page (by default, if the user is logged in it redirects to the homepage).
After some debugging it turns out that request.user= AnonymousUser and User.objects.all()= QuerySet [User: USERNAME]
It means that the user was created successfully, but is not logged in for some reason.
Here is the code :
class SeleniumTestCase(StaticLiveServerTestCase):
def setUp(self):
self.initialize()
def tearDown(self):
self.selenium.quit()
def initialize(self):
self.create_test_user()
self.run_chrome()
self.login()
def run_chrome(self):
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
self.selenium = webdriver.Chrome(chrome_options=chrome_options)
self.wait = WebDriverWait(self.selenium, 10)
#staticmethod
def create_test_user():
user = User.objects.create_superuser(
username=USERNAME,
email=EMAIL,
is_active=True,
password=PASSWORD)
user.save()
def login(self):
login_success = self.client.login(username=USERNAME, password=PASSWORD)
self.assertTrue(login_success)
self.selenium.get(self.live_server_url)
def test1(self):
self.assertTrue(True)
If I sign in via the gui in my website (selenium) - I can pass the login page. But with self.client.login I can't.
Any ideas?
Thanks

I was having this problem myself and initially thought it might be an anomaly of the clicks or something in the form. Notice though that in LiveServerTestCase.setUpClass(), there are extra threads created, so some actions become isolated and basically, some actions will seem to have no effect on Django's back-end database.
I needed to call setUpClass() to get my fixtures, but I didn't want this thread interference just yet.
The solution for me was to make my own call to load the fixtures, then proceed with the rest of super's setup:
class TestSalesUI(StaticLiveServerTestCase, TestCase):
hidden_fixtures = ["bevrly/fixtures/fish_dinners.json",
"wine/fixtures/wine_sizes.json"]
#classmethod
def setUpClass(cls):
for db_name in cls._databases_names(include_mirrors=False):
try:
call_command('loaddata', *cls.hidden_fixtures, **{
'verbosity': 0,
'commit': False,
'database': db_name,
})
except Exception:
cls._rollback_atomics(cls.cls_atomics)
raise
User = get_user_model()
cls.carpenter = User.objects.get(username="carpenter")
# Password needs to be set now rather than in a fixture, for new hashing.
cls.carpenter.set_password(CARPENTER_PASSWORD)
cls.carpenter.save()
super(TestSalesUI, cls).setUpClass() # New threads now
cls.driver = webdriver.Chrome()
cls.driver.implicitly_wait(10) # ⏳☕🖵

Related

Session Auth in Django-Rest-Framwork, Is this really what I have to do to make is CSRF safe?

First off, this code works, it just doesn't feel as clean as it should be for something so simple.
Background:
I'm trying to make a custom login API endpoint in DRF that will be consumed by the React Frontend. It seems you have to manually force a csrf to be sent in DRF so that's what I have done.
I didn't want to send over a Django Form because it didn't seem RESTful, but this is the only method I could find to avoid that. Please let me know if this is clean code.
Serializers.py
from rest_framework import serializers
from django.contrib.auth import get_user_model # If used custom user model
UserModel = get_user_model()
class UserSerializer(serializers.ModelSerializer):
password = serializers.CharField(write_only=True)
def create(self, validated_data):
user = UserModel.objects.create_user(
username=validated_data['username'],
password=validated_data['password'],
email=validated_data['email'],
)
return user
class Meta:
model = UserModel
# Tuple of serialized model fields (see link [2])
fields = ( "id", "username", 'email', "password", )
View.py
from rest_framework import permissions
from django.contrib.auth import get_user_model # If used custom user model
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .serializers import UserSerializer
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import ensure_csrf_cookie, csrf_protect
class CreateUserView(APIView):
model = get_user_model()
permission_classes = [
permissions.AllowAny # Or anon users can't register
]
serializer_class = UserSerializer
#method_decorator(ensure_csrf_cookie)
def get(self, request, format = None):
return Response(status=status.HTTP_200_OK)
#method_decorator(csrf_protect)
def post(self,request, format = None):
serializer = UserSerializer(data=request.data)
if serializer.is_valid():
serializer.create(serializer.validated_data)
return Response(serializer.data, status=status.HTTP_201_CREATED)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
CSRF is enabled by Django, not DRF. And as specified, CSRF protections only kick in when logged in.
Login and registration actions does not need to be CSRF protected (as the password data is needed, and cannot be guessed, in a CSRF attack scenario) by the attacker.
Also per Django spec, GET actions/views are not protected by CSRF. However, GET actions should not change the state of your application. If it's not the case, and you're able to implemant the CSRF protection on your front (which is possible for REST app, but not with default Django app), you can manually protect it with your decorator.
This is mainly not a DRF issue but a Django issue.

Why is SQLALCHEMY_DATABASE_URI set to "sqlite:///:memory:" when I set it to a path in my Config?

I am learning Flask by following Miguel Ginsberg mega tutorial chapter 4. When I run any Flask command from the Anaconda command panel I get an error that includes "Neither SQLALCHEMY_DATABASE_URI nor SQLALCHEMY_BINDS is set." and as a result an SQLite database is created in memory.
But I have created a Config object that sets SQLALCHEMY_DATABASE_URI, SECRET_KEY and SQLALCHEMY_TRACK_MODIFICATIONS, and have tested the python separately, and it all works.
I have tried everything I can think of including testing snippets of code separately, at least 8 hours searching the web, and trawling though Ginsberg's posts, nothing works. One person Graham (post #29) seems to have had the same problem but Ginsberg does not give a useful answer.
Here is my app init code
__init__
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
from config import Config
app = Flask(__name__)
app.config.from_object(Config)
db = SQLAlchemy(app)
migrate = Migrate(app, db)
from app import routes, models
Here is my config, it works when run separately.
import os
basedir = os.path.abspath(os.path.dirname(__file__))
class Config(object):
SECRET_KEY = os.environ.get('SECRET_KEY') or 'you-will-never-guess'
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \
'sqlite:///' + os.path.join(basedir, 'app.db')
SQLALCHEMY_TRACK_MODIFICATIONS = False
For completeness here are my routes and models
from flask import render_template, flash, redirect, url_for
from app import app
from app.forms import LoginForm
#app.route('/')
#app.route('/index')
def index():
user = {'username': 'Miguel'}
posts = [
{
'author': {'username': 'John'},
'body': 'Beautiful day in Portland!'
},
{
'author': {'username': 'Susan'},
'body': 'The Avengers movie was so cool!'
}
]
return render_template('index.html', title='Home', user=user, posts=posts)
#app.route('/login', methods=['GET', 'POST'])
def login():
form = LoginForm()
if form.validate_on_submit():
flash('Login requested for user {}, remember_me={}'.format(
form.username.data, form.remember_me.data))
return redirect(url_for('index'))
return render_template('login.html', title='Sign In', form=form)
and
from datetime import datetime
from app import db
class User(db.Model):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(64), index=True, unique=True)
email = db.Column(db.String(120), index=True, unique=True)
password_hash = db.Column(db.String(128))
posts = db.relationship('Post', backref='author', lazy='dynamic')
def __repr__(self):
return '<User {}>'.format(self.username)
class Post(db.Model):
id = db.Column(db.Integer, primary_key=True)
body = db.Column(db.String(140))
timestamp = db.Column(db.DateTime, index=True, default=datetime.utcnow)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
def __repr__(self):
return '<Post {}>'.format(self.body)
What should happen is that when I run a command like
> flask db init
or
> flask db migrate -m "users table"
the command should complete successfully because SQLALCHEMY_DATABASE_URI should equal the path of the app and the SQLite database should be app.db.
Instead I get error messages stating SQLALCHEMY_DATABASE_URI is not set and that therefore SQLALCHEMY_DATABASE_URI has been set to "sqlite:///:memory:"
My app needs a persistent database! Why isn't SQLALCHEMY_DATABASE_URI and SQLALCHEMY_TRACK_MODIFICATIONS being set?
this problem has gone away by itself, but since others may experience it I decided to describe the work-around I used to save them some frustration. I think the original problem may have been due to the sequence in which I was importing packages/modules and initiating classes/objects into my __init__ method.
The workaround is to comment out the original config statement and directly set the config variables, including the SQLite database, in __init__.
### app.config.from_object(Config)
app.config["SECRET_KEY"] = os.environ.get('SECRET_KEY') or 'you-will-never-guess'
app.config["SQLALCHEMY_DATABASE_URI"] = os.environ.get('DATABASE_URL') or \
'sqlite:///' + 'C:\\...path...\\app.db'
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
...
The workaround can probably be backed off a little by using
import os
basedir = os.path.abspath(os.path.dirname(__file__))
...
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \
'sqlite:///' + os.path.join(basedir, 'app.db')
...

Scrapy : How to write a UserAgentMiddleware?

I want to write a UserAgentMiddleware for scrapy,
the docs says:
Middleware that allows spiders to override the default user agent.
In order for a spider to override the default user agent, its user_agent attribute must be set.
docs:
https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#module-scrapy.downloadermiddlewares.useragent
But there is no a example,I have no ideas how to write it.
Any suggestions?
You look at it in install scrapy path
/Users/tarun.lalwani/.virtualenvs/project/lib/python3.6/site-packages/scrapy/downloadermiddlewares/useragent.py
"""Set User-Agent header per spider or use a default value from settings"""
from scrapy import signals
class UserAgentMiddleware(object):
"""This middleware allows spiders to override the user_agent"""
def __init__(self, user_agent='Scrapy'):
self.user_agent = user_agent
#classmethod
def from_crawler(cls, crawler):
o = cls(crawler.settings['USER_AGENT'])
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
return o
def spider_opened(self, spider):
self.user_agent = getattr(spider, 'user_agent', self.user_agent)
def process_request(self, request, spider):
if self.user_agent:
request.headers.setdefault(b'User-Agent', self.user_agent)
You can see a below example for setting Random user agent
https://github.com/alecxe/scrapy-fake-useragent/blob/master/scrapy_fake_useragent/middleware.py
First visit some website and get some of the newest user agents. Then in your standard middleware do something like this. This is the same place you would setup your own proxy settings. Grab a random UA from the text file, and put it in the headers. This is sloppy to show an example you would want to import random at the top and also make sure to closer useragents.txt when you are done with it. I would probably just load them into a list at the top of the document.
class GdataDownloaderMiddleware(object):
#classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
# Called for each request that goes through the downloader
# middleware.
user_agents = open('useragents.txt', 'r')
user_agents = user_agents.readlines()
import random
user_agent = random.choice(user_agents)
request.headers.setdefault(b'User-Agent', user_agent)
# Must either:
# - return None: continue processing this request
# - or return a Response object
# - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called
return None
def process_response(self, request, response, spider):
# Called with the response returned from the downloader.
# Must either;
# - return a Response object
# - return a Request object
# - or raise IgnoreRequest
return response
def process_exception(self, request, exception, spider):
# Called when a download handler or a process_request()
# (from other downloader middleware) raises an exception.
# Must either:
# - return None: continue processing this exception
# - return a Response object: stops process_exception() chain
# - return a Request object: stops process_exception() chain
pass
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)

Scrapy how to remove a url from httpcache or prevent adding to cache

I am using latest scrapy version, v1.3
I crawl a webpage page by page, by following urls in pagination. In some pages, website detects that I use a bot and gives me an error in html. Since it is a successful request, it caches the page and when I run it again, I get the same error.
What I need is how can I prevent that page get into cache? Or if I cannot do that, I need to remove it from cache after I realize the error in parse method. Then I can retry and get the correct one.
I have a partial solution, I yield all requests with "dont_cache":False parameter in meta so I make sure they use cache. Where I detect the error and retry the request, I put dont_filter=True along with "dont_cache":True to make sure I get the fresh copy of the erroneous url.
def parse(self, response):
page = response.meta["page"] + 1
html = Selector(response)
counttext = html.css('h2#s-result-count::text').extract_first()
if counttext is None:
page = page - 1
yield Request(url=response.url, callback=self.parse, meta={"page":page, "dont_cache":True}, dont_filter=True)
I also tried a custom retry middleware, where I managed to get it working before cache, but I couldnt read the response.body successfully. I suspect that it is zipped somehow, as it is binary data.
class CustomRetryMiddleware(RetryMiddleware):
def process_response(self, request, response, spider):
with open('debug.txt', 'wb') as outfile:
outfile.write(response.body)
html = Selector(text=response.body)
url = response.url
counttext = html.css('h2#s-result-count::text').extract_first()
if counttext is None:
log.msg("Automated process error: %s" %url, level=log.INFO)
reason = 'Automated process error %d' %response.status
return self._retry(request, reason, spider) or response
return response
Any suggestion is appreciated.
Thanks
Mehmet
Middleware responsible for requests/response caching is HttpCacheMiddleware. Under the hood it is driven by the cache policies - special classes which dispatch what requests and responses should or shouldn't be cached. You can implement your own cache policy class and use it with the setting
HTTPCACHE_POLICY = 'my.custom.cache.Class'
More information in docs: https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
Source code of built-in policies: https://github.com/scrapy/scrapy/blob/master/scrapy/extensions/httpcache.py#L18
Thanks to mizhgun, I managed to develop a solution using custom policies.
Here is what I did,
from scrapy.utils.httpobj import urlparse_cached
class CustomPolicy(object):
def __init__(self, settings):
self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
self.ignore_http_codes = [int(x) for x in settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES')]
def should_cache_request(self, request):
return urlparse_cached(request).scheme not in self.ignore_schemes
def should_cache_response(self, response, request):
return response.status not in self.ignore_http_codes
def is_cached_response_fresh(self, response, request):
if "refresh_cache" in request.meta:
return False
return True
def is_cached_response_valid(self, cachedresponse, response, request):
if "refresh_cache" in request.meta:
return False
return True
And when I catch the error, (after caching occurred of course)
def parse(self, response):
html = Selector(response)
counttext = html.css('selector').extract_first()
if counttext is None:
yield Request(url=response.url, callback=self.parse, meta={"refresh_cache":True}, dont_filter=True)
When you add refresh_cache into meta, that can be catched in custom policy class.
Don't forget to add dont_filter otherwise second request will be filtered as duplicate.

Setting referer in Selenium

Im working with the selenium remote driver to automate actions on a site, i can open the page i need directly by engineering the url as the sites url schema is very constant. This speeds up the script as it dose not have to work through several pages before it gets to the one it needs.
To make the automation seem organic is there a way to set a referral page in Selenium ?
If you're checking the referrer on the server, then using a proxy (as mentioned in other answers) will be the way to go.
However, if you need access to the referrer in Javascript using a proxy will not work. To set the Javascript referrer I did the following:
Go to the referral website
Inject this javascript onto the page via Selenium API: document.write('<script>window.location.href = "<my website>";</script>')"
I'm using a Python wrapper around selenium, so I cannot provide the function you need to inject the code in your language, but it should be easy to find.
What you are looking for is referer spoofing.
Selenium does not have an inbuilt method to do this, however it can be accomplished by using a proxy such as fiddler.
Fiddler also provides an API-only version of the FiddlerCore component, and programmatic access to all of the proxy's settings and data, thus allowing you to modify the headers of the http response.
Here is a solution in Python to do exactly that:
https://github.com/j-bennet/selenium-referer
I described the use case and the solution in the README. I think github repo won't go anywhere, but I'll quote the relevant pieces here just in case.
The solution uses libmproxy to implement a proxy server that only does one thing: adds a Referer header. Header is specified as command line parameter when running the proxy. Code:
# -*- coding: utf-8 -*-
"""
Proxy server to add a specified Referer: header to the request.
"""
from optparse import OptionParser
from libmproxy import controller, proxy
from libmproxy.proxy.server import ProxyServer
class RefererMaster(controller.Master):
"""
Adds a specified referer header to the request.
"""
def __init__(self, server, referer):
"""
Init the proxy master.
:param server: ProxyServer
:param referer: string
"""
controller.Master.__init__(self, server)
self.referer = referer
def run(self):
"""
Basic run method.
"""
try:
print('Running...')
return controller.Master.run(self)
except KeyboardInterrupt:
self.shutdown()
def handle_request(self, flow):
"""
Adds a Referer header.
"""
flow.request.headers['referer'] = [self.referer]
flow.reply()
def handle_response(self, flow):
"""
Does not do anything extra.
"""
flow.reply()
def start_proxy_server(port, referer):
"""
Start proxy server and return an instance.
:param port: int
:param referer: string
:return: RefererMaster
"""
config = proxy.ProxyConfig(port=port)
server = ProxyServer(config)
m = RefererMaster(server, referer)
m.run()
if __name__ == '__main__':
parser = OptionParser()
parser.add_option("-r", "--referer", dest="referer",
help="Referer URL.")
parser.add_option("-p", "--port", dest="port", type="int",
help="Port number (int) to run the server on.")
popts, pargs = parser.parse_args()
start_proxy_server(popts.port, popts.referer)
Then, in the setUp() method of the test, proxy server is started as an external process, using pexpect, and stopped in tearDown(). Method called proxy() returns proxy settings to configure Firefox driver with:
# -*- coding: utf-8 -*-
import os
import sys
import pexpect
import unittest
from selenium.webdriver.common.proxy import Proxy, ProxyType
import utils
class ProxyBase(unittest.TestCase):
"""
We have to use our own proxy server to set a Referer header, because Selenium does not
allow to interfere with request headers.
This is the base class. Change `proxy_referer` to set different referers.
"""
base_url = 'http://www.facebook.com'
proxy_server = None
proxy_address = '127.0.0.1'
proxy_port = 8888
proxy_referer = None
proxy_command = '{0} {1} --referer {2} --port {3}'
def setUp(self):
"""
Create the environment.
"""
print('\nSetting up.')
self.start_proxy()
self.driver = utils.create_driver(proxy=self.proxy())
def tearDown(self):
"""
Cleanup the environment.
"""
print('\nTearing down.')
utils.close_driver(self.driver)
self.stop_proxy()
def proxy(self):
"""
Create proxy settings for our Firefox profile.
:return: Proxy
"""
proxy_url = '{0}:{1}'.format(self.proxy_address, self.proxy_port)
p = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': proxy_url,
'ftpProxy': proxy_url,
'sslProxy': proxy_url,
'noProxy': 'localhost, 127.0.0.1'
})
return p
def start_proxy(self):
"""
Start the proxy process.
"""
if not self.proxy_referer:
raise Exception('Set the proxy_referer in child class!')
python_path = sys.executable
current_dir = os.path.dirname(__file__)
proxy_file = os.path.normpath(os.path.join(current_dir, 'referer_proxy.py'))
command = self.proxy_command.format(
python_path, proxy_file, self.proxy_referer, self.proxy_port)
print('Running the proxy command:')
print(command)
self.proxy_server = pexpect.spawnu(command)
self.proxy_server.expect_exact(u'Running...', 2)
def stop_proxy(self):
"""
Override in child class to use a proxy.
"""
print('Stopping proxy server...')
self.proxy_server.close(True)
print('Proxy server stopped.')
I wanted my unit tests to start and stop the proxy server without any user interaction, and could not find any Python samples doing that. Which is why I created the github repo (link above).
Hope this helps someone.
Not sure if i understand your question correctly, but if you want to override your HTTP requests there is no way to do it directly with webdriver. You must run your request thru a proxy. I prefer using browsermob, you can get it thru maven or similar.
ProxyServer server = new ProxyServer(proxy_port); //net.lightbody.bmp.proxy.ProxyServer;
server.start();
server.setCaptureHeaders(true);
Proxy proxy = server.seleniumProxy(); //org.openqa.selenium.Proxy
proxy.setHttpProxy("localhost").setSslProxy("localhost");
server.addRequestInterceptor(new RequestInterceptor() {
#Override
public void process(BrowserMobHttpRequest browserMobHttpRequest, Har har) {
browserMobHttpRequest.addRequestHeader("Referer", "blabla");
}
});
// configure it as a desired capability
DesiredCapabilities capabilities = new DesiredCapabilities();
capabilities.setCapability(CapabilityType.PROXY, proxy);
// start the driver
driver = new FirefoxDriver(capabilities);
Or black/whitelist anything:
server.blacklistRequests("https?://.*\\.google-analytics\\.com/.*", 410);
server.whitelistRequests("https?://*.*.yoursite.com/.*. https://*.*.someOtherYourSite.*".split(","), 200);