Move from Spider to CrawlSpider - scrapy

I tried to move from the general spider to CrawlSpider to make use of rules. However,
my crawler doesn't work anymore that way. Do you see what I did wrong?
BEFORE:
class GitHubSpider(scrapy.Spider):
name = "github"
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users",
]
def parse(self, response):
engineer_links = response.css("a.mr-1::attr(href)")
yield from response.follow_all(engineer_links, self.parse_engineer)
pagination_links = response.css(".next_page::attr(href)")
yield from response.follow_all(pagination_links, self.parse)
def parse_engineer(self, response):
yield {
"username": response.css(".vcard-username::text").get().strip(),
}
NEW (not working):
class GitHubSpider(CrawlSpider):
name = "github"
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users",
]
rules = (
Rule(
LinkExtractor(restrict_css=("a.mr-1::attr(href)")),
callback="parse_engineer",
),
Rule(LinkExtractor(restrict_css=(".next_page::attr(href)"))),
)
def parse_engineer(self, response):
yield {
"username": response.css(".vcard-username::text").get().strip(),
}

Now, it's working:
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class GitHubSpider(CrawlSpider):
name = "github"
allowed_domains = [github.com]
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users"
]
rules = (
Rule(LinkExtractor(restrict_css="a.mr-1"),callback="parse_engineer",),
Rule(LinkExtractor(restrict_css=".next_page")),
)
def parse_engineer(self, response):
yield {
"username": response.css(".vcard-username::text").get().strip()
}

Related

Karate: How to paste and run scenario for each param from array response inside scenario?

I have a range of values, like taskId, extracted from DB.
* def getTasks = db.readRows('SELECT task_id from tasks t WHERE t.status = \'IN_PROGRES\'
')
* def getIds = get getTasks[*].task_id
* 'task', 'setUser'
* request
"""
[{
"task_id": " ",
"assignedUser": {
"user": "someValue"
}
}
]
"""
* method post
* status 200
* def responseResult =
"""
{
"totalCount": '#number',
"successCount": '#number',
"skippedCount": '#number',
"failedCount": '#number',
}
"""
* match response == responseResult
I need to get each value from the list response and paste in into a "task_id"
Could you please clarify this case?
If you mean trying to create a JSON array from a bunch of values that is easy.
* def response = { foo: 1, bar: 2 }
* def task_ids = []
* task_ids.push(response.foo)
* task_ids.push(response.bar)
* match task_ids == [1, 2]
When it comes to JSON manipulation, think of Karate as just like JavaScript.

how to manage variable inside of json value string?

I have variable cardholder in my karate-config file.
I assigned it to the new entrID variable.
The main thing that i am building JSON as a String..
* def entrID = cardholder
* def requestContactHistoryAdd =
"""
{
"RequestBody": "{ \"ENTR_ID\" : \"entrID\", \"BHVR_ID\" : \"VRU\", }"
}
"""
Now how can i provide it inside of my json RequestBody?
EDIT: since you seem to have a very badly designed API where the JSON has an embedded string (which looks like JSON).
Please note I am using a string type below: https://github.com/intuit/karate#type-conversion
You can do this:
* def entrID = 'foo'
* string temp = { "ENTR_ID" : "#(entrID)", "BHVR_ID" : "VRU" }
# note that you could have done this:
# def temp = '{ "ENTR_ID" : "' + entrID + '", "BHVR_ID" : "VRU" }'
* def body = { RequestBody: '#(temp)' }
* print body
Which gives you:
08:17:25.671 [main] INFO com.intuit.karate - [print] {
"RequestBody": "{\"ENTR_ID\":\"foo\",\"BHVR_ID\":\"VRU\"}"
}
i solved it also like this
* def entrID = someValueFromSomeWhere
* def bodyValue = "{ \"ENTR_ID\":\"" + entrID + "\", \"BHVR_ID\" : \"VRU\" }"
* def requestContactHistoryAdd =
"""
{
"RequestBody": "#(bodyValue)"
}
"""
we can also do this way
* def bodyValue = "{ \"ENTR_ID\":\"" + someValueFromSomeWhere + "\", \"BHVR_ID\" : \"VRU\" }"
* def requestContactHistoryAdd =
"""
{
"RequestBody": "#(bodyValue)"
}
"""

Error "u'social' is not a registered namespace" in basic integration of Auth0 with Django 1.8

I have a basic functional integration of Auth0 with Django 1.9 for user authentication, obtained of https://auth0.com/docs/quickstart/backend/django that use Python2.7 and works fine.
But I whant change the version of the Django to 1.8. To do this I did some changes mostly in settigs, but I'm missing something.
When whant to access to http://127.0.0.1:8000/login/auth0 get the error: NoReverseMatch at /login/auth0. u'social' is not a registered namespace
That URL match with social_django.urls
The code is:
Settings.py
from dotenv import load_dotenv, find_dotenv
import os
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'social_django',
'auth0login'
)
MIDDLEWARE_CLASSES = (
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.security.SecurityMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
)
ROOT_URLCONF = 'webappexample.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'webappexample.wsgi.application'
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
}
}
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
STATIC_URL = '/static/'
# ********************************
ENV_FILE = find_dotenv()
if ENV_FILE:
load_dotenv(ENV_FILE)
# SOCIAL AUTH AUTH0 BACKEND CONFIG
SOCIAL_AUTH_TRAILING_SLASH = False
SESSION_COOKIE_SECURE = False
CSRF_COOKIE_SECURE = False
SECURE_SSL_REDIRECT = False
SOCIAL_AUTH_AUTH0_KEY = os.environ.get('AUTH0_CLIENT_ID')
SOCIAL_AUTH_AUTH0_SECRET = os.environ.get('AUTH0_CLIENT_SECRET')
SOCIAL_AUTH_AUTH0_SCOPE = [
'openid',
'profile'
]
SOCIAL_AUTH_AUTH0_DOMAIN = os.environ.get('AUTH0_DOMAIN')
AUDIENCE = None
if os.environ.get('AUTH0_AUDIENCE'):
AUDIENCE = os.environ.get('AUTH0_AUDIENCE')
else:
if SOCIAL_AUTH_AUTH0_DOMAIN:
AUDIENCE = 'https://' + SOCIAL_AUTH_AUTH0_DOMAIN + '/userinfo'
if AUDIENCE:
SOCIAL_AUTH_AUTH0_AUTH_EXTRA_ARGUMENTS = {'audience': AUDIENCE}
AUTHENTICATION_BACKENDS = {
'auth0login.auth0backend.Auth0',
'django.contrib.auth.backends.ModelBackend'
}
LOGIN_URL = '/login/auth0'
LOGIN_REDIRECT_URL = '/dashboard'
SOCIAL_AUTH_URL_NAMESPACE = 'social'
urls.py
from django.conf.urls import url, include
from . import views
urlpatterns = [
url(r'^$', views.index),
url(r'^dashboard$', views.dashboard),
url(r'^logout$', 'django.contrib.auth.views.logout', {'next_page': '/'}),
url(r'^', include('django.contrib.auth.urls')),
url(r'^', include('social_django.urls')),
]
views.py
from django.shortcuts import render
from django.contrib.auth.decorators import login_required
from django.contrib.auth import logout as log_out
from django.conf import settings
from django.http import HttpResponseRedirect, HttpResponse
import json
from django.utils.http import urlencode
def index(request):
username = None
if request.user.is_authenticated():
username = request.user.username
viewItems = {
'username':username
}
return render(request, 'index.html', viewItems)
#login_required
def dashboard(request):
user = request.user
auth0user = user.social_auth.get(provider='auth0')
userdata = {
'user_id': auth0user.uid,
'name': user.first_name,
'picture': auth0user.extra_data['picture']
}
return render(request, 'dashboard.html', {
'auth0User': auth0user,
'userdata': json.dumps(userdata, indent=4)
})
index.html
{% extends 'layout.html' %}
{% block content %}
<div class="login-page clearfix">
<div class="login-box auth0-box before">
<img src="https://i.cloudup.com/StzWWrY34s.png" />
<h3>Auth0 Example</h3>
{% if username == None %}
<p> No se encuentra logeado ningĂșn usuario.</p>
<a class="btn btn-primary" href="/login/auth0">Login</a><br>
{% else %}
<p> Se encuentra logeado el usuario: {{ username }}.</p>
<a class="btn btn-primary" href="/logout">Logout</a><br>
{% endif %}
</div>
</div>
{% endblock content %}
auth0backend.py
from urllib2 import urlopen
from jose import jwt
from social_core.backends.oauth import BaseOAuth2
class Auth0(BaseOAuth2):
"""Auth0 OAuth authentication backend"""
name = 'auth0'
SCOPE_SEPARATOR = ' '
ACCESS_TOKEN_METHOD = 'POST'
EXTRA_DATA = [
('picture', 'picture')
]
def authorization_url(self):
print 'https://' + self.setting('DOMAIN') + '/authorize'
return 'https://' + self.setting('DOMAIN') + '/authorize'
def access_token_url(self):
print 'https://' + self.setting('DOMAIN') + '/oauth/token'
return 'https://' + self.setting('DOMAIN') + '/oauth/token'
def get_user_id(self, details, response):
"""Return current user id."""
print details['user_id']
return details['user_id']
def get_user_details(self, response):
# Obtain JWT and the keys to validate the signature
id_token = response.get('id_token')
jwks = urlopen('https://' + self.setting('DOMAIN') + '/.well-known/jwks.json')
issuer = 'https://' + self.setting('DOMAIN') + '/'
audience = self.setting('KEY') # CLIENT_ID
payload = jwt.decode(id_token, jwks.read(), algorithms=['RS256'], audience=audience, issuer=issuer)
print {'username': payload['nickname'],
'first_name': payload['name'],
'picture': payload['picture'],
'user_id': payload['sub']}
return {'username': payload['nickname'],
'first_name': payload['name'],
'picture': payload['picture'],
'user_id': payload['sub']}
Looking at your urls.py I don't see the route you are looking to leverage. Have you combed through your app as well as well as your Auth0 application dashboard to confirm that any changes made are reflected there as well? That would be the first step I would recommend to resolve this issue. I hope this helps you in your quest!

graphQL vue this.$apollo.query doesn't work with parameter (variables)

I have an application in Vuejs, and a function that downloads questionnaire via graphQL. Function works perfect until I add variables to the query.
The working code of function is:
downloadQuestionnaire() {
console.log("downloadQuestionnaire: " + questionnaireVersion);
this.$apollo
.query({
query: gql`
query questionnaire {
questionnaire(inputParams: { language: "en", version: 1 }) {
sections {
section
cssClass
remainingItemsType
endingMessageText
questions {
qId
label
question
inputType
possibleAnswers {
paId
text
}
multipleAnswersAccepted
individualFormat
answers
}
}
}
}
`,
client: "questionnaire",
variables: {
version: questionnaireVersion
}
})
.then(data => {
// this.sections = data.questionnaire;
// console.log(data);
this.copyQuestionnaie(data.data.questionnaire.sections);
// console.log(JSON.stringify(data.data.questionnaire.sections));
})
.catch(error => {
this.error = error;
alert("E " + error);
});
},
and I need to parametrise the version in the the query, by changing it to:
downloadQuestionnaire() {
console.log("downloadQuestionnaire: " + questionnaireVersion);
this.$apollo
.query({
query: gql`
query questionnaire($version: Int) {
questionnaire(
inputParams: { language: "en", version: $version }
) {
sections {
section
cssClass
remainingItemsType
endingMessageText
questions {
qId
label
question
inputType
possibleAnswers {
paId
text
}
multipleAnswersAccepted
individualFormat
answers
}
}
}
}
`,
client: "questionnaire",
variables: {
version: 1
}
})
.then(data => {
// this.sections = data.questionnaire;
// console.log(data);
this.copyQuestionnaie(data.data.questionnaire.sections);
// console.log(JSON.stringify(data.data.questionnaire.sections));
})
.catch(error => {
this.error = error;
alert("E " + error);
console.log("ERROR: " + error);
});
},
And then I get the error:
RROR: Error: Network error: Response not successful: Received status code 400
I was trying to use the same syntax as in example here.
Am I injecting the parameters in a wrong way or I oversee some typo?
Update:
Below is the schema python code for the backend:
import graphene
from .db import get_questionnaire_in_dict, getAnswers, putAnswers
class InputParam(graphene.InputObjectType): # type: ignore
"""Input parameters for questionnaire."""
language = graphene.String(required=True)
version = graphene.Int(required=True)
class PossibleAnswer(graphene.ObjectType): # type: ignore
"""Possible answers pair of key and text."""
paId = graphene.String(description="Answer id")
text = graphene.String(description="Answer text")
def __init__(self, paId: str, text: str) -> None:
self.paId = paId
self.text = text
def display(self) -> None:
"""Print self content."""
print("Label: {label},\nQuestion: {question}".format(
label=self.label, question=self.question))
class Question(graphene.ObjectType): # type: ignore
"""Question object."""
qId = graphene.String()
label = graphene.String(description="Translated question label")
question = graphene.String(description="Translated question")
# qPointer = graphene.Field(QuestionItems)
multipleAnswersAccepted = graphene.Boolean()
possibleAnswers = graphene.List(PossibleAnswer)
answers = graphene.List(graphene.String)
inputType = graphene.String(description="HTML input type")
individualFormat = graphene.String()
def __init__(self, questionObj):
self.qId = questionObj["qPointer"]
self.label = questionObj["label"]
self.question = questionObj["question"]
self.inputType = questionObj["inputType"]
self.multipleAnswersAccepted = questionObj["multipleAnswersAccepted"]
if "individualFormat" in questionObj:
self.individualFormat = questionObj["individualFormat"]
else:
self.individualFormat = None
if questionObj["possibleAnswersPointer"]:
self.possibleAnswers = []
for key, value in enumerate(questionObj["possibleAnswersPointer"]):
possibleAnswer = PossibleAnswer(key, value)
self.addPossibleAnswer(possibleAnswer)
else:
self.possibleAnswers = None
self.answers = []
def display(self):
print("Question {inputType}".format(inputType=self.inputType))
self.qPointer.display()
def addPossibleAnswer(self, possibleAnswer):
self.possibleAnswers.append(possibleAnswer)
class Section(graphene.ObjectType):
section = graphene.String()
css_class = graphene.String()
remainingItemsCount = graphene.Int
remainingItemsType = graphene.String()
endingMessageText = graphene.String()
questions = graphene.List(graphene.NonNull(Question))
def __init__(self, sectionObj):
self.section = sectionObj["section"]
self.css_class = sectionObj["class"]
self.remainingItemsCount = sectionObj["remainingItemsCount"]
self.remainingItemsType = sectionObj["remainingItemsType"]
self.endingMessageText = sectionObj["endingMessageText"]
self.questions = []
def display(self):
print("Section {section}, class: {css_class}".format(
section=self.section, css_class=self.css_class))
def addQuestion(self, question):
self.questions.append(question)
class Questionnaire(graphene.ObjectType): # type: ignore
lang = graphene.String()
sections = graphene.List(Section)
def __init__(self, lang):
self.lang = lang.language
self.sections = []
def addSection(self, section):
self.sections.append(section)
class AnswersInputParam(graphene.InputObjectType): # type: ignore
userId = graphene.String(required=True)
version = graphene.Int(required=True)
class Answer(graphene.ObjectType): # type: ignore
qId = graphene.String()
answers = graphene.List(graphene.String)
def __init__(self, answerObj):
print("Answer creator: {}".format(answerObj))
self.qId = answerObj["qId"]
self.answers = answerObj["answers"]
class Answers(graphene.ObjectType): # type: ignore
userId = graphene.String()
version = graphene.Int()
answers = graphene.List(Answer)
def __init__(self, answersObj, userId, version):
self.userId = userId
self.version = version
self.answers = []
# print("answersObj[\"answers\"]: {}".format(answersObj))
for index, value in enumerate(answersObj):
print("_XXX_: {idx}={val}".format(idx=index, val=value))
answer = Answer(value)
self.addAnswer(answer)
def addAnswer(self, answer):
self.answers.append(answer)
class SaveAnswers(graphene.Mutation):
class Arguments:
userId = graphene.String(required=True)
version = graphene.Int(required=True)
answers = graphene.JSONString(required=True)
Output = Answers
def mutate(self, info, answers, version, userId):
putAnswers(userId, version, answers)
return Answers(answers, userId, version)
class Mutation(graphene.ObjectType):
save_answers = SaveAnswers.Field()
class Query(graphene.ObjectType):
answers = graphene.Field(
Answers, inputParams=AnswersInputParam(required=True))
def resolve_answers(self, info, inputParams):
answers = getAnswers(inputParams.userId, inputParams.version)
return Answers(answers, inputParams.userId, inputParams.version)
questionnaire = graphene.Field(
Questionnaire, inputParams=InputParam(required=True))
def resolve_questionnaire(self, info, inputParams):
qest = Questionnaire(inputParams)
_struct = get_questionnaire_in_dict(
inputParams.language, inputParams.version)
for _sectionRef, sectionData in _struct.items():
s = Section(sectionObj=sectionData)
# s.display()
for key, value in sectionData.items():
# print(key, value)
if key == "questions":
for _questionNum, questionData in enumerate(value):
q = Question(questionObj=questionData)
# q.display()
s.addQuestion(question=q)
qest.addSection(s)
return qest
schema1 = graphene.Schema(query=Query, mutation=Mutation)
# sample calls
# mutation{
# saveAnswers
# (
# userId: "U123",
# version: 1,
# answers: "[{\"qId\":\"s1q4\",\"answers\":\"0\"},{\"qId\":\"s2q1\",\"answers\":\"1\"},{\"qId\":\"s2q10\",\"answers\":[\"1\",\"3\"]}]"
# ) {
# userId
# version
# answers {
# qId
# answers
# }
# }
# }
# {
# answers(inputParams: {userId: "U123", version: 1})
# {
# answers{
# qId
# answers
# }
# }
# }

How to get logged username in template

I am using Django 1.8 with Python 3.4
I had no idea why my template doesn't show my username on template profile.html :/
profile.py
{% load staticfiles %}
<link rel="stylesheet" type="text/css" href="{% static 'accounts/css/style.css' %}" />
{% block content %}
<h2>My profile</h2>
<p>{{ request.user.username }}</p>
{% endblock %}
views.py
from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
from django.shortcuts import render_to_response
from django.http import HttpResponseRedirect
from django.core.context_processors import csrf
from django.contrib.auth import authenticate, login
def login_view(request):
if request.method == 'POST':
username = request.POST['username']
password = request.POST['password']
user = authenticate(username=username, password=password)
if user is not None:
if user.is_active:
login(request, user)
return HttpResponseRedirect('/accounts/profile')
else:
# Return a 'disabled account' error message
...
pass
else:
# Return an 'invalid login' error message.
pass
form = AuthenticationForm()
args = {}
args.update(csrf(request))
args['form']= AuthenticationForm()
return render_to_response('accounts/login.html', args)
def my_view(request):
username = request.POST['username']
password = request.POST['password']
user = authenticate(username=username, password=password)
if user is not None:
print(request.user)
if user.is_active:
login(request, user)
return HttpResponseRedirect('/accounts/profile')
else:
# Return a 'disabled account' error message
...
else:
# Return an 'invalid login' error message.
...
def profile(request):
username = request.user.username
return render_to_response('accounts/profile.html', username)
def register_user(request):
if request.method == 'POST':
form = UserCreationForm(request.POST)
if form.is_valid():
form.save()
return HttpResponseRedirect('/accounts/register_success')
args = {}
args.update(csrf(request))
args['form']= UserCreationForm()
return render_to_response('accounts/register_user.html', args)
def register_success(request):
return render_to_response('accounts/register_success.html')
What's the best way to get user information from a django template?
Add django.template.context_processors.request to context_processors options of TEMPLATE variable in your settings.py file :
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request', # add this line
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]