I'm trying to make a test for this view:
def author_detail(request, pk):
author = get_object_or_404(Author, pk=pk)
blog = author.blog_set.all()
paginator = Paginator(blog, 1)
page_number = request.GET.get('page')
page_obj = paginator.get_page(page_number)
context = {
'author': author,
'page_obj': page_obj,
}
return render(request, 'blog/author_detail.html', context=context)
The view is working normally. My problem is when I'm going to try to test this view. Here my test:
class AuthorDetailViewTest(TestCase):
def setUp(self):
user = User.objects.create(username='user01', password='123456')
self.author_instance = Author.objects.create(
user=user, date_of_birth='1998-09-08', bio='I am user01')
topic = Topic.objects.create(name='Testing')
Blog.objects.create(title='My blog', content="It's my blog")
Blog.author = self.author_instance
Blog.topic = topic
# The author.blog_set.all() are returning an empty QuerySet
# This problem are only happening in the tests, not in the view
def test_pagination_first_page(self):
response = self.client.get(
reverse('author-detail', kwargs={'pk':self.author_instance.pk}))
self.assertEqual(len(response.context['page_obj']), 1)
The result are:
FAIL: test_pagination_first_page (blog.tests.test_views.AuthorDetailViewTest)
-------------------------------------------------------------------
Traceback (most recent call last):
File "/home/carlos/problem/venv_01/the_blog/blog/tests/test_views.py", line 189,in test_pagination_first_page
self.assertEqual(len(response.context['page_obj']), 1)
AssertionError: 0 != 1
----------------------------------------------------------------------
The len(response.context['page_obj']) is equal 0. It should be at least 1, because I created one Blog object. When I print the QuerySet of author.blog_set.all(), the returned QuerySet are empty (<QuerySet []>). I think that the problem is in the creation of the Blog model, because the author and topic fields are ManyToManyField.
As I mentioned before, my problem is in the test, not in the view. The view is working normally.
The last 3 lines of the following code snippet have some issues:
def setUp(self):
user = User.objects.create(username='user01', password='123456')
self.author_instance = Author.objects.create(
user=user, date_of_birth='1998-09-08', bio='I am user01')
topic = Topic.objects.create(name='Testing')
Blog.objects.create(title='My blog', content="It's my blog")
Blog.author = self.author_instance
Blog.topic = topic
The blog object is created but never returned/fetched
Blog model is being used to connect author and topic. Instead, the blog object should be used.
Author and Topic are M2M on Blog. The new objects should be added via add method. See How to add data into ManyToMany field? for additional context.
Solution:
def setUp(self):
user = User.objects.create(username='user01', password='123456')
author = Author.objects.create(
user=user, date_of_birth='1998-09-08', bio='I am user01')
blog = Blog.objects.create(
title='My blog', content="It's my blog")
blog.author.add(author)
blog.topic.add(topic)
It worked.
Related
I've got a django rest framework view that passes data from a POST to another function that returns a result.
I want to store that result in the database so that you can GET a previous result or results.
Currently, I've got the "nornir_result" defined in my serializer and model.
The serializer field is set to required=False (because the result isn't known when the payload is passed in).
Then, to populate/save this information for later GETs, save(nornir_result=nornir_result) gets called during the POST.
Is my approach correct? Passing additional data to save() to store the results of logic triggered by a POST so it can be viewed later?
# views.py
class F5AuditList(APIView):
def get(self, request, format=None):
audits = F5Audit.objects.all()
serializer = F5AuditSerializer(audits, many=True)
return Response(serializer.data)
def post(self, request, format=None):
serializer = F5AuditSerializer(data=request.data)
if serializer.is_valid():
nornir_result = audit.django_result(request.data)
serializer.save(
audit_id=create_unique_number(), nornir_result=nornir_result
)
return Response(serializer.data, status=status.HTTP_200_OK)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
# serializers.py
class F5AuditSerializer(serializers.Serializer):
audit_id = serializers.IntegerField(required=False)
created = serializers.DateTimeField(required=False)
devices = serializers.DictField()
audit_type = serializers.CharField(
max_length=None, style={"base_template": "textarea.html"}, required=False
)
nornir_result = serializers.CharField(
max_length=None, style={"base_template": "textarea.html"}, required=False
)
# models.py
class F5Audit(Document):
audit_id = fields.IntField(primary_key=True)
created = fields.DateTimeField(default=datetime.datetime.utcnow)
audit_type = fields.StringField(max_length=100, default="type of f5 audit")
devices = fields.DictField()
nornir_result = fields.StringField(max_length=1000)
'''
import scrapy
from ..items import GooddealItem
class FarmtoolsSpider(scrapy.Spider):
name = 'farmtools'
allowed_domains = ['www.gooddeal.com']
start_urls = ['https://www.gooddeal.com/all?
source=private&sort=publishdate%20desc']
def parse(self, response):
items = GooddealItem()
rows = response.xpath('//ul[#class="card-collection"]/li')
for row in rows:
link = row.xpath('.//a/#href').get() #this is the full link.
link_split = link.split('/')[-1] #this splits the url link th first time.
linkid = link_split.split('?')[0] #this splits it the second time.
title = row.xpath('.//div[1]/p[#class="card__body-title"]/text()').get()
county = row.xpath('.//a/div/div[2]/div[1]/ul[#class="card__body-keyinfo"]/li[contains(text(),"min")]/following-sibling::node()/text()').get()
price = row.xpath('.//p[#class="card__price"]/span[1]/text()').get()
subcat = row.xpath('.//a/div/div[2]/div[1]/p[2]/text()[2]').get()
zero = row.xpath('.//a/div/div[2]/div[1]/ul[#class="card__body-keyinfo"]/li[contains(text(),"min")]/text()').get()
if zero == '0 min':
items['linkid'] = linkid
items['title'] = title
items['county'] = county
items['price'] = price
items['subcat'] = subcat
items['zero'] = zero
items['link'] = link
yield response.follow(url = link, callback=self.parse_item_page)
def parse_item_page(self, response):
items = GooddealItem()
rows = response.xpath('/html/body[1]')
for row in rows:
category = row.xpath('.//main/div/div[1]/div/div[1]/div/nav/span/a[1]/span/text()').get(),
views = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[1]/div[3]/div[1]/div/div[1]/div/div/span[2]/text()').get(),
seller_id = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[1]/div[1]/div[2]/a/#href').get(),
seller_ads = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[2]/div/dl[3]/dd/text()').get(),
lifetime_ads = row.xpath('//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[2]/div/dl[4]/dd/text()').get()
items['category'] = category
items['views'] = views
items['seller_id'] = seller_id
items['seller_ads'] = seller_ads
items['lifetime_ads'] = lifetime_ads
yield items
'''
I'm stuck on this as it's my first attempt. When I run the code I'm just getting back:
2020-07-12 22:53:21 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.gooddeal.com/dogs-for-sale/dachshunds/25348559>
{'category': (None,),
'lifetime_ads': None,
'seller_ads': (None,),
'seller_id': (None,),
'views': (None,)}
Any help will be appreciated, thanks
I'm assuming you want the data scraped in parse method to be joined together with the data scraped in the parse_item_page.
If you are using Scrapy v1.7+ you can use cb_kwargs when building the request.
This parameter receives a dict with arbitrary data that will be used as argument in the callback function. So you would have to do something like this in your request:
...
yield response.follow(url = link, callback=self.parse_item_page, cb_kwargs={'scraped_item': items})
For this to work, you also need to change the callback function to receive this parameter. Like this:
def parse_item_page(self, response, scraped_item):
...
Scrapy will take care of sending the scraped_item when calling the parse_item_page.
If you are using Scrapy v1.6 or older:
You will need to use the meta parameter. This method still works in more recent versions, but cb_kwargs(solution above) are preferable.
When building the request you will use the meta parameter to include some arbitrary data in the request. The data will be accessible in the response object that the callback function receives. Your request should look like this:
...
yield response.follow(url = link, callback=self.parse_item_page, meta={'scraped_item': items})
In this case you will access the data by calling response.meta:
def parse_item_page(self, response):
items = response.meta.get('scraped_item') #response.meta is a dict
...
I'm trying to scrape data from amazon India website. I am not able collect response and parse the elements using the yield() method when:
1) I have to move from product page to review page
2) I have to move from one review page to another review page
Product page
Review page
Code flow:
1) customerReviewData() calls the getCustomerRatingsAndComments(response)
2) The getCustomerRatingsAndComments(response)
finds the URL of the review page and call the yield request method with getCrrFromReviewPage(request) as callback method, with url of this review page
3) getCrrFromReviewPage() gets new response of the firstreview page and scrape all the elements from the first review page (page loaded) and add it to customerReviewDataList[]
4) get URL of the next page if it exists and recursively call getCrrFromReviewPage() method, and crawl elements from next page, until all the review page is crawled
5) All the reviews gets added to the customerReviewDataList[]
I have tried playing around with yield() changing the parameters and also looked up the scrapy documentation for yield() and Request/Response yield
# -*- coding: utf-8 -*-
import scrapy
import logging
customerReviewDataList = []
customerReviewData = {}
#Get product name in <H1>
def getProductTitleH1(response):
titleH1 = response.xpath('normalize-space(//*[#id="productTitle"]/text())').extract()
return titleH1
def getCustomerRatingsAndComments(response):
#Fetches the relative url
reviewRelativePageUrl = response.css('#reviews-medley-footer a::attr(href)').extract()[0]
if reviewRelativePageUrl:
#get absolute URL
reviewPageAbsoluteUrl = response.urljoin(reviewRelativePageUrl)
yield Request(url = reviewPageAbsoluteUrl, callback = getCrrFromReviewPage())
self.log("yield request complete")
return len(customerReviewDataList)
def getCrrFromReviewPage():
userReviewsAndRatings = response.xpath('//div[#id="cm_cr-review_list"]/div[#data-hook="review"]')
for userReviewAndRating in userReviewsAndRatings:
customerReviewData[reviewTitle] = response.css('#cm_cr-review_list .review-title span ::text').extract()
customerReviewData[reviewDescription] = response.css('#cm_cr-review_list .review-text span::text').extract()
customerReviewDataList.append(customerReviewData)
reviewNextPageRelativeUrl = response.css('#cm_cr-pagination_bar .a-pagination .a-last a::attr(href)')[0].extract()
if reviewNextPageRelativeUrl:
reviewNextPageAbsoluteUrl = response.urljoin(reviewNextPageRelativeUrl)
yield Request(url = reviewNextPageAbsoluteUrl, callback = getCrrFromReviewPage())
class UsAmazonSpider(scrapy.Spider):
name = 'Test_Crawler'
allowed_domains = ['amazon.in']
start_urls = ['https://www.amazon.in/Philips-Trimmer-Cordless-Corded-QT4011/dp/B00JJIDBIC/ref=sr_1_3?keywords=philips&qid=1554266853&s=gateway&sr=8-3']
def parse(self, response):
titleH1 = getProductTitleH1(response),
customerReviewData = getCustomerRatingsAndComments(response)
yield{
'Title_H1' : titleH1,
'customer_Review_Data' : customerReviewData
}
I'm getting the following response:
{'Title_H1': (['Philips Beard Trimmer Cordless and Corded for Men QT4011/15'],), 'customer_Review_Data': <generator object getCustomerRatingsAndComments at 0x048AC630>}
The "Customer_review_Data" should be a list of dict of title and review
I am not able to figure out as to what mistake I am doing here.
When I use the log() or print() to see what data is captured in customerReviewDataList[], unable to see the data in the console either.
I am able to scrape all the reviews in customerReviewDataList[], if they are present in the product page,
In this scenario where I have to use the yield function I am getting the output stated above like this [https://ibb.co/kq8w6cf]
This is the kind of output I am looking for:
{'customerReviewTitle': ['Difficult to find a charger adapter'],'customerReviewComment': ['I already have a phillips trimmer which was only cordless. ], 'customerReviewTitle': ['Good Product'],'customerReviewComment': ['Solves my need perfectly HK']}]}
Any help is appreciated. Thanks in advance.
You should complete the Scrapy tutorial. The Following links section should be specially helpful to you.
This is a simplified version of your code:
def data_request_iterator():
yield Request('https://example.org')
class MySpider(Spider):
name = 'myspider'
start_urls = ['https://example.com']
def parse(self, response):
yield {
'title': response.css('title::text').get(),
'data': data_request_iterator(),
}
Instead, it should look like this:
class MySpider(Spider):
name = 'myspider'
start_urls = ['https://example.com']
def parse(self, response):
item = {
'title': response.css('title::text').get(),
}
yield Request('https://example.org', meta={'item': item}, callback=self.parse_data)
def parse_data(self, response):
item = response.meta['item']
# TODO: Extend item with data from this second response as needed.
yield item
If I want to create a program using Python2.7 where:
1) There are three classes, namely Tagging, Commenting, and Posting
2) The self.content for both Tagging and Commenting classes will be sent to the self.compact of class Posting
class Tagging: # Handles Tagging - Create new Tags
def __init__(self):
self.content = []
self.initialTag = ""
def doTag(self): #Tag people
self.initialTag = raw_input("Name to Tag: ")
self.content.append(self.initialTag)
#Tagging can only be done if the user created new post.
class Commenting: #Handles Commenting - Create new Comments
def __init__(self):
self.content = []
self.initialComment = ""
def doComment(self): #Commenting on Posts
self.initialComment = raw_input("Comment: ")
self.content.append(self.initialComment)
#Commenting can only be done on Posts. No Post means no Comment. (Same goes to Tags)
class Posting: #Handles Posting - Create new Posts
def __init__(self):
self.content = [] #Content of the post
self.initialPost = ""
self.compact = [] #Post that contains the Post, Comments, and Tags
#How do I do this?
def doPost(self):
self.initialPost = raw_input("Post: ")
self.content.append(self.initialPost)
I tried inheriting class Posting to both class Tagging and class Commenting but I think using Inheritance just for one single variable of class Posting is illogical.
Can anyone suggest me a better way?
And additional question: Are the class Tagging and class Commenting having Aggregation relationship to class Posting? Or is it an Association relationship? (word definition by UML)
How about this, just example code in "__ main __" part:
class Tagging: # Handles Tagging - Create new Tags
def __init__(self):
self.content = []
self.initialTag = ""
def doTag(self): #Tag people
self.initialTag = raw_input("Name to Tag: ")
self.content.append(self.initialTag)
#Tagging can only be done if the user created new post.
class Commenting: #Handles Commenting - Create new Comments
def __init__(self):
self.content = []
self.initialComment = ""
def doComment(self): #Commenting on Posts
self.initialComment = raw_input("Comment: ")
self.content.append(self.initialComment)
#Commenting can only be done on Posts. No Post means no Comment. (Same goes to Tags)
class Posting: #Handles Posting - Create new Posts
def __init__(self, TaggingContent, CommentingContent):
self.content = [] #Content of the post
self.initialPost = ""
self.compact = TaggingContent + CommentingContent #Post that contains the Post, Comments, and Tags
#How do I do this?
def doPost(self):
self.initialPost = raw_input("Post: ")
self.content.append(self.initialPost)
if __name__ == "__main__":
T = Tagging()
C = Commenting()
##Do stuff here with tagging and commenting....
P = Posting(T.content, C.content)
#Do stuff with posting
That way you have the content from Tagging and Commenting into compact from Posting, or am I wrong about what you need?
If you want to ensure in OOP that a set of classes provides obeys a certain contract you normally define an interface.
Python does not provide interfaces directly, but instead it's common to use duck-typeing by the use of something like isinstance or hasattr, which means, that if your object has a content-property use it, if not, raise an error.
Another possiblity to emulate interfaces is available since Python 2.6 by means of Abstract Base Classes
Hope this helps.
I'm a newbee to python and django and I can't figure out what I'm doing wrong here.
I have a Site object:
class Site (models.Model):
domain = models.CharField(max_length=30)
support_status = models.CharField(max_length=20, choices= SITE_SUPPORTED_STATUS, blank=False)
requests = models.IntegerField()
objects = SiteManager()
def __unicode__(self):
return u'%s %s' % (self.domain, self.support_status)
And a SiteManager object
class SiteManager(models.Manager):
def supported_site_counts(self):
i = self.filter(support_status__iexact="SUPPORTED").count()
return i
From the console, the method "supported_site_counts()" works just fine
>>(InteractiveConsole)
>>> from bookmark.models import Site, SiteManager
>>> Site.objects.supported_site_counts()
>>>>2012-05-18 18:09:20,027 DEBUG (0.001) SELECT COUNT(*) FROM "bookmark_site" WHERE
>>>>"bookmark_site"."support_status" LIKE SUPPORTED ESCAPE '\' ; args=(u'SUPPORTED',)
>>>>2012-05-18 18:09:20,028 DEBUG Got 1 supported site
>>>>1
But when it's called from a testcase, the count returns as 0
class SiteManagerTest(unittest.TestCase):
def test_supported_site_counts(self):
self.x = False
self.count = Site.objects.supported_site_counts()
logging.debug(self.count)
This is probably because the tests will set up a database separate from your development database to run the tests in. You will need to put testing data in to the testing database, either programmatically or using fixtures.