How to encode the csv file using APIviews - api

How to encode the csv file while importing the csv file data into database using Rest django APIview.
class UploadViewSet(APIView):
parser_classes = (MultiPartParser, FormParser)
permission_classes = (AllowAny,)
serializer_class= UploadSerializer
def post(self, request, *args, **kwargs):
serializer = UploadSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
file = serializer.validated_data['File']
reader = csv.reader(file('file','r'))
for row in reader():
new_company = Company(
name=row['name'],
hr_name=row['hr_name'],
hr_email=row['hr_email'],
hr_verified=row['hr_verified'],
user_id=row['user_id'],
primary_phone=row['primary_phone'],
comments=row['comments'],
)
new_company.save()
return Response({"status": "success"},status.HTTP_201_CREATED)

Related

drf post file to database

MODELS.PY
class MyFile(models.Model):
id = models.AutoField(primary_key=True)
in_file = models.FileField(upload_to=join_path('front/files', 'input'))
def __str__(self):
return self.in_file.name[18:]
SERIALIZERS.PY
class MyFileSerializer(serializers.ModelSerializer):
in_file = serializers.FileField(
max_length = 10000000,
allow_empty_file = False,
use_url = False,
)
class Meta:
model = MyFile
fields = ['in_file']
def create(self, validated_data):
file = validated_data.pop('in_file')
return MyFile.objects.create(in_file=file)
class DocumentChangeAPIView(APIView):
parser_classes = (FileUploadParser, FormParser)
def post(self, request):
serializer = MyFileSerializer(data=request.data)
print(request.data)
if serializer.is_valid(raise_exception=True):
serializer.save()
return Response(serializer.data)
enter image description here
printing this response
{'file': <InMemoryUploadedFile: samandar_resume.pdf (multipart/form-data; boundary=--------------------------689093269296817855921257)>}

No adapter found for objects of type: 'itemadapter.adapter.ItemAdapter'

I want to change the names of images downloaded from a webpage. I want to use standard names given by the website as opposed to cleaning the request url for it.
I have the following pipeline.py
from itemadapter import ItemAdapter
from scrapy.pipelines.images import ImagesPipeline
class ScrapyExercisesPipeline:
def process_item(self, item, spider):
adapter = ItemAdapter(item)
return adapter
class DownfilesPipeline(ImagesPipeline):
def file_path(self, request, response=None, info=None, item=None):
adapter = ScrapyExercisesPipeline().process_item()[0]
image_name: str = f'{adapter}.jpg'
return image_name
This produces the following error:
raise TypeError(f"No adapter found for objects of type: {type(item)} ({item})")
TypeError: No adapter found for objects of type: <class 'itemadapter.adapter.ItemAdapter'> (<ItemAdapter for ScrapyExercisesItem(name='unknown267', images=['https://bl-web-assets.britishland.com/live/meadowhall/s3fs-public/styles/retailer_thumbnail/public/retailer/boots_1.jpg?qQ.NHRs04tdmGxoyZKerRHcrhCImB3JH&itok=PD5LxLmS&cb=1657061667-curtime&v=1657061667-curtime'])>)
scraper.py:
import scrapy
from scrapy_exercises.items import ScrapyExercisesItem
class TestSpider(scrapy.Spider):
name = 'test'
#allowed_domains = ['x']
start_urls = ['https://www.meadowhall.co.uk/eatdrinkshop?page=1']
def start_requests(self):
for url in self.start_urls:
yield scrapy.Request(
url=url,
callback=self.parse,
cb_kwargs = {'pg':0}
)
def parse(self, response,pg):
pg=0
content_page = response.xpath("//div[#class='view-content']//div")
for cnt in content_page:
image_url = cnt.xpath(".//img//#src").get()
image_name = cnt.xpath(".//img//#alt").get()
if image_url != None:
pg+=1
items = ScrapyExercisesItem()
if image_name == '':
items['name'] = 'unknown'+f'{pg}'
items['images'] = [image_url]
yield items
else:
items['name'] = image_name
items['images'] = [image_url]
yield items
settings.py
ITEM_PIPELINES = {
#'scrapy.pipelines.images.ImagesPipeline': 1,
'scrapy_exercises.pipelines.ScrapyExercisesPipeline':45,
'scrapy_exercises.pipelines.DownfilesPipeline': 55
}
from pathlib import Path
import os
BASE_DIR = Path(__file__).resolve().parent.parent
IMAGES_STORE = os.path.join(BASE_DIR, 'images')
IMAGES_URLS_FIELD = 'images'
IMAGES_RESULT_FIELD = 'results'
You are calling on a pipeline from within your pipeline while that pipeline is also registered in your settings to be run as a pipeline. It would be simpler to just extract the name field from your item in your DownFilesPipeLine and return it.
Change your pipelines.py file to:
from itemadapter import ItemAdapter
from scrapy.pipelines.images import ImagesPipeline
class DownfilesPipeline(ImagesPipeline):
def file_path(self, request, response=None, info=None, item=None):
return item['name'] + '.jpg'
You also need to turn off the ScrapyExercisesPipeline in your settings

showing error " list indices must be integers or slices, not str "

showing error like "list indices must be integers or slices, not str"
Here MY APIView
class UploadViewSet(APIView):
parser_classes = (MultiPartParser, FormParser)
permission_classes = (AllowAny,)
def post(self, request, *args, **kwargs):
file = request.POST.get("content")
data = StringIO(file)
reader = csv.reader(data, delimiter=',')
for row in reader:
new_company = Company(
name=row['name'],
hr_name=row['hr_name'],
hr_email=row['hr_email'],
hr_verified=row['hr_verified'],
user_id=row['user_id'],
primary_phone=row['primary_phone'],
comments=row['comments'],
)
new_company.save()
return Response({"status": "success"},status.HTTP_201_CREATED)

CsvItemExporter for multiple files in custom item pipeline not exporting all items

I have created an item pipeline as an answer to this question.
It is supposed to create a new file for every page according to the page_no value set in the item. This works mostly fine.
The problem is with the last csv file generated by the pipeline/item exporter, page-10.csv.
The last 10 values are not exported, so the file stays empty.
What could be the reason for this behaviour?
pipelines.py
from scrapy.exporters import CsvItemExporter
class PerFilenameExportPipeline:
"""Distribute items across multiple CSV files according to their 'page_no' field"""
def open_spider(self, spider):
self.filename_to_exporter = {}
def spider_closed(self, spider):
for exporter in self.filename_to_exporter.values():
exporter.finish_exporting()
def _exporter_for_item(self, item):
filename = 'page-' + str(item['page_no'])
del item['page_no']
if filename not in self.filename_to_exporter:
f = open(f'{filename}.csv', 'wb')
exporter = CsvItemExporter(f, export_empty_fields=True)
exporter.start_exporting()
self.filename_to_exporter[filename] = exporter
return self.filename_to_exporter[filename]
def process_item(self, item, spider):
exporter = self._exporter_for_item(item)
exporter.export_item(item)
return item
spider
import scrapy
from ..pipelines import PerFilenameExportPipeline
class spidey(scrapy.Spider):
name = "idk"
custom_settings = {
'ITEM_PIPELINES': {
PerFilenameExportPipeline: 100
}
}
def start_requests(self):
yield scrapy.Request("http://quotes.toscrape.com/", cb_kwargs={'page_no': 1})
def parse(self, response, page_no):
for qts in response.xpath("//*[#class=\"quote\"]"):
yield {
'page_no': page_no,
'author' : qts.xpath("./span[2]/small/text()").get(),
'quote' : qts.xpath("./*[#class=\"text\"]/text()").get()
}
next_pg = response.xpath('//li[#class="next"]/a/#href').get()
if next_pg is not None:
yield response.follow(next_pg, cb_kwargs={'page_no': page_no + 1})
I know, 2 years later, but still - it might turn out helpful for someone.
It looks like you're never closing the file you're writing to (as you're using inline open). Please compare your code to the one in Scrapy's docs (the "Using Item Exporters" section): https://docs.scrapy.org/en/latest/topics/exporters.html
Besides, the method should now be called "close_spider", not "spider_closed"
Changing your code to the following should help:
from scrapy.exporters import CsvItemExporter
class PerFilenameExportPipeline:
def open_spider(self, spider):
self.filename_to_exporter = {}
def close_spider(self, spider):
#iterating over exporter-file tuples instead of only exporters
for exporter, csv_file in self.filename_to_exporter.values():
exporter.finish_exporting()
#closing the file
csv_file.close()
def _exporter_for_item(self, item):
filename = 'page-' + str(item['page_no'])
del item['page_no']
if filename not in self.filename_to_exporter:
csv_file = open(f'{filename}.csv', 'wb')
exporter = CsvItemExporter(f, export_empty_fields=True)
exporter.start_exporting()
#adding both exporter & file to later be closed as the dict's value
self.filename_to_exporter[filename] = (exporter, csv_file)
#picking only the exporter via [0]
return self.filename_to_exporter[filename][0]
def process_item(self, item, spider):
exporter = self._exporter_for_item(item)
exporter.export_item(item)
return item

is there a way to get FactoryBoy to pass a parameter to the save() method of my Django model?

I have a Django model like this:
class NicePerson(models.Model):
last_name = models.CharField(max_length=100)
def save(self, make_irish=False, *args, **kwargs):
"""if make_irish is True, prepend last_name with O'"
if make_irish:
self.last_name = "O'" + self.last_name
super(MyModel, self).save(*args, **kwargs)
And I have a FactoryBoy class to build NicePerson instances
class NicePersonFactory(factory.django.DjangoModelFactory):
class Meta:
model = NicePerson
I know I can use these together like so:
nice_person = NicePersonFactory(last_name='Shea')
But how can I pass the "make_irish" parameter to my save() method?
factory_boy uses the default manager for creation, i.e it calls NicePerson.objects.create() or NicePerson.objects.get_or_create().
For your example, you could override those managers (in your Model definition):
class NicePersonManager(models.Manager):
def create(self, *args, **kwargs):
if kwargs.pop('make_irish', False):
kwargs.update(...)
return super().create(*args, **kwargs)
Another option would be to override your factory's _create and _get_or_create methods. For instance:
class NicePersonFactory(factory.django.DjangoModelFactory):
#classmethod
def _create(cls, model_class, *args, **kwargs):
make_irish = kwargs.pop('make_irish', False)
instance = model_class(**kwargs)
instance.save(make_irish=make_irish)
return instance