Scrapy calling spider other than the one specified on the command line - scrapy

(P6Svenv)malikarumi#Tetuoan2:~/Projects/P6/P6Svenv/test2/test2/spiders$ scrapy crawl zomd
Traceback (most recent call last):
File "/usr/bin/scrapy", line 9, in <module>
load_entry_point('Scrapy==1.0.3.post6-g2d688cd', 'console_scripts', 'scrapy')()
File "/usr/lib/pymodules/python2.7/scrapy/cmdline.py", line 142, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "/usr/lib/pymodules/python2.7/scrapy/crawler.py", line 209, in __init__
super(CrawlerProcess, self).__init__(settings)
File "/usr/lib/pymodules/python2.7/scrapy/crawler.py", line 115, in __init__
self.spider_loader = _get_spider_loader(settings)
File "/usr/lib/pymodules/python2.7/scrapy/crawler.py", line 296, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "/usr/lib/pymodules/python2.7/scrapy/spiderloader.py", line 30, in from_settings
return cls(settings)
File "/usr/lib/pymodules/python2.7/scrapy/spiderloader.py", line 21, in __init__
for module in walk_modules(name):
File "/usr/lib/pymodules/python2.7/scrapy/utils/misc.py", line 71, in walk_modules
submod = import_module(fullpath)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
File "/home/malikarumi/Projects/P6/P6Svenv/test2/test2/spiders/t350_crawl.py", line 36
def parse_item(self, response):
^
IndentationError: unindent does not match any outer indentation level
Do you see it? Scrapy isn't even calling the spider I specified on the command line!
I see that super in the traceback, but all my t350's are derived from CrawlSpider. zomd is subclassed from scrapy.Spider. Why is this happening and what do I do about it?

Spider's name doesn't equal to the file name. It is defined within the spider file by the second line below:
class CAPjobSpider(Spider):
name = "spider_name"
The above spider's name is "spider_name", even if the file may be "New_York.py".

Related

pyshark "TypeError: sequence item 6: expected str instance, _io.TextIOWrapper found"

I am using pyshark for live packet capture. when I pass a parameter output_file = myFilObject for saving captures to a file,
getting following error on sniffing line. If output_file parameter is removed, this works absolutely fine. Please suggest.
MySampleCode:
import pyshark
def capturePacket():
outputF = open('capturepcap.pcap', 'w')
cap = pyshark.LiveCapture(interface='Ethernet 8', output_file=outputF)
cap.sniff(timeout=60)
outputF.close()
Error:
Traceback (most recent call last):
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "c:\Users\wxyz\.vscode\extensions\ms-python.python-2022.6.2\pythonFiles\lib\python\debugpy\__main__.py", line 45, in <module>
cli.main()
File "c:\Users\wxyz\.vscode\extensions\ms-python.python-2022.6.2\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 444, in main
run()
File "c:\Users\wxyz\.vscode\extensions\ms-python.python-2022.6.2\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 285, in run_file
runpy.run_path(target_as_str, run_name=compat.force_str("__main__"))
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 269, in run_path
return _run_module_code(code, init_globals, run_name,
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 96, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "c:\Users\wxyz\Documents\automation\practice_set_script\paket_capture\basic_packetCapture.py", line 29, in <module>
capturePacket()
File "c:\Users\wxyz\Documents\automation\practice_set_script\paket_capture\basic_packetCapture.py", line 22, in capturePacket
cap.sniff(timeout=60)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\site-packages\pyshark\capture\capture.py", line 137, in load_packets
self.apply_on_packets(keep_packet, timeout=timeout, packet_count=packet_count)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\site-packages\pyshark\capture\capture.py", line 274, in apply_on_packets
return self.eventloop.run_until_complete(coro)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 641, in run_until_complete
return future.result()
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\asyncio\tasks.py", line 445, in wait_for
return fut.result()
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\site-packages\pyshark\capture\capture.py", line 283, in packets_from_tshark
tshark_process = await self._get_tshark_process(packet_count=packet_count)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\site-packages\pyshark\capture\live_capture.py", line 94, in _get_tshark_process
tshark = await super(LiveCapture, self)._get_tshark_process(packet_count=packet_count, stdin=read)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\site-packages\pyshark\capture\capture.py", line 399, in _get_tshark_process
self._log.debug("Creating TShark subprocess with parameters: " + " ".join(parameters))
TypeError: sequence item 6: expected str instance, _io.TextIOWrapper found
Error on reading from the event loop self pipe
loop: <ProactorEventLoop running=True closed=False debug=False>
Traceback (most recent call last):
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\asyncio\proactor_events.py", line 779, in _loop_self_reading
f = self._proactor.recv(self._ssock, 4096)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\asyncio\windows_events.py", line 450, in recv
self._register_with_iocp(conn)
File "C:\Users\wxyz\AppData\Local\Programs\Python\Python310\lib\asyncio\windows_events.py", line 723, in _register_with_iocp
_overlapped.CreateIoCompletionPort(obj.fileno(), self._iocp, 0, 0)
OSError: [WinError 87] The parameter is incorrect
PS C:\Users\wxyz\Documents\automation\practice_set_script\paket_capture>
The issue in your code is these lines:
outputF = open('capturepcap.pcap', 'w')
cap = pyshark.LiveCapture(interface='Ethernet 8', output_file=outputF)
The output_file parameter is a string and not a io.TextIOWrapper
:param output_file: A string of a file to write every read packet into (useful when filtering).
So this works:
import pyshark
def capturePacket():
cap = pyshark.LiveCapture(interface='en0', output_file='capturepcap.pcap')
cap.sniff(timeout=60)
capturePacket()
Here is a reference that I put together on using PyShark

indeed scrapy can't retrieve data

I run this command scrapy crawl indeed --logfile=crawl.log But no logfile is generated and I received the following error. I tried to debug the code, can't see nothing.
enter code here`File "C:\Anaconda3\Scripts\scrapy-script.py", line 10, in <module>
sys.exit(execute())
File "C:\Anaconda3\lib\site-packages\scrapy\cmdline.py", line 110, in execute
settings = get_project_settings()
File "C:\Anaconda3\lib\site-packages\scrapy\utils\project.py", line 68, in get_project_settings
settings.setmodule(settings_module_path, priority='project')
File "C:\Anaconda3\lib\site-packages\scrapy\settings\__init__.py", line 295, in setmodule
self.set(key, getattr(module, key), priority)
File "C:\Anaconda3\lib\site-packages\scrapy\settings\__init__.py", line 270, in set
self.attributes[name].set(value, priority)
File "C:\Anaconda3\lib\site-packages\scrapy\settings\__init__.py", line 55, in set
value = BaseSettings(value, priority=priority)
File "C:\Anaconda3\lib\site-packages\scrapy\settings\__init__.py", line 91, in __init__
self.update(values, priority)
File "C:\Anaconda3\lib\site-packages\scrapy\settings\__init__.py", line 327, in update
for name, value in six.iteritems(values):
File "C:\Anaconda3\lib\site-packages\six.py", line 587, in iteritems
return iter(d.items(**kw))
AttributeError: 'list' object has no attribute 'items'

unorderable types: str() < tuple() when train pet detector by google object detection api

I train pet detector by google object detection api and get error as fellow:Does it mean sorted fun does not support the dict's key type is tuple and the object detection api still does not support python3? 
Traceback (most recent call last):
File "D:\Program Files\JetBrains\PyCharm 2017.1.1\helpers\pydev\pydevd.py", line 1578, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "D:\Program Files\JetBrains\PyCharm 2017.1.1\helpers\pydev\pydevd.py", line 1015, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "D:\Program Files\JetBrains\PyCharm 2017.1.1\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "E:/Work/Lib/tensorflow/models/object_detection/train.py", line 198, in <module>
tf.app.run()
File "D:\Program Files\Python\Python35\lib\site-packages\tensorflow\python\platform\app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "E:/Work/Lib/tensorflow/models/object_detection/train.py", line 194, in main
worker_job_name, is_chief, FLAGS.train_dir)
File "E:\Work\Lib\tensorflow\models\object_detection\trainer.py", line 184, in train
data_augmentation_options)
File "E:\Work\Lib\tensorflow\models\object_detection\trainer.py", line 77, in _create_input_queue
prefetch_queue_capacity=prefetch_queue_capacity)
File "E:\Work\Lib\tensorflow\models\object_detection\core\batcher.py", line 93, in __init__
num_threads=num_batch_queue_threads)
File "D:\Program Files\Python\Python35\lib\site-packages\tensorflow\python\training\input.py", line 919, in batch
name=name)
File "D:\Program Files\Python\Python35\lib\site-packages\tensorflow\python\training\input.py", line 697, in _batch
tensor_list = _as_tensor_list(tensors)
File "D:\Program Files\Python\Python35\lib\site-packages\tensorflow\python\training\input.py", line 385, in _as_tensor_list
return [tensors[k] for k in sorted(tensors)]
TypeError: unorderable types: str() < tuple()
I ran into the same problem. I traced the issue down to a python 3 compat issue in TensorFlow. I have submitted a fix for it here: https://github.com/tensorflow/tensorflow/pull/11039

IndentationError: expected an indented block, Scrapy

career#careercrawler:~/stack/stack$ scrapy crawl stack
Traceback (most recent call last): File
"/home/career/.local/bin/scrapy", line 11, in
sys.exit(execute())
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/cmdline.py",
line 141, in execute
cmd.crawler_process = CrawlerProcess(settings)
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/crawler.py",
line 238, in init
super(CrawlerProcess, self).init(settings)
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/crawler.py",
line 129, in init
self.spider_loader = _get_spider_loader(settings)
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/crawler.py",
line 325, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/spiderloader.py",
line 33, in from_settings
return cls(settings)
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/spiderloader.py",
line 20, in init
self._load_all_spiders()
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/spiderloader.py",
line 28, in _load_all_spiders
for module in walk_modules(name):
File
"/home/career/.local/lib/python2.7/site-packages/scrapy/utils/misc.py",
line 71, in walk_modules
submod = import_module(fullpath)
File "/usr/lib/python2.7/importlib/init.py", line 37, in
import_module
import(name)
File "/home/career/stack/stack/spiders/stack_spider.py", line 4, in
from stack.items import StackItem
File "/home/career/stack/stack/items.py", line 13
title = scrapy.Field()
^
IndentationError: expected an indented block
This is my error, I don't know what is happening there. Someone help me, please.
This error is because of intention,
As mentioned in the traceback:
/home/career/stack/stack/items.py", line 13 title = scrapy.Field()
go to ~/stack/stack/items.py and check indentation at line 13.

KeyError: 'active_id' openerp

I've changed record rules for project purpose after that when I click on "My Current Time sheet throwing below error
Client Traceback (most recent call last):
File "F:\OpenERP 7.0-20140118-002423\Server\server\openerp\addons\web\http.py", line 204, in dispatch
File "F:\OpenERP 7.0-20140118- 002423\Server\server\openerp\addons\web\controllers\main.py", line 1432, in run
File "F:\OpenERP 7.0-20140118-002423\Server\server\openerp\addons\web\session.py", line 42, in proxy
File "F:\OpenERP 7.0-20140118-002423\Server\server\openerp\addons\web\session.py", line 30, in proxy_method
File "F:\OpenERP 7.0-20140118-002423\Server\server\openerp\addons\web\session.py", line 103, in send
Server Traceback (most recent call last):
File "F:\OpenERP 7.0-20140118-002423\Server\server\openerp\addons\web\session.py", line 89, in send
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\netsvc.py", line 292, in dispatch_rpc
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\service\web_services.py", line 626, in dispatch
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\osv\osv.py", line 190, in execute_kw
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\osv\osv.py", line 132, in wrapper
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\osv\osv.py", line 199, in execute
File "F:\OpenERP 7.0-20140118-002423\Server\server.\openerp\osv\osv.py", line 187, in execute_cr
File "F:\OpenERP 7.0-20140118- 002423\Server\server\openerp\addons\smsclient\serveraction.py", line 47, in run
KeyError: 'active_id'
Once if you give record rule with domain_force condition, it will store in your db (ir.rule) . Even comment/remove that code, it will work.
You've to make that condition as empty []. Then upgrade the respective module. it will work..
Example:
[('employee_id.user_id', '=', user.id)]
if you want to remove the domain_force give like this:
[]
Then upgrade, and comment/remove that respective line