I am trying to deploy to ScrapingHub and here is the error I am getting...
Deploy log last 30 lines:
File "/app/python/lib/python3.8/site-packages/scrapy/cmdline.py", line 142, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "/app/python/lib/python3.8/site-packages/scrapy/crawler.py", line 280, in __init__
super(CrawlerProcess, self).__init__(settings)
File "/app/python/lib/python3.8/site-packages/scrapy/crawler.py", line 152, in __init__
self.spider_loader = self._get_spider_loader(settings)
File "/app/python/lib/python3.8/site-packages/scrapy/crawler.py", line 146, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "/app/python/lib/python3.8/site-packages/scrapy/spiderloader.py", line 60, in from_settings
return cls(settings)
File "/app/python/lib/python3.8/site-packages/scrapy/spiderloader.py", line 24, in __init__
self._load_all_spiders()
File "/app/python/lib/python3.8/site-packages/scrapy/spiderloader.py", line 46, in _load_all_spiders
for module in walk_modules(name):
File "/app/python/lib/python3.8/site-packages/scrapy/utils/misc.py", line 77, in walk_modules
submod = import_module(fullpath)
File "/usr/local/lib/python3.8/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 655, in _load_unlocked
File "<frozen importlib._bootstrap>", line 618, in _load_backward_compatible
File "<frozen zipimport>", line 259, in load_module
File "/app/__main__.egg/sstkscrape/spiders/sstkscrape_spider.py", line 4, in <module>
File "/app/__main__.egg/sstkscrape/spiders/sstkscrape_spider.py", line 7, in QuoteSpider
FileNotFoundError: [Errno 2] No such file or directory: 'urls-test.csv'
{"message": "shub-image-info exit code: 1", "details": null, "error": "image_info_error"}
{"status": "error", "message": "Internal error"}
Deploy log location: /var/folders/p7/nwmq6_4138n6t3w2spdnpzfm0000gn/T/shub_deploy_n9zmxfto.log
Error: Deploy failed: b'{"status": "error", "message": "Internal error"}'
I have a local csv file I am trying to include in the deploy and doing open of that csv file and wondering if that is causing the issues?
You need to use package_data section of you setup.py file: Deploying non-code files
Related
[I 2020-12-09 09:15:49,277] Trial 2 finished with value: 0.678097665309906 and parameters: {'num_filters1': 16, 'num_filters2': 16, 'kernel_size': 6, 'dropout_rate': 0.5876482728988799, 'optimizer': 'RMSprop', 'lr': 9.014890468942645e-05, 'Weight_decay': 2.6662100371168874e-09}. Best is trial 1 with value: 0.6492913365364075.
/home/shar/anaconda3/lib/python3.7/site-packages/optuna/structs.py:27: FutureWarning: `structs` is deprecated. Classes have moved to the following modules. `structs.StudyDirection`->`study.StudyDirection`, `structs.StudySummary`->`study.StudySummary`, `structs.FrozenTrial`->`trial.FrozenTrial`, `structs.TrialState`->`trial.TrialState`, `structs.TrialPruned`->`exceptions.TrialPruned`.
warnings.warn(_message, FutureWarning)
Traceback (most recent call last):
File "OptunaTest", line 573, in <module>
n_instances, n_features, scores = run_analysis()
File "OptunaTest", line 354, in run_analysis
pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
File "OptunaTest", line 354, in <listcomp>
pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
File "/home/shar/anaconda3/lib/python3.7/site-packages/optuna/__init__.py", line 54, in __getattr__
return getattr(self._load(), item)
File "/home/shar/anaconda3/lib/python3.7/site-packages/optuna/__init__.py", line 49, in _load
module = importlib.import_module(self._name)
File "/home/shar/anaconda3/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/home/shar/anaconda3/lib/python3.7/site-packages/optuna/structs.py", line 41, in <module>
"This class was moved to :mod:`~optuna.trial`. Please use "
File "/home/shar/anaconda3/lib/python3.7/site-packages/optuna/_deprecated.py", line 77, in deprecated
removed_version = _get_removed_version_from_deprecated_version(deprecated_version)
File "/home/shar/anaconda3/lib/python3.7/site-packages/optuna/_deprecated.py", line 39, in _get_removed_version_from_deprecated_version
return "{}.0.0".format(parsed_deprecated_version.major + 2)
AttributeError: 'Version' object has no attribute 'major'
I somehow screwed up the SSH terminal pytorch environment and it's not able to find the attribute major for version object.
The code is able to run but towards the end the error above appeared. I reinstalled the torch package and the issue is still not resolved.
Anyone able to advise how to fix it?
I'm trying to install the plugin DEDRM 6.7.0 in Calibre 5.7.2 (operating system: Windows 10), but I got the following error:
calibre, version 5.7.2
ERRORE: Eccezione non gestita: SyntaxError:invalid syntax (calibre_plugins.dedrm.init, line 168)
calibre 5.7.2 [64bit] embedded-python: True is64bit: True
Windows-10-10.0.19041 Windows ('64bit', 'WindowsPE')
('Windows', '10', '10.0.19041')
Python 3.8.5
Windows: ('10', '10.0.19041', '', 'Multiprocessor Free')
Interface language: it
Traceback (most recent call last):
File "calibre\gui2\preferences\plugins.py", line 317, in add_plugin
File "calibre\customize\ui.py", line 472, in add_plugin
File "calibre\customize\ui.py", line 61, in load_plugin
File "calibre\customize\zipplugin.py", line 293, in load
File "importlib\__init__.py", line 127, in import_module
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
File "calibre\customize\zipplugin.py", line 185, in exec_module
File "calibre\customize\zipplugin.py", line 181, in get_code
File "calibre_plugins.dedrm.__init__", line 168
print u"{0} v{1}: Copying needed library files from plugin's zip".format(PLUGIN_NAME, PLUGIN_VERSION)
^
SyntaxError: invalid syntax
How can I overcome this problem?
The plugin DEDRM 6.7.0 is not compatible with Calibre 5.7.2. It works with Calibre 4.23.0 (see here).
Error
Exception in thread django-main-thread:
Traceback (most recent call last):
File "e:\python\installation files\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "e:\python\installation files\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\utils\autoreload.py", line 53, in wrapper
fn(*args, **kwargs)
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\core\management\commands\runserver.py", line 109, in inner_run
autoreload.raise_last_exception()
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\utils\autoreload.py", line 76, in raise_last_exception
raise _exception[1]
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\core\management\__init__.py", line 357, in execute
autoreload.check_errors(django.setup)()
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\utils\autoreload.py", line 53, in wrapper
fn(*args, **kwargs)
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\__init__.py", line 24, in setup
apps.populate(settings.INSTALLED_APPS)
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\apps\registry.py", line 122, in populate
app_config.ready()
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\admin\apps.py", line 24, in ready
self.module.autodiscover()
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\admin\__init__.py", line 26, in autodiscover
autodiscover_modules('admin', register_to=site)
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\utils\module_loading.py", line 47, in autodiscover_modules
import_module('%s.%s' % (app_config.name, module_to_search))
File "e:\python\installation files\lib\importlib\__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\admin\__init__.py", line 5, in <module>
from django.contrib.gis.admin.options import GeoModelAdmin, OSMGeoAdmin
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\admin\options.py", line 2, in <module>
from django.contrib.gis.admin.widgets import OpenLayersWidget
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\admin\widgets.py", line 3, in <module>
from django.contrib.gis.gdal import GDALException
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\gdal\__init__.py", line 28, in <module>
from django.contrib.gis.gdal.datasource import DataSource
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\gdal\datasource.py", line 39, in <module>
from django.contrib.gis.gdal.driver import Driver
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\gdal\driver.py", line 5, in <module>
from django.contrib.gis.gdal.prototypes import ds as vcapi, raster as rcapi
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\gdal\prototypes\ds.py", line 9, in <module>
from django.contrib.gis.gdal.libgdal import GDAL_VERSION, lgdal
File "C:\Users\WIN8\Envs\my_django\lib\site-packages\django\contrib\gis\gdal\libgdal.py", line 52, in <module>
lgdal = CDLL(lib_path)
File "e:\python\installation files\lib\ctypes\__init__.py", line 364, in __init__
self._handle = _dlopen(self._name, mode)
OSError: [WinError 127] The specified procedure could not be found
I have followed the geodjango documentation for windows. I have installed OSGeo4W 64bit installer and also set the environment variables as said in the documentation.
settings.py file
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'accounts.apps.AccountsConfig',
'django.contrib.gis',
]
and
DATABASES = {
'default': {
'ENGINE': 'django.contrib.gis.db.backends.postgis',
'NAME': 'postgres',
'USER': 'postgres',
'PASSWORD': '********',
'HOST': 'localhost',
'PORT': '5432'
}
}
OSGeo4W is successfully installed and path variables are set correctly as I can access gdal from cmd. Screenshot included.
Screenshot
If I do
scrapy shell https://en.wikipedia.org/wiki/Electric_battery
and then
response.css('h1#firstHeading::text').extract()
i get the correct output of
['Electric battery']
But if I make this python code ;
import scrapy
class WikiSpider(scrapy.Spider):
name = 'wiki'
allowed_domains = ['wikipedia.com']
start_urls = ['https://en.wikipedia.org/wiki/Electric_battery']
def parse(self, response):
print response.css('h1#firstHeading::text').extract()
and I run it I get the error message
C:\python\wikipedia>scrapy runspider wiki
Traceback (most recent call last):
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\runpy.py", line 193, in _run_module_as_main
return _run_code(code, main_globals, None,
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "C:\Users\thega\AppData\Local\Programs\Python\Python38-32\Scripts\scrapy.exe\__main__.py", line 7, in <module>
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\cmdline.py", line 145, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\crawler.py", line 267, in __init__
super(CrawlerProcess, self).__init__(settings)
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\crawler.py", line 145, in __init__
self.spider_loader = _get_spider_loader(settings)
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\crawler.py", line 347, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\spiderloader.py", line 61, in from_settings
return cls(settings)
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\spiderloader.py", line 25, in __init__
self._load_all_spiders()
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\spiderloader.py", line 47, in _load_all_spiders
for module in walk_modules(name):
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\site-packages\scrapy\utils\misc.py", line 73, in walk_modules
submod = import_module(fullpath)
File "c:\users\thega\appdata\local\programs\python\python38-32\lib\importlib\__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 779, in exec_module
File "<frozen importlib._bootstrap_external>", line 916, in get_code
File "<frozen importlib._bootstrap_external>", line 846, in source_to_code
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "C:\python\wikipedia\wikipedia\spiders\wiki.py", line 11
print response.css('h1#firstHeading::text').extract()
^
SyntaxError: invalid syntax
I am taking the code from a tutorial at https://www.makeuseof.com/tag/build-basic-web-crawler-pull-information-website-2/
I'm confused how the syntax is wrong when it's been tested as working in shell???
you are writing the code for python 2 but running it in python 3 you are missing the brackets,here is the way to do it
import scrapy
class WikiSpider(scrapy.Spider):
name = 'wiki'
allowed_domains = ['wikipedia.com']
start_urls = ['https://en.wikipedia.org/wiki/Electric_battery']
def parse(self, response):
print(response.css('h1#firstHeading::text').extract())
I am trying to deploy my scrapy which connected to django project to scrapyd, but when I tried scrapyd-deploy JD -p JDSpider, it failed. It said No module named GradutionProject. It seems the scrapyd cannot detect "GradutionProject.settings" in settings.py. I have tried each combination of the path, but all failed. Could you please tell me possible solutions? Thanks in advance.
2019-03-24T01:02:02+0800 [_GenericHTTPChannelProtocol,12,127.0.0.1] Unhandled Error
Traceback (most recent call last):
File "d:\anacaonda\lib\site-packages\twisted\web\http.py", line 2190, in allContentReceived
req.requestReceived(command, path, version)
File "d:\anacaonda\lib\site-packages\twisted\web\http.py", line 917, in requestReceived
self.process()
File "d:\anacaonda\lib\site-packages\twisted\web\server.py", line 199, in process
self.render(resrc)
File "d:\anacaonda\lib\site-packages\twisted\web\server.py", line 259, in render
body = resrc.render(self)
--- <exception caught here> ---
File "d:\anacaonda\lib\site-packages\scrapyd\webservice.py", line 21, in render
return JsonResource.render(self, txrequest).encode('utf-8')
File "d:\anacaonda\lib\site-packages\scrapyd\utils.py", line 20, in render
r = resource.Resource.render(self, txrequest)
File "d:\anacaonda\lib\site-packages\twisted\web\resource.py", line 250, in render
return m(request)
File "d:\anacaonda\lib\site-packages\scrapyd\webservice.py", line 86, in render_POST
spiders = get_spider_list(project, version=version)
File "d:\anacaonda\lib\site-packages\scrapyd\utils.py", line 137, in get_spider_list
raise RuntimeError(msg.encode('unicode_escape') if six.PY2 else msg)
builtins.RuntimeError: Traceback (most recent call last):
File "d:\anacaonda\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "d:\anacaonda\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "d:\anacaonda\lib\site-packages\scrapyd\runner.py", line 40, in <module>
main()
File "d:\anacaonda\lib\site-packages\scrapyd\runner.py", line 37, in main
execute()
File "d:\anacaonda\lib\site-packages\scrapy\cmdline.py", line 108, in execute
settings = get_project_settings()
File "d:\anacaonda\lib\site-packages\scrapy\utils\project.py", line 68, in get_project_settings
settings.setmodule(settings_module_path, priority='project')
File "d:\anacaonda\lib\site-packages\scrapy\settings\__init__.py", line 292, in setmodule
module = import_module(module)
File "d:\anacaonda\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 978, in _gcd_import
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "c:\users\柘宇\appdata\local\temp_~1\jdspider-1553360516-l5i44at6.egg\JDSpider\settings.py", line 24, in <module>
File "d:\anacaonda\lib\site-packages\django\__init__.py", line 19, in setup
configure_logging(settings.LOGGING_CONFIG, settings.LOGGING)
File "d:\anacaonda\lib\site-packages\django\conf\__init__.py", line 56, in __getattr__
self._setup(name)
File "d:\anacaonda\lib\site-packages\django\conf\__init__.py", line 43, in _setup
self._wrapped = Settings(settings_module)
File "d:\anacaonda\lib\site-packages\django\conf\__init__.py", line 106, in __init__
mod = importlib.import_module(self.SETTINGS_MODULE)
File "d:\anacaonda\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 978, in _gcd_import
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 936, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 205, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 978, in _gcd_import
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 948, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'GraduationProject'
scrapy.cfg
[settings]
default = JDSpider.settings
[deploy:JD]
url = http://localhost:6800/
project = JDSpider
settings.py
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath('.')))
# Do not forget the change iCrawler part based on your project name
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "GraduationProject.settings")
import django
django.setup()
Project hierarchy
I came with the problem today and found out that, you need to cd to the root dir of your scrapy project inside the django project, then start scrapyd.
This is what I've found today, the readme of this repo was helpful:Scrapyd-Django-Template
you should add GraduationProject to PYTHONPATH