Django & Celery & Rabbit getting Not Registered error - rabbitmq

I am trying to set up Django & Celery & Rabbit for the first time following this tutorial. I am using Django 2.0 Celery 4.2.0 and Rabbit on Windows
I am getting the error: celery.exceptions.NotRegistered: 'GeneratePDF'
I have set up as follows:
in my init.py:
from __future__ import absolute_import, unicode_literals
import celery
from .celery import app as celery_app
__all__ = ['celery_app']
in my celery.py:
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from django.conf import settings
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'abc.settings')
app = Celery('abc')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
in my tasks.py:
from celery import shared_task
from abc.celery import app
#shared_task(name='GeneratePDF')
class GeneratePDF(View):
def get(self, request, *args, **kwargs):
....
in my views.py:
from abc.tasks import GeneratePDF
#method_decorator(login_required, name='dispatch')
class ClientProfilePDF(RedirectView):
def get(self, request, *args, **kwargs):
GeneratePDF.delay(request)
return HttpResponseRedirect('/home/')
in my settings.py:
CELERY_BROKER_URL = 'amqp://localhost'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_RESULT_BACKEND = 'django-db'
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = 'Australia/Sydney'
CELERY_IMPORTS = ('abc.tasks',)
Can anyone point me in the right direction as to where I am going wrong and why I am getting this error? Any help is much appreciated!

Two quick things:
No need for any parameters to app.autodiscover_tasks() Celery alreayd knows how to use settings.INSTALLED_APPS.
The #shared_task decorator is for tasks that live in apps that do not have their own celery.py file that instantiates an app. From the looks of it, your tasks.py file lives in the same django app as the celery.py file. In this case, you should use #app.task and not #shared_task.
before you start, you can get a list of registered tasks by doing celery -A myapp inspect registered. That will let you see if your GeneratePDF task is registered or not.

Related

Trying to deploy an ml model as an api through bentoml

import bentoml
import numpy as np
from bentoml.io import NumpyNdarray
# Get the runner
xgb_runner = bentoml.models.get("xgb_booster:latest").to_runner()
# Create a Service object
svc = bentoml.Service("xgb_classifier", runners=[xgb_runner])
# Create an endpoint named classify
#svc.api(input=NumpyNdarray(), output=NumpyNdarray())
def classify(input_series) -> np.ndarray:
# Convert the input string to numpy array
label = xgb_runner.predict.run(input_series)
return label
bentoml serve service.py:svc --reload
so this is my code
and the error i am getting is
Error: [bentoml-cli] serve failed: Failed to load bento or import service 'Service.py:svc'.
If you are attempting to import bento in local store: 'Failed to import module "Service.py": No module named 'Service.py''.
If you are importing by python module path: 'Bento 'Service.py:svc' is not found in BentoML store <osfs '/root/bentoml/bentos'>'.
so i tried pip install service

How to get messages of telegram channel by python-telegram-bot tool

I was wondering if there is a possible way to get messages from the telegram channel knowing that I logged in to this account and I am the admin of this channel so I just want the get messages.
import feedparser
from telegram import Update, ForceReply, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext, CallbackQueryHandler
from bs4 import BeautifulSoup
from datetime import datetime
import json
import telegram
from time import sleep
from telegram.ext import MessageHandler, Filters
class Config:
def __init__(self):
with open("config.json", "r") as config:
self.config = json.load(config)
class TelegramBotChannel:
def __init__(self, token, start_channel_id):
self.updater = Updater(token=token, use_context=True)
self.dispatcher = self.updater.dispatcher
self.start_channel_id = start_channel_id
if __name__ == '__main__':
telegram_bot = TelegramBotChannel(Config().config["token"], Config().config["start"])
pass
This is the minimal code to fetch the messages from a channel using a telegram bot which is the subscriber (only admin subscription possible) of the channel. Provide the correct bot api as KEY.:
from api_keys import bot_api_key as KEY
from telegram.ext import Updater, Filters, MessageHandler
updater = Updater(token=KEY, use_context=True)
dispatcher = updater.dispatcher
def forwarder(update, context):
msg = update.channel_post
if msg:
print(msg)
forwardHandler = MessageHandler(Filters.text & (~Filters.command), forwarder)
dispatcher.add_handler(forwardHandler)
updater.start_polling()
updater.idle()
Bots can only get updates about channel posts if they are a member in that channel (and bots can only be added to channels as admin). If they are admins in the channel, they will receive updates just like from every other chat.
Requirements :
Your bot should be in the channel. obviously as an admin
so first just make a function :
def forwader(update , context):
context.bot.copy_message("#temporary2for" ,"#tempmain" , update.channel_post.message_id)
After that make handler :
forwadHandler= MessageHandler(Filters.text & (~Filters.command) , forwader)
Than register your handler :
dispatcher.add_handler(forwadHandler)
Than don't forget to start Bot polling :
updater.start_polling()
updater.idle()
Full code :
from telegram import bot
from telegram.ext import Updater , CommandHandler , Filters , MessageHandler
from config import useless
import logging
updater = Updater(token=useless, use_context=True)
dispatcher = updater.dispatcher
import logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
def forwader(update , context):
context.bot.copy_message("#temporary2for" ,"#tempmain" , update.channel_post.message_id)
forwadHandler= MessageHandler(Filters.text & (~Filters.command) , forwader)
dispatcher.add_handler(forwadHandler)
updater.start_polling()
updater.idle()
Some Import are useless .

why scrapy logs different in the console and external log file

I'm new to Scrapy and I once managed to run my script well on Scrapy 0.24. But when I switched to the newly launched 1.0 I encountered a logging problem: What I want to do is to set both the file and the console log level to INFO, but however I set the LOG_LEVEL or the configure_logging() function(using the Python internal logging package instead of scrapy.log), Scrapy always logs DEBUG level information to the console, which returns the whole item object in format of dict. In fact, the LOG_LEVEL option only works for the external file. I suspect it must have something to do with the Python logging but have no idea how to set it. Could any one help me out?
This is how I config my logging in run_my_spider.py:
from crawler.settings import LOG_FILE, LOG_FORMAT
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from crawler.spiders.MySpiders import MySpider
import logging
def run_spider(spider):
settings = get_project_settings()
# configure file logging
# It ONLY works for the file
configure_logging({'LOG_FORMAT': LOG_FORMAT,
'LOG_ENABLEED' : True,
'LOG_FILE' : LOG_FILE,
'LOG_LEVEL' : 'INFO',
'LOG_STDOUT' : True})
# instantiate spider
process = CrawlerProcess(settings)
process.crawl(MySpider)
logging.info('Running Crawler: ' + spider.name)
process.start() # the script will block here until the spider_closed signal was sent
logging.info('Crawler ' + spider.name + ' stopped.\n')
......
This is the console output:
DEBUG:scrapy.core.engine:Crawled (200) <GET http://mil.news.sina.com.cn/2014-10-09/0450804543.html>(referer: http://rss.sina.com.cn/rollnews/jczs/20141009.js)
{'item_name': 'item_sina_news_reply',
'news_id': u'jc:27-1-804530',
'reply_id': u'jc:27-1-804530:1',
'reply_lastcrawl': '1438605374.41',
'reply_table': 'news_reply_20141009'}
Many Thanks!
It may be that what you are viewing in the console is the Twisted Logs.
It will print the Debug level messages to the console.
You can redirect them to your log files using:
from twisted.python import log
observer = log.PythonLoggingObserver(loggerName='logname')
observer.start()
(As given in How to make Twisted use Python logging?)

Scrapy: How to redownload already cached URL if certain conditions apply

I have a large amount of http data stored in my cache backend in scrapy. There are certain pages that contain false data. these urls need to be rescheduled for download on the next run of scrapy.
I came up with the idea to modify the dummy cache policy that comes with scrapy. Unfortunately this doesn't seem to work.
Can anyone see what is wrong in the method is_cached_response_fresh?:
import os
import cPickle as pickle
from time import time
from weakref import WeakKeyDictionary
from email.utils import mktime_tz, parsedate_tz
from w3lib.http import headers_raw_to_dict, headers_dict_to_raw
from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.utils.request import request_fingerprint
from scrapy.utils.project import data_path
from scrapy.utils.httpobj import urlparse_cached
class DummyPolicy(object):
def __init__(self, settings):
self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
self.ignore_http_codes = [int(x) for x in settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES')]
def should_cache_request(self, request):
return urlparse_cached(request).scheme not in self.ignore_schemes
def should_cache_response(self, response, request):
return response.status not in self.ignore_http_codes
def is_cached_response_fresh(self, response, request):
if "thisstring" in response.body.lower():
print "got mobile page. redownload"
return False
else:
return True
def is_cached_response_valid(self, cachedresponse, response, request):
return True
I think the answer here is that your content is most likely gzipped or deflated.
Try
from scrapy.utils.gz import gunzip
if "thisstring" in gunzip(response.body).lower()
Cannot say that solution is versatile but most likely it'll work in your case.

ScrapyDeprecationWaring: Command's default `crawler` is deprecated and will be removed. Use `create_crawler` method to instantiate crawlers

Scrapy version 0.19
I am using the code at this page ( Run multiple scrapy spiders at once using scrapyd ). When I run scrapy allcrawl, I got
ScrapyDeprecationWaring: Command's default `crawler` is deprecated and will be removed. Use `create_crawler` method to instantiate crawlers
Here is the code:
from scrapy.command import ScrapyCommand
import urllib
import urllib2
from scrapy import log
class AllCrawlCommand(ScrapyCommand):
requires_project = True
default_settings = {'LOG_ENABLED': False}
def short_desc(self):
return "Schedule a run for all available spiders"
def run(self, args, opts):
url = 'http://localhost:6800/schedule.json'
for s in self.crawler.spiders.list(): #this line raise the warning
values = {'project' : 'YOUR_PROJECT_NAME', 'spider' : s}
data = urllib.urlencode(values)
req = urllib2.Request(url, data)
response = urllib2.urlopen(req)
log.msg(response)
How do I fix the DeprecationWarning ?
Thanks
Use:
crawler = self.crawler_process.create_crawler()