I'd like to make XMLRPC proxy with balancer using twisted.
[XMLRPC Server 1_1 8.8.8.8:8000] <--> [----------------------] <--- Client
[Proxy example.com:8000] <--- Client
[XMLRPC Server 1_2 9.9.9.9:8000] <--> [----------------------] <--- Client
So there are two XMLRPC instances which represents same methods. I need xmlrpc-proxy between this instances and clients. One more thing - this proxy should also accept JSON calls (kind of http://example.com:8000/RPC2, http://example.com:8000/JSON).
Right now I was trying to implement XMLRPC proxy calls. I can't receive answer back to client although sendLine() is calling.
import argparse
from twisted.internet import protocol, reactor, defer, threads
from twisted.web import xmlrpc
from twisted.internet.task import LoopingCall
from twisted.internet.defer import DeferredQueue, Deferred, inlineCallbacks
from twisted.protocols.basic import LineReceiver
import configfile
from bcsxmlrpc import xmlrpc_request_parser, xmlrpc_marshal
from customlogging import logging
logging.getLogger().setLevel(logging.DEBUG)
class ProxyClient(xmlrpc.Proxy):
def __init__(self, proxy_uri, user, timeout=30.0):
self.proxy_uri = proxy_uri
xmlrpc.Proxy.__init__(self, url=proxy_uri, user=user, connectTimeout=timeout)
#inlineCallbacks
def call_api(self, name, *args):
logging.debug(u"Calling API: %s" % name)
result = yield self.callRemote(name, *args)
proxy_pool.add(self.proxy_uri)
defer.returnValue(result)
class Request(object):
def __init__(self, method, params, deferred):
self.method = method
self.params = params
self.deferred = deferred
class ProxyServer(LineReceiver):
def dataReceived(self, data):
logging.pr(data)
params, method = xmlrpc_request_parser(data) # got method name and arguments
d = Deferred()
d.addCallbacks(self._send_reply, self._log_error)
logging.debug(u"%s%s added to queue" % (method, params))
queue.put(Request(method, params, d))
def _send_reply(self, result):
logging.ps(result)
self.sendLine(str(result))
def _log_error(self, error):
logging.error(error)
def connectionMade(self):
logging.info(u"New client connected")
def connectionLost(self, reason):
logging.info(u"Client connection lost: %s" % reason.getErrorMessage())
class ProxyServerFactory(protocol.Factory):
protocol = ProxyServer
def buildProtocol(self, addr):
return ProxyServer()
#inlineCallbacks
def _queue_execute_job():
if queue.pending and proxy_pool:
proxy = proxy_pool.pop()
request = yield queue.get()
result = yield ProxyClient(proxy, "").call_api(request.method, *list(request.params))
request.deferred.callback(result)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run configuration")
parser.add_argument('--config', help=u"Configuration file name/path")
config = configfile.ProxyConfig(parser.parse_args().config)
global proxy_pool
proxy_pool = set()
for proxy_server in config.servers:
proxy_pool.add(proxy_server)
global queue
queue = DeferredQueue()
lc2 = LoopingCall(_queue_execute_job)
lc2.start(1)
logging.info(u"Starting Proxy at port %s" % config.port)
reactor.listenTCP(config.port, ProxyServerFactory())
reactor.run()
Related
How can I mock the following function for connecting to Redis?
import redis
class RedisCache:
redis_instance = None
#classmethod
def set_connect(cls):
redis_instance = redis.StrictRedis(host='0.0.0.0', port=6379, password='xyz', charset='utf-8', decode_responses=True, socket_timeout=30)
return redis_instance
#classmethod
def get_conn(cls):
cls.redis_instance = cls.set_connect()
return cls.redis_instance
I looked for some solutions, but they were basically using fakeredis module. I wanted to have a simpler way to mock these functions.
Note-
data returned by the function: Redis<ConnectionPool<Connection<host=127.0.0.1,port=6379,db=0>>>
You can use patch() function to mock out redis.StrictRedis class. See where-to-patch
E.g.
redis_cache.py:
import redis
class RedisCache:
redis_instance = None
#classmethod
def set_connect(cls):
redis_instance = redis.StrictRedis(host='0.0.0.0', port=6379, password='xyz',
charset='utf-8', decode_responses=True, socket_timeout=30)
return redis_instance
#classmethod
def get_conn(cls):
cls.redis_instance = cls.set_connect()
return cls.redis_instance
test_redis_cache.py:
from unittest import TestCase
import unittest
from unittest.mock import patch, Mock
from redis_cache import RedisCache
class TestRedisCache(TestCase):
def test_set_connect(self):
with patch('redis.StrictRedis') as mock_StrictRedis:
mock_redis_instance = mock_StrictRedis.return_value
actual = RedisCache.set_connect()
self.assertEqual(actual, mock_redis_instance)
mock_StrictRedis.assert_called_once_with(host='0.0.0.0', port=6379, password='xyz',
charset='utf-8', decode_responses=True, socket_timeout=30)
#patch('redis.StrictRedis')
def test_get_conn(self, mock_StrictRedis):
mock_redis_instance = mock_StrictRedis.return_value
RedisCache.get_conn()
self.assertEqual(RedisCache.redis_instance, mock_redis_instance)
if __name__ == '__main__':
unittest.main()
test result:
..
----------------------------------------------------------------------
Ran 2 tests in 0.004s
OK
Name Stmts Miss Cover Missing
------------------------------------------------------------------------------
src/stackoverflow/70016401/redis_cache.py 11 0 100%
src/stackoverflow/70016401/test_redis_cache.py 18 0 100%
------------------------------------------------------------------------------
TOTAL 29 0 100%
I would like to repeatedly scrape the same URLs with different delays. After researching the issue it seemed that the appropriate solution was to use something like
nextreq = scrapy.Request(url, dont_filter=True)
d = defer.Deferred()
delay = 1
reactor.callLater(delay, d.callback, nextreq)
yield d
in parse.
However, I have been unable to make this work. I am getting the error message
ERROR: Spider must return Request, BaseItem, dict or None, got 'Deferred'
I am not familiar with twisted so I hope I am just missing something obvious
Is there a better way of achieving my goal that doesn't fight the framework so much?
I finally found an answer in an old PR
def parse():
req = scrapy.Request(...)
delay = 0
reactor.callLater(delay, self.crawler.engine.schedule, request=req, spider=self)
However, the spider can exit due to being idle too early. Based on the outdated middleware https://github.com/ArturGaspar/scrapy-delayed-requests, this can be remedied with
from scrapy import signals
from scrapy.exceptions import DontCloseSpider
class ImmortalSpiderMiddleware(object):
#classmethod
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_idle, signal=signals.spider_idle)
return s
#classmethod
def spider_idle(cls, spider):
raise DontCloseSpider()
The final option, updating the middleware by ArturGaspar, led to:
from weakref import WeakKeyDictionary
from scrapy import signals
from scrapy.exceptions import DontCloseSpider
from twisted.internet import reactor
class DelayedRequestsMiddleware(object):
requests = WeakKeyDictionary()
#classmethod
def from_crawler(cls, crawler):
ext = cls()
crawler.signals.connect(ext.spider_idle, signal=signals.spider_idle)
return ext
#classmethod
def spider_idle(cls, spider):
if cls.requests.get(spider):
spider.log("delayed requests pending, not closing spider")
raise DontCloseSpider()
def process_request(self, request, spider):
delay = request.meta.pop('delay_request', None)
if delay:
self.requests.setdefault(spider, 0)
self.requests[spider] += 1
reactor.callLater(delay, self.schedule_request, request.copy(),
spider)
raise IgnoreRequest()
def schedule_request(self, request, spider):
spider.crawler.engine.schedule(request, spider)
self.requests[spider] -= 1
And can be used in parse like:
yield Request(..., meta={'delay_request': 5})
After struggling with inlineCallbacks and yield of twisted/txredisapi, I can save my data into redis. Thanks to author of txredisapi. Now I met a new issue, socket server will not send back to client before/after saving into DB.
Twisted offers simple socket server as following:
from twisted.internet import protocol, reactor
class Echo(protocol.Protocol):
def dataReceived(self, data):
self.transport.write(data) ### write back
class EchoFactory(protocol.Factory):
def buildProtocol(self, addr):
return Echo()
reactor.listenTCP(8000, EchoFactory)
recctor.run()
My code is similiar, only with additional DB ops.
#!/usr/bin/env python
import time
import binascii
import txredisapi
from twisted.internet import defer
from twisted.internet import protocol, reactor
from twisted.internet.protocol import Factory
from twisted.enterprise import adbapi
from twisted.python import log
from dmpack import Dmpack
from dmdb import Dmdb
from dmconfig import DmConf
dm = Dmpack()
conf = DmConf().loadConf()
rcs = txredisapi.lazyConnection(password=conf['RedisPassword'])
dbpool = adbapi.ConnectionPool("MySQLdb",db=conf['DbName'],user=conf['DbAccount'],\
passwd=conf['DbPassword'],host=conf['DbHost'],\
use_unicode=True,charset=conf['DbCharset'])
def getDataParsed(data):
realtime = None
period = None
self.snrCode = dm.snrToAscii(data[2:7])
realtime = data[7:167] # save it into redis
period = data[167:-2] # save it into SQL
return (snrCode, realtime, period)
class PlainTCP(protocol.Protocol):
def __init__(self, factory):
self.factory = factory
self.factory.numConnections = 0
self.snrCode = None
self.rData = None
self.pData = None
self.err = None
def connectionMade(self):
self.factory.numConnections += 1
print "Nr. of connections: %d\n" %(self.factory.numConnections)
self.transport.write("Hello remote\r\n") # it only prints very 5 connections.
def connectionLost(self, reason):
self.factory.numConnections -= 1
print "Nr. of connections: %d\n" %(self.factory.numConnections)
#defer.inlineCallbacks
def dataReceived(self, data):
global dbpool, rcs
(self.snrCode,rDat,pDat) = getDataParsed(data)
if self.snrCode == None or rDat == None or pDat == None:
err = "Bad format"
else:
err = "OK"
print "err:%s"%(err) # debug print to show flow control
self.err = err
self.transport.write(self.snrCode)
self.transport.write(self.err)
self.transport.write(rDat)
self.transport.write(pDat)
self.transport.loseConnection()
if self.snrCode != None and rDat != None and pDat != None:
res = yield self.saveRealTimeData(rcs, rDat)
res = yield self.savePeriodData(dbpool, pDat, conf)
print "err2:%s"%(err) # debug print to show flow control
#defer.inlineCallbacks
def saveRealTimeData(self, rc, dat):
key = "somekey"
val = "somedata"
yield rc.set(key,val)
yield rc.expire(key,30)
#defer.inlineCallbacks
def savePeriodData(self,rc,dat,conf):
query = "some SQL statement"
yield rc.runQuery(query)
class PlainTCPFactory(protocol.Factory):
def buildProtocol(self, addr):
return PlainTCP(self)
def main():
dmdb = Dmdb()
if not dmdb.detectDb():
print "Please run MySQL RDBS first."
sys.exit()
log.startLogging(sys.stdout)
reactor.listenTCP(8080, PlainTCPFactory())
reactor.run()
if __name__ == "__main__":
main()
And clip of my client, which is a simple client:
def connectSend(host="127.0.0.1",port=8080):
global packet
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect((host, port))
s.sendall(''.join(packet))
data = s.recv(1024)
s.close()
print 'Received', repr(data)
except socket.error, err:
print "Remote socket is not available: %s"%str(err)
sys.exit(1)
The current status is:
If disable #defer.inlineCallbacks and yield opertions of dataReceived(), both self.transport.write() inside of connectionMode() and dataReceived() can output data to clients.
If we enabled #defer.inlineCallbacks and two yield DB ops of SQL/Redis, then self.transport.write() inside of connectionMode() prints every 5 connections, and dataReceived() will not output any data to clients.
the debug print statements will print on log regardless of #defer.inlineCallbacks anyway.
I was told that dataReceived() should not be #defer.inlineCallbacks. But it doesn't change anything if I removed that decoration.
I am thinking if gevent can help me out of this unpredicted behavior. I am twisted into an endless tornado, cyclone .....
Anyone who has similiar experience, please help me. Thanks.
By changing function as following, the code works.
#COMMENT OUT decorator of #defer.inlineCallbacks
def dataReceived(self, data):
global dbpool, rcs
(self.snrCode,rDat,pDat) = getDataParsed(data)
if self.snrCode == None or rDat == None or pDat == None:
err = "Bad format"
else:
err = "OK"
print "err:%s"%(err) # debug print to show flow control
self.err = err
self.transport.write(self.snrCode)
self.transport.write(self.err)
self.transport.write(rDat)
self.transport.write(pDat)
self.transport.loseConnection()
if self.snrCode != None and rDat != None and pDat != None:
self.saveRealTimeData(rcs, rDat)
self.savePeriodData(dbpool, pDat, conf)
# Removing yield before DB ops
print "err2:%s"%(err) # debug print to show flow control
#defer.inlineCallbacks
def saveRealTimeData(self, rc, dat):
print "saveRedis"
key = "somekey"
val = "somedata"
yield rc.set(key,val)
yield rc.expire(key,30)
#defer.inlineCallbacks
def savePeriodData(self,rc,dat,conf):
print "save SQL"
query = "some SQL statement"
yield rc.runQuery(query)
If we keep #defer.inlineCallbacks and yield in dataReceived. The connection is closed before second DB op. Therefore no data is output to connection. Maybe is caused by inlineCallbacks decorator.
By removing this, the flow control is simple and straightforward.
However, I still can get why I can not add inlineCallbacks if there are two deferred DB ops. This time they don't need deferred?
I have a file, mc_send.py, that send mcast messages and shall receive a unicastmessage back from the program that received the mcast message, mc_recv.py. mcast work but when receiving the unicast message back a strange error appear: ValueError: maximum length of data to be read cannot be negative The error is att line 14 in this file mc_send.py:
I have struggled with this many hours on windows7 with python2.7.2 and pyqt4 v4.9 but can't find what I'm doing wrong. This programs is based on the broadcast examples from pyqt4.
""" to see all ports on windows: netstat -ap udp | find "4545" """
from PyQt4 import QtCore, QtGui, QtNetwork
unicast_addr = "127.0.0.1"
unicast_port = 45455
mcast_addr = "239.255.43.21"
mcast_port = 45454
class Sender(QtGui.QDialog):
def processPendingDatagrams(self):
while self.udpServer.hasPendingDatagrams():
datagram, host, port = self.udpServer.readDatagram(self.udpSocket.pendingDatagramSize())
print "got msg:", datagram
def __init__(self, parent=None):
super(Sender, self).__init__(parent)
self.groupAddress = QtNetwork.QHostAddress(mcast_addr)
self.unicastAddress = QtNetwork.QHostAddress(unicast_addr)
self.statusLabel = QtGui.QLabel("Ready to multicast datagrams to group %s on port 45454" %
self.groupAddress.toString())
# setup socket for listening on incomming datagrams
self.udpServer = QtNetwork.QUdpSocket(self)
self.udpServer.bind(unicast_port)
self.udpServer.readyRead.connect(self.processPendingDatagrams)
self.startButton = QtGui.QPushButton("&Start")
self.quitButton = QtGui.QPushButton("&Quit")
buttonBox = QtGui.QDialogButtonBox()
buttonBox.addButton(self.startButton, QtGui.QDialogButtonBox.ActionRole)
buttonBox.addButton(self.quitButton, QtGui.QDialogButtonBox.RejectRole)
self.timer = QtCore.QTimer(self)
self.udpSocket = QtNetwork.QUdpSocket(self)
self.messageNo = 1
self.startButton.clicked.connect(self.startSending)
self.quitButton.clicked.connect(self.close)
self.timer.timeout.connect(self.send_mc_msg)
mainLayout = QtGui.QVBoxLayout()
mainLayout.addWidget(self.statusLabel)
mainLayout.addWidget(buttonBox)
self.setLayout(mainLayout)
self.setWindowTitle("WSim")
def startSending(self):
self.startButton.setEnabled(False)
self.timer.start(1000)
def send_mc_msg(self):
self.udpSocket.writeDatagram("hello %d" %(self.messageNo), self.groupAddress, mcast_port)
self.messageNo += 1
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
sender = Sender()
sender.show()
sys.exit(sender.exec_())
The multicast receiver that also send the unicast response back looks like this mc_recv.py:
from PyQt4 import QtGui, QtNetwork
mcast_addr = "239.255.43.21"
mcast_port = 45454
answer_addr = "127.0.0.1"
answer_port = 45455
class Receiver(QtGui.QDialog):
def __init__(self, parent=None):
super(Receiver, self).__init__(parent)
self.groupAddress = QtNetwork.QHostAddress(mcast_addr)
self.udpSocket = QtNetwork.QUdpSocket(self)
self.udpSocket.bind(mcast_port, QtNetwork.QUdpSocket.ReuseAddressHint)
self.udpSocket.joinMulticastGroup(self.groupAddress)
self.udpSocket.readyRead.connect(self.processPendingDatagrams)
# Use this socket to send unicast messages to back
self.answerSocket = QtNetwork.QUdpSocket(self)
self.answerAddress = QtNetwork.QHostAddress(answer_addr)
quitButton = QtGui.QPushButton("&Quit")
quitButton.clicked.connect(self.close)
buttonLayout = QtGui.QHBoxLayout()
buttonLayout.addStretch(1)
buttonLayout.addWidget(quitButton)
buttonLayout.addStretch(1)
self.statusLabel = QtGui.QLabel("Listening for multicasted messages on %s" % mcast_addr)
mainLayout = QtGui.QVBoxLayout()
mainLayout.addWidget(self.statusLabel)
mainLayout.addLayout(buttonLayout)
self.setLayout(mainLayout)
self.setWindowTitle("mrecv")
def processPendingDatagrams(self):
"""receive and decode multicast messages and send a response message on the return address"""
while self.udpSocket.hasPendingDatagrams():
datagram, host, port = self.udpSocket.readDatagram(self.udpSocket.pendingDatagramSize())
self.statusLabel.setText("received mcast msg '%s'" % datagram)
# send a response back to msend
self.answerSocket.writeDatagram("hi back", self.answerAddress, answer_port)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
receiver = Receiver()
receiver.show()
sys.exit(receiver.exec_())
found the cause, embarrassingly simple error, had written self.udpSocket.pendingDatagramSize()
instead of self.udpServer.pendingDatagramSize() when I was reading the data...
I have to implement a Task subclass that gracefully fails if the broker is not running - currently I'm using RabbitMQ.
I could probably just use a try statement to catch the exception:
try:
Mytask.delay(arg1, arg2)
except socket.error:
# Send an notice to an admin
pass
but I'd like to create a subclass of Task that can handle that.
I've tried something like that:
class MyTask(Task):
ignore_result = True
def __call__(self, *args, **kwargs):
try:
return self.run(*args, **kwargs)
except socket.error:
# Send an notice to an admin
return None
but the workflow is clearly wrong. I think I need to inject maybe a backend subclass or a failure policy somehow.
Do you have any suggestion?
A possible solution I came up with:
import socket
from celery.decorators import task
from celery.task import Task
from celery.backends.base import BaseBackend
UNDELIVERED = 'UNDELIVERED'
class DummyBackend(BaseBackend):
"""
Dummy queue backend for undelivered messages (due to the broker being down).
"""
def store_result(self, *args, **kwargs):
pass
def get_status(self, *args, **kwargs):
return UNDELIVERED
def _dummy(self, *args, **kwargs):
return None
wait_for = get_result = get_traceback = _dummy
class SafeTask(Task):
"""
A task not raising socket errors if the broker is down.
"""
abstract = True
on_broker_error = None
errbackend = DummyBackend
#classmethod
def apply_async(cls, *args, **kwargs):
try:
return super(SafeTask, cls).apply_async(*args, **kwargs)
except socket.error, err:
if cls.on_broker_error is not None:
cls.on_broker_error(err, cls, *args, **kwargs)
return cls.app.AsyncResult(None, backend=cls.errbackend(),
task_name=cls.name)
def safetask(*args, **kwargs):
"""
Task factory returning safe tasks handling socket errors.
When a socket error occurs, the given callable *on_broker_error*
is called passing the exception object, the class of the task
and the original args and kwargs.
"""
if 'base' not in kwargs:
on_broker_error = kwargs.pop('on_broker_error', SafeTask.on_broker_error)
errbackend = kwargs.pop('errbackend', SafeTask.errbackend)
kwargs['base'] = type('SafeTask', (SafeTask,), {
'on_broker_error': staticmethod(on_broker_error),
'errbackend': errbackend,
'abstract': True,
})
return task(*args, **kwargs)
You can both subclass SafeTask or use the decorator #safetask.
If you can think of an improvement, don't hesitate to contribute.