import time
def create_logs():
print("Creating log file...")
print("Process [", end="")
for i in range(0,5):
time.sleep(1)
print("#", end="")
logs = open('/home/motya/Desktop/logs_py/logs.txt', 'w')
logs.write("LOGS_ENABLE=1\nUSERNAME=GUEST")
logs.close
for i in range(0,5):
time.sleep(1)
print("#",end="")
print("]\nComplete!\n")
def start_welcome():
create_logs()
print("Welcome!")
start_welcome()
When i'm using the end parameter in print(), the output occurs after 10 seconds (the sum of all time.sleep). When the end parameter is deleted, the output occurs every second, as it should be, but with a new line.
How can i fix it?
upd: sorry for my english, it's not my main language and it`s difficult for me
Try adding sys.stdout.flush() after your print statements like this:
import time
import sys
def create_logs():
print("Creating log file...")
print("Process [", end="")
for i in range(0,5):
time.sleep(1)
print("#", end="")
sys.stdout.flush()
logs = open('/home/motya/Desktop/logs_py/logs.txt', 'w')
logs.write("LOGS_ENABLE=1\nUSERNAME=GUEST")
logs.close
for i in range(0,5):
time.sleep(1)
print("#",end="")
sys.stdout.flush()
print("]\nComplete!\n")
sys.stdout.flush()
def start_welcome():
create_logs()
print("Welcome!")
sys.stdout.flush()
start_welcome()
That will cause your print to do it's job and then flush the output to the screen.
Related
I am trying to make this code output to a csv file when calling the spider with -o output.csv
# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import SitemapSpider
from scrapy.spiders import Spider
from scrapy.http import Request, XmlResponse
from scrapy.utils.sitemap import Sitemap, sitemap_urls_from_robots
from scrapy.utils.gz import gunzip, is_gzipped
import re
import requests
class GetpagesfromsitemapSpider(SitemapSpider):
name = "test"
handle_httpstatus_list = [404]
def parse(self, response):
print response.url
def _parse_sitemap(self, response):
if response.url.endswith('/robots.txt'):
for url in sitemap_urls_from_robots(response.body):
yield Request(url, callback=self._parse_sitemap)
else:
body = self._get_sitemap_body(response)
if body is None:
self.logger.info('Ignoring invalid sitemap: %s', response.url)
return
s = Sitemap(body)
sites = []
if s.type == 'sitemapindex':
for loc in iterloc(s, self.sitemap_alternate_links):
if any(x.search(loc) for x in self._follow):
yield Request(loc, callback=self._parse_sitemap)
elif s.type == 'urlset':
for loc in iterloc(s):
for r, c in self._cbs:
if r.search(loc):
sites.append(loc)
break
print sites
def __init__(self, spider=None, *a, **kw):
super(GetpagesfromsitemapSpider, self).__init__(*a, **kw)
self.spider = spider
l = []
url = "https://channelstore.roku.com"
resp = requests.head(url + "/sitemap.xml")
if (resp.status_code != 404):
l.append(resp.url)
else:
resp = requests.head(url + "/robots.txt")
if (resp.status_code == 200):
l.append(resp.url)
self.sitemap_urls = l
print self.sitemap_urls
def iterloc(it, alt=False):
for d in it:
yield d['loc']
# Also consider alternate URLs (xhtml:link rel="alternate")
if alt and 'alternate' in d:
for l in d['alternate']:
yield l
I have tried changing the print response url on line 18 to a few things but I cant seem to make this script output to a CSV, all I can manage is seeing the exact information I want but on the terminal screen.
This code is from here but I am not working well with the easy part of completing the code.
Any help is greatly appreciated!
Not clear from your example, but it looks like you are not passing the command line arguments (-o) to your SitemapSpider.
A simpler solution, instead of passing the -o argument, is to just redirect your output to a file:
my_script.py > output.csv
OR
my_script.py | tee output.csv <-- this way will write to file, and also output in your terminal
EDIT:
Not the most efficient way, but without seeing a full script:
def parse(self, response):
with open('output.csv', 'a') as fh:
fh.write('{}\n'.format(response.url))
This will append each response.url to a new line in the output.csv file
Here is a simple python file--test.py.
import math
class myClass():
def myFun(self,x):
return(math.sqrt(x))
if __name__ == "__main__":
myInstance=myClass()
print(myInstance.myFun(9))
It print 3 with python test.py,let's analyse the running process.
1. to instance myClass and assign it to myInstance.
2.to call myFun function and print the result.
It is scrapy's turn.
In the scrapy1.4 manual,quotes_spider.py is as below.
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-2]
filename = 'quotes-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)
To run the spider with scrapy crawl quotes,i am puzzled:
1.Where is the main function or main body for the spider?
2.Which class was instanced?
3.Which method was called?
mySpider = QuotesSpider(scrapy.Spider)
mySpider.parse(response)
How scrapy crawl work exactly?
So let's start. Assuming you use linux/mac. Let's check where us scrapy
$ which scrapy
/Users/tarun.lalwani/.virtualenvs/myproject/bin/scrapy
Let's look at the content of this file
$ cat /Users/tarun.lalwani/.virtualenvs/myproject/bin/scrapy
#!/Users/tarun.lalwani/.virtualenvs/myproject/bin/python3.6
# -*- coding: utf-8 -*-
import re
import sys
from scrapy.cmdline import execute
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(execute())
So this executes execute method from cmdline.py and her is your main method.
cmdline.py
from __future__ import print_function
....
....
def execute(argv=None, settings=None):
if argv is None:
argv = sys.argv
# --- backwards compatibility for scrapy.conf.settings singleton ---
if settings is None and 'scrapy.conf' in sys.modules:
from scrapy import conf
if hasattr(conf, 'settings'):
settings = conf.settings
# ------------------------------------------------------------------
if settings is None:
settings = get_project_settings()
# set EDITOR from environment if available
try:
editor = os.environ['EDITOR']
except KeyError: pass
else:
settings['EDITOR'] = editor
check_deprecated_settings(settings)
# --- backwards compatibility for scrapy.conf.settings singleton ---
import warnings
from scrapy.exceptions import ScrapyDeprecationWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore", ScrapyDeprecationWarning)
from scrapy import conf
conf.settings = settings
# ------------------------------------------------------------------
inproject = inside_project()
cmds = _get_commands_dict(settings, inproject)
cmdname = _pop_command_name(argv)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
conflict_handler='resolve')
if not cmdname:
_print_commands(settings, inproject)
sys.exit(0)
elif cmdname not in cmds:
_print_unknown_command(settings, cmdname, inproject)
sys.exit(2)
cmd = cmds[cmdname]
parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax())
parser.description = cmd.long_desc()
settings.setdict(cmd.default_settings, priority='command')
cmd.settings = settings
cmd.add_options(parser)
opts, args = parser.parse_args(args=argv[1:])
_run_print_help(parser, cmd.process_options, args, opts)
cmd.crawler_process = CrawlerProcess(settings)
_run_print_help(parser, _run_command, cmd, args, opts)
sys.exit(cmd.exitcode)
if __name__ == '__main__':
execute()
Now if you notice execute method it processes the arguments passed by you. which is crawl quotes in your case. The execute methods scans the projects for classes and check which has name defined as quotes. It creates the CrawlerProcess class and that runs the whole show.
Scrapy is based on Twisted Python Framework. Which is a scheduler based framework.
Consider the below part of the code
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
When the engine executes this function and first yield is execute. The value is returned to the engined. The engine now looks at other task that are pending executes them, (when they yield, some other pending task queue function gets a chance). So yield is what allows to break a function execution into parts and help Scrapy/Twisted work.
You can get a detailed overview on the link below
https://doc.scrapy.org/en/latest/topics/architecture.html
Here is the code that I copy from another .py file and I got a TypeError
#coding:utf-8
import serial
import sys
import time
import logging
class TestRemoteControl(object):
def __init__(self,com):
self.ser = serial.Serial(com,115200)
self.ser.bytesize = 8
self.ser.stopbits = 1
self.logger = logging.getLogger()
self.logger.setLevel(logging.INFO)
self.formatter = logging.Formatter('%(asctime)-25s - %(name)s - %(levelname)s - %(message)s')
self.ch = logging.StreamHandler()
self.ch.setLevel(logging.INFO)
self.fh = logging.FileHandler('Test.txt')
self.fh.setLevel(logging.INFO)
self.fh.setFormatter(self.formatter)
self.ch.setFormatter(self.formatter)
self.logger.addHandler(self.ch)
self.logger.addFilter(self.fh)
def start_esc(self):
self.logger.info("开启电机")
self.ser.write("####1")
def stop_esc(self):
self.logger.info("关闭电机")
self.ser.write("####1")
time.sleep(0.4)
self.ser.write("####1")
time.sleep(0.4)
self.ser.write("####1")
time.sleep(0.4)
def speed_up(self):
self.logger.info("电机加速")
self.ser.write("####3")
def speed_down(self):
self.logger.info("电机减速")
self.ser.write("####2")
def main():
logging.basicConfig(level=logging.DEBUG,
format = '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt = '%a, %d %b %Y %H:%M:%S',
filename = 'myapp.log',
filemode = 'w')
print("please enter com num:")
a = raw_input()
temp_com = "com"+a
test_RC = TestRemoteControl(temp_com)
count = 1
max_count = int(raw_input('Please enter on-off counts'))
while count < max_count:
test_RC.start_esc()
# time.sleep(10)
# test_RC.speed_up()
time.sleep(2)
test_RC.stop_esc()
print "complete ",count," times "
time.sleep(1)
count += 1
if __name__ == "__main__":
main()
Here is the error, I don't kown why. Help me please.
TypeError:
unbound method init() must be called with TestRemoteControl instance as first argument (got nothing instead)
This code is OK.All the problem is the IDE "pycharm" that I can only run the unittest because I used the name "TestRemoteControl"
After struggling with inlineCallbacks and yield of twisted/txredisapi, I can save my data into redis. Thanks to author of txredisapi. Now I met a new issue, socket server will not send back to client before/after saving into DB.
Twisted offers simple socket server as following:
from twisted.internet import protocol, reactor
class Echo(protocol.Protocol):
def dataReceived(self, data):
self.transport.write(data) ### write back
class EchoFactory(protocol.Factory):
def buildProtocol(self, addr):
return Echo()
reactor.listenTCP(8000, EchoFactory)
recctor.run()
My code is similiar, only with additional DB ops.
#!/usr/bin/env python
import time
import binascii
import txredisapi
from twisted.internet import defer
from twisted.internet import protocol, reactor
from twisted.internet.protocol import Factory
from twisted.enterprise import adbapi
from twisted.python import log
from dmpack import Dmpack
from dmdb import Dmdb
from dmconfig import DmConf
dm = Dmpack()
conf = DmConf().loadConf()
rcs = txredisapi.lazyConnection(password=conf['RedisPassword'])
dbpool = adbapi.ConnectionPool("MySQLdb",db=conf['DbName'],user=conf['DbAccount'],\
passwd=conf['DbPassword'],host=conf['DbHost'],\
use_unicode=True,charset=conf['DbCharset'])
def getDataParsed(data):
realtime = None
period = None
self.snrCode = dm.snrToAscii(data[2:7])
realtime = data[7:167] # save it into redis
period = data[167:-2] # save it into SQL
return (snrCode, realtime, period)
class PlainTCP(protocol.Protocol):
def __init__(self, factory):
self.factory = factory
self.factory.numConnections = 0
self.snrCode = None
self.rData = None
self.pData = None
self.err = None
def connectionMade(self):
self.factory.numConnections += 1
print "Nr. of connections: %d\n" %(self.factory.numConnections)
self.transport.write("Hello remote\r\n") # it only prints very 5 connections.
def connectionLost(self, reason):
self.factory.numConnections -= 1
print "Nr. of connections: %d\n" %(self.factory.numConnections)
#defer.inlineCallbacks
def dataReceived(self, data):
global dbpool, rcs
(self.snrCode,rDat,pDat) = getDataParsed(data)
if self.snrCode == None or rDat == None or pDat == None:
err = "Bad format"
else:
err = "OK"
print "err:%s"%(err) # debug print to show flow control
self.err = err
self.transport.write(self.snrCode)
self.transport.write(self.err)
self.transport.write(rDat)
self.transport.write(pDat)
self.transport.loseConnection()
if self.snrCode != None and rDat != None and pDat != None:
res = yield self.saveRealTimeData(rcs, rDat)
res = yield self.savePeriodData(dbpool, pDat, conf)
print "err2:%s"%(err) # debug print to show flow control
#defer.inlineCallbacks
def saveRealTimeData(self, rc, dat):
key = "somekey"
val = "somedata"
yield rc.set(key,val)
yield rc.expire(key,30)
#defer.inlineCallbacks
def savePeriodData(self,rc,dat,conf):
query = "some SQL statement"
yield rc.runQuery(query)
class PlainTCPFactory(protocol.Factory):
def buildProtocol(self, addr):
return PlainTCP(self)
def main():
dmdb = Dmdb()
if not dmdb.detectDb():
print "Please run MySQL RDBS first."
sys.exit()
log.startLogging(sys.stdout)
reactor.listenTCP(8080, PlainTCPFactory())
reactor.run()
if __name__ == "__main__":
main()
And clip of my client, which is a simple client:
def connectSend(host="127.0.0.1",port=8080):
global packet
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect((host, port))
s.sendall(''.join(packet))
data = s.recv(1024)
s.close()
print 'Received', repr(data)
except socket.error, err:
print "Remote socket is not available: %s"%str(err)
sys.exit(1)
The current status is:
If disable #defer.inlineCallbacks and yield opertions of dataReceived(), both self.transport.write() inside of connectionMode() and dataReceived() can output data to clients.
If we enabled #defer.inlineCallbacks and two yield DB ops of SQL/Redis, then self.transport.write() inside of connectionMode() prints every 5 connections, and dataReceived() will not output any data to clients.
the debug print statements will print on log regardless of #defer.inlineCallbacks anyway.
I was told that dataReceived() should not be #defer.inlineCallbacks. But it doesn't change anything if I removed that decoration.
I am thinking if gevent can help me out of this unpredicted behavior. I am twisted into an endless tornado, cyclone .....
Anyone who has similiar experience, please help me. Thanks.
By changing function as following, the code works.
#COMMENT OUT decorator of #defer.inlineCallbacks
def dataReceived(self, data):
global dbpool, rcs
(self.snrCode,rDat,pDat) = getDataParsed(data)
if self.snrCode == None or rDat == None or pDat == None:
err = "Bad format"
else:
err = "OK"
print "err:%s"%(err) # debug print to show flow control
self.err = err
self.transport.write(self.snrCode)
self.transport.write(self.err)
self.transport.write(rDat)
self.transport.write(pDat)
self.transport.loseConnection()
if self.snrCode != None and rDat != None and pDat != None:
self.saveRealTimeData(rcs, rDat)
self.savePeriodData(dbpool, pDat, conf)
# Removing yield before DB ops
print "err2:%s"%(err) # debug print to show flow control
#defer.inlineCallbacks
def saveRealTimeData(self, rc, dat):
print "saveRedis"
key = "somekey"
val = "somedata"
yield rc.set(key,val)
yield rc.expire(key,30)
#defer.inlineCallbacks
def savePeriodData(self,rc,dat,conf):
print "save SQL"
query = "some SQL statement"
yield rc.runQuery(query)
If we keep #defer.inlineCallbacks and yield in dataReceived. The connection is closed before second DB op. Therefore no data is output to connection. Maybe is caused by inlineCallbacks decorator.
By removing this, the flow control is simple and straightforward.
However, I still can get why I can not add inlineCallbacks if there are two deferred DB ops. This time they don't need deferred?
I am trying to write a basic program using leap motion, I just want to constantly run the on_frame method but the method only runs once and disconnecting the device does not call the on_disconnect method. The program will not run until I hit enter, What am I doing wrong? Thanks for the help :
import Leap, sys, thread
from Leap import CircleGesture, KeyTapGesture, ScreenTapGesture,SwipeGesture
class LeapMotionListener(Leap.Listener):
state_names = ["STATE_INVAILD","STATE_START","STATE_UPDATE", "STATE_END"]
def on_init(self, controller):
print "Initialized"
def on_connect(self, controller):
print "Connected"
controller.enable_gesture(Leap.Gesture.TYPE_CIRCLE);
controller.enable_gesture(Leap.Gesture.TYPE_KEY_TAP);
controller.enable_gesture(Leap.Gesture.TYPE_SCREEN_TAP);
controller.enable_gesture(Leap.Gesture.TYPE_SWIPE);
print "All gestures enabled"
def on_disconnect(self, controller):
print "Disconnected"
def on_exit(self, controller):
print "Exit"
def on_frame(self, controller):
frame= controller.frame()
print "\n Frame ID"+ str(frame.id)
print "num of hands: " +str(len(frame.hands))
print "num of gestures: " +str(len(frame.gestures()))
for gesture in frame.gestures():
if gesture.type is Leap.Gesture.TYPE_CIRCLE:
circle = Leap.CircleGesture(gesture)
elif gesture.type is Leap.Gesture.TYPE_SWIPE:
swipe = Leap.SwipeGesture(gesture)
elif gesture.type is Leap.Gesture.TYPE_KEY_TAP:
key_tap = Leap.KeyTapGesture(gesture)
elif gesture.type is Leap.Gesture.TYPE_SCREEN_TAP:
screen_tap = Leap.ScreenTapGesture(gesture)
def main():
listener= LeapMotionListener()
controller = Leap.Controller()
controller.add_listener(listener)
print "Press enter to quit: "
try:
sys.stdin.readline()
except KeyboardInterrupt:
pass
finally:
controller.remove_listener(listener)
if __name__ == "__main__":
main()
Your code runs fine from the command line. I.e:
> python scriptname.py
Are you running from Idle? if so, this part:
try:
sys.stdin.readline()
except KeyboardInterrupt:
pass
doesn't work. (See Python 3.2 Idle vs terminal) A similar issue might exist in other IDEs.
The following should work in Idle, but you have to use Ctrl-C to exit.
# Keep this process running until a Ctrl-C is pressed
print "Press Ctrl-C to quit..."
try:
while True:
time.sleep(1)
except:
print "Quiting"
finally:
# Remove the sample listener when done
controller.remove_listener(listener)
Quitting on any key stroke in both Idle and the command line seemed surprisingly difficult and complex when I tried it some time ago -- but then, I'm a Python duffer.