Pool apply function hangs and never executes - python-multiprocessing

I am trying to fetch Rally data by using its python library pyral. Sequentially the same code works, but its slow.
I thought of using python multiprocess package, however my pool.apply method gets stuck and never executes. I tried running it in Pycharm IDE as well as the windows cmd prompt.
import pandas as pd
from pyral import Rally
from multiprocessing import Pool, Manager
from pyral.entity import Project
def process_row(sheetHeaders: list, item: Project, L: list):
print('processing row : ' + item.Name) ## this print never gets called
row = ()
for header in sheetHeaders:
row.append(process_cell(header, item))
L.append(row)
def process_cell(attr, item: Project):
param = getattr(item, attr)
if param is None:
return None
try:
if attr == 'Owner':
return param.__getattr__('Name')
elif attr == 'Parent':
return param.__getattr__('ObjectID')
else:
return param
except KeyError as e:
print(e)
# Projects
# PortfolioItem
# User Story
# Hierarchical Req
# tasks
# defects
# -------------MAIN-----------------
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<my_key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
p = Pool(1)
result = rally.get(sheetName, fetch=True, pagesize=10)
with Manager() as manager:
L = manager.list()
for item in result:
print('adding row for : ' + item.Name)
p.apply_async(func=process_row, args=(sheetHeaders, item, L)) ## gets stuck here
p.close()
p.join()
pd.DataFrame(L).to_excel(file, sheet_name=sheetName)
if __name__ == '__main__':
main()
Also tried without Manager list without any difference in the outcome
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
result = rally.get(sheetName, fetch=True, pagesize=10)
async_results = []
with Pool(50) as p:
for item in result:
print('adding row for : ' + item.Name)
async_results.append(p.apply_async(func=process_row, args=(sheetHeaders, item)))
res = [r.get() for r in async_results]
pd.DataFrame(res).to_excel(file, sheet_name=sheetName)

I dont know why, but replacing multiprocessing
with multiprocessing.dummy in the import statement worked.

Related

java.lang.NullPointerException in test.py file of Qpython 3L

Several java.lang.NullPointerException in test.py file of Qpython 3L errors I got when run test.py from standard package of Qpython scripts.
File test.py and screens of errors attached.
When started to post obtained such error:"It looks like your post is mostly code; please add some more details."
Cannot add any other comments.
Any idea ?
Thank you.
import sys
import time
import types
import androidhelper
try:
import gdata.docs.service
except ImportError:
gdata = None
droid = androidhelper.Android()
def event_loop():
for i in range(10):
time.sleep(1)
droid.eventClearBuffer()
time.sleep(1)
e = droid.eventPoll(1)
if e.result is not None:
return True
return False
def test_imports():
try:
import termios
import bs4 as BeautifulSoup
import pyxmpp2 as xmpp
from xml.dom import minidom
except ImportError:
return False
return True
def test_clipboard():
previous = droid.getClipboard().result
msg = 'Hello, world!'
droid.setClipboard(msg)
echo = droid.getClipboard().result
droid.setClipboard(previous)
return echo == msg
def test_gdata():
if gdata is None:
return False
# Create a client class which will make HTTP requests with Google Docs server.
client = gdata.docs.service.DocsService()
# Authenticate using your Google Docs email address and password.
username = droid.dialogGetInput('Username').result
password = droid.dialogGetPassword('Password', 'For ' + username).result
try:
client.ClientLogin(username, password)
except:
return False
# Query the server for an Atom feed containing a list of your documents.
documents_feed = client.GetDocumentListFeed()
# Loop through the feed and extract each document entry.
return bool(list(documents_feed.entry))
def test_gps():
droid.startLocating()
try:
return event_loop()
finally:
droid.stopLocating()
def test_battery():
droid.batteryStartMonitoring()
time.sleep(1)
try:
return bool(droid.batteryGetStatus())
finally:
droid.batteryStopMonitoring()
def test_sensors():
# Accelerometer, once per second.
droid.startSensingTimed(2, 1000)
try:
return event_loop()
finally:
droid.stopSensing()
def test_speak():
result = droid.ttsSpeak('Hello, world!')
return result.error is None
def test_phone_state():
droid.startTrackingPhoneState()
try:
return event_loop()
finally:
droid.stopTrackingPhoneState()
def test_ringer_silent():
result1 = droid.toggleRingerSilentMode()
result2 = droid.toggleRingerSilentMode()
return result1.error is None and result2.error is None
def test_ringer_volume():
get_result = droid.getRingerVolume()
if get_result.error is not None:
return False
droid.setRingerVolume(0)
set_result = droid.setRingerVolume(get_result.result)
if set_result.error is not None:
return False
return True
def test_get_last_known_location():
result = droid.getLastKnownLocation()
return result.error is None
def test_geocode():
result = droid.geocode(0.0, 0.0, 1)
return result.error is None
def test_make_toast():
result = droid.makeToast('Hello, world!')
return result.error is None
def test_vibrate():
result = droid.vibrate()
return result.error is None
def test_notify():
result = droid.notify('Test Title', 'Hello, world!')
return result.error is None
def test_get_running_packages():
result = droid.getRunningPackages()
return result.error is None
def test_alert_dialog():
title = 'User Interface'
message = 'Welcome to the SL4A integration test.'
droid.dialogCreateAlert(title, message)
droid.dialogSetPositiveButtonText('Continue')
droid.dialogShow()
response = droid.dialogGetResponse().result
return response['which'] == 'positive'
def test_alert_dialog_with_buttons():
title = 'Alert'
message = ('This alert box has 3 buttons and '
'will wait for you to press one.')
droid.dialogCreateAlert(title, message)
droid.dialogSetPositiveButtonText('Yes')
droid.dialogSetNegativeButtonText('No')
droid.dialogSetNeutralButtonText('Cancel')
droid.dialogShow()
response = droid.dialogGetResponse().result
return response['which'] in ('positive', 'negative', 'neutral')
def test_spinner_progress():
title = 'Spinner'
message = 'This is simple spinner progress.'
droid.dialogCreateSpinnerProgress(title, message)
droid.dialogShow()
time.sleep(2)
droid.dialogDismiss()
return True
def test_horizontal_progress():
title = 'Horizontal'
message = 'This is simple horizontal progress.'
droid.dialogCreateHorizontalProgress(title, message, 50)
droid.dialogShow()
for x in range(0, 50):
time.sleep(0.1)
droid.dialogSetCurrentProgress(x)
droid.dialogDismiss()
return True
def test_alert_dialog_with_list():
title = 'Alert'
droid.dialogCreateAlert(title)
droid.dialogSetItems(['foo', 'bar', 'baz'])
droid.dialogShow()
response = droid.dialogGetResponse().result
return True
def test_alert_dialog_with_single_choice_list():
title = 'Alert'
droid.dialogCreateAlert(title)
droid.dialogSetSingleChoiceItems(['foo', 'bar', 'baz'])
droid.dialogSetPositiveButtonText('Yay!')
droid.dialogShow()
response = droid.dialogGetResponse().result
return True
def test_alert_dialog_with_multi_choice_list():
title = 'Alert'
droid.dialogCreateAlert(title)
droid.dialogSetMultiChoiceItems(['foo', 'bar', 'baz'], [])
droid.dialogSetPositiveButtonText('Yay!')
droid.dialogShow()
response = droid.dialogGetResponse().result
return True
def test_wifi():
result1 = droid.toggleWifiState()
result2 = droid.toggleWifiState()
droid.toggleWifiState(True) # Make sure wifi ends up ON, as it interferes with other tests
return result1.error is None and result2.error is None
if __name__ == '__main__':
for name, value in list(globals().items()):
if name.startswith('test_') and isinstance(value, types.FunctionType):
print('Running %s...' % name, end=' ')
sys.stdout.flush()
if value():
print(' PASS')
else:
print(' FAIL')
Screenshot with errors

Handling errors within loops through exceptions

Tried my first python program to read temp sensor and output to influxdb
Occasionally temp sensor gives error "IndexError: list index out of range" and loop ends
I want loop to wait 15 seconds on this error and then continue the loop (sensor usually corrects itself by then on the next read)
My code:
import os
import glob
import time
import urllib
import urllib2
import httplib
import json
from influxdb import InfluxDBClient
client = InfluxDBClient(host='192.168.1.7', port=8086)
#client.get_list_database()
client.switch_database('influxdb1')
os.system('modprobe w1-gpio')
os.system('modprobe w1-therm')
base_dir = '/sys/devices/w1_bus_master1/'
device_folder = glob.glob(base_dir + '28*')[0]
while True:
device_file = device_folder + '/w1_slave'
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines()
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
******************************************************
which works ok :)
When I edit the code to catch the exception.....
******************************************************
while True:
except IndexError:
time.sleep(15)
continue
device_file = device_folder + '/w1_slave' # store the details
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines() # read the device details
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
************************************************************
I get following error...
File "temptoinfluxdb2.py", line 22
except IndexError:
^
SyntaxError: invalid syntax
Where am i going wrong please?
You will always need to use the except block in combination with a try block.
So the code in the try block is executed until an exception (in that case IndexError) occurs.
try:
# Execution block
except IndexError:
# Error handling
You could also use a more general approach with except Exception as e, which catches not just the IndexError but any exception.
Check the official documentation for further information.

Scrapy yield from a non call back method

I am trying to scrape a html file that has a json object with all the required test case data , but processing of the json happens in "find" and "parseTestCaseDetails" method where iteratively i get the test case details , which i finally parse in "findInterestedFields" , so my requirement is to yield the details of test cases to a json file from the the last method called in the hierarchy i.e findInterestedFields , is it possible to achieve this??
Thanks in advance!
import scrapy
import datetime
import json
import re
import collections
import time
import os
import js2xml
from scrapy.selector import HtmlXPathSelector
class AltiplanoAVInprogressTCSpiderTest(scrapy.Spider):
name = "AltiplanoAVInprogressTCSpiderTest"
buildNumber = []
FormatedBuildTime = []
keys = []
testcaseName=''
testcaseSuit=''
testcaseDoc=''
def start_requests(self):
print os.environ["jenkinsdomain"]
urls = [
os.environ["jenkinsdomain"] + "/job/InprogressFlakyAndBlockedTestCaseDetails/lastSuccessfulBuild/"
]
for url in urls:
print url
yield scrapy.Request(url=url, callback=self.parse, errback=self.parseerror)
def parseerror(self, failure):
print failure
def parse(self, response):
hxs = HtmlXPathSelector(response)
buildNumberString = hxs.select('normalize-space(//*[#id="main-panel"]/h1/text())').extract_first()
self.buildNumber = buildNumberString.split("#")[-1].split("(")[0].strip()
buildTimeUnformatd = buildNumberString.split("(")[-1].split(")")[0].strip().replace("PM", "").replace("AM", "")
buildTimeUnformatd = buildTimeUnformatd.strip()
t = time.strptime(buildTimeUnformatd, "%b %d, %Y %I:%M:%S")
self.FormatedBuildTime = time.strftime('%d %b %y %H:%M IST', t)
static_testDetailsUrl = os.environ[
"jenkinsdomain"] + "XX/Inprogress-ANV.html"
yield scrapy.Request(url=static_testDetailsUrl, callback=self.parseTestDetails, errback=self.parseerror)
def find(self, key, dictionary):
for k, v in dictionary.iteritems():
if k == key:
yield v
self.parseTestCaseDetails(v)
elif isinstance(v, dict):
for result in self.find(key, v):
yield result
elif isinstance(v, list):
for d in v:
for result in self.find(key, d):
yield result
def parseTestCaseDetails(self, testcases):
# get the list of test cases and again parse them one by one to get the name and other fields
for testcase in testcases:
self.findInterestedFields(testcase)
def findInterestedFields(self, dictionary):
jsonLoad = json.dumps(dictionary, indent=4)
loaded_json = json.loads(jsonLoad)
readabledatetime = datetime.datetime.now().strftime("%d %b %y %H:%M IST")
for k, v in loaded_json.iteritems():
if k == "name":
testcaseName = v
if k == "fullName":
testcaseSuit = v
if k == "doc":
testcaseDoc = v
yield {"name":"avInprogresstestcases",'avInprogresstestcases':{
'testcaseName':testcaseName
}}
def parseTestDetails(self, response):
data = response.xpath('//script[4]/text()').extract_first().strip()
jstree = js2xml.parse(data)
testDetailsInJson = js2xml.jsonlike.getall(jstree)
jsonLoad = json.dumps(testDetailsInJson[0], indent=4)
loaded_json = json.loads(jsonLoad)
list(self.find('tests', loaded_json))

python TypeError: expected string or buffer when parsing JSON from a file

I realize this problem has been answered for other folks but none of the threads are helping me solve it. I'm trying to parse a JSON structure and add all values in the sent_file when the keys match with the tweet_file. The error I'm getting
import sys
import json
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
scores = {}
#tweet = {}
#tweet_text = {}
#hw()
#lines(sent_file)
#lines(tweet_file)
for line in sent_file:
term,score = line.split("\t")
scores[term] = int(score)
#print scores.items()
for tweets in tweet_file:
current_sent_value = 0
tweet = {} #this is a dict
#print type(tweets) str
tweet = json.loads(tweets)#[0] #this assignment changes tweet to a list. Why?
if 'text' in tweet:
tweet_text = {}
unicode_string = tweet['text']
encoded_string = unicode_string.encode('utf-8')
tweet_text = encoded_string.split()
for key in tweet_text:
for key in scores:
#print type(tweet_text) -- list
#print type(scores) --dict
if tweet_text.get(key) == scores.get(key): # get() does not work on a list. tweet_text is a list.
current_sent_value += scores(value)
print current_sent_value
if name == 'main':
main()
The error is here \assignment1\tweet_sentiment2.py", line 42, in main
if tweet_text.get(key) == scores.get(key): # get() does not work on a list. tweet_text is a list.
AttributeError: 'list' object has no attribute 'get'

APScheduler - job not executed

I'm new to APScheduler and testing it before implementing it in a larger scope.
I have created the code below, but somehow my functions are never called when I use add_job with trigger='date'. If I use trigger='interval', then everything works fine.
I also tried to play with run_date param, with no luck.
Any idea about what could be wrong ?
Apscheduler is version 3.0.3
Many thanks in advance :)
def my_listener(event):
if event.exception:
print('The job crashed :(')
else:
print('The job worked :)')
def test():
print("{} ok ".format(datetime.now()))
def myFunc(content, img):
print("{} - content={}|image{}".format(datetime.now(), content, img))
myfile = open("scheduler.log", "a")
myfile.write("{} - content={}|image{}".format(datetime.now(), content, img))
myfile.close()
def main():
jobstores = \
{
'default': SQLAlchemyJobStore(url="postgresql+psycopg2://{}:{}#{}:{}/{}".format(db_user, db_password, db_host, db_port, db_database))
}
executors = \
{
'default': ThreadPoolExecutor(20),
'processpool': ProcessPoolExecutor(5)
}
job_defaults = \
{
'coalesce': False,
'max_instances': 3
}
scheduler = BackgroundScheduler(jobstores=jobstores, executors=executors, job_defaults=job_defaults, timezone=utc)
scheduler.start()
CURR_DATE = datetime.strptime(datetime.strftime(datetime.now(), '%Y%m%d%H%M'), '%Y%m%d%H%M')
JOB_DATE = CURR_DATE + timedelta(minutes=1)
uid=uuid.uuid4()
newjob = scheduler.add_job(myFunc,
trigger='date',
args=['content data', 'image data'],
kwargs=None,
id=str(uid),
name='test' + str(uid),
misfire_grace_time=5,
coalesce=False,
max_instances=1,
next_run_time= JOB_DATE,
jobstore='default',
executor='default',
replace_existing=True)
print("Added - {}".format(newjob))
scheduler.add_listener(my_listener, events.EVENT_JOB_EXECUTED | events.EVENT_JOB_ERROR)
scheduler.print_jobs()
while True:
sys.stdout.write('{}\n'.format(datetime.now())); sys.stdout.flush()
sleep(1)
if __name__ == "__main__":
main()
Problem is that you are using
next_run_time= JOB_DATE,
instead of run_date=JOB_DATE,
another option is declaring your trigger and passing it as a parameter to add job like this:
trigger = DateTrigger(run_date=start_date)
newjob = scheduler.add_job(myFunc,
trigger=trigger,
args=['content data', 'image data'],
kwargs=None,
id=str(uid),
name='test' + str(uid),
misfire_grace_time=5,
coalesce=False,
max_instances=1,
jobstore='default',
executor='default',
replace_existing=True)
another problem with your code is that while you made your scheduler timeaware you are using an unaware datetime
CURR_DATE = datetime.strptime(datetime.strftime(datetime.now(), '%Y%m%d%H%M'), '%Y%m%d%H%M')
JOB_DATE = CURR_DATE + timedelta(minutes=1) #this is unaware
try declaring your date this way:
import pytz
import datetime
job_date = datetime.datetime.now(pytz.UTC) + datetime.timedelta(minutes=1)