Create record using aerospike operate command - aerospike

I am trying following code. Getting "key not found" error. I want to know whether it is possible to create a record using aerospike multi ops.
public long incrementSingle(String counterName, long by){
// Create a key
Key recordKey = new Key(Constants.NAMESPACE, Constants.SINGLE_SET, counterName);
// Increment operation
Bin incrementCounter = new Bin(Constants.SINGLE_COUNTER_BIN, by);
// https://www.aerospike.com/docs/client/java/usage/kvs/multiops.html#operation-specification
Record record = asClient.operate(null, recordKey,
Operation.add(incrementCounter),
Operation.get(Constants.SINGLE_COUNTER_BIN));
return record.getLong(Constants.SINGLE_COUNTER_BIN);
}

I don't see any problem with this. Here's some Python code that does the same:
# -*- coding: utf-8 -*-
from __future__ import print_function
import aerospike
from aerospike import exception as e
from aerospike_helpers.operations import operations as oh
import sys
config = {"hosts": [("192.168.243.133", 3000)]}
try:
client = aerospike.client(config).connect()
except e.ClientError as e:
print("Error: {0} [{1}]".format(e.msg, e.code))
sys.exit(2)
key = ("test", "demo", "count-this")
print("Nuke the record")
try:
client.remove(key)
except:
pass
ops = [oh.increment("counter", 2), oh.read("counter")]
meta = {"ttl": aerospike.TTL_NEVER_EXPIRE}
for i in range(4):
try:
k, m, b = client.operate_ordered(key, ops, meta)
print("Iteration {0}: bin value is {1}".format(i, b))
except e.RecordError as e:
print("Error: {0} [{1}]".format(e.msg, e.code))
sys.exit(4)
client.close()
Resulting in the expected
Nuke the record
Iteration 0: bin value is [('counter', 2)]
Iteration 1: bin value is [('counter', 4)]
Iteration 2: bin value is [('counter', 6)]
Iteration 3: bin value is [('counter', 8)]

I think for add() to work, the record must exist before. Test with creating the record referred by recordKey with some initial value (say initialize to 0) in the counter bin and then increment it. The Operation API for add() does not specifically point this out, but I am just guessing because add() will first read existing record on disk and is perhaps not finding it. (Edit: See further definitive test/clarification per Write Policy used in the comments below.)

Related

reactivex: how to make a behaviorsubject emit from observable

I'm going to be using rxandroid in an android app. I'm trying to model the behavior right now in rxpy because it was the easiest for me to set up and play with. In the example below, source3 is emitting the correct data; which is a concatenation of an initialization that takes some time and a permanent subscription which I have just faked out. I want the BehaviorSubject because I need the last value immediately for field initialization.
I cannot figure out how to chain the BehaviorSubject on top of source3 so that it emits source 3 while remembering the last value. I have searched the internet for two days and not found a clear direction on this use case. Here is my code, and the question is why I don't get any emissions from the observer.
from rx import Observable, Observer
from rx.subjects import BehaviorSubject
import time, random
def fake_initialization(observer):
time.sleep(5) # It takes some time
observer.on_next("Alpha")
observer.on_completed()
def fake_subscription(observer):
iter = 0 # Subscription emits forever
while True:
observer.on_next("message %02d"%(iter))
time.sleep(random.randrange(2,5))
iter += 1
class PrintObserver(Observer):
def on_next(self, value):
print("Received {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done!")
def on_error(self, error):
print("Error Occurred: {0}".format(error))
source1 = Observable.create(fake_initialization)
source2 = Observable.create(fake_subscription)
source3 = source1 + source2
bsubject = BehaviorSubject(False)
source4 = source3.multicast(bsubject)
source4.connect()
source4.subscribe(PrintObserver())
This was actually a fairly easy answer for someone. I'm posting this in case anyone else ends up in this situation. Admittedly, I didn't read the rxpy page closely enough. You need to add concurrency on your own, presumably because there are so many concurrent solutions in Python. Here is the final working code:
import random
import time
import multiprocessing
from rx import Observable,Observer
from rx.concurrency import ThreadPoolScheduler
from rx.subjects import Subject
class PrintObserver1(Observer):
def on_next(self, value):
print("Received 1 {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done 1!")
def on_error(self, error):
print("Error Occurred: 1 {0}".format(error))
class PrintObserver2(Observer):
def on_next(self, value):
print("Received 2 {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done 2!")
def on_error(self, error):
print("Error Occurred: 2 {0}".format(error))
def fake_initialization(observer):
time.sleep(5) # It takes some time
observer.on_next("Alpha")
observer.on_completed()
def fake_subscription(observer):
iter = 0 # Subscription emits forever
while True:
observer.on_next("message %02d"%(iter))
time.sleep(random.randrange(2,5))
iter += 1
optimal_thread_count = multiprocessing.cpu_count()
pool_scheduler = ThreadPoolScheduler(optimal_thread_count)
source1 = Observable.create(fake_initialization).subscribe_on(pool_scheduler)
source2 = Observable.create(fake_subscription).subscribe_on(pool_scheduler)
catted_source = source1 + source2
native_source = Observable.interval(1000)
print native_source,catted_source
#source = source3
subject = Subject()
# native_source = works
# catted_source = not works
subSource = catted_source.subscribe(subject)
#####
subSubject1 = subject.subscribe(PrintObserver1())
subSubject2 = subject.subscribe(PrintObserver2())
time.sleep(30)
subject.on_completed()
subSubject1.dispose()
subSubject2.dispose()
Also note that you have to install the 'futures' package for concurrency to work on Python 2.7.
If you get this error:
from concurrent.futures import ThreadPoolExecutor
ImportError: No module named concurrent.futures
Read this (link is for slightly different error but solution works):
ImportError: No module named concurrent.futures.process

What is the effect of calling TensorArray.close()?

(tensorflow version: '0.12.head')
The documentation of TensorArray.close says that it close the current TensorArray. What does it mean for the status of TensorArray? I try the following code
import tensorflow as tf
sess = tf.InteractiveSession()
a1 = tf.TensorArray(tf.int32, 2)
a1.close().run()
a2 = a1.write(0, 0)
a2.close().run()
print(a2.read(0).eval())
And there are no errors. What is the usage of close?
Learning-to-learn includes TensorArray.close in the reset operations of the network. I can't figure out what the comment Empty array as part of the reset process means.
Update
For examples,
import tensorflow as tf
sess = tf.InteractiveSession()
N = 3
def cond(i, arr):
return i < N
def body(i, arr):
arr = arr.write(i, i)
i += 1
return i, arr
arr = tf.TensorArray(tf.int32, N)
_, result_arr = tf.while_loop(cond, body, [0, arr])
reset = arr.close() # corresponds to https://github.com/deepmind/learning-to-learn/blob/6ee52539e83d0452051fe08699b5d8436442f803/meta.py#L370
NUM_EPOCHS = 3
for _ in range(NUM_EPOCHS):
reset.run() # corresponds to https://github.com/deepmind/learning-to-learn/blob/6ee52539e83d0452051fe08699b5d8436442f803/util.py#L32
print(result_arr.stack().eval())
Why arr.close() doesn't make the while loop fail? What are the advantages of calling arr.close() at the beginning of each epoch?
This is a Python op which wraps a native op and both have help strings, but the native op help string is more informative. If you look at inspect.getsourcefile(fx_array.close) it'll point you to tensorflow/python/ops/tensor_array_ops.py. Inside the implementation you see that it defers to _tensor_array_close_v2. So you can do this
> from tensorflow.python.ops import gen_data_flow_ops
> help(gen_data_flow_ops._tensor_array_close_v2)
Delete the TensorArray from its resource container. This enables
the user to close and release the resource in the middle of a step/run.
That same doc string is also in tensorflow/core/ops/ops.pbtxt under TensorArrayCloseV2
Looking at tensorflow/core/kernels/tensor_array_ops.cc you see that TensorArrayCloseOp is the implementation registered for TensorArrayCloseV2 in , and has more info
// Delete the TensorArray from its resource container. This enables
// the user to close and release the resource in the middle of a step/run.
// TODO(ebrevdo): decide whether closing the grad op should happen
// here or on the python side.
class TensorArrayCloseOp : public OpKernel {
public:
explicit TensorArrayCloseOp(OpKernelConstruction* context)
: OpKernel(context) {}
void Compute(OpKernelContext* ctx) override {
TensorArray* tensor_array;
OP_REQUIRES_OK(ctx, GetTensorArray(ctx, &tensor_array));
core::ScopedUnref unref(tensor_array);
// Instead of deleting this TA from the ResourceManager, we just
// clear it away and mark it as closed. The remaining memory
// consumed store its mutex and handle Tensor. This will be
// cleared out at the end of the step anyway, so it's fine to keep
// it around until the end of the step. Further calls to the
// TensorArray will fail because TensorArray checks internally to
// see if it is closed or not.
The description seems inconsistent with the behavior you are seeing, could be a bug.
The TensorArray that is being closed in the Learning-to-learn example is not the original TensorArray that's being passed to the while-loop.
# original array (fx_array) declared here
fx_array = tf.TensorArray(tf.float32, size=len_unroll + 1,
clear_after_read=False)
# new array (fx_array) returned here
_, fx_array, x_final, s_final = tf.while_loop(
cond=lambda t, *_: t < len_unroll,
body=time_step,
loop_vars=(0, fx_array, x, state),
parallel_iterations=1,
swap_memory=True,
name="unroll")
Any subsequent calls to fx_array.close() from here close the new array returned by the while-loop and not the original array passed to the loop in the first iteration.
If you want to see how close behaves as expected then run:
session.run([reset, loss])
This will fail with TensorArray has already been closed. since the loss op tries to run pack() on the closed array.

Redis not returning result after upgrading Celery from 3.1 to 4.0

I recently upgraded my Celery installation to 4.0. After a few days of wrestling with the upgrade process, I finally got it to work... sort of. Some tasks will return, but the final task will not.
I have a class, SFF, that takes in and parses a file:
# Constructor with I/O file
def __init__(self, file):
# File data that's gonna get used a lot
sffDescriptor = file.fileno()
fileName = abspath(file.name)
# Get the pointer to the file
filePtr = mmap.mmap(sffDescriptor, 0, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ)
# Get the header info
hdr = filePtr.read(HEADER_SIZE)
self.header = SFFHeader._make(unpack(HEADER_FMT, hdr))
# Read in the palette maps
print self.header.onDemandDataSize
print self.header.onLoadDataSize
palMapsResult = getPalettes.delay(fileName, self.header.palBankOff - HEADER_SIZE, self.header.onDemandDataSize, self.header.numPals)
# Read the sprite list nodes
nodesStart = self.header.sprListOff
nodesEnd = self.header.palBankOff
print nodesEnd - nodesStart
sprNodesResult = getSprNodes.delay(fileName, nodesStart, nodesEnd, self.header.numSprites)
# Get palette data
self.palettes = palMapsResult.get()
# Get sprite data
spriteNodes = sprNodesResult.get()
# TESTING
spritesResultSet = ResultSet([])
numSpriteNodes = len(spriteNodes)
# Split the nodes into chunks of size 32 elements
for x in xrange(0, numSpriteNodes, 32):
spritesResult = getSprites.delay(spriteNodes, x, x+32, fileName, self.palettes, self.header.palBankOff, self.header.onDemandDataSizeTotal)
spritesResultSet.add(spritesResult)
break # REMEMBER TO REMOVE FOR ENTIRE SFF
self.sprites = spritesResultSet.join_native()
It doesn't matter if it's a single task that returns the entire spritesResult, or if I split it using a ResultSet, the outcome is always the same: the Python console I'm using just hangs at either spritesResultSet.join_native() or spritesResult.get() (depending on how I format it).
Here is the task in question:
#task
def getSprites(nodes, start, end, fileName, palettes, palBankOff, onDemandDataSizeTotal):
sprites = []
with open(fileName, "rb") as file:
sffDescriptor = file.fileno()
sffData = mmap.mmap(sffDescriptor, 0, flags=mmap.MAP_SHARED, prot=mmap.PROT_READ)
for node in nodes[start:end]:
sprListNode = dict(SprListNode._make(node)._asdict()) # Need to convert it to a dict since values may change.
#print node
#print sprListNode
# If it's a linked sprite, the data length is 0, so get the linked index.
if sprListNode['dataLen'] == 0:
sprListNodeTemp = SprListNode._make(nodes[sprListNode['index']])
sprListNode['dataLen'] = sprListNodeTemp.dataLen
sprListNode['dataOffset'] = sprListNodeTemp.dataOffset
sprListNode['compression'] = sprListNodeTemp.compression
# What does the offset need to be?
dataOffset = sprListNode['dataOffset']
if sprListNode['loadMode'] == 0:
dataOffset += palBankOff #- HEADER_SIZE
elif sprListNode['loadMode'] == 1:
dataOffset += onDemandDataSizeTotal #- HEADER_SIZE
#print sprListNode
# Seek to the data location and "read" it in. First 4 bytes are just the image length
start = dataOffset + 4
end = dataOffset + sprListNode['dataLen']
#sffData.seek(start)
compressedSprite = sffData[start:end]
# Create the sprite
sprite = Sprite(sprListNode, palettes[sprListNode['palNo']], np.fromstring(compressedSprite, dtype=np.uint8))
sprites.append(sprite)
return json.dumps(sprites, cls=SpriteJSONEncoder)
I know it reaches the return statement, because if I put a print right above it, it will print in the Celery window. I also know that the task is running to completion because I get the following message from the worker:
[2016-11-16 00:03:33,639: INFO/PoolWorker-4] Task framedatabase.tasks.getSprites[285ac9b1-09b4-4cf1-a251-da6212863832] succeeded in 0.137236133218s: '[{"width": 120, "palNo": 30, "group": 9000, "xAxis": 0, "yAxis": 0, "data":...'
Here are my celery settings in settings.py:
# Celery settings
BROKER_URL='redis://localhost:1717/1'
CELERY_RESULT_BACKEND='redis://localhost:1717/0'
CELERY_IGNORE_RESULT=False
CELERY_IMPORTS = ("framedatabase.tasks", )
... and my celery.py:
from __future__ import absolute_import
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'framedatabase.settings')
from django.conf import settings # noqa
app = Celery('framedatabase', backend='redis://localhost:1717/1', broker="redis://localhost:1717/0",
include=['framedatabase.tasks'])
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()
#app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
Found the problem. Apparently it was leading to deadlock as mentioned in the section "Avoid launching synchronous subtasks" in the Celery documentation here: http://docs.celeryproject.org/en/latest/userguide/tasks.html#tips-and-best-practices
So I got rid of the line:
sprNodesResult.get()
And changed the final result to a chain:
self.sprites = chain(getSprNodes.s(fileName, nodesStart, nodesEnd, self.header.numSprites),
getSprites.s(0,32,fileName,self.palettes,self.header.palBankOff,self.header.onDemandDataSizeTotal))().get()
And it works! Now I just have to find a way to split this the way I want!

return a list from class object

I am using multiprocessing module to generate 35 dataframes. I guess this will save my time. But the problem is that the class does not return anything. I expect the list of dataframes to be returned from self.dflist
Here is how to create dfnames list.
urls=[]
fnames=[]
dfnames=[]
for x in xrange(100,3600,100):
y = str(x)
i = y.zfill(4)
filename='DCHB_Town_Release_'+i+'.xlsx'
url = "http://www.censusindia.gov.in/2011census/dchb/"+filename
urls.append(url)
fnames.append(filename)
dfnames.append((filename, 'DCHB_Town_Release_'+i))
This is the class that uses the dfnames generated by above code.
import pandas as pd
import multiprocessing
class mydf1():
def __init__(self, dflist, jobs, dfnames):
self.dflist=list()
self.jobs=list()
self.dfnames=dfnames
def dframe_create(self, filename, dfname):
print 'abc', filename, dfname
dfname=pd.read_excel(filename)
self.dflist.append(dfname)
print self.dflist
return self.dflist
def mp(self):
for f,d in self.dfnames:
p = multiprocessing.Process(target=self.dframe_create, args=(f,d))
self.jobs.append(p)
p.start()
#return self.dflist
for j in self.jobs:
j.join()
print '%s.exitcode = %s' % (j.name, j.exitcode)
This class when called like this...
dflist=[]
jobs=[]
x=mydf1(dflist, jobs, dfnames)
y=x.mp()
Prints the self.dflist correctly. But does not return anything.
I can collect all datafarmes sequentially. But in order to save time, I need to use multiple processes simultaneously to generate and add dataframes to a list.
In your case I prefer to write as less code as possible and use Pool:
import pandas as pd
import logging
import multiprocessing
def dframe_create(filename):
try:
return pd.read_excel(filename)
except Exception as e:
logging.error("Something went wrong: %s", e, exc_info=1)
return None
p = multiprocessing.Pool()
excel_files = p.map(dframe_create, dfnames)
for f in excel_files:
if f is not None:
print 'Ready to work'
else:
print ':('
Prints the self.dflist correctly. But does not return anything.
That's because you don't have a return statement in the mp method, e.g.
def mp(self):
...
return self.dflist
It's not entirely clear what you're issue is, however, you have to take some care here in that you can't just pass objects/lists across processes. That's why you have special objects (which lock while they make modifications to a list), that way you don't get tripped up when two processes try to make a change at the same time (and you only get one update).
That is, you have to use multiprocessing's list.
class mydf1():
def __init__(self, dflist, jobs, dfnames):
self.dflist = multiprocessing.list() # perhaps should be multiprocessing.list(dflist or ())
self.jobs = list()
self.dfnames = dfnames
However you have a bigger problem: the whole point of multiprocessing is that they may run/finish out of order, so keeping two lists like this is doomed to fail. You should use a multiprocessing.dict that way the DataFrame is saved unambiguously with the filename.
class mydf1():
def __init__(self, dflist, jobs, dfnames):
self.dfdict = multiprocessing.dict()
...
def dframe_create(self, filename, dfname):
print 'abc', filename, dfname
df = pd.read_excel(filename)
self.dfdict[dfname] = df

reindexObject fails during FileField to BlobField migration in Plone 4.0.7

I'm trying to migrate from plone 3.3.5 to plone 4.0.7 and I'm stuck on a step that converts all the FileFields to BlobFields.
Plone upgrade script successfully converts all native FileFields but I have several custom AT-based classes which have to be converted manually. I've tried two ways of doing the conversion which leads me to the same error.
Using schemaextender as outlined in Plone migration guide and a source code example
Renaming all FileFields to blob fields and then running this script:
from AccessControl.SecurityManagement import newSecurityManager
from AccessControl import getSecurityManager
from Products.CMFCore.utils import getToolByName
from zope.app.component.hooks import setSite
from Products.contentmigration.migrator import BaseInlineMigrator
from Products.contentmigration.walker import CustomQueryWalker
from plone.app.blob.field import BlobField
admin=app.acl_users.getUserById("admin")
newSecurityManager(None, admin)
portal = app.plone
setSite(portal)
def find_all_types_fields(portal_catalog, type_instance_to_search):
output = {}
searched = []
for k in catalog():
kobj = k.getObject()
if kobj.__class__.__name__ in searched:
continue
searched.append(kobj.__class__.__name__)
for field in kobj.schema.fields():
if isinstance(field, type_instance_to_search):
if kobj.__class__.__name__ in output:
output[kobj.__class__.__name__].append(field.__name__)
else:
output[kobj.__class__.__name__] = [field.__name__]
return output
def produce_migrator(field_map):
source_class = field_map.keys()[0]
fields = {}
for x in field_map.values()[0]: fields[x] = None
class FileBlobMigrator(BaseInlineMigrator):
'''Migrating ExtensionBlobField (which is still a FileField) to BlobField'''
src_portal_type = source_class
src_meta_type = source_class
fields_map = fields
def migrate_data(self):
'''Unfinished'''
for k in self.fields_map.keys():
#print "examining attributes"
#import pdb; pdb.set_trace()
#if hasattr(self.obj, k):
if k in self.obj.schema.keys():
print("***converting attribute:", k)
field = self.obj.getField(k).get(self.obj)
mutator = self.obj.getField(k).getMutator(self.obj)
mutator(field)
def last_migrate_reindex(self):
'''Unfinished'''
self.obj.reindexObject()
return FileBlobMigrator
def consume_migrator(portal_catalog, migrator):
walker = CustomQueryWalker(portal_catalog, migrator, full_transaction=True)
transaction.savepoint(optimistic=True)
walker_status = walker.go()
return walker.getOutput()
def migrate_blobs(catalog, migrate_type):
all_fields = find_all_types_fields(catalog, migrate_type)
import pdb; pdb.set_trace()
for k in [ {k : all_fields[k]} for k in all_fields]:
migrator = produce_migrator(k)
print consume_migrator(catalog, migrator)
catalog = getToolByName(portal, 'portal_catalog')
migrate_blobs(catalog, BlobField)
The problem occurs on self.obj.reindexObject() line where I receive the following traceback:
2011-08-09 17:21:12 ERROR Zope.UnIndex KeywordIndex: unindex_object could not remove documentId -1945041983 from index object_provides. This should not happen.
Traceback (most recent call last):
File "/home/alex/projects/plone4/eggs/Zope2-2.12.18-py2.6-linux-x86_64.egg/Products/PluginIndexes/common/UnIndex.py", line 166, in removeForwardIndexEntry indexRow.remove(documentId)
KeyError: -1945041983
> /home/alex/projects/plone4/eggs/Zope2-2.12.18-py2.6-linux-x86_64.egg/Products/PluginIndexes/common/UnIndex.py(192)removeForwardIndexEntry()
191 str(documentId), str(self.id)),
--> 192 exc_info=sys.exc_info())
193 else:
If I remove the line that triggers reindexing, the conversion completes successfully, but if I try to manually reindex catalog later, every object that's been converted can no longer be found, and I'm a bit at loss of what to do now.
The site has LinguaPlone installed, maybe it has something to do with this?
One option would be to run the migration without the reindexObject() call and do a "Clear and Rebuild" in the catalog ZMI Advanced tab after migrating.