Related
I have a conceptually simple code I want to parallelize, but all the other threads I found about are too complicated and I do not understand how to apply them to my case, or even if they are applicable.
In my code, a function with multiple arguments is called over a while loop and returns both an output and the exit condition from the loop. I want to parallelize the while loop. I am using Python 3.7.3.
Here is a simplified example:
import multiprocessing as mp
import numpy as np
import time
def foo(i, arg1, arg2):
n = np.random.rand()
n = arg1*n + arg2
if n > 0.9:
stop = True
else:
stop = False
return [i, n], stop
if __name__ == '__main__':
i = 0
stop = False
output = list()
while not stop:
out, stop = foo(i, 1, 0)
i = i + 1
if not stop:
output.append(out)
print(np.asarray(output))
Output:
[[ 0. 0.25295033]
[ 1. 0.53795096]
[ 2. 0.48774803]
[ 3. 0.09281972]
[ 4. 0.75053227]
[ 5. 0.30367072]
[ 6. 0.57043762]
[ 7. 0.4589554 ]
[ 8. 0.33231446]
[ 9. 0.76805717]
[10. 0.22486246]
[11. 0.69499273]
[12. 0.67616563]]
EDIT. I would like to "bump" this thread as this is something I really need help about and I cannot do it by myself. Meta-etiquette says I should edit by adding value to the question, but I do not think I could add anything else: I just need to parallelize the code presented. I would really appreciate any (practical) feedback.
I am using a version of the distributed tensorflow example https://www.tensorflow.org/deploy/distributed
Here is my code in "mnist_trainer.py".
import math
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.logging.set_verbosity(tf.logging.INFO)
# Flags for defining the tf.train.ClusterSpec
tf.app.flags.DEFINE_string("ps_hosts", "",
"Comma-separated list of hostname:port pairs")
tf.app.flags.DEFINE_string("worker_hosts", "",
"Comma-separated list of hostname:port pairs")
# Flags for defining the tf.train.Server
tf.app.flags.DEFINE_string("job_name", "", "One of 'ps', 'worker'")
tf.app.flags.DEFINE_integer("task_index", 0, "Index of task within the job")
tf.app.flags.DEFINE_integer("hidden_units", 100,
"Number of units in the hidden layer of the NN")
tf.app.flags.DEFINE_string("data_dir", "/home/anijsure/mnist_data",
"Directory for storing mnist data")
tf.app.flags.DEFINE_integer("batch_size", 100, "Training batch size")
FLAGS = tf.app.flags.FLAGS
IMAGE_PIXELS = 28
def main(_):
print "Starting"
ps_hosts = FLAGS.ps_hosts.split(",")
worker_hosts = FLAGS.worker_hosts.split(",")
# Create a cluster from the parameter server and worker hosts.
print "Cluster starting"
cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
# Create and start a server for the local task.
print "Server starting"
server = tf.train.Server(cluster,
job_name=FLAGS.job_name,
task_index=FLAGS.task_index)
if FLAGS.job_name == "ps":
server.join()
elif FLAGS.job_name == "worker":
print "Job : WORKER"
# Assigns ops to the local worker by default.
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % FLAGS.task_index,
cluster=cluster)):
mytask = tf.constant(FLAGS.task_index, name="mytask")
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
dataset = tf.data.Dataset.from_tensor_slices((mnist.train.images, mnist.train.labels))
# Create batches of data
dataset = dataset.batch(FLAGS.batch_size)
# Create an iterator, to go over the dataset
iterator = dataset.make_initializable_iterator()
X,Y = iterator.get_next()
# Variables of the hidden layer
hid_w = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
stddev=1.0 / IMAGE_PIXELS), name="hid_w")
hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name="hid_b")
# Variables of the softmax layer
sm_w = tf.Variable(
tf.truncated_normal([FLAGS.hidden_units, 10],
stddev=1.0 / math.sqrt(FLAGS.hidden_units)),
name="sm_w")
sm_b = tf.Variable(tf.zeros([10]), name="sm_b")
hid_lin = tf.nn.xw_plus_b(X, hid_w, hid_b)
hid = tf.nn.relu(hid_lin)
y = tf.nn.xw_plus_b(hid, sm_w, sm_b)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y), name="loss")
global_step = tf.train.get_or_create_global_step()
train_op = tf.train.AdagradOptimizer(0.01).minimize(
loss, global_step=global_step)
# The StopAtStepHook handles stopping after running given steps.
chiefhooks=[tf.train.StopAtStepHook(num_steps=25)]
allhooks=[tf.train.LoggingTensorHook(tensors={"Task": "mytask","loss":"loss", "Step":"global_step"}, every_n_iter=1)]
# The MonitoredTrainingSession takes care of session initialization,
# restoring from a checkpoint, saving to a checkpoint, and closing when done
# or an error occurs.
with tf.train.MonitoredTrainingSession(master=server.target,
is_chief=(FLAGS.task_index == 0),
checkpoint_dir="/tmp/train_logs_%d" % FLAGS.task_index,
hooks=allhooks, chief_only_hooks=chiefhooks) as mon_sess:
mon_sess.run(iterator.initializer)
while not mon_sess.should_stop():
# Run a training step asynchronously.
# See `tf.train.SyncReplicasOptimizer` for additional details on how to
# perform *synchronous* training.
# mon_sess.run handles AbortedError in case of preempted PS.
_ = mon_sess.run([train_op])
if __name__ == "__main__":
tf.app.run()
I run it like so:
HOSTS=<node0>:2222
WORKERS=<node1>:2222,<node1>:2223,<node1>:2224
python mnist_trainer.py --ps_hosts=$HOSTS --worker_hosts=$WORKERS --job_name=ps --task_index=0 &
python mnist_trainer.py --data_dir mnist_data --ps_hosts=$HOSTS --worker_hosts=$WORKERS --job_name=worker --task_index=0 2>&1 | tee worker0.log &
python mnist_trainer.py --data_dir mnist_data_1 --ps_hosts=$HOSTS --worker_hosts=$WORKERS --job_name=worker --task_index=1 2>&1 | tee worker1.log &
python mnist_trainer.py --data_dir mnist_data_2 --ps_hosts=$HOSTS --worker_hosts=$WORKERS --job_name=worker --task_index=2 2>&1 | tee worker2.log &
I have tried this with 1 PS and 2 or 3 workers - both nodes are CPU machines. PS is on node0 and workers are all different ports on node1. In either of 2 or 3 worker case, chief worker (task0 worker) does not seem to be making any updates at all. I have set the StopatStepHook to 25 on chief worker only. However training seems to stop at global_step=549 with 2 worker case and global_step=1098 with 3 worker case. I am printing worker task# with the LoggingTensorHook and it only shows task 1 and 2 logging anything. Only on the last iteration does task 0 log the tensors.
Is this expected behaviour? Is chief worker supposed to only keep track of monitoring session, checkpointing, etc?
Considering that the training does stop at this magic number of 550 iters, something on the chief worker is indeed triggering the stop.
What is the chief worker doing and how is it keeping track of the stopping step?
Usually the chief worker is responsible for initialize graph, save model checkpoint operations for the training cluster.
According to the TensorFlow documentation for tf.estimator.train_and_evaluate:
…[T]he chief worker also does the model training job, similar to other non-chief training workers (see next paragraph). In addition to the model training, it manages some extra work, e.g., checkpoint saving and restoring, writing summaries, etc.
I am currently working on a project using Distributed Tensorflow. My goal is to run several independent graphs across several different machines.
As an example, I want to do something like this (assume that the server is open on each machine)
import tensorflow as tf
a = tf.constant(3)
b = tf.constant(2)
x = tf.mul(a,b) # To be run on "grpc://www.example0.com:2222"
y = tf.mul(a,b) # To be run on "grpc://www.example1.com:2222"
z = tf.mul(a,b) # To be run on "grpc://www.example2.com:2222"
with tf.Session() as sess:
sess.run([x,y,z]) # Ops x,y,z are run on different machines in parallel
My current attempt at this is shown in the following code. However, this code runs the sessions in serial, but I want them to be executed in a parallel distributed manner.
import tensorflow as tf
a = tf.constant(3)
b = tf.constant(2)
x = tf.mul(a,b) # To be run on "grpc://www.example0.com:2222"
y = tf.mul(a,b) # To be run on "grpc://www.example1.com:2222"
z = tf.mul(a,b) # To be run on "grpc://www.example2.com:2222"
with tf.Session("grpc://www.example0.com:2222") as sess:
sess.run(x)
with tf.Session("grpc://www.example1.com:2222") as sess:
sess.run(y)
with tf.Session("grpc://www.example2.com:2222") as sess:
sess.run(z)
While reading the documentation about Distributed Tensorflow, I found that tf.device allows me to set which CPU or GPU to run Tensorflow Ops on. Is there something similar that allows me to set the session target to specify which machine will run which op? Or is there another way of distributing Tensorflow Ops?
I'm currently struggling with this myself. The following is mostly cribbed from the tensorflow distributed how-to guide.
You can pin ops to a job/task using tf.device:
clusterspec = \
{ "worker":
[ "www.example0.com:2222"
, "www.example1.com:2222"
, "www.example2.com:2222"
]
, "master":
[ "localhost:2222" ]
}
cluster = tf.ClusterSpec(clusterspec)
a = tf.constant(3)
b = tf.constant(2)
# pin 'x' to www.example0.com
with tf.device("/job:worker/task:0"):
x = tf.mul(a, b)
# pin 'y' to www.example1.com
with tf.device("/job:worker/task:1"):
y = tf.mul(a, b)
server = tf.train.Server(cluster, job_name="master", task_index=0)
with tf.Session(server.target) as sess:
# run the ops
print(sess.run([x, y]))
However, at least for me, this only works if all the worker processes are on the same machine as the master. Otherwise, it hangs at sess.run.
This turned out to be a problem with the use of localhost in the cluster specification. If you share the same cluster specification between servers, don't use localhost; instead, use the IP address or hostname of the computer that you think localhost refers to. In the case of the above example, suppose that you're running the master script on www.master.com. You have two options:
1. One clusterspec per server using localhost
On each server, localhost refers to the machine running the server.
# on www.example0.com
clusterspec = \
{ "worker":
[ "localhost:2222"
, "www.example1.com:2222"
, "www.example2.com:2222"
]
, "master":
[ "www.master.com:2222" ]
}
cluster = tf.ClusterSpec(clusterspec)
server = tf.train.Server(cluster, job_name="worker", task_index=0)
server.join()
# on www.example1.com
clusterspec = \
{ "worker":
[ "www.example0.com:2222"
, "localhost:2222"
, "www.example2.com:2222"
]
, "master":
[ "www.master.com:2222" ]
}
cluster = tf.ClusterSpec(clusterspec)
server = tf.train.Server(cluster, job_name="worker", task_index=1)
server.join()
# on www.example2.com
clusterspec = \
{ "worker":
[ "www.example0.com:2222"
, "www.example1.com:2222"
, "localhost:2222"
]
, "master":
[ "www.master.com:2222" ]
}
cluster = tf.ClusterSpec(clusterspec)
server = tf.train.Server(cluster, job_name="worker", task_index=2)
server.join()
# on www.master.com
clusterspec = \
{ "worker":
[ "www.example0.com:2222"
, "www.example1.com:2222"
, "www.example2.com:2222"
]
, "master":
[ "localhost:2222" ]
}
cluster = tf.ClusterSpec(clusterspec)
a = tf.constant(3)
b = tf.constant(2)
with tf.device("/job:worker/task:0"):
x = tf.mul(a, b)
with tf.device("/job:worker/task:1"):
y = tf.mul(a, b)
server = tf.train.Server(cluster, job_name="master", task_index=0)
with tf.Session(server.target) as sess:
print(sess.run([x, y]))
2. Shared clusterspec
One cluster specification, using IP addresses / domain names that can all be seen from every node.
Saved in clusterspec.json:
{ "worker":
[ "www.example0.com:2222"
, "www.example1.com:2222"
, "www.example2.com:2222"
]
, "master":
[ "www.master.com:2222" ]
}
Then on each worker:
import json
with open('clusterspec.json', 'r') as f:
clusterspec = json.load(f)
cluster = tf.ClusterSpec(clusterspec)
server = tf.train.Server(cluster, job_name="worker", task_index=<INDEX OF TASK>)
Then on the master:
import json
with open('clusterspec.json', 'r') as f:
clusterspec = json.load(f)
cluster = tf.ClusterSpec(clusterspec)
a = tf.constant(3)
b = tf.constant(2)
with tf.device("/job:worker/task:0"):
x = tf.mul(a, b)
with tf.device("/job:worker/task:1"):
y = tf.mul(a, b)
server = tf.train.Server(cluster, job_name="master", task_index=0)
with tf.Session(server.target) as sess:
print(sess.run([x, y]))
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 4 years ago.
Improve this question
This is how-to which I believe is missed from TF examples.
Task:
samples for each class are given in separate dir and thus labels are indirect (i.e. by dir)
decoupled load and computations in TF
Each separate bit could be found, however I think have them all together in one place will help to save a lot of time for TF beginners (like myself).
Lets tackle 1. in my case it is two sets of images:
# all filenames for .jpg in dir
# - list of fnames
# - list of labels
def path_fnames(f_path, label, ext = ['.jpg', '.jpeg']):
f_n = [f_path+'/'+f for f in sorted(os.listdir(f_path)) if os.path.splitext(f)[1].lower() in ext]
f_l = [label] * len(f_n)
return f_n, f_l
#
def dense_to_one_hot(labels_dense, num_classes=10, dtype=np.float32):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes),dtype=dtype)
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
data_dir = '/mnt/dataset/'
dir_1 = '/class_1'
dir_2 = '/class_2'
# --- get filenames for data ---
dpath = [data_dir+dir_1, data_dir+dir_2]
f_n1, f_l1 = path_fnames(dpath[0], 0)
f_n2, f_l2 = path_fnames(dpath[1], 1)
# --- create one-hot labels ---
ohl = dense_to_one_hot(np.asarray(f_l1+f_l2), num_classes=2, dtype = np.float32)
fnames = f_n1+f_n2; # one-hot labels created in this sequence
Now we have all file-names and one-hot labels preloaded.
Lets move to the 2.
It is based on How to prefetch data using a custom python function in tensorflow. In short it has:
custom image-reader (replace with yours)
queue fnl_q with [filename label] which is used by reader to feed
queue proc_q with [sample label] which is used to feed processing some_op
thread which perform read_op to get [sample label] and enqueue_op to put pair into proc_q. Thread is controlled by tf.Coordinator
some_op which first get data from proc_q by dequeue_many() and rest of computation (also could be put in separate thread).
Notes:
feature_read_op and label_read_op are two separate ops.
I use sleep() to slow down and control op - only for test purposes
i have separated "feeding" and "calculation" parts - in real case just run them in parallel
print 'TF version:', tf.__version__
# --- params ----
im_s = [30, 30, 1] # target image size
BATCH_SIZE = 16
# image reader
# - fnl_queue: queue with [fn l] pairs
# Notes
# - to resize: image_tensor = tf.image.resize_image_with_crop_or_pad(image_tensor, HEIGHT, WIDTH)
# - how about image preprocessing?
def img_reader_jpg(fnl_queue, ch = 3, keep = False):
fn, label = fnl_queue.dequeue()
if keep:
fnl_queue.enqueue([fn, label])
img_bytes = tf.read_file(fn)
img_u8 = tf.image.decode_jpeg(img_bytes, channels=ch)
img_f32 = tf.cast(img_u8, tf.float32)/256.0
#img_4 = tf.expand_dims(img_f32,0)
return img_f32, label
# load [feature, label] and enqueue to processing queue
# - sess: tf session
# - sess: tf Coordinator
# - [fr_op, lr_op ]: feature_read_op label_read_op
# - enqueue_op: [f l] pairs enqueue op
def load_and_enqueue(sess, coord, feature_read_op, label_read_op , enqueue_op):
i = 0
while not coord.should_stop():
# for testing purpose
time.sleep(0.1)
#print 'load_and_enqueue i=',i
#i = i +1
feature, label = sess.run([feature_read_op, label_read_op ])
feed_dict = {feature_input: feature,
label_input : label}
sess.run(enqueue_op, feed_dict=feed_dict)
# --- TF part ---
# filenames and labels are pre-loaded
fv = tf.constant(fnames)
lv = tf.constant(ohl)
#fnl_q = tf.FIFOQueue(len(fnames), [tf.string, tf.float32])
fnl_q = tf.RandomShuffleQueue(len(fnames), 0, [tf.string, tf.float32])
do_enq = fnl_q.enqueue_many([fv, lv])
# reading_op: feature_read_op label_read_op
feature_read_op, label_read_op = img_reader_jpg(fnl_q, ch = im_s[2])
# samples queue
f_s = im_s
l_s = 2
feature_input = tf.placeholder(tf.float32, shape=f_s, name='feature_input')
label_input = tf.placeholder(tf.float32, shape=l_s, name='label_input')
#proc_q = tf.RandomShuffleQueue(len(fnames), 0, [tf.float32, tf.float32], shapes=[f_s, l_s])
proc_q = tf.FIFOQueue(len(fnames), [tf.float32, tf.float32], shapes=[f_s, l_s])
enqueue_op = proc_q.enqueue([feature_input, label_input])
# test:
# - some op
img_batch, lab_batch = proc_q.dequeue_many(BATCH_SIZE)
some_op = [img_batch, lab_batch]
# service ops
init_op = tf.initialize_all_variables()
# let run stuff
with tf.Session() as sess:
sess.run(init_op)
sess.run(do_enq)
print "fnl_q.size:", fnl_q.size().eval()
print "proc_q.size:", proc_q.size().eval()
# --- test thread stuff ---
# - fill proc_q
coord = tf.train.Coordinator()
t = threading.Thread(target=load_and_enqueue, args = (sess, coord, feature_read_op, label_read_op , enqueue_op))
t.start()
time.sleep(2.1)
coord.request_stop()
coord.join([t])
print "fnl_q.size:", fnl_q.size().eval()
print "proc_q.size:", proc_q.size().eval()
# - process a bit
ss = sess.run(some_op)
print 'ss[0].shape', ss[0].shape
print ' ss[1]:\n', ss[1]
print "fnl_q.size:", fnl_q.size().eval()
print "proc_q.size:", proc_q.size().eval()
print 'ok'
Typical output:
TF version: 0.6.0
fnl_q.size: 1225
proc_q.size: 0
fnl_q.size: 1204
proc_q.size: 21
ss[0].shape (16, 30, 30, 1)
ss[1]:
[[ 0. 1.]
[ 1. 0.]
[ 1. 0.]
[ 0. 1.]
[ 0. 1.]
[ 1. 0.]
[ 1. 0.]
[ 0. 1.]
[ 1. 0.]
[ 0. 1.]
[ 0. 1.]
[ 1. 0.]
[ 1. 0.]
[ 0. 1.]
[ 1. 0.]
[ 0. 1.]]
fnl_q.size: 1204
proc_q.size: 5
ok
All as expected
batch of pairs [sample label] are created
pairs are shuffled
Only thing left is to apply TF as it is intended to be used by replacing some_op :)
And a question:
one observed problem problem - in case I use tf.FIFOQueue for file-names and tf.RandomShuffleQueue for samples - shuffling doesn't happen. However other way around (as it code above) it does shuffle perfectly.
Any problem with shuffling for
tf.RandomShuffleQueue(len(fnames), 0, [tf.float32, tf.float32], shapes=[f_s, l_s]) ?
ADD:
The version with two threads:
one for re-fill/update/change file name queue
second for fill samples to processing queue.
Also added correct way to stop threads.
def load_and_enqueue(sess, coord, feature_read_op, label_read_op , enqueue_op):
try:
while not coord.should_stop():
feature, label = sess.run([feature_read_op, label_read_op ])
feed_dict = {feature_input: feature,
label_input : label}
sess.run(enqueue_op, feed_dict=feed_dict)
except Exception as e:
return
# periodically check the state of fnl queue and if needed refill it
# - enqueue_op: 'refill' file-name_label queue
def enqueue_fnl(sess, coord, fnl_q, enqueue_op):
try:
while not coord.should_stop():
time.sleep(0.5)
s = sess.run(fnl_q.size())
if s < (9*BATCH_SIZE) :
sess.run(enqueue_op)
except Exception as e:
return
# -- ops for feed part --
# filenames and labels are pre-loaded
fv = tf.constant(fnames)
lv = tf.constant(ohl)
# read op
fnl_q = tf.RandomShuffleQueue(len(fnames)*2, 0, [tf.string, tf.float32], name = 'fnl_q') # add some margin for re-fill to fit
do_fnl_enq = fnl_q.enqueue_many([fv, lv])
feature_read_op, label_read_op = img_reader_jpg(fnl_q, ch = IMG_SIZE[2])
# samples queue
feature_input = tf.placeholder(tf.float32, shape=IMG_SIZE, name='feature_input')
label_input = tf.placeholder(tf.float32, shape=LAB_SIZE, name='label_input')
proc_q = tf.FIFOQueue(len(fnames)*3, [tf.float32, tf.float32], shapes=[IMG_SIZE, LAB_SIZE], name = 'fe_la_q')
enqueue_op = proc_q.enqueue([feature_input, label_input])
# -- ops for trainind end eval
img_batch, lab_batch = proc_q.dequeue_many(BATCH_SIZE)
... here is your model
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, lab_ph))
optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)
with tf.Session() as sess:
coord = tf.train.Coordinator()
t_le = threading.Thread(target=load_and_enqueue, args = (sess, coord, feature_read_op, label_read_op , enqueue_op) , name = 'load_and_enqueue')
t_re = threading.Thread(target=enqueue_fnl, args = (sess, coord, fnl_q, do_fnl_enq), name = 'enqueue_fnl') # re-enq thread i.e. refiling filename queue
t_le.start()
t_re.start()
try:
# training
for step in xrange(823):
# some proc
img_v, lab_v = sess.run([img_batch, lab_batch])
feed_dict = { img_ph : img_v,
lab_ph : lab_v,
keep_prob: 0.7}
_, loss_v = sess.run([optimizer, loss], feed_dict = feed_dict)
except Exception as e:
print 'Training: Exception:', e
# stop threads
coord.request_stop() # ask to stop
sess.run(fnl_q.close(cancel_pending_enqueues=True)) # tell proc_q don't wait for enque anymore
sess.run(proc_q.close(cancel_pending_enqueues=True)) # tell proc_q don't wait for enque anymore
coord.join([t_le, t_re], stop_grace_period_secs=8)
I am trying to figure out how to get information on unacknowledged messages. Where are these stored? In playing with celery inspect it seems that once a message gets acknowledged it processes through and you can follow the state. Assuming you have a results backend then you can see the results of it. But from the time you apply delay until it get's acknowledged it's in a black hole.
Where are noAcks stored?
How do I find out how "deep" is the noAcks list? In other words how many are there and where is my task in the list.
While not exactly germane to the problem here is what I'm working with.
from celery.app import app_or_default
app = app_or_default()
inspect = app.control.inspect()
# Now if I want "RECEIVED" jobs..
data = inspect.reserved()
# or "ACTIVE" jobs..
data = inspect.active()
# or "REVOKED" jobs..
data = inspect.revoked()
# or scheduled jobs.. (Assuming these are time based??)
data = inspect.scheduled()
# FILL ME IN FOR UNACK JOBS!!
# data = inspect.??
# This will never work for tasks that aren't in one of the above buckets..
pprint.pprint(inspect.query_task([tasks]))
I really appreciate your advice and help on this.
They are those tasks in inspect.reserved() that have 'acknowleged': False
from celery.app import app_or_default
app = app_or_default()
inspect = app.control.inspect()
# those that have been sent to a worker and are thus reserved
# from being sent to another worker, but may or may not be acknowledged as received by that worker
data = inspect.reserved()
{'celery.tasks': [{'acknowledged': False,
'args': '[]',
'delivery_info': {'exchange': 'tasks',
'priority': None,
'routing_key': 'celery'},
'hostname': 'celery.tasks',
'id': '527961d4-639f-4002-9dc6-7488dd8c8ad8',
'kwargs': '{}',
'name': 'globalapp.tasks.task_loop_tick',
'time_start': None,
'worker_pid': None},
{'acknowledged': False,
'args': '[]',
'delivery_info': {'exchange': 'tasks',
'priority': None,
'routing_key': 'celery'},
'hostname': 'celery.tasks',
'id': '09d5b726-269e-48d0-8b0e-86472d795906',
'kwargs': '{}',
'name': 'globalapp.tasks.task_loop_tick',
'time_start': None,
'worker_pid': None},
{'acknowledged': False,
'args': '[]',
'delivery_info': {'exchange': 'tasks',
'priority': None,
'routing_key': 'celery'},
'hostname': 'celery.tasks',
'id': 'de6d399e-1b37-455c-af63-a68078a9cf7c',
'kwargs': '{}',
'name': 'globalapp.tasks.task_loop_tick',
'time_start': None,
'worker_pid': None}],
'fastlane.tasks': [],
'images.tasks': [],
'mailer.tasks': []}
After hours of reviewing celery I've come to the conclusion that it's just not possible using pure celery. However it is possible to loosely track the entire process. Here is the code I used to look up the unacknowledged count. Most of this can be done using the utilities in celery.
I still am unable to query the underlying unacknowledged tasks by id but..
If you have the RabbitMQ management plug-in installed you can query the API
data = {}
base_url = "http://localhost:55672"
url = base_url + "/api/queues/{}/".format(vhost)
req = requests.get(url, auth=(settings.RABBITMQ_USER, settings.RABBITMQ_PASSWORD))
if req.status_code != 200:
log.error(req.text)
else:
request_data = req.json()
for queue in request_data:
# TODO if we know what queue the task is then we can nail this.
if queue.get('name') == "celery":
data['state'] = "Unknown"
if queue.get('messages'):
data['messages'] = queue.get('messages')
data['messages_ready'] = queue.get('messages_ready')
data['messages_unacknowledged'] = queue.get('messages_unacknowledged')
break
return data