1 | # -*- coding: UTF-8 -*- |
---|
2 | """Easy to use object-oriented thread pool framework. |
---|
3 | |
---|
4 | A thread pool is an object that maintains a pool of worker threads to perform |
---|
5 | time consuming operations in parallel. It assigns jobs to the threads |
---|
6 | by putting them in a work request queue, where they are picked up by the |
---|
7 | next available thread. This then performs the requested operation in the |
---|
8 | background and puts the results in another queue. |
---|
9 | |
---|
10 | The thread pool object can then collect the results from all threads from |
---|
11 | this queue as soon as they become available or after all threads have |
---|
12 | finished their work. It's also possible, to define callbacks to handle |
---|
13 | each result as it comes in. |
---|
14 | |
---|
15 | The basic concept and some code was taken from the book "Python in a Nutshell, |
---|
16 | 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section |
---|
17 | 14.5 "Threaded Program Architecture". I wrapped the main program logic in the |
---|
18 | ThreadPool class, added the WorkRequest class and the callback system and |
---|
19 | tweaked the code here and there. Kudos also to Florent Aide for the exception |
---|
20 | handling mechanism. |
---|
21 | |
---|
22 | Basic usage:: |
---|
23 | |
---|
24 | >>> pool = ThreadPool(poolsize) |
---|
25 | >>> requests = makeRequests(some_callable, list_of_args, callback) |
---|
26 | >>> [pool.putRequest(req) for req in requests] |
---|
27 | >>> pool.wait() |
---|
28 | |
---|
29 | See the end of the module code for a brief, annotated usage example. |
---|
30 | |
---|
31 | Website : http://chrisarndt.de/projects/threadpool/ |
---|
32 | |
---|
33 | """ |
---|
34 | __docformat__ = "restructuredtext en" |
---|
35 | |
---|
36 | __all__ = [ |
---|
37 | 'makeRequests', |
---|
38 | 'NoResultsPending', |
---|
39 | 'NoWorkersAvailable', |
---|
40 | 'ThreadPool', |
---|
41 | 'WorkRequest', |
---|
42 | 'WorkerThread' |
---|
43 | ] |
---|
44 | |
---|
45 | __author__ = "Christopher Arndt" |
---|
46 | __version__ = "1.2.5" |
---|
47 | __revision__ = "$Revision: 354 $" |
---|
48 | __date__ = "$Date: 2008-11-19 18:34:46 +0100 (Wed, 19 Nov 2008) $" |
---|
49 | __license__ = 'MIT license' |
---|
50 | |
---|
51 | |
---|
52 | # standard library modules |
---|
53 | import sys |
---|
54 | import threading |
---|
55 | import Queue |
---|
56 | import traceback |
---|
57 | |
---|
58 | |
---|
59 | # exceptions |
---|
60 | class NoResultsPending(Exception): |
---|
61 | """All work requests have been processed.""" |
---|
62 | pass |
---|
63 | |
---|
64 | class NoWorkersAvailable(Exception): |
---|
65 | """No worker threads available to process remaining requests.""" |
---|
66 | pass |
---|
67 | |
---|
68 | |
---|
69 | # internal module helper functions |
---|
70 | def _handle_thread_exception(request, exc_info): |
---|
71 | """Default exception handler callback function. |
---|
72 | |
---|
73 | This just prints the exception info via ``traceback.print_exception``. |
---|
74 | |
---|
75 | """ |
---|
76 | traceback.print_exception(*exc_info) |
---|
77 | |
---|
78 | |
---|
79 | # utility functions |
---|
80 | def makeRequests(callable_, args_list, callback=None, |
---|
81 | exc_callback=_handle_thread_exception): |
---|
82 | """Create several work requests for same callable with different arguments. |
---|
83 | |
---|
84 | Convenience function for creating several work requests for the same |
---|
85 | callable where each invocation of the callable receives different values |
---|
86 | for its arguments. |
---|
87 | |
---|
88 | ``args_list`` contains the parameters for each invocation of callable. |
---|
89 | Each item in ``args_list`` should be either a 2-item tuple of the list of |
---|
90 | positional arguments and a dictionary of keyword arguments or a single, |
---|
91 | non-tuple argument. |
---|
92 | |
---|
93 | See docstring for ``WorkRequest`` for info on ``callback`` and |
---|
94 | ``exc_callback``. |
---|
95 | |
---|
96 | """ |
---|
97 | requests = [] |
---|
98 | for item in args_list: |
---|
99 | if isinstance(item, tuple): |
---|
100 | requests.append( |
---|
101 | WorkRequest(callable_, item[0], item[1], callback=callback, |
---|
102 | exc_callback=exc_callback) |
---|
103 | ) |
---|
104 | else: |
---|
105 | requests.append( |
---|
106 | WorkRequest(callable_, [item], None, callback=callback, |
---|
107 | exc_callback=exc_callback) |
---|
108 | ) |
---|
109 | return requests |
---|
110 | |
---|
111 | |
---|
112 | # classes |
---|
113 | class WorkerThread(threading.Thread): |
---|
114 | """Background thread connected to the requests/results queues. |
---|
115 | |
---|
116 | A worker thread sits in the background and picks up work requests from |
---|
117 | one queue and puts the results in another until it is dismissed. |
---|
118 | |
---|
119 | """ |
---|
120 | |
---|
121 | def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds): |
---|
122 | """Set up thread in daemonic mode and start it immediatedly. |
---|
123 | |
---|
124 | ``requests_queue`` and ``results_queue`` are instances of |
---|
125 | ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new |
---|
126 | worker thread. |
---|
127 | |
---|
128 | """ |
---|
129 | threading.Thread.__init__(self, **kwds) |
---|
130 | self.setDaemon(1) |
---|
131 | self._requests_queue = requests_queue |
---|
132 | self._results_queue = results_queue |
---|
133 | self._poll_timeout = poll_timeout |
---|
134 | self._dismissed = threading.Event() |
---|
135 | self.start() |
---|
136 | |
---|
137 | def run(self): |
---|
138 | """Repeatedly process the job queue until told to exit.""" |
---|
139 | while True: |
---|
140 | if self._dismissed.isSet(): |
---|
141 | # we are dismissed, break out of loop |
---|
142 | break |
---|
143 | # get next work request. If we don't get a new request from the |
---|
144 | # queue after self._poll_timout seconds, we jump to the start of |
---|
145 | # the while loop again, to give the thread a chance to exit. |
---|
146 | try: |
---|
147 | request = self._requests_queue.get(True, self._poll_timeout) |
---|
148 | except Queue.Empty: |
---|
149 | continue |
---|
150 | else: |
---|
151 | if self._dismissed.isSet(): |
---|
152 | # we are dismissed, put back request in queue and exit loop |
---|
153 | self._requests_queue.put(request) |
---|
154 | break |
---|
155 | try: |
---|
156 | result = request.callable(*request.args, **request.kwds) |
---|
157 | self._results_queue.put((request, result)) |
---|
158 | except: |
---|
159 | request.exception = True |
---|
160 | self._results_queue.put((request, sys.exc_info())) |
---|
161 | |
---|
162 | def dismiss(self): |
---|
163 | """Sets a flag to tell the thread to exit when done with current job.""" |
---|
164 | self._dismissed.set() |
---|
165 | |
---|
166 | |
---|
167 | class WorkRequest: |
---|
168 | """A request to execute a callable for putting in the request queue later. |
---|
169 | |
---|
170 | See the module function ``makeRequests`` for the common case |
---|
171 | where you want to build several ``WorkRequest`` objects for the same |
---|
172 | callable but with different arguments for each call. |
---|
173 | |
---|
174 | """ |
---|
175 | |
---|
176 | def __init__(self, callable_, args=None, kwds=None, requestID=None, |
---|
177 | callback=None, exc_callback=_handle_thread_exception): |
---|
178 | """Create a work request for a callable and attach callbacks. |
---|
179 | |
---|
180 | A work request consists of the a callable to be executed by a |
---|
181 | worker thread, a list of positional arguments, a dictionary |
---|
182 | of keyword arguments. |
---|
183 | |
---|
184 | A ``callback`` function can be specified, that is called when the |
---|
185 | results of the request are picked up from the result queue. It must |
---|
186 | accept two anonymous arguments, the ``WorkRequest`` object and the |
---|
187 | results of the callable, in that order. If you want to pass additional |
---|
188 | information to the callback, just stick it on the request object. |
---|
189 | |
---|
190 | You can also give custom callback for when an exception occurs with |
---|
191 | the ``exc_callback`` keyword parameter. It should also accept two |
---|
192 | anonymous arguments, the ``WorkRequest`` and a tuple with the exception |
---|
193 | details as returned by ``sys.exc_info()``. The default implementation |
---|
194 | of this callback just prints the exception info via |
---|
195 | ``traceback.print_exception``. If you want no exception handler |
---|
196 | callback, just pass in ``None``. |
---|
197 | |
---|
198 | ``requestID``, if given, must be hashable since it is used by |
---|
199 | ``ThreadPool`` object to store the results of that work request in a |
---|
200 | dictionary. It defaults to the return value of ``id(self)``. |
---|
201 | |
---|
202 | """ |
---|
203 | if requestID is None: |
---|
204 | self.requestID = id(self) |
---|
205 | else: |
---|
206 | try: |
---|
207 | self.requestID = hash(requestID) |
---|
208 | except TypeError: |
---|
209 | raise TypeError("requestID must be hashable.") |
---|
210 | self.exception = False |
---|
211 | self.callback = callback |
---|
212 | self.exc_callback = exc_callback |
---|
213 | self.callable = callable_ |
---|
214 | self.args = args or [] |
---|
215 | self.kwds = kwds or {} |
---|
216 | |
---|
217 | def __str__(self): |
---|
218 | return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ |
---|
219 | (self.requestID, self.args, self.kwds, self.exception) |
---|
220 | |
---|
221 | class ThreadPool: |
---|
222 | """A thread pool, distributing work requests and collecting results. |
---|
223 | |
---|
224 | See the module docstring for more information. |
---|
225 | |
---|
226 | """ |
---|
227 | |
---|
228 | def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5): |
---|
229 | """Set up the thread pool and start num_workers worker threads. |
---|
230 | |
---|
231 | ``num_workers`` is the number of worker threads to start initially. |
---|
232 | |
---|
233 | If ``q_size > 0`` the size of the work *request queue* is limited and |
---|
234 | the thread pool blocks when the queue is full and it tries to put |
---|
235 | more work requests in it (see ``putRequest`` method), unless you also |
---|
236 | use a positive ``timeout`` value for ``putRequest``. |
---|
237 | |
---|
238 | If ``resq_size > 0`` the size of the *results queue* is limited and the |
---|
239 | worker threads will block when the queue is full and they try to put |
---|
240 | new results in it. |
---|
241 | |
---|
242 | .. warning: |
---|
243 | If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is |
---|
244 | the possibilty of a deadlock, when the results queue is not pulled |
---|
245 | regularly and too many jobs are put in the work requests queue. |
---|
246 | To prevent this, always set ``timeout > 0`` when calling |
---|
247 | ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions. |
---|
248 | |
---|
249 | """ |
---|
250 | self._requests_queue = Queue.Queue(q_size) |
---|
251 | self._results_queue = Queue.Queue(resq_size) |
---|
252 | self.workers = [] |
---|
253 | self.dismissedWorkers = [] |
---|
254 | self.workRequests = {} |
---|
255 | self.createWorkers(num_workers, poll_timeout) |
---|
256 | |
---|
257 | def createWorkers(self, num_workers, poll_timeout=5): |
---|
258 | """Add num_workers worker threads to the pool. |
---|
259 | |
---|
260 | ``poll_timout`` sets the interval in seconds (int or float) for how |
---|
261 | ofte threads should check whether they are dismissed, while waiting for |
---|
262 | requests. |
---|
263 | |
---|
264 | """ |
---|
265 | for i in range(num_workers): |
---|
266 | self.workers.append(WorkerThread(self._requests_queue, |
---|
267 | self._results_queue, poll_timeout=poll_timeout)) |
---|
268 | |
---|
269 | def dismissWorkers(self, num_workers, do_join=False): |
---|
270 | """Tell num_workers worker threads to quit after their current task.""" |
---|
271 | dismiss_list = [] |
---|
272 | for i in range(min(num_workers, len(self.workers))): |
---|
273 | worker = self.workers.pop() |
---|
274 | worker.dismiss() |
---|
275 | dismiss_list.append(worker) |
---|
276 | |
---|
277 | if do_join: |
---|
278 | for worker in dismiss_list: |
---|
279 | worker.join() |
---|
280 | else: |
---|
281 | self.dismissedWorkers.extend(dismiss_list) |
---|
282 | |
---|
283 | def joinAllDismissedWorkers(self): |
---|
284 | """Perform Thread.join() on all worker threads that have been dismissed. |
---|
285 | """ |
---|
286 | for worker in self.dismissedWorkers: |
---|
287 | worker.join() |
---|
288 | self.dismissedWorkers = [] |
---|
289 | |
---|
290 | def putRequest(self, request, block=True, timeout=0): |
---|
291 | """Put work request into work queue and save its id for later.""" |
---|
292 | assert isinstance(request, WorkRequest) |
---|
293 | # don't reuse old work requests |
---|
294 | assert not getattr(request, 'exception', None) |
---|
295 | self._requests_queue.put(request, block, timeout) |
---|
296 | self.workRequests[request.requestID] = request |
---|
297 | |
---|
298 | def poll(self, block=False): |
---|
299 | """Process any new results in the queue.""" |
---|
300 | while True: |
---|
301 | # still results pending? |
---|
302 | if not self.workRequests: |
---|
303 | raise NoResultsPending |
---|
304 | # are there still workers to process remaining requests? |
---|
305 | elif block and not self.workers: |
---|
306 | raise NoWorkersAvailable |
---|
307 | try: |
---|
308 | # get back next results |
---|
309 | request, result = self._results_queue.get(block=block) |
---|
310 | # has an exception occured? |
---|
311 | if request.exception and request.exc_callback: |
---|
312 | request.exc_callback(request, result) |
---|
313 | # hand results to callback, if any |
---|
314 | if request.callback and not \ |
---|
315 | (request.exception and request.exc_callback): |
---|
316 | request.callback(request, result) |
---|
317 | del self.workRequests[request.requestID] |
---|
318 | except Queue.Empty: |
---|
319 | break |
---|
320 | |
---|
321 | def wait(self): |
---|
322 | """Wait for results, blocking until all have arrived.""" |
---|
323 | while 1: |
---|
324 | try: |
---|
325 | self.poll(True) |
---|
326 | except NoResultsPending: |
---|
327 | break |
---|
328 | |
---|
329 | |
---|
330 | ################ |
---|
331 | # USAGE EXAMPLE |
---|
332 | ################ |
---|
333 | |
---|
334 | if __name__ == '__main__': |
---|
335 | import random |
---|
336 | import time |
---|
337 | |
---|
338 | # the work the threads will have to do (rather trivial in our example) |
---|
339 | def do_something(data): |
---|
340 | time.sleep(random.randint(1,5)) |
---|
341 | result = round(random.random() * data, 5) |
---|
342 | # just to show off, we throw an exception once in a while |
---|
343 | if result > 5: |
---|
344 | raise RuntimeError("Something extraordinary happened!") |
---|
345 | return result |
---|
346 | |
---|
347 | # this will be called each time a result is available |
---|
348 | def print_result(request, result): |
---|
349 | print "**** Result from request #%s: %r" % (request.requestID, result) |
---|
350 | |
---|
351 | # this will be called when an exception occurs within a thread |
---|
352 | # this example exception handler does little more than the default handler |
---|
353 | def handle_exception(request, exc_info): |
---|
354 | if not isinstance(exc_info, tuple): |
---|
355 | # Something is seriously wrong... |
---|
356 | print request |
---|
357 | print exc_info |
---|
358 | raise SystemExit |
---|
359 | print "**** Exception occured in request #%s: %s" % \ |
---|
360 | (request.requestID, exc_info) |
---|
361 | |
---|
362 | # assemble the arguments for each job to a list... |
---|
363 | data = [random.randint(1,10) for i in range(20)] |
---|
364 | # ... and build a WorkRequest object for each item in data |
---|
365 | requests = makeRequests(do_something, data, print_result, handle_exception) |
---|
366 | # to use the default exception handler, uncomment next line and comment out |
---|
367 | # the preceding one. |
---|
368 | #requests = makeRequests(do_something, data, print_result) |
---|
369 | |
---|
370 | # or the other form of args_lists accepted by makeRequests: ((,), {}) |
---|
371 | data = [((random.randint(1,10),), {}) for i in range(20)] |
---|
372 | requests.extend( |
---|
373 | makeRequests(do_something, data, print_result, handle_exception) |
---|
374 | #makeRequests(do_something, data, print_result) |
---|
375 | # to use the default exception handler, uncomment next line and comment |
---|
376 | # out the preceding one. |
---|
377 | ) |
---|
378 | |
---|
379 | # we create a pool of 3 worker threads |
---|
380 | print "Creating thread pool with 3 worker threads." |
---|
381 | main = ThreadPool(3) |
---|
382 | |
---|
383 | # then we put the work requests in the queue... |
---|
384 | for req in requests: |
---|
385 | main.putRequest(req) |
---|
386 | print "Work request #%s added." % req.requestID |
---|
387 | # or shorter: |
---|
388 | # [main.putRequest(req) for req in requests] |
---|
389 | |
---|
390 | # ...and wait for the results to arrive in the result queue |
---|
391 | # by using ThreadPool.wait(). This would block until results for |
---|
392 | # all work requests have arrived: |
---|
393 | # main.wait() |
---|
394 | |
---|
395 | # instead we can poll for results while doing something else: |
---|
396 | i = 0 |
---|
397 | while True: |
---|
398 | try: |
---|
399 | time.sleep(0.5) |
---|
400 | main.poll() |
---|
401 | print "Main thread working...", |
---|
402 | print "(active worker threads: %i)" % (threading.activeCount()-1, ) |
---|
403 | if i == 10: |
---|
404 | print "**** Adding 3 more worker threads..." |
---|
405 | main.createWorkers(3) |
---|
406 | if i == 20: |
---|
407 | print "**** Dismissing 2 worker threads..." |
---|
408 | main.dismissWorkers(2) |
---|
409 | i += 1 |
---|
410 | except KeyboardInterrupt: |
---|
411 | print "**** Interrupted!" |
---|
412 | break |
---|
413 | except NoResultsPending: |
---|
414 | print "**** No pending results." |
---|
415 | break |
---|
416 | if main.dismissedWorkers: |
---|
417 | print "Joining all dismissed worker threads..." |
---|
418 | main.joinAllDismissedWorkers() |
---|