[517] | 1 | # -*- coding: UTF-8 -*- |
---|
| 2 | """Easy to use object-oriented thread pool framework. |
---|
[232] | 3 | |
---|
[517] | 4 | A thread pool is an object that maintains a pool of worker threads to perform |
---|
[232] | 5 | time consuming operations in parallel. It assigns jobs to the threads |
---|
| 6 | by putting them in a work request queue, where they are picked up by the |
---|
| 7 | next available thread. This then performs the requested operation in the |
---|
[517] | 8 | background and puts the results in another queue. |
---|
[232] | 9 | |
---|
[517] | 10 | The thread pool object can then collect the results from all threads from |
---|
[232] | 11 | this queue as soon as they become available or after all threads have |
---|
| 12 | finished their work. It's also possible, to define callbacks to handle |
---|
| 13 | each result as it comes in. |
---|
| 14 | |
---|
[517] | 15 | The basic concept and some code was taken from the book "Python in a Nutshell, |
---|
| 16 | 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section |
---|
| 17 | 14.5 "Threaded Program Architecture". I wrapped the main program logic in the |
---|
[232] | 18 | ThreadPool class, added the WorkRequest class and the callback system and |
---|
[517] | 19 | tweaked the code here and there. Kudos also to Florent Aide for the exception |
---|
| 20 | handling mechanism. |
---|
[232] | 21 | |
---|
[517] | 22 | Basic usage:: |
---|
[232] | 23 | |
---|
[517] | 24 | >>> pool = ThreadPool(poolsize) |
---|
| 25 | >>> requests = makeRequests(some_callable, list_of_args, callback) |
---|
| 26 | >>> [pool.putRequest(req) for req in requests] |
---|
| 27 | >>> pool.wait() |
---|
[232] | 28 | |
---|
| 29 | See the end of the module code for a brief, annotated usage example. |
---|
[517] | 30 | |
---|
| 31 | Website : http://chrisarndt.de/projects/threadpool/ |
---|
| 32 | |
---|
[232] | 33 | """ |
---|
[517] | 34 | __docformat__ = "restructuredtext en" |
---|
[232] | 35 | |
---|
[517] | 36 | __all__ = [ |
---|
| 37 | 'makeRequests', |
---|
| 38 | 'NoResultsPending', |
---|
| 39 | 'NoWorkersAvailable', |
---|
| 40 | 'ThreadPool', |
---|
| 41 | 'WorkRequest', |
---|
| 42 | 'WorkerThread' |
---|
| 43 | ] |
---|
[232] | 44 | |
---|
[517] | 45 | __author__ = "Christopher Arndt" |
---|
| 46 | __version__ = "1.2.5" |
---|
| 47 | __revision__ = "$Revision: 354 $" |
---|
| 48 | __date__ = "$Date: 2008-11-19 18:34:46 +0100 (Wed, 19 Nov 2008) $" |
---|
| 49 | __license__ = 'MIT license' |
---|
[232] | 50 | |
---|
| 51 | |
---|
[517] | 52 | # standard library modules |
---|
| 53 | import sys |
---|
| 54 | import threading |
---|
| 55 | import Queue |
---|
| 56 | import traceback |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | # exceptions |
---|
[232] | 60 | class NoResultsPending(Exception): |
---|
| 61 | """All work requests have been processed.""" |
---|
| 62 | pass |
---|
[517] | 63 | |
---|
[232] | 64 | class NoWorkersAvailable(Exception): |
---|
| 65 | """No worker threads available to process remaining requests.""" |
---|
| 66 | pass |
---|
| 67 | |
---|
[517] | 68 | |
---|
| 69 | # internal module helper functions |
---|
| 70 | def _handle_thread_exception(request, exc_info): |
---|
| 71 | """Default exception handler callback function. |
---|
| 72 | |
---|
| 73 | This just prints the exception info via ``traceback.print_exception``. |
---|
| 74 | |
---|
| 75 | """ |
---|
| 76 | traceback.print_exception(*exc_info) |
---|
| 77 | |
---|
| 78 | |
---|
| 79 | # utility functions |
---|
| 80 | def makeRequests(callable_, args_list, callback=None, |
---|
| 81 | exc_callback=_handle_thread_exception): |
---|
| 82 | """Create several work requests for same callable with different arguments. |
---|
| 83 | |
---|
| 84 | Convenience function for creating several work requests for the same |
---|
| 85 | callable where each invocation of the callable receives different values |
---|
| 86 | for its arguments. |
---|
| 87 | |
---|
| 88 | ``args_list`` contains the parameters for each invocation of callable. |
---|
| 89 | Each item in ``args_list`` should be either a 2-item tuple of the list of |
---|
| 90 | positional arguments and a dictionary of keyword arguments or a single, |
---|
| 91 | non-tuple argument. |
---|
| 92 | |
---|
| 93 | See docstring for ``WorkRequest`` for info on ``callback`` and |
---|
| 94 | ``exc_callback``. |
---|
| 95 | |
---|
| 96 | """ |
---|
| 97 | requests = [] |
---|
| 98 | for item in args_list: |
---|
| 99 | if isinstance(item, tuple): |
---|
| 100 | requests.append( |
---|
| 101 | WorkRequest(callable_, item[0], item[1], callback=callback, |
---|
| 102 | exc_callback=exc_callback) |
---|
| 103 | ) |
---|
| 104 | else: |
---|
| 105 | requests.append( |
---|
| 106 | WorkRequest(callable_, [item], None, callback=callback, |
---|
| 107 | exc_callback=exc_callback) |
---|
| 108 | ) |
---|
| 109 | return requests |
---|
| 110 | |
---|
| 111 | |
---|
| 112 | # classes |
---|
[232] | 113 | class WorkerThread(threading.Thread): |
---|
| 114 | """Background thread connected to the requests/results queues. |
---|
| 115 | |
---|
| 116 | A worker thread sits in the background and picks up work requests from |
---|
| 117 | one queue and puts the results in another until it is dismissed. |
---|
[517] | 118 | |
---|
[232] | 119 | """ |
---|
| 120 | |
---|
[517] | 121 | def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds): |
---|
| 122 | """Set up thread in daemonic mode and start it immediatedly. |
---|
[232] | 123 | |
---|
[517] | 124 | ``requests_queue`` and ``results_queue`` are instances of |
---|
| 125 | ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new |
---|
| 126 | worker thread. |
---|
| 127 | |
---|
[232] | 128 | """ |
---|
| 129 | threading.Thread.__init__(self, **kwds) |
---|
| 130 | self.setDaemon(1) |
---|
[517] | 131 | self._requests_queue = requests_queue |
---|
| 132 | self._results_queue = results_queue |
---|
| 133 | self._poll_timeout = poll_timeout |
---|
[232] | 134 | self._dismissed = threading.Event() |
---|
| 135 | self.start() |
---|
| 136 | |
---|
| 137 | def run(self): |
---|
[517] | 138 | """Repeatedly process the job queue until told to exit.""" |
---|
| 139 | while True: |
---|
[232] | 140 | if self._dismissed.isSet(): |
---|
[517] | 141 | # we are dismissed, break out of loop |
---|
| 142 | break |
---|
| 143 | # get next work request. If we don't get a new request from the |
---|
| 144 | # queue after self._poll_timout seconds, we jump to the start of |
---|
| 145 | # the while loop again, to give the thread a chance to exit. |
---|
| 146 | try: |
---|
| 147 | request = self._requests_queue.get(True, self._poll_timeout) |
---|
| 148 | except Queue.Empty: |
---|
| 149 | continue |
---|
| 150 | else: |
---|
| 151 | if self._dismissed.isSet(): |
---|
| 152 | # we are dismissed, put back request in queue and exit loop |
---|
| 153 | self._requests_queue.put(request) |
---|
| 154 | break |
---|
| 155 | try: |
---|
| 156 | result = request.callable(*request.args, **request.kwds) |
---|
| 157 | self._results_queue.put((request, result)) |
---|
| 158 | except: |
---|
| 159 | request.exception = True |
---|
| 160 | self._results_queue.put((request, sys.exc_info())) |
---|
[232] | 161 | |
---|
| 162 | def dismiss(self): |
---|
[517] | 163 | """Sets a flag to tell the thread to exit when done with current job.""" |
---|
[232] | 164 | self._dismissed.set() |
---|
| 165 | |
---|
| 166 | |
---|
| 167 | class WorkRequest: |
---|
| 168 | """A request to execute a callable for putting in the request queue later. |
---|
| 169 | |
---|
[517] | 170 | See the module function ``makeRequests`` for the common case |
---|
| 171 | where you want to build several ``WorkRequest`` objects for the same |
---|
| 172 | callable but with different arguments for each call. |
---|
| 173 | |
---|
[232] | 174 | """ |
---|
| 175 | |
---|
[517] | 176 | def __init__(self, callable_, args=None, kwds=None, requestID=None, |
---|
| 177 | callback=None, exc_callback=_handle_thread_exception): |
---|
| 178 | """Create a work request for a callable and attach callbacks. |
---|
| 179 | |
---|
| 180 | A work request consists of the a callable to be executed by a |
---|
[232] | 181 | worker thread, a list of positional arguments, a dictionary |
---|
| 182 | of keyword arguments. |
---|
| 183 | |
---|
[517] | 184 | A ``callback`` function can be specified, that is called when the |
---|
| 185 | results of the request are picked up from the result queue. It must |
---|
| 186 | accept two anonymous arguments, the ``WorkRequest`` object and the |
---|
| 187 | results of the callable, in that order. If you want to pass additional |
---|
| 188 | information to the callback, just stick it on the request object. |
---|
[232] | 189 | |
---|
[517] | 190 | You can also give custom callback for when an exception occurs with |
---|
| 191 | the ``exc_callback`` keyword parameter. It should also accept two |
---|
| 192 | anonymous arguments, the ``WorkRequest`` and a tuple with the exception |
---|
| 193 | details as returned by ``sys.exc_info()``. The default implementation |
---|
| 194 | of this callback just prints the exception info via |
---|
| 195 | ``traceback.print_exception``. If you want no exception handler |
---|
| 196 | callback, just pass in ``None``. |
---|
| 197 | |
---|
| 198 | ``requestID``, if given, must be hashable since it is used by |
---|
| 199 | ``ThreadPool`` object to store the results of that work request in a |
---|
| 200 | dictionary. It defaults to the return value of ``id(self)``. |
---|
| 201 | |
---|
[232] | 202 | """ |
---|
| 203 | if requestID is None: |
---|
| 204 | self.requestID = id(self) |
---|
| 205 | else: |
---|
[517] | 206 | try: |
---|
| 207 | self.requestID = hash(requestID) |
---|
| 208 | except TypeError: |
---|
| 209 | raise TypeError("requestID must be hashable.") |
---|
| 210 | self.exception = False |
---|
[232] | 211 | self.callback = callback |
---|
[517] | 212 | self.exc_callback = exc_callback |
---|
| 213 | self.callable = callable_ |
---|
[232] | 214 | self.args = args or [] |
---|
| 215 | self.kwds = kwds or {} |
---|
| 216 | |
---|
[517] | 217 | def __str__(self): |
---|
| 218 | return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ |
---|
| 219 | (self.requestID, self.args, self.kwds, self.exception) |
---|
[232] | 220 | |
---|
| 221 | class ThreadPool: |
---|
| 222 | """A thread pool, distributing work requests and collecting results. |
---|
| 223 | |
---|
[517] | 224 | See the module docstring for more information. |
---|
| 225 | |
---|
[232] | 226 | """ |
---|
| 227 | |
---|
[517] | 228 | def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5): |
---|
[232] | 229 | """Set up the thread pool and start num_workers worker threads. |
---|
| 230 | |
---|
[517] | 231 | ``num_workers`` is the number of worker threads to start initially. |
---|
| 232 | |
---|
| 233 | If ``q_size > 0`` the size of the work *request queue* is limited and |
---|
| 234 | the thread pool blocks when the queue is full and it tries to put |
---|
| 235 | more work requests in it (see ``putRequest`` method), unless you also |
---|
| 236 | use a positive ``timeout`` value for ``putRequest``. |
---|
| 237 | |
---|
| 238 | If ``resq_size > 0`` the size of the *results queue* is limited and the |
---|
| 239 | worker threads will block when the queue is full and they try to put |
---|
| 240 | new results in it. |
---|
| 241 | |
---|
| 242 | .. warning: |
---|
| 243 | If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is |
---|
| 244 | the possibilty of a deadlock, when the results queue is not pulled |
---|
| 245 | regularly and too many jobs are put in the work requests queue. |
---|
| 246 | To prevent this, always set ``timeout > 0`` when calling |
---|
| 247 | ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions. |
---|
| 248 | |
---|
[232] | 249 | """ |
---|
[517] | 250 | self._requests_queue = Queue.Queue(q_size) |
---|
| 251 | self._results_queue = Queue.Queue(resq_size) |
---|
[232] | 252 | self.workers = [] |
---|
[517] | 253 | self.dismissedWorkers = [] |
---|
[232] | 254 | self.workRequests = {} |
---|
[517] | 255 | self.createWorkers(num_workers, poll_timeout) |
---|
[232] | 256 | |
---|
[517] | 257 | def createWorkers(self, num_workers, poll_timeout=5): |
---|
| 258 | """Add num_workers worker threads to the pool. |
---|
[232] | 259 | |
---|
[517] | 260 | ``poll_timout`` sets the interval in seconds (int or float) for how |
---|
| 261 | ofte threads should check whether they are dismissed, while waiting for |
---|
| 262 | requests. |
---|
| 263 | |
---|
| 264 | """ |
---|
[232] | 265 | for i in range(num_workers): |
---|
[517] | 266 | self.workers.append(WorkerThread(self._requests_queue, |
---|
| 267 | self._results_queue, poll_timeout=poll_timeout)) |
---|
[232] | 268 | |
---|
[517] | 269 | def dismissWorkers(self, num_workers, do_join=False): |
---|
| 270 | """Tell num_workers worker threads to quit after their current task.""" |
---|
| 271 | dismiss_list = [] |
---|
[232] | 272 | for i in range(min(num_workers, len(self.workers))): |
---|
| 273 | worker = self.workers.pop() |
---|
| 274 | worker.dismiss() |
---|
[517] | 275 | dismiss_list.append(worker) |
---|
[232] | 276 | |
---|
[517] | 277 | if do_join: |
---|
| 278 | for worker in dismiss_list: |
---|
| 279 | worker.join() |
---|
| 280 | else: |
---|
| 281 | self.dismissedWorkers.extend(dismiss_list) |
---|
| 282 | |
---|
| 283 | def joinAllDismissedWorkers(self): |
---|
| 284 | """Perform Thread.join() on all worker threads that have been dismissed. |
---|
| 285 | """ |
---|
| 286 | for worker in self.dismissedWorkers: |
---|
| 287 | worker.join() |
---|
| 288 | self.dismissedWorkers = [] |
---|
| 289 | |
---|
| 290 | def putRequest(self, request, block=True, timeout=0): |
---|
| 291 | """Put work request into work queue and save its id for later.""" |
---|
| 292 | assert isinstance(request, WorkRequest) |
---|
| 293 | # don't reuse old work requests |
---|
| 294 | assert not getattr(request, 'exception', None) |
---|
| 295 | self._requests_queue.put(request, block, timeout) |
---|
[232] | 296 | self.workRequests[request.requestID] = request |
---|
| 297 | |
---|
| 298 | def poll(self, block=False): |
---|
| 299 | """Process any new results in the queue.""" |
---|
[517] | 300 | while True: |
---|
| 301 | # still results pending? |
---|
| 302 | if not self.workRequests: |
---|
| 303 | raise NoResultsPending |
---|
| 304 | # are there still workers to process remaining requests? |
---|
| 305 | elif block and not self.workers: |
---|
| 306 | raise NoWorkersAvailable |
---|
[232] | 307 | try: |
---|
| 308 | # get back next results |
---|
[517] | 309 | request, result = self._results_queue.get(block=block) |
---|
| 310 | # has an exception occured? |
---|
| 311 | if request.exception and request.exc_callback: |
---|
| 312 | request.exc_callback(request, result) |
---|
| 313 | # hand results to callback, if any |
---|
| 314 | if request.callback and not \ |
---|
| 315 | (request.exception and request.exc_callback): |
---|
[232] | 316 | request.callback(request, result) |
---|
| 317 | del self.workRequests[request.requestID] |
---|
| 318 | except Queue.Empty: |
---|
| 319 | break |
---|
| 320 | |
---|
| 321 | def wait(self): |
---|
| 322 | """Wait for results, blocking until all have arrived.""" |
---|
| 323 | while 1: |
---|
| 324 | try: |
---|
| 325 | self.poll(True) |
---|
| 326 | except NoResultsPending: |
---|
[508] | 327 | break |
---|
[232] | 328 | |
---|
[508] | 329 | |
---|
[517] | 330 | ################ |
---|
| 331 | # USAGE EXAMPLE |
---|
| 332 | ################ |
---|
[232] | 333 | |
---|
[508] | 334 | if __name__ == '__main__': |
---|
| 335 | import random |
---|
| 336 | import time |
---|
| 337 | |
---|
| 338 | # the work the threads will have to do (rather trivial in our example) |
---|
| 339 | def do_something(data): |
---|
| 340 | time.sleep(random.randint(1,5)) |
---|
[517] | 341 | result = round(random.random() * data, 5) |
---|
| 342 | # just to show off, we throw an exception once in a while |
---|
| 343 | if result > 5: |
---|
| 344 | raise RuntimeError("Something extraordinary happened!") |
---|
| 345 | return result |
---|
[508] | 346 | |
---|
| 347 | # this will be called each time a result is available |
---|
| 348 | def print_result(request, result): |
---|
[517] | 349 | print "**** Result from request #%s: %r" % (request.requestID, result) |
---|
[508] | 350 | |
---|
[517] | 351 | # this will be called when an exception occurs within a thread |
---|
| 352 | # this example exception handler does little more than the default handler |
---|
| 353 | def handle_exception(request, exc_info): |
---|
| 354 | if not isinstance(exc_info, tuple): |
---|
| 355 | # Something is seriously wrong... |
---|
| 356 | print request |
---|
| 357 | print exc_info |
---|
| 358 | raise SystemExit |
---|
| 359 | print "**** Exception occured in request #%s: %s" % \ |
---|
| 360 | (request.requestID, exc_info) |
---|
| 361 | |
---|
[508] | 362 | # assemble the arguments for each job to a list... |
---|
| 363 | data = [random.randint(1,10) for i in range(20)] |
---|
| 364 | # ... and build a WorkRequest object for each item in data |
---|
[517] | 365 | requests = makeRequests(do_something, data, print_result, handle_exception) |
---|
| 366 | # to use the default exception handler, uncomment next line and comment out |
---|
| 367 | # the preceding one. |
---|
| 368 | #requests = makeRequests(do_something, data, print_result) |
---|
[508] | 369 | |
---|
[517] | 370 | # or the other form of args_lists accepted by makeRequests: ((,), {}) |
---|
| 371 | data = [((random.randint(1,10),), {}) for i in range(20)] |
---|
| 372 | requests.extend( |
---|
| 373 | makeRequests(do_something, data, print_result, handle_exception) |
---|
| 374 | #makeRequests(do_something, data, print_result) |
---|
| 375 | # to use the default exception handler, uncomment next line and comment |
---|
| 376 | # out the preceding one. |
---|
| 377 | ) |
---|
| 378 | |
---|
| 379 | # we create a pool of 3 worker threads |
---|
| 380 | print "Creating thread pool with 3 worker threads." |
---|
[508] | 381 | main = ThreadPool(3) |
---|
| 382 | |
---|
| 383 | # then we put the work requests in the queue... |
---|
| 384 | for req in requests: |
---|
| 385 | main.putRequest(req) |
---|
| 386 | print "Work request #%s added." % req.requestID |
---|
| 387 | # or shorter: |
---|
| 388 | # [main.putRequest(req) for req in requests] |
---|
| 389 | |
---|
| 390 | # ...and wait for the results to arrive in the result queue |
---|
[517] | 391 | # by using ThreadPool.wait(). This would block until results for |
---|
| 392 | # all work requests have arrived: |
---|
[508] | 393 | # main.wait() |
---|
| 394 | |
---|
[517] | 395 | # instead we can poll for results while doing something else: |
---|
[508] | 396 | i = 0 |
---|
[517] | 397 | while True: |
---|
[508] | 398 | try: |
---|
[517] | 399 | time.sleep(0.5) |
---|
[508] | 400 | main.poll() |
---|
[517] | 401 | print "Main thread working...", |
---|
| 402 | print "(active worker threads: %i)" % (threading.activeCount()-1, ) |
---|
[508] | 403 | if i == 10: |
---|
[517] | 404 | print "**** Adding 3 more worker threads..." |
---|
[508] | 405 | main.createWorkers(3) |
---|
[517] | 406 | if i == 20: |
---|
| 407 | print "**** Dismissing 2 worker threads..." |
---|
| 408 | main.dismissWorkers(2) |
---|
[508] | 409 | i += 1 |
---|
[517] | 410 | except KeyboardInterrupt: |
---|
| 411 | print "**** Interrupted!" |
---|
[508] | 412 | break |
---|
[517] | 413 | except NoResultsPending: |
---|
| 414 | print "**** No pending results." |
---|
| 415 | break |
---|
| 416 | if main.dismissedWorkers: |
---|
| 417 | print "Joining all dismissed worker threads..." |
---|
| 418 | main.joinAllDismissedWorkers() |
---|