| 1 | # -*- coding: UTF-8 -*-
|
|---|
| 2 | """Easy to use object-oriented thread pool framework.
|
|---|
| 3 |
|
|---|
| 4 | A thread pool is an object that maintains a pool of worker threads to perform
|
|---|
| 5 | time consuming operations in parallel. It assigns jobs to the threads
|
|---|
| 6 | by putting them in a work request queue, where they are picked up by the
|
|---|
| 7 | next available thread. This then performs the requested operation in the
|
|---|
| 8 | background and puts the results in another queue.
|
|---|
| 9 |
|
|---|
| 10 | The thread pool object can then collect the results from all threads from
|
|---|
| 11 | this queue as soon as they become available or after all threads have
|
|---|
| 12 | finished their work. It's also possible, to define callbacks to handle
|
|---|
| 13 | each result as it comes in.
|
|---|
| 14 |
|
|---|
| 15 | The basic concept and some code was taken from the book "Python in a Nutshell,
|
|---|
| 16 | 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section
|
|---|
| 17 | 14.5 "Threaded Program Architecture". I wrapped the main program logic in the
|
|---|
| 18 | ThreadPool class, added the WorkRequest class and the callback system and
|
|---|
| 19 | tweaked the code here and there. Kudos also to Florent Aide for the exception
|
|---|
| 20 | handling mechanism.
|
|---|
| 21 |
|
|---|
| 22 | Basic usage::
|
|---|
| 23 |
|
|---|
| 24 | >>> pool = ThreadPool(poolsize)
|
|---|
| 25 | >>> requests = makeRequests(some_callable, list_of_args, callback)
|
|---|
| 26 | >>> [pool.putRequest(req) for req in requests]
|
|---|
| 27 | >>> pool.wait()
|
|---|
| 28 |
|
|---|
| 29 | See the end of the module code for a brief, annotated usage example.
|
|---|
| 30 |
|
|---|
| 31 | Website : http://chrisarndt.de/projects/threadpool/
|
|---|
| 32 |
|
|---|
| 33 | """
|
|---|
| 34 | __docformat__ = "restructuredtext en"
|
|---|
| 35 |
|
|---|
| 36 | __all__ = [
|
|---|
| 37 | 'makeRequests',
|
|---|
| 38 | 'NoResultsPending',
|
|---|
| 39 | 'NoWorkersAvailable',
|
|---|
| 40 | 'ThreadPool',
|
|---|
| 41 | 'WorkRequest',
|
|---|
| 42 | 'WorkerThread'
|
|---|
| 43 | ]
|
|---|
| 44 |
|
|---|
| 45 | __author__ = "Christopher Arndt"
|
|---|
| 46 | __version__ = "1.2.5"
|
|---|
| 47 | __revision__ = "$Revision: 354 $"
|
|---|
| 48 | __date__ = "$Date: 2008-11-19 18:34:46 +0100 (Wed, 19 Nov 2008) $"
|
|---|
| 49 | __license__ = 'MIT license'
|
|---|
| 50 |
|
|---|
| 51 |
|
|---|
| 52 | # standard library modules
|
|---|
| 53 | import sys
|
|---|
| 54 | import threading
|
|---|
| 55 | import Queue
|
|---|
| 56 | import traceback
|
|---|
| 57 |
|
|---|
| 58 |
|
|---|
| 59 | # exceptions
|
|---|
| 60 | class NoResultsPending(Exception):
|
|---|
| 61 | """All work requests have been processed."""
|
|---|
| 62 | pass
|
|---|
| 63 |
|
|---|
| 64 | class NoWorkersAvailable(Exception):
|
|---|
| 65 | """No worker threads available to process remaining requests."""
|
|---|
| 66 | pass
|
|---|
| 67 |
|
|---|
| 68 |
|
|---|
| 69 | # internal module helper functions
|
|---|
| 70 | def _handle_thread_exception(request, exc_info):
|
|---|
| 71 | """Default exception handler callback function.
|
|---|
| 72 |
|
|---|
| 73 | This just prints the exception info via ``traceback.print_exception``.
|
|---|
| 74 |
|
|---|
| 75 | """
|
|---|
| 76 | traceback.print_exception(*exc_info)
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 | # utility functions
|
|---|
| 80 | def makeRequests(callable_, args_list, callback=None,
|
|---|
| 81 | exc_callback=_handle_thread_exception):
|
|---|
| 82 | """Create several work requests for same callable with different arguments.
|
|---|
| 83 |
|
|---|
| 84 | Convenience function for creating several work requests for the same
|
|---|
| 85 | callable where each invocation of the callable receives different values
|
|---|
| 86 | for its arguments.
|
|---|
| 87 |
|
|---|
| 88 | ``args_list`` contains the parameters for each invocation of callable.
|
|---|
| 89 | Each item in ``args_list`` should be either a 2-item tuple of the list of
|
|---|
| 90 | positional arguments and a dictionary of keyword arguments or a single,
|
|---|
| 91 | non-tuple argument.
|
|---|
| 92 |
|
|---|
| 93 | See docstring for ``WorkRequest`` for info on ``callback`` and
|
|---|
| 94 | ``exc_callback``.
|
|---|
| 95 |
|
|---|
| 96 | """
|
|---|
| 97 | requests = []
|
|---|
| 98 | for item in args_list:
|
|---|
| 99 | if isinstance(item, tuple):
|
|---|
| 100 | requests.append(
|
|---|
| 101 | WorkRequest(callable_, item[0], item[1], callback=callback,
|
|---|
| 102 | exc_callback=exc_callback)
|
|---|
| 103 | )
|
|---|
| 104 | else:
|
|---|
| 105 | requests.append(
|
|---|
| 106 | WorkRequest(callable_, [item], None, callback=callback,
|
|---|
| 107 | exc_callback=exc_callback)
|
|---|
| 108 | )
|
|---|
| 109 | return requests
|
|---|
| 110 |
|
|---|
| 111 |
|
|---|
| 112 | # classes
|
|---|
| 113 | class WorkerThread(threading.Thread):
|
|---|
| 114 | """Background thread connected to the requests/results queues.
|
|---|
| 115 |
|
|---|
| 116 | A worker thread sits in the background and picks up work requests from
|
|---|
| 117 | one queue and puts the results in another until it is dismissed.
|
|---|
| 118 |
|
|---|
| 119 | """
|
|---|
| 120 |
|
|---|
| 121 | def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
|
|---|
| 122 | """Set up thread in daemonic mode and start it immediatedly.
|
|---|
| 123 |
|
|---|
| 124 | ``requests_queue`` and ``results_queue`` are instances of
|
|---|
| 125 | ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new
|
|---|
| 126 | worker thread.
|
|---|
| 127 |
|
|---|
| 128 | """
|
|---|
| 129 | threading.Thread.__init__(self, **kwds)
|
|---|
| 130 | self.setDaemon(1)
|
|---|
| 131 | self._requests_queue = requests_queue
|
|---|
| 132 | self._results_queue = results_queue
|
|---|
| 133 | self._poll_timeout = poll_timeout
|
|---|
| 134 | self._dismissed = threading.Event()
|
|---|
| 135 | self.start()
|
|---|
| 136 |
|
|---|
| 137 | def run(self):
|
|---|
| 138 | """Repeatedly process the job queue until told to exit."""
|
|---|
| 139 | while True:
|
|---|
| 140 | if self._dismissed.isSet():
|
|---|
| 141 | # we are dismissed, break out of loop
|
|---|
| 142 | break
|
|---|
| 143 | # get next work request. If we don't get a new request from the
|
|---|
| 144 | # queue after self._poll_timout seconds, we jump to the start of
|
|---|
| 145 | # the while loop again, to give the thread a chance to exit.
|
|---|
| 146 | try:
|
|---|
| 147 | request = self._requests_queue.get(True, self._poll_timeout)
|
|---|
| 148 | except Queue.Empty:
|
|---|
| 149 | continue
|
|---|
| 150 | else:
|
|---|
| 151 | if self._dismissed.isSet():
|
|---|
| 152 | # we are dismissed, put back request in queue and exit loop
|
|---|
| 153 | self._requests_queue.put(request)
|
|---|
| 154 | break
|
|---|
| 155 | try:
|
|---|
| 156 | result = request.callable(*request.args, **request.kwds)
|
|---|
| 157 | self._results_queue.put((request, result))
|
|---|
| 158 | except:
|
|---|
| 159 | request.exception = True
|
|---|
| 160 | self._results_queue.put((request, sys.exc_info()))
|
|---|
| 161 |
|
|---|
| 162 | def dismiss(self):
|
|---|
| 163 | """Sets a flag to tell the thread to exit when done with current job."""
|
|---|
| 164 | self._dismissed.set()
|
|---|
| 165 |
|
|---|
| 166 |
|
|---|
| 167 | class WorkRequest:
|
|---|
| 168 | """A request to execute a callable for putting in the request queue later.
|
|---|
| 169 |
|
|---|
| 170 | See the module function ``makeRequests`` for the common case
|
|---|
| 171 | where you want to build several ``WorkRequest`` objects for the same
|
|---|
| 172 | callable but with different arguments for each call.
|
|---|
| 173 |
|
|---|
| 174 | """
|
|---|
| 175 |
|
|---|
| 176 | def __init__(self, callable_, args=None, kwds=None, requestID=None,
|
|---|
| 177 | callback=None, exc_callback=_handle_thread_exception):
|
|---|
| 178 | """Create a work request for a callable and attach callbacks.
|
|---|
| 179 |
|
|---|
| 180 | A work request consists of the a callable to be executed by a
|
|---|
| 181 | worker thread, a list of positional arguments, a dictionary
|
|---|
| 182 | of keyword arguments.
|
|---|
| 183 |
|
|---|
| 184 | A ``callback`` function can be specified, that is called when the
|
|---|
| 185 | results of the request are picked up from the result queue. It must
|
|---|
| 186 | accept two anonymous arguments, the ``WorkRequest`` object and the
|
|---|
| 187 | results of the callable, in that order. If you want to pass additional
|
|---|
| 188 | information to the callback, just stick it on the request object.
|
|---|
| 189 |
|
|---|
| 190 | You can also give custom callback for when an exception occurs with
|
|---|
| 191 | the ``exc_callback`` keyword parameter. It should also accept two
|
|---|
| 192 | anonymous arguments, the ``WorkRequest`` and a tuple with the exception
|
|---|
| 193 | details as returned by ``sys.exc_info()``. The default implementation
|
|---|
| 194 | of this callback just prints the exception info via
|
|---|
| 195 | ``traceback.print_exception``. If you want no exception handler
|
|---|
| 196 | callback, just pass in ``None``.
|
|---|
| 197 |
|
|---|
| 198 | ``requestID``, if given, must be hashable since it is used by
|
|---|
| 199 | ``ThreadPool`` object to store the results of that work request in a
|
|---|
| 200 | dictionary. It defaults to the return value of ``id(self)``.
|
|---|
| 201 |
|
|---|
| 202 | """
|
|---|
| 203 | if requestID is None:
|
|---|
| 204 | self.requestID = id(self)
|
|---|
| 205 | else:
|
|---|
| 206 | try:
|
|---|
| 207 | self.requestID = hash(requestID)
|
|---|
| 208 | except TypeError:
|
|---|
| 209 | raise TypeError("requestID must be hashable.")
|
|---|
| 210 | self.exception = False
|
|---|
| 211 | self.callback = callback
|
|---|
| 212 | self.exc_callback = exc_callback
|
|---|
| 213 | self.callable = callable_
|
|---|
| 214 | self.args = args or []
|
|---|
| 215 | self.kwds = kwds or {}
|
|---|
| 216 |
|
|---|
| 217 | def __str__(self):
|
|---|
| 218 | return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \
|
|---|
| 219 | (self.requestID, self.args, self.kwds, self.exception)
|
|---|
| 220 |
|
|---|
| 221 | class ThreadPool:
|
|---|
| 222 | """A thread pool, distributing work requests and collecting results.
|
|---|
| 223 |
|
|---|
| 224 | See the module docstring for more information.
|
|---|
| 225 |
|
|---|
| 226 | """
|
|---|
| 227 |
|
|---|
| 228 | def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
|
|---|
| 229 | """Set up the thread pool and start num_workers worker threads.
|
|---|
| 230 |
|
|---|
| 231 | ``num_workers`` is the number of worker threads to start initially.
|
|---|
| 232 |
|
|---|
| 233 | If ``q_size > 0`` the size of the work *request queue* is limited and
|
|---|
| 234 | the thread pool blocks when the queue is full and it tries to put
|
|---|
| 235 | more work requests in it (see ``putRequest`` method), unless you also
|
|---|
| 236 | use a positive ``timeout`` value for ``putRequest``.
|
|---|
| 237 |
|
|---|
| 238 | If ``resq_size > 0`` the size of the *results queue* is limited and the
|
|---|
| 239 | worker threads will block when the queue is full and they try to put
|
|---|
| 240 | new results in it.
|
|---|
| 241 |
|
|---|
| 242 | .. warning:
|
|---|
| 243 | If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is
|
|---|
| 244 | the possibilty of a deadlock, when the results queue is not pulled
|
|---|
| 245 | regularly and too many jobs are put in the work requests queue.
|
|---|
| 246 | To prevent this, always set ``timeout > 0`` when calling
|
|---|
| 247 | ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions.
|
|---|
| 248 |
|
|---|
| 249 | """
|
|---|
| 250 | self._requests_queue = Queue.Queue(q_size)
|
|---|
| 251 | self._results_queue = Queue.Queue(resq_size)
|
|---|
| 252 | self.workers = []
|
|---|
| 253 | self.dismissedWorkers = []
|
|---|
| 254 | self.workRequests = {}
|
|---|
| 255 | self.createWorkers(num_workers, poll_timeout)
|
|---|
| 256 |
|
|---|
| 257 | def createWorkers(self, num_workers, poll_timeout=5):
|
|---|
| 258 | """Add num_workers worker threads to the pool.
|
|---|
| 259 |
|
|---|
| 260 | ``poll_timout`` sets the interval in seconds (int or float) for how
|
|---|
| 261 | ofte threads should check whether they are dismissed, while waiting for
|
|---|
| 262 | requests.
|
|---|
| 263 |
|
|---|
| 264 | """
|
|---|
| 265 | for i in range(num_workers):
|
|---|
| 266 | self.workers.append(WorkerThread(self._requests_queue,
|
|---|
| 267 | self._results_queue, poll_timeout=poll_timeout))
|
|---|
| 268 |
|
|---|
| 269 | def dismissWorkers(self, num_workers, do_join=False):
|
|---|
| 270 | """Tell num_workers worker threads to quit after their current task."""
|
|---|
| 271 | dismiss_list = []
|
|---|
| 272 | for i in range(min(num_workers, len(self.workers))):
|
|---|
| 273 | worker = self.workers.pop()
|
|---|
| 274 | worker.dismiss()
|
|---|
| 275 | dismiss_list.append(worker)
|
|---|
| 276 |
|
|---|
| 277 | if do_join:
|
|---|
| 278 | for worker in dismiss_list:
|
|---|
| 279 | worker.join()
|
|---|
| 280 | else:
|
|---|
| 281 | self.dismissedWorkers.extend(dismiss_list)
|
|---|
| 282 |
|
|---|
| 283 | def joinAllDismissedWorkers(self):
|
|---|
| 284 | """Perform Thread.join() on all worker threads that have been dismissed.
|
|---|
| 285 | """
|
|---|
| 286 | for worker in self.dismissedWorkers:
|
|---|
| 287 | worker.join()
|
|---|
| 288 | self.dismissedWorkers = []
|
|---|
| 289 |
|
|---|
| 290 | def putRequest(self, request, block=True, timeout=0):
|
|---|
| 291 | """Put work request into work queue and save its id for later."""
|
|---|
| 292 | assert isinstance(request, WorkRequest)
|
|---|
| 293 | # don't reuse old work requests
|
|---|
| 294 | assert not getattr(request, 'exception', None)
|
|---|
| 295 | self._requests_queue.put(request, block, timeout)
|
|---|
| 296 | self.workRequests[request.requestID] = request
|
|---|
| 297 |
|
|---|
| 298 | def poll(self, block=False):
|
|---|
| 299 | """Process any new results in the queue."""
|
|---|
| 300 | while True:
|
|---|
| 301 | # still results pending?
|
|---|
| 302 | if not self.workRequests:
|
|---|
| 303 | raise NoResultsPending
|
|---|
| 304 | # are there still workers to process remaining requests?
|
|---|
| 305 | elif block and not self.workers:
|
|---|
| 306 | raise NoWorkersAvailable
|
|---|
| 307 | try:
|
|---|
| 308 | # get back next results
|
|---|
| 309 | request, result = self._results_queue.get(block=block)
|
|---|
| 310 | # has an exception occured?
|
|---|
| 311 | if request.exception and request.exc_callback:
|
|---|
| 312 | request.exc_callback(request, result)
|
|---|
| 313 | # hand results to callback, if any
|
|---|
| 314 | if request.callback and not \
|
|---|
| 315 | (request.exception and request.exc_callback):
|
|---|
| 316 | request.callback(request, result)
|
|---|
| 317 | del self.workRequests[request.requestID]
|
|---|
| 318 | except Queue.Empty:
|
|---|
| 319 | break
|
|---|
| 320 |
|
|---|
| 321 | def wait(self):
|
|---|
| 322 | """Wait for results, blocking until all have arrived."""
|
|---|
| 323 | while 1:
|
|---|
| 324 | try:
|
|---|
| 325 | self.poll(True)
|
|---|
| 326 | except NoResultsPending:
|
|---|
| 327 | break
|
|---|
| 328 |
|
|---|
| 329 |
|
|---|
| 330 | ################
|
|---|
| 331 | # USAGE EXAMPLE
|
|---|
| 332 | ################
|
|---|
| 333 |
|
|---|
| 334 | if __name__ == '__main__':
|
|---|
| 335 | import random
|
|---|
| 336 | import time
|
|---|
| 337 |
|
|---|
| 338 | # the work the threads will have to do (rather trivial in our example)
|
|---|
| 339 | def do_something(data):
|
|---|
| 340 | time.sleep(random.randint(1,5))
|
|---|
| 341 | result = round(random.random() * data, 5)
|
|---|
| 342 | # just to show off, we throw an exception once in a while
|
|---|
| 343 | if result > 5:
|
|---|
| 344 | raise RuntimeError("Something extraordinary happened!")
|
|---|
| 345 | return result
|
|---|
| 346 |
|
|---|
| 347 | # this will be called each time a result is available
|
|---|
| 348 | def print_result(request, result):
|
|---|
| 349 | print "**** Result from request #%s: %r" % (request.requestID, result)
|
|---|
| 350 |
|
|---|
| 351 | # this will be called when an exception occurs within a thread
|
|---|
| 352 | # this example exception handler does little more than the default handler
|
|---|
| 353 | def handle_exception(request, exc_info):
|
|---|
| 354 | if not isinstance(exc_info, tuple):
|
|---|
| 355 | # Something is seriously wrong...
|
|---|
| 356 | print request
|
|---|
| 357 | print exc_info
|
|---|
| 358 | raise SystemExit
|
|---|
| 359 | print "**** Exception occured in request #%s: %s" % \
|
|---|
| 360 | (request.requestID, exc_info)
|
|---|
| 361 |
|
|---|
| 362 | # assemble the arguments for each job to a list...
|
|---|
| 363 | data = [random.randint(1,10) for i in range(20)]
|
|---|
| 364 | # ... and build a WorkRequest object for each item in data
|
|---|
| 365 | requests = makeRequests(do_something, data, print_result, handle_exception)
|
|---|
| 366 | # to use the default exception handler, uncomment next line and comment out
|
|---|
| 367 | # the preceding one.
|
|---|
| 368 | #requests = makeRequests(do_something, data, print_result)
|
|---|
| 369 |
|
|---|
| 370 | # or the other form of args_lists accepted by makeRequests: ((,), {})
|
|---|
| 371 | data = [((random.randint(1,10),), {}) for i in range(20)]
|
|---|
| 372 | requests.extend(
|
|---|
| 373 | makeRequests(do_something, data, print_result, handle_exception)
|
|---|
| 374 | #makeRequests(do_something, data, print_result)
|
|---|
| 375 | # to use the default exception handler, uncomment next line and comment
|
|---|
| 376 | # out the preceding one.
|
|---|
| 377 | )
|
|---|
| 378 |
|
|---|
| 379 | # we create a pool of 3 worker threads
|
|---|
| 380 | print "Creating thread pool with 3 worker threads."
|
|---|
| 381 | main = ThreadPool(3)
|
|---|
| 382 |
|
|---|
| 383 | # then we put the work requests in the queue...
|
|---|
| 384 | for req in requests:
|
|---|
| 385 | main.putRequest(req)
|
|---|
| 386 | print "Work request #%s added." % req.requestID
|
|---|
| 387 | # or shorter:
|
|---|
| 388 | # [main.putRequest(req) for req in requests]
|
|---|
| 389 |
|
|---|
| 390 | # ...and wait for the results to arrive in the result queue
|
|---|
| 391 | # by using ThreadPool.wait(). This would block until results for
|
|---|
| 392 | # all work requests have arrived:
|
|---|
| 393 | # main.wait()
|
|---|
| 394 |
|
|---|
| 395 | # instead we can poll for results while doing something else:
|
|---|
| 396 | i = 0
|
|---|
| 397 | while True:
|
|---|
| 398 | try:
|
|---|
| 399 | time.sleep(0.5)
|
|---|
| 400 | main.poll()
|
|---|
| 401 | print "Main thread working...",
|
|---|
| 402 | print "(active worker threads: %i)" % (threading.activeCount()-1, )
|
|---|
| 403 | if i == 10:
|
|---|
| 404 | print "**** Adding 3 more worker threads..."
|
|---|
| 405 | main.createWorkers(3)
|
|---|
| 406 | if i == 20:
|
|---|
| 407 | print "**** Dismissing 2 worker threads..."
|
|---|
| 408 | main.dismissWorkers(2)
|
|---|
| 409 | i += 1
|
|---|
| 410 | except KeyboardInterrupt:
|
|---|
| 411 | print "**** Interrupted!"
|
|---|
| 412 | break
|
|---|
| 413 | except NoResultsPending:
|
|---|
| 414 | print "**** No pending results."
|
|---|
| 415 | break
|
|---|
| 416 | if main.dismissedWorkers:
|
|---|
| 417 | print "Joining all dismissed worker threads..."
|
|---|
| 418 | main.joinAllDismissedWorkers()
|
|---|