[517] | 1 | # -*- coding: UTF-8 -*-
|
---|
| 2 | """Easy to use object-oriented thread pool framework.
|
---|
[232] | 3 |
|
---|
[517] | 4 | A thread pool is an object that maintains a pool of worker threads to perform
|
---|
[232] | 5 | time consuming operations in parallel. It assigns jobs to the threads
|
---|
| 6 | by putting them in a work request queue, where they are picked up by the
|
---|
| 7 | next available thread. This then performs the requested operation in the
|
---|
[517] | 8 | background and puts the results in another queue.
|
---|
[232] | 9 |
|
---|
[517] | 10 | The thread pool object can then collect the results from all threads from
|
---|
[232] | 11 | this queue as soon as they become available or after all threads have
|
---|
| 12 | finished their work. It's also possible, to define callbacks to handle
|
---|
| 13 | each result as it comes in.
|
---|
| 14 |
|
---|
[517] | 15 | The basic concept and some code was taken from the book "Python in a Nutshell,
|
---|
| 16 | 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section
|
---|
| 17 | 14.5 "Threaded Program Architecture". I wrapped the main program logic in the
|
---|
[232] | 18 | ThreadPool class, added the WorkRequest class and the callback system and
|
---|
[517] | 19 | tweaked the code here and there. Kudos also to Florent Aide for the exception
|
---|
| 20 | handling mechanism.
|
---|
[232] | 21 |
|
---|
[517] | 22 | Basic usage::
|
---|
[232] | 23 |
|
---|
[517] | 24 | >>> pool = ThreadPool(poolsize)
|
---|
| 25 | >>> requests = makeRequests(some_callable, list_of_args, callback)
|
---|
| 26 | >>> [pool.putRequest(req) for req in requests]
|
---|
| 27 | >>> pool.wait()
|
---|
[232] | 28 |
|
---|
| 29 | See the end of the module code for a brief, annotated usage example.
|
---|
[517] | 30 |
|
---|
| 31 | Website : http://chrisarndt.de/projects/threadpool/
|
---|
| 32 |
|
---|
[232] | 33 | """
|
---|
[517] | 34 | __docformat__ = "restructuredtext en"
|
---|
[232] | 35 |
|
---|
[517] | 36 | __all__ = [
|
---|
| 37 | 'makeRequests',
|
---|
| 38 | 'NoResultsPending',
|
---|
| 39 | 'NoWorkersAvailable',
|
---|
| 40 | 'ThreadPool',
|
---|
| 41 | 'WorkRequest',
|
---|
| 42 | 'WorkerThread'
|
---|
| 43 | ]
|
---|
[232] | 44 |
|
---|
[517] | 45 | __author__ = "Christopher Arndt"
|
---|
| 46 | __version__ = "1.2.5"
|
---|
| 47 | __revision__ = "$Revision: 354 $"
|
---|
| 48 | __date__ = "$Date: 2008-11-19 18:34:46 +0100 (Wed, 19 Nov 2008) $"
|
---|
| 49 | __license__ = 'MIT license'
|
---|
[232] | 50 |
|
---|
| 51 |
|
---|
[517] | 52 | # standard library modules
|
---|
| 53 | import sys
|
---|
| 54 | import threading
|
---|
| 55 | import Queue
|
---|
| 56 | import traceback
|
---|
| 57 |
|
---|
| 58 |
|
---|
| 59 | # exceptions
|
---|
[232] | 60 | class NoResultsPending(Exception):
|
---|
| 61 | """All work requests have been processed."""
|
---|
| 62 | pass
|
---|
[517] | 63 |
|
---|
[232] | 64 | class NoWorkersAvailable(Exception):
|
---|
| 65 | """No worker threads available to process remaining requests."""
|
---|
| 66 | pass
|
---|
| 67 |
|
---|
[517] | 68 |
|
---|
| 69 | # internal module helper functions
|
---|
| 70 | def _handle_thread_exception(request, exc_info):
|
---|
| 71 | """Default exception handler callback function.
|
---|
| 72 |
|
---|
| 73 | This just prints the exception info via ``traceback.print_exception``.
|
---|
| 74 |
|
---|
| 75 | """
|
---|
| 76 | traceback.print_exception(*exc_info)
|
---|
| 77 |
|
---|
| 78 |
|
---|
| 79 | # utility functions
|
---|
| 80 | def makeRequests(callable_, args_list, callback=None,
|
---|
| 81 | exc_callback=_handle_thread_exception):
|
---|
| 82 | """Create several work requests for same callable with different arguments.
|
---|
| 83 |
|
---|
| 84 | Convenience function for creating several work requests for the same
|
---|
| 85 | callable where each invocation of the callable receives different values
|
---|
| 86 | for its arguments.
|
---|
| 87 |
|
---|
| 88 | ``args_list`` contains the parameters for each invocation of callable.
|
---|
| 89 | Each item in ``args_list`` should be either a 2-item tuple of the list of
|
---|
| 90 | positional arguments and a dictionary of keyword arguments or a single,
|
---|
| 91 | non-tuple argument.
|
---|
| 92 |
|
---|
| 93 | See docstring for ``WorkRequest`` for info on ``callback`` and
|
---|
| 94 | ``exc_callback``.
|
---|
| 95 |
|
---|
| 96 | """
|
---|
| 97 | requests = []
|
---|
| 98 | for item in args_list:
|
---|
| 99 | if isinstance(item, tuple):
|
---|
| 100 | requests.append(
|
---|
| 101 | WorkRequest(callable_, item[0], item[1], callback=callback,
|
---|
| 102 | exc_callback=exc_callback)
|
---|
| 103 | )
|
---|
| 104 | else:
|
---|
| 105 | requests.append(
|
---|
| 106 | WorkRequest(callable_, [item], None, callback=callback,
|
---|
| 107 | exc_callback=exc_callback)
|
---|
| 108 | )
|
---|
| 109 | return requests
|
---|
| 110 |
|
---|
| 111 |
|
---|
| 112 | # classes
|
---|
[232] | 113 | class WorkerThread(threading.Thread):
|
---|
| 114 | """Background thread connected to the requests/results queues.
|
---|
| 115 |
|
---|
| 116 | A worker thread sits in the background and picks up work requests from
|
---|
| 117 | one queue and puts the results in another until it is dismissed.
|
---|
[517] | 118 |
|
---|
[232] | 119 | """
|
---|
| 120 |
|
---|
[517] | 121 | def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
|
---|
| 122 | """Set up thread in daemonic mode and start it immediatedly.
|
---|
[232] | 123 |
|
---|
[517] | 124 | ``requests_queue`` and ``results_queue`` are instances of
|
---|
| 125 | ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new
|
---|
| 126 | worker thread.
|
---|
| 127 |
|
---|
[232] | 128 | """
|
---|
| 129 | threading.Thread.__init__(self, **kwds)
|
---|
| 130 | self.setDaemon(1)
|
---|
[517] | 131 | self._requests_queue = requests_queue
|
---|
| 132 | self._results_queue = results_queue
|
---|
| 133 | self._poll_timeout = poll_timeout
|
---|
[232] | 134 | self._dismissed = threading.Event()
|
---|
| 135 | self.start()
|
---|
| 136 |
|
---|
| 137 | def run(self):
|
---|
[517] | 138 | """Repeatedly process the job queue until told to exit."""
|
---|
| 139 | while True:
|
---|
[232] | 140 | if self._dismissed.isSet():
|
---|
[517] | 141 | # we are dismissed, break out of loop
|
---|
| 142 | break
|
---|
| 143 | # get next work request. If we don't get a new request from the
|
---|
| 144 | # queue after self._poll_timout seconds, we jump to the start of
|
---|
| 145 | # the while loop again, to give the thread a chance to exit.
|
---|
| 146 | try:
|
---|
| 147 | request = self._requests_queue.get(True, self._poll_timeout)
|
---|
| 148 | except Queue.Empty:
|
---|
| 149 | continue
|
---|
| 150 | else:
|
---|
| 151 | if self._dismissed.isSet():
|
---|
| 152 | # we are dismissed, put back request in queue and exit loop
|
---|
| 153 | self._requests_queue.put(request)
|
---|
| 154 | break
|
---|
| 155 | try:
|
---|
| 156 | result = request.callable(*request.args, **request.kwds)
|
---|
| 157 | self._results_queue.put((request, result))
|
---|
| 158 | except:
|
---|
| 159 | request.exception = True
|
---|
| 160 | self._results_queue.put((request, sys.exc_info()))
|
---|
[232] | 161 |
|
---|
| 162 | def dismiss(self):
|
---|
[517] | 163 | """Sets a flag to tell the thread to exit when done with current job."""
|
---|
[232] | 164 | self._dismissed.set()
|
---|
| 165 |
|
---|
| 166 |
|
---|
| 167 | class WorkRequest:
|
---|
| 168 | """A request to execute a callable for putting in the request queue later.
|
---|
| 169 |
|
---|
[517] | 170 | See the module function ``makeRequests`` for the common case
|
---|
| 171 | where you want to build several ``WorkRequest`` objects for the same
|
---|
| 172 | callable but with different arguments for each call.
|
---|
| 173 |
|
---|
[232] | 174 | """
|
---|
| 175 |
|
---|
[517] | 176 | def __init__(self, callable_, args=None, kwds=None, requestID=None,
|
---|
| 177 | callback=None, exc_callback=_handle_thread_exception):
|
---|
| 178 | """Create a work request for a callable and attach callbacks.
|
---|
| 179 |
|
---|
| 180 | A work request consists of the a callable to be executed by a
|
---|
[232] | 181 | worker thread, a list of positional arguments, a dictionary
|
---|
| 182 | of keyword arguments.
|
---|
| 183 |
|
---|
[517] | 184 | A ``callback`` function can be specified, that is called when the
|
---|
| 185 | results of the request are picked up from the result queue. It must
|
---|
| 186 | accept two anonymous arguments, the ``WorkRequest`` object and the
|
---|
| 187 | results of the callable, in that order. If you want to pass additional
|
---|
| 188 | information to the callback, just stick it on the request object.
|
---|
[232] | 189 |
|
---|
[517] | 190 | You can also give custom callback for when an exception occurs with
|
---|
| 191 | the ``exc_callback`` keyword parameter. It should also accept two
|
---|
| 192 | anonymous arguments, the ``WorkRequest`` and a tuple with the exception
|
---|
| 193 | details as returned by ``sys.exc_info()``. The default implementation
|
---|
| 194 | of this callback just prints the exception info via
|
---|
| 195 | ``traceback.print_exception``. If you want no exception handler
|
---|
| 196 | callback, just pass in ``None``.
|
---|
| 197 |
|
---|
| 198 | ``requestID``, if given, must be hashable since it is used by
|
---|
| 199 | ``ThreadPool`` object to store the results of that work request in a
|
---|
| 200 | dictionary. It defaults to the return value of ``id(self)``.
|
---|
| 201 |
|
---|
[232] | 202 | """
|
---|
| 203 | if requestID is None:
|
---|
| 204 | self.requestID = id(self)
|
---|
| 205 | else:
|
---|
[517] | 206 | try:
|
---|
| 207 | self.requestID = hash(requestID)
|
---|
| 208 | except TypeError:
|
---|
| 209 | raise TypeError("requestID must be hashable.")
|
---|
| 210 | self.exception = False
|
---|
[232] | 211 | self.callback = callback
|
---|
[517] | 212 | self.exc_callback = exc_callback
|
---|
| 213 | self.callable = callable_
|
---|
[232] | 214 | self.args = args or []
|
---|
| 215 | self.kwds = kwds or {}
|
---|
| 216 |
|
---|
[517] | 217 | def __str__(self):
|
---|
| 218 | return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \
|
---|
| 219 | (self.requestID, self.args, self.kwds, self.exception)
|
---|
[232] | 220 |
|
---|
| 221 | class ThreadPool:
|
---|
| 222 | """A thread pool, distributing work requests and collecting results.
|
---|
| 223 |
|
---|
[517] | 224 | See the module docstring for more information.
|
---|
| 225 |
|
---|
[232] | 226 | """
|
---|
| 227 |
|
---|
[517] | 228 | def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
|
---|
[232] | 229 | """Set up the thread pool and start num_workers worker threads.
|
---|
| 230 |
|
---|
[517] | 231 | ``num_workers`` is the number of worker threads to start initially.
|
---|
| 232 |
|
---|
| 233 | If ``q_size > 0`` the size of the work *request queue* is limited and
|
---|
| 234 | the thread pool blocks when the queue is full and it tries to put
|
---|
| 235 | more work requests in it (see ``putRequest`` method), unless you also
|
---|
| 236 | use a positive ``timeout`` value for ``putRequest``.
|
---|
| 237 |
|
---|
| 238 | If ``resq_size > 0`` the size of the *results queue* is limited and the
|
---|
| 239 | worker threads will block when the queue is full and they try to put
|
---|
| 240 | new results in it.
|
---|
| 241 |
|
---|
| 242 | .. warning:
|
---|
| 243 | If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is
|
---|
| 244 | the possibilty of a deadlock, when the results queue is not pulled
|
---|
| 245 | regularly and too many jobs are put in the work requests queue.
|
---|
| 246 | To prevent this, always set ``timeout > 0`` when calling
|
---|
| 247 | ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions.
|
---|
| 248 |
|
---|
[232] | 249 | """
|
---|
[517] | 250 | self._requests_queue = Queue.Queue(q_size)
|
---|
| 251 | self._results_queue = Queue.Queue(resq_size)
|
---|
[232] | 252 | self.workers = []
|
---|
[517] | 253 | self.dismissedWorkers = []
|
---|
[232] | 254 | self.workRequests = {}
|
---|
[517] | 255 | self.createWorkers(num_workers, poll_timeout)
|
---|
[232] | 256 |
|
---|
[517] | 257 | def createWorkers(self, num_workers, poll_timeout=5):
|
---|
| 258 | """Add num_workers worker threads to the pool.
|
---|
[232] | 259 |
|
---|
[517] | 260 | ``poll_timout`` sets the interval in seconds (int or float) for how
|
---|
| 261 | ofte threads should check whether they are dismissed, while waiting for
|
---|
| 262 | requests.
|
---|
| 263 |
|
---|
| 264 | """
|
---|
[232] | 265 | for i in range(num_workers):
|
---|
[517] | 266 | self.workers.append(WorkerThread(self._requests_queue,
|
---|
| 267 | self._results_queue, poll_timeout=poll_timeout))
|
---|
[232] | 268 |
|
---|
[517] | 269 | def dismissWorkers(self, num_workers, do_join=False):
|
---|
| 270 | """Tell num_workers worker threads to quit after their current task."""
|
---|
| 271 | dismiss_list = []
|
---|
[232] | 272 | for i in range(min(num_workers, len(self.workers))):
|
---|
| 273 | worker = self.workers.pop()
|
---|
| 274 | worker.dismiss()
|
---|
[517] | 275 | dismiss_list.append(worker)
|
---|
[232] | 276 |
|
---|
[517] | 277 | if do_join:
|
---|
| 278 | for worker in dismiss_list:
|
---|
| 279 | worker.join()
|
---|
| 280 | else:
|
---|
| 281 | self.dismissedWorkers.extend(dismiss_list)
|
---|
| 282 |
|
---|
| 283 | def joinAllDismissedWorkers(self):
|
---|
| 284 | """Perform Thread.join() on all worker threads that have been dismissed.
|
---|
| 285 | """
|
---|
| 286 | for worker in self.dismissedWorkers:
|
---|
| 287 | worker.join()
|
---|
| 288 | self.dismissedWorkers = []
|
---|
| 289 |
|
---|
| 290 | def putRequest(self, request, block=True, timeout=0):
|
---|
| 291 | """Put work request into work queue and save its id for later."""
|
---|
| 292 | assert isinstance(request, WorkRequest)
|
---|
| 293 | # don't reuse old work requests
|
---|
| 294 | assert not getattr(request, 'exception', None)
|
---|
| 295 | self._requests_queue.put(request, block, timeout)
|
---|
[232] | 296 | self.workRequests[request.requestID] = request
|
---|
| 297 |
|
---|
| 298 | def poll(self, block=False):
|
---|
| 299 | """Process any new results in the queue."""
|
---|
[517] | 300 | while True:
|
---|
| 301 | # still results pending?
|
---|
| 302 | if not self.workRequests:
|
---|
| 303 | raise NoResultsPending
|
---|
| 304 | # are there still workers to process remaining requests?
|
---|
| 305 | elif block and not self.workers:
|
---|
| 306 | raise NoWorkersAvailable
|
---|
[232] | 307 | try:
|
---|
| 308 | # get back next results
|
---|
[517] | 309 | request, result = self._results_queue.get(block=block)
|
---|
| 310 | # has an exception occured?
|
---|
| 311 | if request.exception and request.exc_callback:
|
---|
| 312 | request.exc_callback(request, result)
|
---|
| 313 | # hand results to callback, if any
|
---|
| 314 | if request.callback and not \
|
---|
| 315 | (request.exception and request.exc_callback):
|
---|
[232] | 316 | request.callback(request, result)
|
---|
| 317 | del self.workRequests[request.requestID]
|
---|
| 318 | except Queue.Empty:
|
---|
| 319 | break
|
---|
| 320 |
|
---|
| 321 | def wait(self):
|
---|
| 322 | """Wait for results, blocking until all have arrived."""
|
---|
| 323 | while 1:
|
---|
| 324 | try:
|
---|
| 325 | self.poll(True)
|
---|
| 326 | except NoResultsPending:
|
---|
[508] | 327 | break
|
---|
[232] | 328 |
|
---|
[508] | 329 |
|
---|
[517] | 330 | ################
|
---|
| 331 | # USAGE EXAMPLE
|
---|
| 332 | ################
|
---|
[232] | 333 |
|
---|
[508] | 334 | if __name__ == '__main__':
|
---|
| 335 | import random
|
---|
| 336 | import time
|
---|
| 337 |
|
---|
| 338 | # the work the threads will have to do (rather trivial in our example)
|
---|
| 339 | def do_something(data):
|
---|
| 340 | time.sleep(random.randint(1,5))
|
---|
[517] | 341 | result = round(random.random() * data, 5)
|
---|
| 342 | # just to show off, we throw an exception once in a while
|
---|
| 343 | if result > 5:
|
---|
| 344 | raise RuntimeError("Something extraordinary happened!")
|
---|
| 345 | return result
|
---|
[508] | 346 |
|
---|
| 347 | # this will be called each time a result is available
|
---|
| 348 | def print_result(request, result):
|
---|
[517] | 349 | print "**** Result from request #%s: %r" % (request.requestID, result)
|
---|
[508] | 350 |
|
---|
[517] | 351 | # this will be called when an exception occurs within a thread
|
---|
| 352 | # this example exception handler does little more than the default handler
|
---|
| 353 | def handle_exception(request, exc_info):
|
---|
| 354 | if not isinstance(exc_info, tuple):
|
---|
| 355 | # Something is seriously wrong...
|
---|
| 356 | print request
|
---|
| 357 | print exc_info
|
---|
| 358 | raise SystemExit
|
---|
| 359 | print "**** Exception occured in request #%s: %s" % \
|
---|
| 360 | (request.requestID, exc_info)
|
---|
| 361 |
|
---|
[508] | 362 | # assemble the arguments for each job to a list...
|
---|
| 363 | data = [random.randint(1,10) for i in range(20)]
|
---|
| 364 | # ... and build a WorkRequest object for each item in data
|
---|
[517] | 365 | requests = makeRequests(do_something, data, print_result, handle_exception)
|
---|
| 366 | # to use the default exception handler, uncomment next line and comment out
|
---|
| 367 | # the preceding one.
|
---|
| 368 | #requests = makeRequests(do_something, data, print_result)
|
---|
[508] | 369 |
|
---|
[517] | 370 | # or the other form of args_lists accepted by makeRequests: ((,), {})
|
---|
| 371 | data = [((random.randint(1,10),), {}) for i in range(20)]
|
---|
| 372 | requests.extend(
|
---|
| 373 | makeRequests(do_something, data, print_result, handle_exception)
|
---|
| 374 | #makeRequests(do_something, data, print_result)
|
---|
| 375 | # to use the default exception handler, uncomment next line and comment
|
---|
| 376 | # out the preceding one.
|
---|
| 377 | )
|
---|
| 378 |
|
---|
| 379 | # we create a pool of 3 worker threads
|
---|
| 380 | print "Creating thread pool with 3 worker threads."
|
---|
[508] | 381 | main = ThreadPool(3)
|
---|
| 382 |
|
---|
| 383 | # then we put the work requests in the queue...
|
---|
| 384 | for req in requests:
|
---|
| 385 | main.putRequest(req)
|
---|
| 386 | print "Work request #%s added." % req.requestID
|
---|
| 387 | # or shorter:
|
---|
| 388 | # [main.putRequest(req) for req in requests]
|
---|
| 389 |
|
---|
| 390 | # ...and wait for the results to arrive in the result queue
|
---|
[517] | 391 | # by using ThreadPool.wait(). This would block until results for
|
---|
| 392 | # all work requests have arrived:
|
---|
[508] | 393 | # main.wait()
|
---|
| 394 |
|
---|
[517] | 395 | # instead we can poll for results while doing something else:
|
---|
[508] | 396 | i = 0
|
---|
[517] | 397 | while True:
|
---|
[508] | 398 | try:
|
---|
[517] | 399 | time.sleep(0.5)
|
---|
[508] | 400 | main.poll()
|
---|
[517] | 401 | print "Main thread working...",
|
---|
| 402 | print "(active worker threads: %i)" % (threading.activeCount()-1, )
|
---|
[508] | 403 | if i == 10:
|
---|
[517] | 404 | print "**** Adding 3 more worker threads..."
|
---|
[508] | 405 | main.createWorkers(3)
|
---|
[517] | 406 | if i == 20:
|
---|
| 407 | print "**** Dismissing 2 worker threads..."
|
---|
| 408 | main.dismissWorkers(2)
|
---|
[508] | 409 | i += 1
|
---|
[517] | 410 | except KeyboardInterrupt:
|
---|
| 411 | print "**** Interrupted!"
|
---|
[508] | 412 | break
|
---|
[517] | 413 | except NoResultsPending:
|
---|
| 414 | print "**** No pending results."
|
---|
| 415 | break
|
---|
| 416 | if main.dismissedWorkers:
|
---|
| 417 | print "Joining all dismissed worker threads..."
|
---|
| 418 | main.joinAllDismissedWorkers()
|
---|