Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1f0b658

Browse files
committed
Merged revisions 77794 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r77794 | jesse.noller | 2010-01-26 22:05:57 -0500 (Tue, 26 Jan 2010) | 1 line Issue #6963: Added maxtasksperchild argument to multiprocessing.Pool ........
1 parent c3511a4 commit 1f0b658

6 files changed

Lines changed: 130 additions & 19 deletions

File tree

Doc/library/multiprocessing.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1535,7 +1535,7 @@ Process Pools
15351535
One can create a pool of processes which will carry out tasks submitted to it
15361536
with the :class:`Pool` class.
15371537

1538-
.. class:: multiprocessing.Pool([processes[, initializer[, initargs]]])
1538+
.. class:: multiprocessing.Pool([processes[, initializer[, initargs[, maxtasksperchild]]]])
15391539

15401540
A process pool object which controls a pool of worker processes to which jobs
15411541
can be submitted. It supports asynchronous results with timeouts and
@@ -1546,6 +1546,21 @@ with the :class:`Pool` class.
15461546
*initializer* is not ``None`` then each worker process will call
15471547
``initializer(*initargs)`` when it starts.
15481548

1549+
*maxtasksperchild* is the number of tasks a worker process can complete
1550+
before it will exit and be replaced with a fresh worker process, to enable
1551+
unused resources to be freed. The default *maxtasksperchild* is None, which
1552+
means worker processes will live as long as the pool.
1553+
1554+
.. note::
1555+
1556+
Worker processes within a :class:`Pool` typically live for the complete
1557+
duration of the Pool's work queue. A frequent pattern found in other
1558+
systems (such as Apache, mod_wsgi, etc) to free resources held by
1559+
workers is to allow a worker within a pool to complete only a set
1560+
amount of work before being exiting, being cleaned up and a new
1561+
process spawned to replace the old one. The *maxtasksperchild*
1562+
argument to the :class:`Pool` exposes this ability to the end user.
1563+
15491564
.. method:: apply(func[, args[, kwds]])
15501565

15511566
Call *func* with arguments *args* and keyword arguments *kwds*. It blocks

Lib/multiprocessing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,12 @@ def JoinableQueue(maxsize=0):
218218
from multiprocessing.queues import JoinableQueue
219219
return JoinableQueue(maxsize)
220220

221-
def Pool(processes=None, initializer=None, initargs=()):
221+
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None):
222222
'''
223223
Returns a process pool object
224224
'''
225225
from multiprocessing.pool import Pool
226-
return Pool(processes, initializer, initargs)
226+
return Pool(processes, initializer, initargs, maxtasksperchild)
227227

228228
def RawValue(typecode_or_type, *args):
229229
'''

Lib/multiprocessing/pool.py

Lines changed: 81 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def mapstar(args):
4242
# Code run by worker processes
4343
#
4444

45-
def worker(inqueue, outqueue, initializer=None, initargs=()):
45+
def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
46+
assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
4647
put = outqueue.put
4748
get = inqueue.get
4849
if hasattr(inqueue, '_writer'):
@@ -52,7 +53,8 @@ def worker(inqueue, outqueue, initializer=None, initargs=()):
5253
if initializer is not None:
5354
initializer(*initargs)
5455

55-
while 1:
56+
completed = 0
57+
while maxtasks is None or (maxtasks and completed < maxtasks):
5658
try:
5759
task = get()
5860
except (EOFError, IOError):
@@ -69,6 +71,8 @@ def worker(inqueue, outqueue, initializer=None, initargs=()):
6971
except Exception as e:
7072
result = (False, e)
7173
put((job, i, result))
74+
completed += 1
75+
debug('worker exiting after %d tasks' % completed)
7276

7377
#
7478
# Class representing a process pool
@@ -80,11 +84,15 @@ class Pool(object):
8084
'''
8185
Process = Process
8286

83-
def __init__(self, processes=None, initializer=None, initargs=()):
87+
def __init__(self, processes=None, initializer=None, initargs=(),
88+
maxtasksperchild=None):
8489
self._setup_queues()
8590
self._taskqueue = queue.Queue()
8691
self._cache = {}
8792
self._state = RUN
93+
self._maxtasksperchild = maxtasksperchild
94+
self._initializer = initializer
95+
self._initargs = initargs
8896

8997
if processes is None:
9098
try:
@@ -95,16 +103,18 @@ def __init__(self, processes=None, initializer=None, initargs=()):
95103
if initializer is not None and not hasattr(initializer, '__call__'):
96104
raise TypeError('initializer must be a callable')
97105

106+
self._processes = processes
98107
self._pool = []
99-
for i in range(processes):
100-
w = self.Process(
101-
target=worker,
102-
args=(self._inqueue, self._outqueue, initializer, initargs)
103-
)
104-
self._pool.append(w)
105-
w.name = w.name.replace('Process', 'PoolWorker')
106-
w.daemon = True
107-
w.start()
108+
self._repopulate_pool()
109+
110+
self._worker_handler = threading.Thread(
111+
target=Pool._handle_workers,
112+
args=(self, )
113+
)
114+
self._worker_handler.daemon = True
115+
self._worker_handler._state = RUN
116+
self._worker_handler.start()
117+
108118

109119
self._task_handler = threading.Thread(
110120
target=Pool._handle_tasks,
@@ -125,10 +135,48 @@ def __init__(self, processes=None, initializer=None, initargs=()):
125135
self._terminate = Finalize(
126136
self, self._terminate_pool,
127137
args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
128-
self._task_handler, self._result_handler, self._cache),
138+
self._worker_handler, self._task_handler,
139+
self._result_handler, self._cache),
129140
exitpriority=15
130141
)
131142

143+
def _join_exited_workers(self):
144+
"""Cleanup after any worker processes which have exited due to reaching
145+
their specified lifetime. Returns True if any workers were cleaned up.
146+
"""
147+
cleaned = False
148+
for i in reversed(range(len(self._pool))):
149+
worker = self._pool[i]
150+
if worker.exitcode is not None:
151+
# worker exited
152+
debug('cleaning up worker %d' % i)
153+
worker.join()
154+
cleaned = True
155+
del self._pool[i]
156+
return cleaned
157+
158+
def _repopulate_pool(self):
159+
"""Bring the number of pool processes up to the specified number,
160+
for use after reaping workers which have exited.
161+
"""
162+
for i in range(self._processes - len(self._pool)):
163+
w = self.Process(target=worker,
164+
args=(self._inqueue, self._outqueue,
165+
self._initializer,
166+
self._initargs, self._maxtasksperchild)
167+
)
168+
self._pool.append(w)
169+
w.name = w.name.replace('Process', 'PoolWorker')
170+
w.daemon = True
171+
w.start()
172+
debug('added worker')
173+
174+
def _maintain_pool(self):
175+
"""Clean up any exited workers and start replacements for them.
176+
"""
177+
if self._join_exited_workers():
178+
self._repopulate_pool()
179+
132180
def _setup_queues(self):
133181
from .queues import SimpleQueue
134182
self._inqueue = SimpleQueue()
@@ -217,6 +265,13 @@ def map_async(self, func, iterable, chunksize=None, callback=None):
217265
for i, x in enumerate(task_batches)), None))
218266
return result
219267

268+
@staticmethod
269+
def _handle_workers(pool):
270+
while pool._worker_handler._state == RUN and pool._state == RUN:
271+
pool._maintain_pool()
272+
time.sleep(0.1)
273+
debug('worker handler exiting')
274+
220275
@staticmethod
221276
def _handle_tasks(taskqueue, put, outqueue, pool):
222277
thread = threading.current_thread()
@@ -332,16 +387,19 @@ def close(self):
332387
debug('closing pool')
333388
if self._state == RUN:
334389
self._state = CLOSE
390+
self._worker_handler._state = CLOSE
335391
self._taskqueue.put(None)
336392

337393
def terminate(self):
338394
debug('terminating pool')
339395
self._state = TERMINATE
396+
self._worker_handler._state = TERMINATE
340397
self._terminate()
341398

342399
def join(self):
343400
debug('joining pool')
344401
assert self._state in (CLOSE, TERMINATE)
402+
self._worker_handler.join()
345403
self._task_handler.join()
346404
self._result_handler.join()
347405
for p in self._pool:
@@ -358,10 +416,11 @@ def _help_stuff_finish(inqueue, task_handler, size):
358416

359417
@classmethod
360418
def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
361-
task_handler, result_handler, cache):
419+
worker_handler, task_handler, result_handler, cache):
362420
# this is guaranteed to only be called once
363421
debug('finalizing pool')
364422

423+
worker_handler._state = TERMINATE
365424
task_handler._state = TERMINATE
366425
taskqueue.put(None) # sentinel
367426

@@ -373,10 +432,12 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
373432
result_handler._state = TERMINATE
374433
outqueue.put(None) # sentinel
375434

435+
# Terminate workers which haven't already finished.
376436
if pool and hasattr(pool[0], 'terminate'):
377437
debug('terminating workers')
378438
for p in pool:
379-
p.terminate()
439+
if p.exitcode is None:
440+
p.terminate()
380441

381442
debug('joining task handler')
382443
task_handler.join(1e100)
@@ -388,6 +449,11 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
388449
debug('joining pool workers')
389450
for p in pool:
390451
p.join()
452+
for w in pool:
453+
if w.exitcode is None:
454+
# worker has not yet exited
455+
debug('cleaning up worker %d' % w.pid)
456+
w.join()
391457

392458
#
393459
# Class whose instances are returned by `Pool.apply_async()`

Lib/test/test_multiprocessing.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def latin(s):
4646
#
4747

4848
LOG_LEVEL = util.SUBWARNING
49-
#LOG_LEVEL = logging.WARNING
49+
#LOG_LEVEL = logging.DEBUG
5050

5151
DELTA = 0.1
5252
CHECK_TIMINGS = False # making true makes tests take a lot longer
@@ -1053,6 +1053,30 @@ def test_terminate(self):
10531053
join = TimingWrapper(self.pool.join)
10541054
join()
10551055
self.assertTrue(join.elapsed < 0.2)
1056+
1057+
class _TestPoolWorkerLifetime(BaseTestCase):
1058+
1059+
ALLOWED_TYPES = ('processes', )
1060+
def test_pool_worker_lifetime(self):
1061+
p = multiprocessing.Pool(3, maxtasksperchild=10)
1062+
self.assertEqual(3, len(p._pool))
1063+
origworkerpids = [w.pid for w in p._pool]
1064+
# Run many tasks so each worker gets replaced (hopefully)
1065+
results = []
1066+
for i in range(100):
1067+
results.append(p.apply_async(sqr, (i, )))
1068+
# Fetch the results and verify we got the right answers,
1069+
# also ensuring all the tasks have completed.
1070+
for (j, res) in enumerate(results):
1071+
self.assertEqual(res.get(), sqr(j))
1072+
# Refill the pool
1073+
p._repopulate_pool()
1074+
# Finally, check that the worker pids have changed
1075+
finalworkerpids = [w.pid for w in p._pool]
1076+
self.assertNotEqual(sorted(origworkerpids), sorted(finalworkerpids))
1077+
p.close()
1078+
p.join()
1079+
10561080
#
10571081
# Test that manager has expected number of shared objects left
10581082
#

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Brett Cannon
120120
Mike Carlton
121121
Terry Carroll
122122
Donn Cave
123+
Charles Cazabon
123124
Per Cederqvist
124125
Octavian Cerna
125126
Hye-Shik Chang

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,11 @@ C-API
234234
Library
235235
-------
236236

237+
- Issue #6963: Added "maxtasksperchild" argument to multiprocessing.Pool,
238+
allowing for a maximum number of tasks within the pool to be completed by
239+
the worker before that worker is terminated, and a new one created to
240+
replace it.
241+
237242
- Issue #7792: Registering non-classes to ABCs raised an obscure error.
238243

239244
- Issue #7785: Don't accept bytes in FileIO.write().

0 commit comments

Comments
 (0)