4646__author__ = 'Brian Quinlan ([email protected] )' 4747
4848import atexit
49+ import os
4950from concurrent .futures import _base
5051import queue
5152import multiprocessing
52- from multiprocessing .queues import SimpleQueue
53+ from multiprocessing .queues import SimpleQueue , SentinelReady
5354import threading
5455import weakref
5556
@@ -122,7 +123,7 @@ def _process_worker(call_queue, result_queue):
122123 call_item = call_queue .get (block = True )
123124 if call_item is None :
124125 # Wake up queue management thread
125- result_queue .put (None )
126+ result_queue .put (os . getpid () )
126127 return
127128 try :
128129 r = call_item .fn (* call_item .args , ** call_item .kwargs )
@@ -194,29 +195,63 @@ def _queue_management_worker(executor_reference,
194195 result_queue: A multiprocessing.Queue of _ResultItems generated by the
195196 process workers.
196197 """
197- nb_shutdown_processes = 0
198- def shutdown_one_process ():
199- """Tell a worker to terminate, which will in turn wake us again"""
200- nonlocal nb_shutdown_processes
201- call_queue .put (None )
202- nb_shutdown_processes += 1
198+
199+ def shutdown_worker ():
200+ # This is an upper bound
201+ nb_children_alive = sum (p .is_alive () for p in processes .values ())
202+ for i in range (0 , nb_children_alive ):
203+ call_queue .put (None )
204+ # If .join() is not called on the created processes then
205+ # some multiprocessing.Queue methods may deadlock on Mac OS
206+ # X.
207+ for p in processes .values ():
208+ p .join ()
209+
203210 while True :
204211 _add_call_item_to_queue (pending_work_items ,
205212 work_ids_queue ,
206213 call_queue )
207214
208- result_item = result_queue .get ()
209- if result_item is not None :
210- work_item = pending_work_items [result_item .work_id ]
211- del pending_work_items [result_item .work_id ]
212-
213- if result_item .exception :
214- work_item .future .set_exception (result_item .exception )
215- else :
216- work_item .future .set_result (result_item .result )
217- continue
218- # If we come here, we either got a timeout or were explicitly woken up.
219- # In either case, check whether we should start shutting down.
215+ sentinels = [p .sentinel for p in processes .values ()]
216+ assert sentinels
217+ try :
218+ result_item = result_queue .get (sentinels = sentinels )
219+ except SentinelReady as e :
220+ # Mark the process pool broken so that submits fail right now.
221+ executor = executor_reference ()
222+ if executor is not None :
223+ executor ._broken = True
224+ executor ._shutdown_thread = True
225+ del executor
226+ # All futures in flight must be marked failed
227+ for work_id , work_item in pending_work_items .items ():
228+ work_item .future .set_exception (
229+ BrokenProcessPool (
230+ "A process in the process pool was "
231+ "terminated abruptly while the future was "
232+ "running or pending."
233+ ))
234+ pending_work_items .clear ()
235+ # Terminate remaining workers forcibly: the queues or their
236+ # locks may be in a dirty state and block forever.
237+ for p in processes .values ():
238+ p .terminate ()
239+ for p in processes .values ():
240+ p .join ()
241+ return
242+ if isinstance (result_item , int ):
243+ # Clean shutdown of a worker using its PID
244+ # (avoids marking the executor broken)
245+ del processes [result_item ]
246+ elif result_item is not None :
247+ work_item = pending_work_items .pop (result_item .work_id , None )
248+ # work_item can be None if another process terminated (see above)
249+ if work_item is not None :
250+ if result_item .exception :
251+ work_item .future .set_exception (result_item .exception )
252+ else :
253+ work_item .future .set_result (result_item .result )
254+ # Check whether we should start shutting down.
220255 executor = executor_reference ()
221256 # No more work items can be added if:
222257 # - The interpreter is shutting down OR
@@ -226,17 +261,11 @@ def shutdown_one_process():
226261 # Since no new work items can be added, it is safe to shutdown
227262 # this thread if there are no pending work items.
228263 if not pending_work_items :
229- while nb_shutdown_processes < len (processes ):
230- shutdown_one_process ()
231- # If .join() is not called on the created processes then
232- # some multiprocessing.Queue methods may deadlock on Mac OS
233- # X.
234- for p in processes :
235- p .join ()
264+ shutdown_worker ()
236265 return
237266 else :
238267 # Start shutting down by telling a process it can exit.
239- shutdown_one_process ( )
268+ call_queue . put ( None )
240269 del executor
241270
242271_system_limits_checked = False
@@ -264,6 +293,14 @@ def _check_system_limits():
264293 _system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max
265294 raise NotImplementedError (_system_limited )
266295
296+
297+ class BrokenProcessPool (RuntimeError ):
298+ """
299+ Raised when a process in a ProcessPoolExecutor terminated abruptly
300+ while a future was in the running state.
301+ """
302+
303+
267304class ProcessPoolExecutor (_base .Executor ):
268305 def __init__ (self , max_workers = None ):
269306 """Initializes a new ProcessPoolExecutor instance.
@@ -288,11 +325,13 @@ def __init__(self, max_workers=None):
288325 self ._result_queue = SimpleQueue ()
289326 self ._work_ids = queue .Queue ()
290327 self ._queue_management_thread = None
291- self ._processes = set ()
328+ # Map of pids to processes
329+ self ._processes = {}
292330
293331 # Shutdown is a two-step process.
294332 self ._shutdown_thread = False
295333 self ._shutdown_lock = threading .Lock ()
334+ self ._broken = False
296335 self ._queue_count = 0
297336 self ._pending_work_items = {}
298337
@@ -302,6 +341,8 @@ def _start_queue_management_thread(self):
302341 def weakref_cb (_ , q = self ._result_queue ):
303342 q .put (None )
304343 if self ._queue_management_thread is None :
344+ # Start the processes so that their sentinels are known.
345+ self ._adjust_process_count ()
305346 self ._queue_management_thread = threading .Thread (
306347 target = _queue_management_worker ,
307348 args = (weakref .ref (self , weakref_cb ),
@@ -321,10 +362,13 @@ def _adjust_process_count(self):
321362 args = (self ._call_queue ,
322363 self ._result_queue ))
323364 p .start ()
324- self ._processes . add ( p )
365+ self ._processes [ p . pid ] = p
325366
326367 def submit (self , fn , * args , ** kwargs ):
327368 with self ._shutdown_lock :
369+ if self ._broken :
370+ raise BrokenProcessPool ('A child process terminated '
371+ 'abruptly, the process pool is not usable anymore' )
328372 if self ._shutdown_thread :
329373 raise RuntimeError ('cannot schedule new futures after shutdown' )
330374
@@ -338,7 +382,6 @@ def submit(self, fn, *args, **kwargs):
338382 self ._result_queue .put (None )
339383
340384 self ._start_queue_management_thread ()
341- self ._adjust_process_count ()
342385 return f
343386 submit .__doc__ = _base .Executor .submit .__doc__
344387
0 commit comments