2121# Display the running tests if nothing happened last N seconds
2222PROGRESS_UPDATE = 30.0 # seconds
2323
24+ # Time to wait until a worker completes: should be immediate
25+ JOIN_TIMEOUT = 30.0 # seconds
26+
2427
2528def must_stop (result , ns ):
2629 if result .result == INTERRUPTED :
@@ -91,6 +94,10 @@ def stop(self):
9194MultiprocessResult = collections .namedtuple ('MultiprocessResult' ,
9295 'result stdout stderr error_msg' )
9396
97+ class ExitThread (Exception ):
98+ pass
99+
100+
94101class MultiprocessThread (threading .Thread ):
95102 def __init__ (self , pending , output , ns ):
96103 super ().__init__ ()
@@ -100,13 +107,31 @@ def __init__(self, pending, output, ns):
100107 self .current_test_name = None
101108 self .start_time = None
102109 self ._popen = None
110+ self ._killed = False
111+
112+ def __repr__ (self ):
113+ info = ['MultiprocessThread' ]
114+ test = self .current_test_name
115+ if self .is_alive ():
116+ info .append ('alive' )
117+ if test :
118+ info .append (f'test={ test } ' )
119+ popen = self ._popen
120+ if popen :
121+ info .append (f'pid={ popen .pid } ' )
122+ return '<%s>' % ' ' .join (info )
103123
104124 def kill (self ):
125+ self ._killed = True
126+
105127 popen = self ._popen
106128 if popen is None :
107129 return
108- print ("Kill regrtest worker process %s" % popen .pid )
109130 popen .kill ()
131+ # stdout and stderr must be closed to ensure that communicate()
132+ # does not hang
133+ popen .stdout .close ()
134+ popen .stderr .close ()
110135
111136 def _runtest (self , test_name ):
112137 try :
@@ -117,7 +142,21 @@ def _runtest(self, test_name):
117142 popen = self ._popen
118143 with popen :
119144 try :
120- stdout , stderr = popen .communicate ()
145+ if self ._killed :
146+ # If kill() has been called before self._popen is set,
147+ # self._popen is still running. Call again kill()
148+ # to ensure that the process is killed.
149+ self .kill ()
150+ raise ExitThread
151+
152+ try :
153+ stdout , stderr = popen .communicate ()
154+ except OSError :
155+ if self ._killed :
156+ # kill() has been called: communicate() fails
157+ # on reading closed stdout/stderr
158+ raise ExitThread
159+ raise
121160 except :
122161 self .kill ()
123162 popen .wait ()
@@ -154,7 +193,7 @@ def _runtest(self, test_name):
154193 return MultiprocessResult (result , stdout , stderr , err_msg )
155194
156195 def run (self ):
157- while True :
196+ while not self . _killed :
158197 try :
159198 try :
160199 test_name = next (self .pending )
@@ -166,6 +205,8 @@ def run(self):
166205
167206 if must_stop (mp_result .result , self .ns ):
168207 break
208+ except ExitThread :
209+ break
169210 except BaseException :
170211 self .output .put ((True , traceback .format_exc ()))
171212 break
@@ -205,10 +246,20 @@ def start_workers(self):
205246 worker .start ()
206247
207248 def wait_workers (self ):
249+ start_time = time .monotonic ()
208250 for worker in self .workers :
209251 worker .kill ()
210252 for worker in self .workers :
211- worker .join ()
253+ while True :
254+ worker .join (1.0 )
255+ if not worker .is_alive ():
256+ break
257+ dt = time .monotonic () - start_time
258+ print ("Wait for regrtest worker %r for %.1f sec" % (worker , dt ))
259+ if dt > JOIN_TIMEOUT :
260+ print ("Warning -- failed to join a regrtest worker %s"
261+ % worker )
262+ break
212263
213264 def _get_result (self ):
214265 if not any (worker .is_alive () for worker in self .workers ):
0 commit comments