Dixing (Dex) Xu commited on
Commit
c92f194
·
unverified ·
1 Parent(s): 8a75fe3

:zap: update execution timeout logic more aggresively (#35) (#37)

Browse files
Files changed (1) hide show
  1. aide/interpreter.py +39 -22
aide/interpreter.py CHANGED
@@ -74,11 +74,15 @@ def exception_summary(e, working_dir, exec_file_name, format_tb_ipython):
74
 
75
 
76
  class RedirectQueue:
77
- def __init__(self, queue):
78
  self.queue = queue
 
79
 
80
  def write(self, msg):
81
- self.queue.put(msg)
 
 
 
82
 
83
  def flush(self):
84
  pass
@@ -178,17 +182,25 @@ class Interpreter:
178
  def cleanup_session(self):
179
  if self.process is None:
180
  return
181
- # give the child process a chance to terminate gracefully
182
- self.process.terminate()
183
- self.process.join(timeout=2)
184
- # kill the child process if it's still alive
185
- if self.process.exitcode is None:
186
- logger.warning("Child process failed to terminate gracefully, killing it..")
187
- self.process.kill()
188
- self.process.join()
189
- # don't wait for gc, clean up immediately
190
- self.process.close()
191
- self.process = None # type: ignore
 
 
 
 
 
 
 
 
192
 
193
  def run(self, code: str, reset_session=True) -> ExecutionResult:
194
  """
@@ -257,15 +269,12 @@ class Interpreter:
257
  continue
258
  running_time = time.time() - start_time
259
  if running_time > self.timeout:
260
-
261
- # [TODO] handle this in a better way
262
- assert reset_session, "Timeout ocurred in interactive session"
263
-
264
- # send interrupt to child
265
- os.kill(self.process.pid, signal.SIGINT) # type: ignore
266
  child_in_overtime = True
267
- # terminate if we're overtime by more than a minute
268
- if running_time > self.timeout + 60:
 
269
  logger.warning("Child failed to terminate, killing it..")
270
  self.cleanup_session()
271
 
@@ -277,8 +286,16 @@ class Interpreter:
277
  # read all stdout/stderr from child up to the EOF marker
278
  # waiting until the queue is empty is not enough since
279
  # the feeder thread in child might still be adding to the queue
 
280
  while not self.result_outq.empty() or not output or output[-1] != "<|EOF|>":
281
- output.append(self.result_outq.get())
 
 
 
 
 
 
 
282
  output.pop() # remove the EOF marker
283
 
284
  e_cls_name, exc_info, exc_stack = state[1:]
 
74
 
75
 
76
  class RedirectQueue:
77
+ def __init__(self, queue, timeout=5):
78
  self.queue = queue
79
+ self.timeout = timeout
80
 
81
  def write(self, msg):
82
+ try:
83
+ self.queue.put(msg, timeout=self.timeout)
84
+ except queue.Full:
85
+ logger.warning("Queue write timed out")
86
 
87
  def flush(self):
88
  pass
 
182
  def cleanup_session(self):
183
  if self.process is None:
184
  return
185
+ try:
186
+ # Reduce grace period from 2 seconds to 0.5
187
+ self.process.terminate()
188
+ self.process.join(timeout=0.5)
189
+
190
+ if self.process.exitcode is None:
191
+ logger.warning("Process failed to terminate, killing immediately")
192
+ self.process.kill()
193
+ self.process.join(timeout=0.5)
194
+
195
+ if self.process.exitcode is None:
196
+ logger.error("Process refuses to die, using SIGKILL")
197
+ os.kill(self.process.pid, signal.SIGKILL)
198
+ except Exception as e:
199
+ logger.error(f"Error during process cleanup: {e}")
200
+ finally:
201
+ if self.process is not None:
202
+ self.process.close()
203
+ self.process = None
204
 
205
  def run(self, code: str, reset_session=True) -> ExecutionResult:
206
  """
 
269
  continue
270
  running_time = time.time() - start_time
271
  if running_time > self.timeout:
272
+ logger.warning(f"Execution exceeded timeout of {self.timeout}s")
273
+ os.kill(self.process.pid, signal.SIGINT)
 
 
 
 
274
  child_in_overtime = True
275
+
276
+ # terminate if we're overtime by more than 5 seconds
277
+ if running_time > self.timeout + 5:
278
  logger.warning("Child failed to terminate, killing it..")
279
  self.cleanup_session()
280
 
 
286
  # read all stdout/stderr from child up to the EOF marker
287
  # waiting until the queue is empty is not enough since
288
  # the feeder thread in child might still be adding to the queue
289
+ start_collect = time.time()
290
  while not self.result_outq.empty() or not output or output[-1] != "<|EOF|>":
291
+ try:
292
+ # Add 5-second timeout for output collection
293
+ if time.time() - start_collect > 5:
294
+ logger.warning("Output collection timed out")
295
+ break
296
+ output.append(self.result_outq.get(timeout=1))
297
+ except queue.Empty:
298
+ continue
299
  output.pop() # remove the EOF marker
300
 
301
  e_cls_name, exc_info, exc_stack = state[1:]