pytorch/caffe2/python/timeout_guard.py
Bugra Akyildiz 27c7158166 Remove __future__ imports for legacy Python2 supports (#45033)
Summary:
There is a module called `2to3` which you can target for future specifically to remove these, the directory of `caffe2` has the most redundant imports:

```2to3 -f future -w caffe2```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/45033

Reviewed By: seemethere

Differential Revision: D23808648

Pulled By: bugra

fbshipit-source-id: 38971900f0fe43ab44a9168e57f2307580d36a38
2020-09-23 17:57:02 -07:00

110 lines
3.7 KiB
Python

## @package timeout_guard
# Module caffe2.python.timeout_guard
import contextlib
import threading
import os
import time
import signal
import logging
from future.utils import viewitems
'''
Sometimes CUDA devices can get stuck, 'deadlock'. In this case it is often
better just the kill the process automatically. Use this guard to set a
maximum timespan for a python call, such as RunNet(). If it does not complete
in time, process is killed.
Example usage:
with timeout_guard.CompleteInTimeOrDie(10.0):
core.RunNet(...)
'''
class WatcherThread(threading.Thread):
def __init__(self, timeout_secs):
threading.Thread.__init__(self)
self.timeout_secs = timeout_secs
self.completed = False
self.condition = threading.Condition()
self.daemon = True
self.caller_thread = threading.current_thread()
def run(self):
started = time.time()
self.condition.acquire()
while time.time() - started < self.timeout_secs and not self.completed:
self.condition.wait(self.timeout_secs - (time.time() - started))
self.condition.release()
if not self.completed:
log = logging.getLogger("timeout_guard")
log.error("Call did not finish in time. Timeout:{}s PID: {}".format(
self.timeout_secs,
os.getpid(),
))
# First try dying cleanly, but in 10 secs, exit properly
def forcequit():
time.sleep(10.0)
log.info("Prepared output, dumping threads. ")
print("Caller thread was: {}".format(self.caller_thread))
print("-----After force------")
import sys
import traceback
code = []
for threadId, stack in viewitems(sys._current_frames()):
if threadId == self.caller_thread.ident:
code.append("\n# ThreadID: %s" % threadId)
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
if line:
code.append(" %s" % (line.strip()))
print("\n".join(code))
log.error("Process did not terminate cleanly in 10 s, forcing")
os.abort()
forcet = threading.Thread(target=forcequit, args=())
forcet.daemon = True
forcet.start()
print("Caller thread was: {}".format(self.caller_thread))
print("-----Before forcing------")
import sys
import traceback
code = []
for threadId, stack in viewitems(sys._current_frames()):
code.append("\n# ThreadID: %s" % threadId)
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
if line:
code.append(" %s" % (line.strip()))
print("\n".join(code))
os.kill(os.getpid(), signal.SIGINT)
@contextlib.contextmanager
def CompleteInTimeOrDie(timeout_secs):
watcher = WatcherThread(timeout_secs)
watcher.start()
yield
watcher.completed = True
watcher.condition.acquire()
watcher.condition.notify()
watcher.condition.release()
def EuthanizeIfNecessary(timeout_secs=120):
'''
Call this if you have problem with process getting stuck at shutdown.
It will kill the process if it does not terminate in timeout_secs.
'''
watcher = WatcherThread(timeout_secs)
watcher.start()