pytorch/torch/csrc/autograd/python_anomaly_mode.cpp
Edward Z. Yang 9465c0e0b5 Add a lint rule for torch/csrc/util/pybind.h include (#82552)
We define specializations for pybind11 defined templates
(in particular, PYBIND11_DECLARE_HOLDER_TYPE) and consequently
it is important that these specializations *always* be #include'd
when making use of pybind11 templates whose behavior depends on
these specializations, otherwise we can cause an ODR violation.

The easiest way to ensure that all the specializations are always
loaded is to designate a header (in this case, torch/csrc/util/pybind.h)
that ensures the specializations are defined, and then add a lint
to ensure this header is included whenever pybind11 headers are
included.

The existing grep linter didn't have enough knobs to do this
conveniently, so I added some features.  I'm open to suggestions
for how to structure the features better.  The main changes:

- Added an --allowlist-pattern flag, which turns off the grep lint
  if some other line exists.  This is used to stop the grep
  lint from complaining about pybind11 includes if the util
  include already exists.

- Added --match-first-only flag, which lets grep only match against
  the first matching line.  This is because, even if there are multiple
  includes that are problematic, I only need to fix one of them.
  We don't /really/ need this, but when I was running lintrunner -a
  to fixup the preexisting codebase it was annoying without this,
  as the lintrunner overall driver fails if there are multiple edits
  on the same file.

I excluded any files that didn't otherwise have a dependency on
torch/ATen, this was mostly caffe2 and the valgrind wrapper compat
bindings.

Note the grep replacement is kind of crappy, but clang-tidy lint
cleaned it up in most cases.

See also https://github.com/pybind/pybind11/issues/4099

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/82552
Approved by: https://github.com/albanD
2022-08-01 17:16:58 +00:00

133 lines
3.9 KiB
C++

#include <c10/util/Exception.h>
#include <pybind11/pybind11.h>
#include <torch/csrc/Exceptions.h>
#include <torch/csrc/autograd/python_anomaly_mode.h>
#include <torch/csrc/autograd/python_cpp_function.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/auto_gil.h>
#include <torch/csrc/utils/object_ptr.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/utils/python_strings.h>
#include <iostream>
namespace torch {
namespace autograd {
void PyAnomalyMetadata::store_stack() {
pybind11::gil_scoped_acquire gil;
THPObjectPtr mod(PyImport_ImportModule("traceback"));
if (!mod) {
throw python_error();
}
THPObjectPtr list(PyObject_CallMethod(mod.get(), "format_stack", ""));
if (!list) {
throw python_error();
}
if (PyDict_SetItemString(dict(), ANOMALY_TRACE_KEY, list.get())) {
throw python_error();
}
}
void PyAnomalyMetadata::print_stack(const std::string& current_node_name) {
pybind11::gil_scoped_acquire gil;
if (!PyDict_Check(dict())) {
throw std::runtime_error("Anomaly metadata is not a python dictionary.");
}
PyObject* trace_stack = PyDict_GetItemString(dict(), ANOMALY_TRACE_KEY);
_print_stack(trace_stack, current_node_name, false);
PyObject* pyparent(PyDict_GetItemString(dict(), ANOMALY_PARENT_KEY));
// if there is no "parent_" in metadata, then it means this metadata's node
// is the root and stop printing the traceback
while (pyparent) {
THPObjectPtr parent_metadata(PyObject_GetAttrString(pyparent, "metadata"));
if (!parent_metadata) {
throw python_error();
}
THPObjectPtr parent_name_pyobj(PyObject_CallMethod(pyparent, "name", ""));
if (!parent_name_pyobj) {
throw python_error();
}
const char* parent_name_char = PyUnicode_AsUTF8(parent_name_pyobj.get());
if (!parent_name_char) {
throw python_error();
}
const std::string parent_name(parent_name_char);
PyObject* parent_stack =
PyDict_GetItemString(parent_metadata.get(), ANOMALY_TRACE_KEY);
_print_stack(parent_stack, parent_name, true);
// get the parent of this node, if this node is a root, pyparent is simply
// null
pyparent = PyDict_GetItemString(parent_metadata.get(), ANOMALY_PARENT_KEY);
}
}
void PyAnomalyMetadata::assign_parent(
const std::shared_ptr<Node>& parent_node) {
// assign the python object of parent_node in metadata["parent_"]
// if parent_node is nullptr, then do nothing (it can mean that "parent_" key
// is not in metadata)
pybind11::gil_scoped_acquire gil;
if (!parent_node)
return;
THPObjectPtr parent_node_(functionToPyObject(parent_node));
if (!parent_node_) {
throw python_error();
}
if (PyDict_SetItemString(dict(), ANOMALY_PARENT_KEY, parent_node_.get())) {
throw python_error();
}
}
void _print_stack(
PyObject* stack,
const std::string& current_node_name,
bool is_parent) {
if (!stack) {
TORCH_WARN(
"Error detected in ",
current_node_name,
". ",
"No forward pass information available. Enable detect anomaly "
"during forward pass for more information.");
return;
}
THPObjectPtr empty_string(PyUnicode_FromString(""));
if (!empty_string) {
throw python_error();
}
// stack is a list of Python strings ending with newlines. Use join to convert
// to a single string.
THPObjectPtr msg(PyUnicode_Join(empty_string, stack));
if (!msg) {
throw python_error();
}
if (!is_parent) {
TORCH_WARN(
"Error detected in ",
current_node_name,
". ",
"Traceback of forward call that caused the error:\n",
THPUtils_unpackString(msg.get()));
} else {
TORCH_WARN(
"\n\n",
"Previous calculation was induced by ",
current_node_name,
". "
"Traceback of forward call that induced the previous calculation:\n",
THPUtils_unpackString(msg.get()));
}
}
} // namespace autograd
} // namespace torch