More nogil unsafe API fix (#137142)

Cover the PyDict APIs and confirms no update needed for PyModule one.
The rest was already covered in https://github.com/pytorch/pytorch/pull/136899

Pull Request resolved: https://github.com/pytorch/pytorch/pull/137142
Approved by: https://github.com/eqy, https://github.com/Skylion007
This commit is contained in:
albanD 2024-10-04 21:56:32 +00:00 committed by PyTorch MergeBot
parent e27c0048db
commit 88e54de219
18 changed files with 1018 additions and 136 deletions

View File

@ -26,7 +26,7 @@ fi
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
set +ex
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
PYLONG_API_CHECK=$?
if [[ $PYLONG_API_CHECK == 0 ]]; then
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"

View File

@ -2380,7 +2380,14 @@ class CppPythonBindingsCodeCache(CppCodeCache):
iss >> addr;
_torchinductor_pyobject_tensor_data_ptr =
reinterpret_cast<decltype(_torchinductor_pyobject_tensor_data_ptr)>(addr);
return PyModule_Create(&py_module);
PyObject* module = PyModule_Create(&py_module);
if (module == NULL) {
return NULL;
}
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED);
#endif
return module;
}
"""
)

View File

@ -1678,6 +1678,10 @@ PyObject* initModule() {
PyModuleDef_HEAD_INIT, "torch._C", nullptr, -1, methods.data()};
module = PyModule_Create(&torchmodule);
ASSERT_TRUE(module);
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED);
#endif
ASSERT_TRUE(THPGenerator_init(module));
ASSERT_TRUE(THPException_init(module));
THPSize_init(module);

View File

@ -367,9 +367,12 @@ void ConcretePyInterpreterVTable::python_dispatcher(
}
c10::DispatchKey k = ks.highestPriorityTypeId();
// TODO: allow this to be non-owning
auto handler = py::reinterpret_borrow<py::object>(
PyDict_GetItem(cache.ptr(), py::cast(k).ptr()));
PyObject* raw_handler = nullptr;
if (PyDict_GetItemRef(cache.ptr(), py::cast(k).ptr(), &raw_handler) < 0) {
// Error that is not missing key
throw python_error();
}
auto handler = py::reinterpret_steal<py::object>(raw_handler);
if (handler.ptr() == nullptr) {
// Slow path
handler = torch_api_function_overload.attr("_get_dispatch")(k);

View File

@ -32,9 +32,15 @@ void PyAnomalyMetadata::print_stack(const std::string& current_node_name) {
if (!PyDict_Check(dict())) {
throw std::runtime_error("Anomaly metadata is not a python dictionary.");
}
PyObject* trace_stack = PyDict_GetItemString(dict(), ANOMALY_TRACE_KEY);
PyObject* trace_stack = nullptr;
if (PyDict_GetItemStringRef(dict(), ANOMALY_TRACE_KEY, &trace_stack) < 0) {
throw python_error();
}
_print_stack(trace_stack, current_node_name, false);
PyObject* pyparent(PyDict_GetItemString(dict(), ANOMALY_PARENT_KEY));
PyObject* pyparent = nullptr;
if (PyDict_GetItemStringRef(dict(), ANOMALY_PARENT_KEY, &pyparent) < 0) {
throw python_error();
}
// if there is no "parent_" in metadata, then it means this metadata's node
// is the root and stop printing the traceback
@ -52,12 +58,18 @@ void PyAnomalyMetadata::print_stack(const std::string& current_node_name) {
throw python_error();
}
const std::string parent_name(parent_name_char);
PyObject* parent_stack =
PyDict_GetItemString(parent_metadata.get(), ANOMALY_TRACE_KEY);
PyObject* parent_stack = nullptr;
if (PyDict_GetItemStringRef(
parent_metadata.get(), ANOMALY_TRACE_KEY, &parent_stack) < 0) {
throw python_error();
}
_print_stack(parent_stack, parent_name, true);
// get the parent of this node, if this node is a root, pyparent is simply
// null
pyparent = PyDict_GetItemString(parent_metadata.get(), ANOMALY_PARENT_KEY);
if (PyDict_GetItemStringRef(
parent_metadata.get(), ANOMALY_PARENT_KEY, &pyparent) < 0) {
throw python_error();
}
}
}

View File

@ -62,10 +62,13 @@ bool _call_hooks(PyObject* dict, PyObject* args) {
// So, we use `PyDict_Values` which returns a new reference to the values
// i.e. we hold the reference to the hooks till we have iterated over them.
// Reference: https://github.com/pytorch/pytorch/issues/58354
auto hooks = THPObjectPtr{PyDict_Values(dict)};
bool is_modified = false;
const auto len = PyList_Size(hooks);
for (Py_ssize_t idx = 0; idx < len; ++idx) {
// Note that this call is NoGil safe as the list is created just above and
// not accessible by any other thread
const auto hook = PyList_GetItem(hooks, idx);
THPObjectPtr res(PyObject_CallObject(hook, args));
@ -176,30 +179,36 @@ auto PyFunctionPostHook::operator()(
void PyFunctionTensorPreHook::compiled_args(CompiledNodeArgs& args) {
PyObject *key = nullptr, *value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(dict);
while (PyDict_Next(dict, &pos, &key, &value)) {
Py_INCREF(value);
args.add_tensor_pre_hook(
c10::SafePyObject(value, getPyInterpreter()),
static_cast<int>(value_idx));
}
Py_END_CRITICAL_SECTION();
}
void PyFunctionPreHook::compiled_args(CompiledNodeArgs& args) {
PyObject *key = nullptr, *value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(dict);
while (PyDict_Next(dict, &pos, &key, &value)) {
Py_INCREF(value);
args.add_pre_hook(c10::SafePyObject(value, getPyInterpreter()));
}
Py_END_CRITICAL_SECTION();
}
void PyFunctionPostHook::compiled_args(CompiledNodeArgs& args) {
PyObject *key = nullptr, *value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(dict);
while (PyDict_Next(dict, &pos, &key, &value)) {
Py_INCREF(value);
args.add_post_hook(c10::SafePyObject(value, getPyInterpreter()));
}
Py_END_CRITICAL_SECTION();
}
PyFunctionTensorPostAccGradHooks::PyFunctionTensorPostAccGradHooks(
@ -231,11 +240,13 @@ void PyFunctionTensorPostAccGradHooks::compiled_args(
torch::dynamo::autograd::CompiledNodeArgs& args) {
PyObject *key = nullptr, *value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(dict);
while (PyDict_Next(dict, &pos, &key, &value)) {
Py_INCREF(value);
c10::SafePyObject hook_obj(value, getPyInterpreter());
args.add_post_acc_grad_hook(std::move(hook_obj));
}
Py_END_CRITICAL_SECTION();
}
void PyFunctionTensorPostAccGradHooks::apply_with_saved(

View File

@ -390,9 +390,11 @@ PyObject* THCPModule_cudaJiteratorCompileAndLaunchKernel(
PyObject* key = nullptr;
PyObject* value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(kwargs_o);
while (PyDict_Next(kwargs_o, &pos, &key, &value)) {
extra_args.emplace_back(as_scalar(value));
}
Py_END_CRITICAL_SECTION();
c10::SmallVector<at::Tensor> outputs = at::cuda::CompileAndLaunchKernel(
code_string,

View File

@ -899,6 +899,10 @@ PyObject* torch_c_dynamo_eval_frame_init(void) {
return NULL;
}
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED);
#endif
#if IS_PYTHON_3_11_PLUS
if (PyType_Ready(&THPPyInterpreterFrameType) < 0) {
return NULL;

View File

@ -3702,6 +3702,10 @@ PyObject* torch_c_dynamo_guards_init() {
if (m == nullptr)
return nullptr;
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
#endif
Py_INCREF(&TensorGuardsType);
if (PyModule_AddObject(m, "TensorGuards", (PyObject*)&TensorGuardsType) < 0) {
Py_DECREF(&TensorGuardsType);

View File

@ -38,6 +38,9 @@ void initDynamoBindings(PyObject* torch) {
if (dynamo == nullptr || PyModule_AddObject(torch, "_dynamo", dynamo) != 0) {
throw python_error();
}
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(dynamo, Py_MOD_GIL_NOT_USED);
#endif
PyObject* eval_frame = torch_c_dynamo_eval_frame_init();
if (eval_frame == nullptr ||

View File

@ -820,7 +820,15 @@ static PyObject* set_autograd_compiler(PyObject* dummy, PyObject* args) {
}
PyObject* torch_c_dynamo_compiled_autograd_init() {
return PyModule_Create(&_module);
PyObject* mod = PyModule_Create(&_module);
if (mod == nullptr) {
return nullptr;
}
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED);
#endif
return mod;
}
} // namespace torch::dynamo::autograd

View File

@ -25,6 +25,10 @@ PyObject* torch_c_dynamo_utils_init() {
if (m == nullptr)
return nullptr;
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
#endif
auto py_m = py::handle(m).cast<py::module>();
py_m.def("is_instancemethod", is_instancemethod);
return m;

View File

@ -379,9 +379,11 @@ std::string format_invalid_args(
PyObject *key = nullptr, *value = nullptr;
Py_ssize_t pos = 0;
Py_BEGIN_CRITICAL_SECTION(given_kwargs);
while (PyDict_Next(given_kwargs, &pos, &key, &value)) {
kwargs.emplace(THPUtils_unpackString(key), value);
}
Py_END_CRITICAL_SECTION();
}
if (options.size() == 1) {

View File

@ -48,9 +48,9 @@ at::Tensor nested_tensor_ctor(
// Check whether we are dealing with lists of tensors or not
std::vector<at::Tensor> new_list(PyList_Size(data));
for (const auto i : c10::irange(PyList_Size(data))) {
PyObject* elem = PyList_GetItem(data, i);
if (THPVariable_Check(elem)) {
new_list[i] = THPVariable_Unpack(PyList_GetItem(data, i)).detach();
THPObjectPtr elem = THPObjectPtr(PyList_GetItemRef(data, i));
if (THPVariable_Check(elem.get())) {
new_list[i] = THPVariable_Unpack(elem.get()).detach();
TORCH_CHECK(
!new_list[i].is_nested(),
"We do not accept nested tensors as input to nested tensors");
@ -60,7 +60,7 @@ at::Tensor nested_tensor_ctor(
} else {
PythonArgs elem_r(r);
std::array<PyObject*, 6> elem_args = {
elem, // data
elem.get(), // data
r.args[1], // dtpye
nullptr, // device (cpu)
nullptr, // no pinned memory

View File

@ -1447,6 +1447,8 @@ static Py_ssize_t find_param(FunctionSignature& signature, PyObject* name) {
PyObject* value = nullptr;
Py_ssize_t pos = 0;
// Note that this dict traversal is NoGil safe as the kwargs dict is only
// accessible within this thread.
while (PyDict_Next(kwargs, &pos, &key, &value)) {
if (!THPUtils_checkString(key)) {
throw TypeError("keywords must be strings");
@ -1518,6 +1520,8 @@ bool FunctionSignature::parse(
}
obj = PyTuple_GET_ITEM(args, arg_pos);
} else if (kwargs) {
// Note that this call is NoGil safe as it works on kwargs which are local
// to the current function call.
obj = PyDict_GetItem(kwargs, param.python_name);
for (PyObject* numpy_name : param.numpy_python_names) {
if (obj) {

View File

@ -14,8 +14,7 @@ extern "C" {
#define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000
#define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000
PYCAPI_COMPAT_STATIC_INLINE(int)
PyCode_GetNCellvars(PyCodeObject* code) {
static inline int PyCode_GetNCellvars(PyCodeObject* code) {
// gh-26364 added co_ncellvars to Python 3.11.0rc1
#if IS_PYTHON_3_11_PLUS
return code->co_ncellvars;
@ -24,8 +23,7 @@ PyCode_GetNCellvars(PyCodeObject* code) {
#endif
}
PYCAPI_COMPAT_STATIC_INLINE(int)
PyCode_GetNFreevars(PyCodeObject* code) {
static inline int PyCode_GetNFreevars(PyCodeObject* code) {
// gh-26364 added co_nfreevars to Python 3.11.0rc1
#if IS_PYTHON_3_11_PLUS
return code->co_nfreevars;

File diff suppressed because it is too large Load Diff

View File

@ -392,7 +392,10 @@ at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
// Extract the `obj.__cuda_array_interface__['shape']` attribute
std::vector<int64_t> sizes;
{
PyObject* py_shape = PyDict_GetItemString(cuda_dict, "shape");
PyObject* py_shape = nullptr;
if (PyDict_GetItemStringRef(cuda_dict, "shape", &py_shape) < 0) {
throw python_error();
}
if (py_shape == nullptr) {
throw TypeError("attribute `shape` must exist");
}
@ -405,7 +408,10 @@ at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int dtype_size_in_bytes;
{
PyObject* py_typestr = PyDict_GetItemString(cuda_dict, "typestr");
PyObject* py_typestr = nullptr;
if (PyDict_GetItemStringRef(cuda_dict, "typestr", &py_typestr) < 0) {
throw python_error();
}
if (py_typestr == nullptr) {
throw TypeError("attribute `typestr` must exist");
}
@ -426,7 +432,10 @@ at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
void* data_ptr;
{
PyObject* py_data = PyDict_GetItemString(cuda_dict, "data");
PyObject* py_data = nullptr;
if (PyDict_GetItemStringRef(cuda_dict, "data", &py_data) < 0) {
throw python_error();
}
if (py_data == nullptr) {
throw TypeError("attribute `shape` data exist");
}
@ -450,7 +459,10 @@ at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
// Extract the `obj.__cuda_array_interface__['strides']` attribute
std::vector<int64_t> strides;
{
PyObject* py_strides = PyDict_GetItemString(cuda_dict, "strides");
PyObject* py_strides = nullptr;
if (PyDict_GetItemStringRef(cuda_dict, "strides", &py_strides) < 0) {
throw python_error();
}
if (py_strides != nullptr && py_strides != Py_None) {
if (PySequence_Length(py_strides) == -1 ||
static_cast<size_t>(PySequence_Length(py_strides)) != sizes.size()) {