#include #include #include #include #define PY_SSIZE_T_CLEAN #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef USE_CUDA #include #endif #ifdef USE_XPU #include #endif #include #include #include #include // Certain CPython data structures are defined in `.c` files in earlier Python // versions, e.g., for TupleIteratorGetItemAccessor, we need a fast way to // retrieve the underlying tuple and access the item. Before Python 3.12 // version, the data structure is in tupleobject.c file - // https://github.com/python/cpython/blob/9afc6d102d16080535325f645849cd84eb04d57d/Objects/tupleobject.c#L1058-L1062 // // To handle the older python versions, we manually copy the struct here and // manually cast it to this new struct. For newer versions, the struct is // included in the header file. #if IS_PYTHON_3_12_PLUS #define Py_BUILD_CORE #include // _PyRangeIterObject #include // _PyTupleIterObject #undef Py_BUILD_CORE #else // Manually create _PyTupleIterObject struct typedef struct { PyObject_HEAD Py_ssize_t it_index; PyTupleObject* it_seq; /* Set to NULL when iterator is exhausted */ } _PyTupleIterObject; // Copied from CPython, and given a unified name for different Python verions. // https://github.com/python/cpython/blob/7f71003b222ad398713514c2b55d34dc05dba6bc/Objects/rangeobject.c#L765-L771 typedef struct { PyObject_HEAD // NOTE for Python 3.12+, `index` is removed, and `start` is updated in place // instead, upon each `next(...)` call. See // https://github.com/python/cpython/pull/27986 long index; long start; long step; long len; } _PyRangeIterObject; #endif // IS_PYTHON_3_12_PLUS namespace torch::dynamo { // Macro to skip addition of duplicate guards like EQUALS_MATCH #define SKIP_IF_GUARD_ALREADY_PRESENT(name) \ if (self.is_leaf_guard_present(name)) { \ return; \ } \ self.insert_leaf_guard(name); TensorCheck::TensorCheck( const LocalState& state, PyTypeObject* pt, const at::Tensor& v, std::vector> dynamic_dims_sizes, std::vector> dynamic_dims_strides) : pytype(pt), dispatch_key_(state.apply(v.key_set()).raw_repr()), dtype_(v.dtype().toScalarType()), device_index_(v.device().index()), requires_grad_(v.requires_grad()), sizes_(std::move(dynamic_dims_sizes)), strides_(std::move(dynamic_dims_strides)), dim_(static_cast(sizes_.size())) { // TODO(voz): In cases where sizes_ and strides_ are fully dynamic, should // we just treat this as optional? } TensorCheck::TensorCheck( const LocalState& state, PyTypeObject* pt, c10::DispatchKeySet dispatch_key_set, at::ScalarType dtype, at::DeviceIndex device_index, bool requires_grad, std::vector> dynamic_dims_sizes, std::vector> dynamic_dims_strides) : pytype(pt), dispatch_key_(state.apply(dispatch_key_set).raw_repr()), dtype_(dtype), device_index_(device_index), requires_grad_(requires_grad), sizes_(std::move(dynamic_dims_sizes)), strides_(std::move(dynamic_dims_strides)), dim_(static_cast(sizes_.size())) {} // See note in guards.py [Note - On Export Tensor Guards] // Logic parallel to here must be maintained in python bool TensorCheck::check(const LocalState& state, const at::Tensor& v) { // In terms of a sparse_csr tensor, it does not support strides informatio c10::SymIntArrayRef sym_strides(std::vector(v.ndimension(), -1)); bool does_not_support_stride = v.layout() == c10::kSparseCsr || v.layout() == c10::kSparseCsc || v.layout() == c10::kSparseBsc || v.layout() == c10::kSparseBsr; if (!does_not_support_stride) { sym_strides = v.sym_strides(); } return check( state, v.key_set(), v.dtype().toScalarType(), v.device(), v.sym_sizes(), sym_strides, v.requires_grad()); } bool TensorCheck::check( const LocalState& state, const c10::DispatchKeySet& dispatch_key_set, const at::ScalarType& dtype, const c10::Device& device, const c10::SymIntArrayRef& sym_sizes, const c10::SymIntArrayRef& sym_strides, const bool& requires_grad) { if (dispatch_key_ != state.apply(dispatch_key_set).raw_repr() || dtype_ != dtype || device_index_ != device.index() || requires_grad_ != requires_grad) { return false; } auto ndim = sym_sizes.size(); if (ndim != static_cast(dim_)) { return false; } const auto& sizes = sym_sizes; const auto& strides = sym_strides; for (auto i : c10::irange(ndim)) { auto known_size = sizes_[i]; auto known_stride = strides_[i]; if (known_size.has_value()) { if (known_size.value() != sizes[i]) { return false; } } if (known_stride.has_value()) { if (known_stride.value() != strides[i]) { return false; } } } return true; } std::string TensorCheck::check_verbose( const LocalState& state, const at::Tensor& v, const std::string& tensor_name) { std::stringstream fail_reason; fail_reason << "tensor '" << tensor_name << "' "; if (dispatch_key_ != state.apply(v.key_set()).raw_repr()) { // return fmt::format("tensor dispatch key mismatch. expected {}, actual // {}", dispatch_key_, state.apply(v.key_set()).raw_repr()); fail_reason << "dispatch key set mismatch. expected " << c10::DispatchKeySet(c10::DispatchKeySet::RAW, dispatch_key_) << ", actual " << state.apply(v.key_set()); return fail_reason.str(); } else if (dtype_ != v.dtype().toScalarType()) { // return fmt::format("tensor dtype mismatch. expected {}, actual {}", // dtype_, v.dtype().toScalarType()); fail_reason << "dtype mismatch. expected " << dtype_ << ", actual " << v.dtype().toScalarType(); return fail_reason.str(); } else if (device_index_ != v.device().index()) { fail_reason << "Tensor device index mismatch. Expected device index to be " << device_index_ << ", actual " << v.device().index(); return fail_reason.str(); } else if (requires_grad_ != v.requires_grad()) { // return fmt::format("tensor requires_grad mismatch. expected {}", // requires_grad_); fail_reason << "requires_grad mismatch. expected requires_grad=" << requires_grad_; return fail_reason.str(); } auto ndim = v.ndimension(); if (ndim != dim_) { // return fmt::format("tensor rank mismatch. expected {}, actual {}", // sizes_.size(), ndim); fail_reason << "rank mismatch. expected " << sizes_.size() << ", actual " << ndim; return fail_reason.str(); } const auto& sizes = v.sym_sizes(); for (auto i : c10::irange(ndim)) { auto known_size = sizes_[i]; if (known_size.has_value() && (known_size.value() != sizes[i])) { fail_reason << "size mismatch at index " << i << ". expected " << known_size.value() << ", actual " << sizes[i]; return fail_reason.str(); } } const bool supports_stride = !v.is_sparse() && !at::sparse_csr::is_sparse_compressed(v); if (supports_stride) { const auto& strides = v.sym_strides(); for (auto i : c10::irange(ndim)) { auto known_stride = strides_[i]; if (known_stride.has_value() && known_stride.value() != strides[i]) { fail_reason << "stride mismatch at index " << i << ". expected " << known_stride.value() << ", actual " << strides[i]; return fail_reason.str(); } } } return ""; } namespace { typedef std::vector ChecksList; typedef struct { PyObject_HEAD ChecksList* checks; } TensorGuards; static void TensorGuards_dealloc(TensorGuards* self) { if (self->checks != nullptr) { delete self->checks; self->checks = nullptr; } Py_TYPE(self)->tp_free((PyObject*)self); } static PyObject* TensorGuards_new( PyTypeObject* type, PyObject* args, PyObject* kwds) { TensorGuards* self = (TensorGuards*)type->tp_alloc(type, 0); if (self != nullptr) { self->checks = new ChecksList(); } return (PyObject*)self; } static std::vector> wrapIntegersInOptional( const c10::SymIntArrayRef& intArray) { std::vector> optVec(intArray.size()); std::transform( intArray.begin(), intArray.end(), optVec.begin(), [](const c10::SymInt& value) { return std::make_optional(value); }); return optVec; } static std::vector> pyListToVecOptInt( PyObject* pyList) { std::vector> vec; Py_ssize_t size = PyList_Size(pyList); for (Py_ssize_t i = 0; i < size; i++) { PyObject* item = PyList_GetItem(pyList, i); auto handle = py::handle(item); if (item == Py_None) { vec.emplace_back(std::nullopt); } else if (torch::is_symint(handle)) { vec.emplace_back(py::cast(handle)); } else { int64_t value = PyLong_AsLongLong(item); if (value == -1 && PyErr_Occurred()) { PyErr_SetString( PyExc_TypeError, "Size or stride list item is not a valid integer."); TORCH_CHECK(false, "Size or stride list item is not a valid integer."); } vec.emplace_back(c10::SymInt(value)); } } return vec; } static std::vector>> get_dynamic_dims( PyObject* dynamic_dims_py) { std::vector>> per_tensor_dynamic_dims; if (dynamic_dims_py != Py_None) { Py_ssize_t size = PyList_Size(dynamic_dims_py); for (Py_ssize_t i = 0; i < size; i++) { PyObject* py_list = PyList_GetItem(dynamic_dims_py, i); std::vector> vec = pyListToVecOptInt(py_list); per_tensor_dynamic_dims.push_back(std::move(vec)); } } return per_tensor_dynamic_dims; } static int TensorGuards_init( TensorGuards* self, PyObject* args, PyObject* kwds) { if (!PyTuple_CheckExact(args)) { PyErr_SetString(PyExc_TypeError, "expected tuple()"); return -1; } // Top level structure is List[List[Union[int, None]]] PyObject* dynamic_dims_sizes_py = PyDict_GetItemString(kwds, "dynamic_dims_sizes"); if (dynamic_dims_sizes_py == nullptr) { PyErr_SetString(PyExc_TypeError, "missing dynamic_dims_sizes=..."); return -1; } PyObject* dynamic_dims_strides_py = PyDict_GetItemString(kwds, "dynamic_dims_strides"); if (dynamic_dims_strides_py == nullptr) { PyErr_SetString(PyExc_TypeError, "missing dynamic_dims_strides=..."); return -1; } // dynamic_dims_strides/sizes_py is None when dynamic_shapes=False - this is // an optimization to avoid invoking .size()/.stride() in python needlessly std::vector>> per_tensor_dynamic_dims_sizes = get_dynamic_dims(dynamic_dims_sizes_py); std::vector>> per_tensor_dynamic_dims_strides = get_dynamic_dims(dynamic_dims_strides_py); auto& checks = *self->checks; auto len = PyTuple_GET_SIZE(args); checks.reserve(len); LocalState state; for (auto i : c10::irange(len)) { PyObject* item = PyTuple_GET_ITEM(args, i); if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) { PyErr_SetString(PyExc_TypeError, "expected Tensor()"); return -1; } auto tensor = THPVariable_Unpack(item); std::vector> tensor_dims_size = per_tensor_dynamic_dims_sizes.empty() ? wrapIntegersInOptional(tensor.sym_sizes()) : per_tensor_dynamic_dims_sizes[i]; std::vector> tensor_dims_stride = per_tensor_dynamic_dims_strides.empty() ? wrapIntegersInOptional(tensor.sym_strides()) : per_tensor_dynamic_dims_strides[i]; checks.emplace_back( state, Py_TYPE(item), std::move(tensor), std::move(tensor_dims_size), std::move(tensor_dims_stride)); } return 0; } PyObject* TensorGuards_check( TensorGuards* self, PyObject* args, PyObject* kwargs) { if (!PyTuple_CheckExact(args)) { PyErr_SetString(PyExc_TypeError, "expected tuple()"); return nullptr; } auto& checks = *self->checks; auto len = PyTuple_GET_SIZE(args); // kwargs is just ignored here if (static_cast(checks.size()) != len) { PyErr_SetString(PyExc_TypeError, "wrong length"); return nullptr; } LocalState state; // Note - all the tensors that make it to guards must be unique. Dynamo // builder handles guarding for positive aliases (X is Y). However, we do not // create guards for negative alias (X is not Y) as that is an N^2 // relationship. Instead, we rely on the uniqueness upstream to verify, at // check_fn time (this function). ska::flat_hash_map unique_tensors; for (auto i : c10::irange(len)) { PyObject* item = PyTuple_GET_ITEM(args, i); if (Py_TYPE(item) != checks[i].pytype) { Py_RETURN_FALSE; } auto insertion = unique_tensors.insert({item, nullptr}); if (!insertion.second) { // Violates uniqueness Py_RETURN_FALSE; } if (!checks[i].check(state, THPVariable_Unpack(item))) { Py_RETURN_FALSE; } } Py_RETURN_TRUE; } PyObject* TensorGuards_check_verbose( TensorGuards* self, PyObject* args, PyObject* kwargs) { if (!PyTuple_CheckExact(args)) { PyErr_SetString(PyExc_TypeError, "expected tuple()"); return nullptr; } auto& checks = *self->checks; auto len = PyTuple_GET_SIZE(args); if (static_cast(checks.size()) != len) { PyErr_SetString(PyExc_TypeError, "wrong length"); return nullptr; } PyObject* tensor_check_names_py = PyDict_GetItemString(kwargs, "tensor_check_names"); if (tensor_check_names_py == nullptr) { PyErr_SetString(PyExc_TypeError, "missing tensor_check_names kwarg"); return nullptr; } if (!PyList_Check(tensor_check_names_py)) { PyErr_SetString(PyExc_TypeError, "tensor_check_names kwarg must be a list"); return nullptr; } auto names_size = PyList_Size(tensor_check_names_py); if (names_size != static_cast(checks.size())) { PyErr_SetString( PyExc_TypeError, "tensor_check_names should be the same size as # tensors"); return nullptr; } std::vector tensor_check_names; tensor_check_names.reserve(names_size); for (auto i : c10::irange(names_size)) { PyObject* value = PyList_GetItem(tensor_check_names_py, i); if (!PyUnicode_Check(value)) { PyErr_SetString( PyExc_TypeError, "tensor_check_names must only contain strings"); return nullptr; } tensor_check_names.emplace_back(PyUnicode_AsUTF8(value)); } LocalState state; ska::flat_hash_map unique_tensors; for (auto i : c10::irange(len)) { PyObject* item = PyTuple_GET_ITEM(args, i); if (Py_TYPE(item) != checks[i].pytype) { std::stringstream fail_reason; PyObject* type_str = PyObject_Str(PyObject_Type(item)); fail_reason << "expected type of '" << tensor_check_names[i] << "' to be a tensor type, "; if (!type_str) { fail_reason << "but found a different type"; } else { fail_reason << "' but found " << PyUnicode_AsUTF8(type_str); } return Py_BuildValue("s", fail_reason.str().c_str()); } auto insertion = unique_tensors.insert({item, nullptr}); if (!insertion.second) { std::stringstream fail_reason; fail_reason << "Duplicate tensor found where not expected! "; fail_reason << tensor_check_names[i] << "should not alias to anything, but is aliased"; return Py_BuildValue("s", fail_reason.str().c_str()); } std::string fail_reason = checks[i].check_verbose( state, THPVariable_Unpack(item), tensor_check_names[i]); if (!fail_reason.empty()) { return Py_BuildValue("s", fail_reason.c_str()); } } Py_RETURN_TRUE; } // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) static PyMethodDef TensorGuards_methods[] = { {"check", (PyCFunction)(void*)TensorGuards_check, METH_VARARGS | METH_KEYWORDS, ""}, {"check_verbose", (PyCFunction)(void*)TensorGuards_check_verbose, METH_VARARGS | METH_KEYWORDS, "verbose fail reasons for failed checks"}, {nullptr} /* Sentinel */ }; static PyTypeObject TensorGuardsType = { PyVarObject_HEAD_INIT(nullptr, 0) }; struct AutocastState { static constexpr auto& DEVICES = at::autocast::_AUTOCAST_SUPPORTED_DEVICES; std::array enabled; std::array dtype; bool cache_enabled; AutocastState() : enabled{}, dtype{} { for (size_t i = 0; i < DEVICES.size(); i++) { enabled[i] = at::autocast::is_autocast_enabled(DEVICES[i]); dtype[i] = at::autocast::get_autocast_dtype(DEVICES[i]); } cache_enabled = at::autocast::is_autocast_cache_enabled(); } bool operator==(const AutocastState& o) const { for (size_t i = 0; i < DEVICES.size(); i++) { if (enabled[i] != o.enabled[i] || dtype[i] != o.dtype[i]) { return false; } } if (cache_enabled != o.cache_enabled) { return false; } return true; } }; // TODO (janimesh) - Remove the PyObject_HEAD part when C++ guard manager is // merged. // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) struct GlobalStateGuard { PyObject_HEAD inline void init() { auto& ctx = at::globalContext(); _grad_mode = at::GradMode::is_enabled(); _autocast_state = AutocastState(); // The below two flags disambiguate // if torch function disabled state is // 1) enabled, 2) all disabled, 3) subclasses disabled // we guard on the stack separately _torch_function = torch::torch_function_enabled(); _torch_function_all_disabled = at::impl::torch_function_all_disabled(); _deterministic_algorithms = ctx.deterministicAlgorithms(); _deterministic_algorithms_warn_only = ctx.deterministicAlgorithmsWarnOnly(); _allow_tf32 = ctx.allowTF32CuBLAS(); _allow_fp16_reduce = ctx.allowFP16ReductionCuBLAS(); _allow_bf16_reduce = ctx.allowBF16ReductionCuBLAS(); _num_threads = at::get_num_threads(); _default_dtype = at::get_default_dtype(); } inline bool check() const { auto& ctx = at::globalContext(); return (_grad_mode == at::GradMode::is_enabled() && _autocast_state == AutocastState() && _torch_function == torch::torch_function_enabled() && _torch_function_all_disabled == at::impl::torch_function_all_disabled() && _deterministic_algorithms == ctx.deterministicAlgorithms() && _deterministic_algorithms_warn_only == ctx.deterministicAlgorithmsWarnOnly() && _allow_tf32 == ctx.allowTF32CuBLAS() && _allow_fp16_reduce == ctx.allowFP16ReductionCuBLAS() && _allow_bf16_reduce == ctx.allowBF16ReductionCuBLAS() && _num_threads == at::get_num_threads()) && _default_dtype == at::get_default_dtype(); } inline std::string reason() const { std::ostringstream os; auto& ctx = at::globalContext(); if (_grad_mode != at::GradMode::is_enabled()) os << "grad_mode "; if (!(_autocast_state == AutocastState())) os << "autocast "; if (_torch_function != torch::torch_function_enabled()) os << "torch_function "; if (_deterministic_algorithms != ctx.deterministicAlgorithms()) os << "deterministic_algorithms "; if (_deterministic_algorithms_warn_only != ctx.deterministicAlgorithmsWarnOnly()) os << "deterministic_algorithms_warn_only "; if (_allow_tf32 != ctx.allowTF32CuBLAS()) os << "allow_tf32 "; if (_allow_fp16_reduce != ctx.allowFP16ReductionCuBLAS()) os << "allow_fp16_reduce "; if (_allow_bf16_reduce != ctx.allowBF16ReductionCuBLAS()) os << "allow_bf16_reduce "; if (_num_threads != at::get_num_threads()) os << "num_threads "; if (_default_dtype != at::get_default_dtype()) os << "default_dtype "; return os.str(); } bool _grad_mode; AutocastState _autocast_state; bool _torch_function; bool _torch_function_all_disabled; bool _deterministic_algorithms; bool _deterministic_algorithms_warn_only; bool _allow_tf32; bool _allow_fp16_reduce; bool _allow_bf16_reduce; int _num_threads; caffe2::TypeMeta _default_dtype; // TODO(jansel): we should guard on more state as inductor starts using it }; int GlobalStateGuard_init( GlobalStateGuard* self, PyObject* args, PyObject* kwargs) { self->init(); return 0; } PyObject* GlobalStateGuard_check( GlobalStateGuard* self, PyObject* args, PyObject* kwargs) { if (self->check()) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } } PyObject* GlobalStateGuard_reason( GlobalStateGuard* self, PyObject* args, PyObject* kwargs) { return PyUnicode_FromString(self->reason().c_str()); } // NOLINTNEXTLINE(*array*) static PyMethodDef GlobalStateGuard_methods[] = { {"check", (PyCFunction)(void*)GlobalStateGuard_check, METH_NOARGS, "Return true if global state was the same as at creation time"}, {"reason", (PyCFunction)(void*)GlobalStateGuard_reason, METH_NOARGS, "Return string reason for guard check failing"}, {nullptr}}; static PyTypeObject GlobalStateGuardType = { PyVarObject_HEAD_INIT(nullptr, 0) }; static PyObject* check_type_id(PyObject* dummy, PyObject* args) { // faster `lambda obj, expected: id(type(obj)) == expected` PyObject* obj = nullptr; unsigned long long expected = 0; if (!PyArg_ParseTuple(args, "OK", &obj, &expected)) { return nullptr; } // NOLINTNEXTLINE(performance-no-int-to-ptr) if (Py_TYPE(obj) == (void*)expected) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } } static PyObject* check_obj_id(PyObject* dummy, PyObject* args) { // faster `lambda obj, expected: id(obj) == expected` PyObject* obj = nullptr; unsigned long long expected = 0; if (!PyArg_ParseTuple(args, "OK", &obj, &expected)) { return nullptr; } // NOLINTNEXTLINE(performance-no-int-to-ptr) if (obj == (void*)expected) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } } #if IS_PYTHON_3_12_PLUS static std::unordered_map dict_version_map; static int dict_version_watcher_id; static uint64_t global_dict_version_id = 1; static int dict_version_watch_callback( PyDict_WatchEvent event, PyObject* dict, PyObject* key, PyObject* new_value) noexcept { if (event == PyDict_EVENT_DEALLOCATED) { dict_version_map.erase(dict); } else if (event != PyDict_EVENT_CLONED) { dict_version_map[dict] = global_dict_version_id++; } return 0; } #endif static uint64_t get_dict_version_unchecked(PyObject* dict) { #if IS_PYTHON_3_12_PLUS if (PyDict_Watch(dict_version_watcher_id, dict)) { throw std::runtime_error("failed to add version watcher to dict!"); } if (!dict_version_map.count(dict)) { dict_version_map[dict] = global_dict_version_id++; } return dict_version_map[dict]; #else return ((PyDictObject*)dict)->ma_version_tag; #endif } static PyObject* dict_version(PyObject* dummy, PyObject* args) { // Retrieves the version of a dictionary. PyObject* obj = nullptr; if (!PyArg_ParseTuple(args, "O", &obj)) { return nullptr; } if (!PyDict_Check(obj)) { return nullptr; } return THPUtils_packUInt64(get_dict_version_unchecked(obj)); } static PyObject* assert_size_stride(PyObject* dummy, PyObject* args) { /* Assert that a given tensor has a given size/stride, but ignore strides of size==1 dimensions. Implemented in C++ as this is on the hot path. */ PyObject* item = nullptr; PyObject* size = nullptr; PyObject* stride = nullptr; if (!PyArg_ParseTuple(args, "OOO", &item, &size, &stride)) { return nullptr; } if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) { PyErr_SetString(PyExc_TypeError, "expected Tensor()"); return nullptr; } if (!PyTuple_CheckExact(size) || !PyTuple_CheckExact(stride)) { PyErr_SetString(PyExc_TypeError, "expected tuple()"); return nullptr; } at::Tensor tensor = THPVariable_Unpack(item); int64_t ndim = tensor.ndimension(); if (PyTuple_GET_SIZE(size) != ndim || PyTuple_GET_SIZE(stride) != ndim) { PyErr_SetString(PyExc_AssertionError, "wrong number of dimensions"); return nullptr; } std::stringstream msg; int num_errors = 0; for (auto i : c10::irange(ndim)) { int64_t want_size = THPUtils_unpackLong(PyTuple_GET_ITEM(size, i)); int64_t want_stride = THPUtils_unpackLong(PyTuple_GET_ITEM(stride, i)); int64_t actual_size = tensor.size(i); int64_t actual_stride = tensor.stride(i); if (want_size != actual_size || // ignore stride differences when size is 1 (want_stride != actual_stride && actual_size > 1)) { if (num_errors > 0) msg << "; "; msg << "expected size " << actual_size << "==" << want_size << ", stride " << actual_stride << "==" << want_stride << " at dim=" << i; num_errors++; } } if (num_errors) { PyErr_SetString(PyExc_AssertionError, msg.str().c_str()); return nullptr; } Py_RETURN_TRUE; } template static void unwrap_size_tuple(PyObject* obj, T& output) { TORCH_CHECK(PyTuple_CheckExact(obj)); size_t len = PyTuple_GET_SIZE(obj); output.reserve(len); for (size_t i = 0; i < len; ++i) { auto result = PyLong_AsSsize_t(PyTuple_GET_ITEM(obj, i)); TORCH_CHECK(result >= 0); output.emplace_back(result); } } template static void _parse_empty_strided_args( PyObject* args, T& sizes, T& strides, at::ScalarType& dtype) { TORCH_CHECK(PyTuple_CheckExact(args)); TORCH_CHECK(PyTuple_GET_SIZE(args) == 3); // note PyTuple_GET_ITEM returns a borrowed ref, so no need for refcounts unwrap_size_tuple(PyTuple_GET_ITEM(args, 0), sizes); unwrap_size_tuple(PyTuple_GET_ITEM(args, 1), strides); PyObject* py_dtype = PyTuple_GET_ITEM(args, 2); TORCH_CHECK(THPDtype_Check(py_dtype)); dtype = reinterpret_cast(py_dtype)->scalar_type; } static PyObject* _empty_strided_device( PyObject* dummy, PyObject* args, c10::DeviceType device_type) { HANDLE_TH_ERRORS; at::SmallVector sizes; at::SmallVector strides; at::ScalarType dtype{at::ScalarType::Undefined}; _parse_empty_strided_args(args, sizes, strides, dtype); if (device_type == c10::DeviceType::CPU) { return THPVariable_Wrap( at::detail::empty_strided_cpu(sizes, strides, dtype)); } #ifdef USE_CUDA else if (device_type == c10::DeviceType::CUDA) { return THPVariable_Wrap(at::detail::empty_strided_cuda( sizes, strides, dtype, c10::DeviceType::CUDA)); } #endif #ifdef USE_XPU else if (device_type == c10::DeviceType::XPU) { return THPVariable_Wrap(at::detail::empty_strided_xpu( sizes, strides, dtype, c10::DeviceType::XPU)); } #endif else { TORCH_CHECK( false, "PyTorch compiled without support for the specified device."); } END_HANDLE_TH_ERRORS; } static PyObject* _empty_strided_cpu(PyObject* dummy, PyObject* args) { // at::empty_strided is surprising slow. This is a lower-overhead // version that saves ~2us on every allocation. return _empty_strided_device(dummy, args, c10::DeviceType::CPU); } static PyObject* _empty_strided_cuda(PyObject* dummy, PyObject* args) { // at::empty_strided is surprising slow. This is lower-overhead. return _empty_strided_device(dummy, args, c10::DeviceType::CUDA); } static PyObject* _empty_strided_xpu(PyObject* dummy, PyObject* args) { // at::empty_strided is surprising slow. This is lower-overhead. return _empty_strided_device(dummy, args, c10::DeviceType::XPU); } static PyObject* _reinterpret_tensor(PyObject* dummy, PyObject* args) { HANDLE_TH_ERRORS; static PythonArgParser parser( {"_reinterpret_tensor(Tensor base, IntArrayRef sizes, IntArrayRef strides, int64_t offset_increment=0)"}, /*traceable=*/true); ParsedArgs<4> parsed_args; auto r = parser.parse(args, /*kwargs=*/nullptr, parsed_args); Tensor self = r.tensor(0); auto sizes = r.intlist(1); auto strides = r.intlist(2); auto offset_increment = r.toInt64(3); auto res = torch::inductor::_reinterpret_tensor( self, sizes, strides, offset_increment); return torch::autograd::utils::wrap(res); END_HANDLE_TH_ERRORS; } // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) static PyMethodDef _methods[] = { {"check_type_id", check_type_id, METH_VARARGS, nullptr}, {"check_obj_id", check_obj_id, METH_VARARGS, nullptr}, {"assert_size_stride", assert_size_stride, METH_VARARGS, nullptr}, {"dict_version", dict_version, METH_VARARGS, nullptr}, {"_empty_strided_cpu", _empty_strided_cpu, METH_VARARGS, nullptr}, {"_empty_strided_cuda", _empty_strided_cuda, METH_VARARGS, nullptr}, {"_empty_strided_xpu", _empty_strided_xpu, METH_VARARGS, nullptr}, {"_reinterpret_tensor", _reinterpret_tensor, METH_VARARGS, nullptr}, {nullptr, nullptr, 0, nullptr}}; static struct PyModuleDef _module = { PyModuleDef_HEAD_INIT, "torch._C._dynamo.guards", "Module containing checks on tensors", -1, _methods}; std::string get_exception_message() { PyObject *ptype = nullptr, *pvalue = nullptr, *ptraceback = nullptr; PyErr_Fetch(&ptype, &pvalue, &ptraceback); PyObject* exc_message_pyobj = PyObject_Str(pvalue); const char* exc_message = PyUnicode_AsUTF8(exc_message_pyobj); Py_DECREF(exc_message_pyobj); Py_XDECREF(ptype); Py_XDECREF(pvalue); Py_XDECREF(ptraceback); return std::string(exc_message); } bool is_immutable_object(py::handle example_value) { py::object config_module = py::module_::import("torch._dynamo.config"); bool is_tensor_immutable = config_module.attr("skip_tensor_guards_with_matching_dict_tags") .cast(); if (PyTuple_Check(example_value.ptr())) { // Check that each element is immutable for (Py_ssize_t i = 0; i < PyTuple_Size(example_value.ptr()); ++i) { if (!is_immutable_object( py::handle(PyTuple_GetItem(example_value.ptr(), i)))) { return false; } } return true; } return PyLong_Check(example_value.ptr()) || PyFloat_Check(example_value.ptr()) || PyBool_Check(example_value.ptr()) || PyUnicode_Check(example_value.ptr()) || (is_tensor_immutable && THPVariable_Check(example_value.ptr())); } bool is_parameter(py::handle tensor) { py::object parameter = py::module::import("torch.nn").attr("Parameter"); return py::isinstance(tensor, parameter); } /** * Dispatches metadata functions to the methods that return integer values, * i.e. used whenever static shapes are being used. * * These are used by the tensor storage overlapping check. Even though their * symbolic counterpart does work whenever static shapes are being used, the * introduced overhead might significantly worsen the performance. */ struct StaticMeta { static int64_t numel(const Tensor& t) { return t.numel(); } static int64_t storage_offset(const Tensor& t) { return t.storage_offset(); } static int64_t size(const Tensor& t, int64_t i) { return t.size(i); } static int64_t stride(const Tensor& t, int64_t i) { return t.stride(i); } }; /** * Dispatches metadata functions to the methods that return c10::SymInt * values, i.e. used whenever dynamic shapes are being used. */ struct DynamicMeta { static SymInt numel(const Tensor& t) { return t.sym_numel(); } static SymInt storage_offset(const Tensor& t) { return t.sym_storage_offset(); } static SymInt size(const Tensor& t, int64_t i) { return t.sym_size(i); } static SymInt stride(const Tensor& t, int64_t i) { return t.sym_stride(i); } }; /** * Assumption: x and y are known to share a storage, and we are trying to * determine if their memory is actually completely disjoint, based on * sizes/strides/storage_offset * * "Meta" should be one of the "*Meta" classes above. They dictate which * version of the metadata functions we should be using (symbolic vs. * concrete). Even though they have the same apparent behavior, the symbolic * version introduces a bit of overhead. Such an overhead might end up * becoming relevant if it's run enough times. */ template bool tensors_definitely_do_not_overlap(const Tensor& x, const Tensor& y) { if (x.is_same(y)) { return false; } if (Meta::numel(x) == 0 || Meta::numel(y) == 0) { return true; } // Make x always on the left if (Meta::storage_offset(x) > Meta::storage_offset(y)) { return tensors_definitely_do_not_overlap(y, x); } // Short-circuit in the "obvious" overlapping case: both tensors are // contiguous if (x.is_contiguous() && y.is_contiguous()) { if (Meta::storage_offset(x) + Meta::numel(x) > Meta::storage_offset(y)) { // definitely overlap return false; } else { // definitely no overlap return true; } } // Short-circuit: if last memory address of x is < start of y, then not // overlapping. auto x_last = Meta::storage_offset(x); for (int64_t i = 0; i < x.dim(); i++) { x_last += (Meta::size(x, i) - 1) * Meta::stride(x, i); } if (x_last < Meta::storage_offset(y)) { return true; } if (x.dim() == 2 && y.dim() == 2 && Meta::stride(x, 1) == 1 && Meta::stride(y, 1) == 1) { // This cases is needed for the shampoo optimizer. // All tensors are 2d (non-contiguous), have the same outer stride, and have // an inner stride of 1 (so rows are contiguous) if (Meta::stride(x, 0) == Meta::stride(y, 0)) { auto offset_delta = Meta::storage_offset(y) - Meta::storage_offset(x); if (offset_delta < Meta::size(x, 1)) { // definitely overlaps (row 0 of y overlaps with row 0 of x) // Example: // base = torch.arange(32).reshape(4, 8) // x = base.narrow(1, 0, 4) // x: size=(4, 4), stride=(8, 1), offset=0 // y = base.narrow(1, 3, 4) // y: size=(4, 4), stride=(8, 1), offset=3 return false; } auto x_total_elems_covered = Meta::stride(x, 0) * (Meta::size(x, 0) - 1) + Meta::size(x, 1); if (x_total_elems_covered <= offset_delta) { // definitely does not overlap (last byte of x is before start of y) // Example: // x: size=(4, 4), stride=(8, 1), offset=0 (last byte is 27) // y: size=(4, 4), stride=(8, 1), offset=28 (start byte is 28) return true; } // At this point, we want to check if the 0th row of y // overlaps with **some** row of x. // We can check this by shifting y backward by the shared stride, // repeatedly, until the first row of y is before the first row of x. Then // we can check if these rows overlap. We can accomplish this by modding // our offset by the stride. auto offset_delta_mod = offset_delta % Meta::stride(x, 0); // Example: // 0 1 2 3 // 9 10 11 12 // 18 19 20 21 // 27 28 29 30 // x: size=(4, 4), stride=(9, 1), offset=0 // y: size=(4, 4), stride=(9, 1), offset=22 (this would not overlap) // y: size=(4, 4), stride=(9, 1), offset=23 (this would not overlap) // y: size=(4, 4), stride=(9, 1), offset=24 (this would overlap) // y: size=(4, 4), stride=(9, 1), offset=25 (this would overlap) // If the interval [modded_offset, modded_offset + x_size] falls entirely // without if (offset_delta_mod + Meta::size(y, 1) <= Meta::stride(x, 0)) { return true; } } } return false; } /** * Computes the indices of the tensors that might overlap. * * Checks which of the given tensors have overlapping storages with ANY of * the other tensors. * * So, for example, if tensor 1 overlaps with tensor 2, and tensor 3 with * tensor 4, all of them will be in the output of this function. Even if * tensor 1 and 4 don't overlap. */ template std::unordered_set compute_overlapping_tensors( const std::vector& tensors) { std::unordered_set aliased_tensor_indices; for (int64_t i = 0; i < static_cast(tensors.size()); i++) { const auto& tensor_i = tensors[i]; for (int64_t j = 0; j < i; j++) { if (!tensors_definitely_do_not_overlap(tensor_i, tensors[j])) { aliased_tensor_indices.insert(i); aliased_tensor_indices.insert(j); } } } return aliased_tensor_indices; } /** * Checks whether the storage overlapping relation is preserved. * * At this point, `non_overlapping` represents the tensors that should not * have overlapping storages. Similarly, `overlapping` represents the tensors * that should have overlapping storage in some way (or that we can't be sure). * * This function checks whether the assumption above is true or not. */ bool check_overlapping( const std::vector& overlapping, const std::vector& non_overlapping) { // Merge the tensor lists. std::vector tensors; tensors.reserve(overlapping.size() + non_overlapping.size()); tensors.insert(tensors.end(), overlapping.begin(), overlapping.end()); tensors.insert(tensors.end(), non_overlapping.begin(), non_overlapping.end()); // Check what is the current storage overlapping relation. auto indices = compute_overlapping_tensors(tensors); // Check that the set of indices of tensors that might overlap is equal to // the indices of the first `overlapping.size()` tensors. That's because // `overlapping` tensors were in the beginning of `tensors` list. auto range = c10::irange(overlapping.size()); return indices.size() == overlapping.size() && std::all_of(range.begin(), range.end(), [&](int64_t i) { return indices.count(i) == 1; }); } /** * Class responsible for collecting and checking the storage overlap relations. * * The way GuardManager is implemented, when STORAGE_OVERLAPPING guard check is * run on a given tensor, we don't know if it is an overlapping or * non-overlapping tensor. There's no order to which GuardManager runs the guard * check so that we can split it in 2. * * Since we are only interested in the classification of each tensor (not * necessarily the order), we can just issue 2 STORAGE_OVERLAPPING guards * representing the overlapping tensors and the non-overlapping ones. * * In order to collect the information from both guards (so that we can call * `check_overlapping` function correctly), we need this class which stores * both kinds of tensors, and knows when it has collected each one of them. */ class StorageOverlapChecker { public: StorageOverlapChecker( size_t expected_overlapping, size_t expected_non_overlapping) : _expected_overlapping(expected_overlapping), _expected_non_overlapping(expected_non_overlapping) {} /** * Adds a tensor to the corresponding storage, based on whether it should be * an `overlapping` tensor or not. */ void add(PyObject* obj, bool overlapping) { // Just check that `obj` is actually a tensor, so that we can keep it alive // by incrementing its ref-count. TORCH_CHECK(THPVariable_CheckExact(obj) || THPVariable_Check(obj)); Py_INCREF(obj); _get(overlapping).push_back(obj); } void reset(bool overlapping) { auto& vec = _get(overlapping); for (auto item : vec) { Py_DECREF(item); } vec.clear(); } /** * Maybe checks the storage overlapping relation. * * Before actually calling `check_overlapping` function, this function makes * sure it has collected all expected tensors. */ bool maybe_check() { TORCH_CHECK(_expected_overlapping >= _overlapping.size()); TORCH_CHECK(_expected_non_overlapping >= _non_overlapping.size()); if (_expected_overlapping == _overlapping.size() && _expected_non_overlapping == _non_overlapping.size()) { // Transform each list of PyObject* into an actual list of Tensors. auto overlapping_tensors = _tensors_from(_overlapping, _expected_overlapping); auto non_overlapping_tensors = _tensors_from(_non_overlapping, _expected_non_overlapping); return check_overlapping(overlapping_tensors, non_overlapping_tensors); } else { // If we haven't collected them all yet, keep on running. return true; } } private: /** * Returns a reference to the container that corresponds to the given * overlapping relation. */ std::vector& _get(bool overlapping) { return overlapping ? _overlapping : _non_overlapping; } /** * Transforms a given list of PyObject* into a list of Tensor. */ std::vector _tensors_from( const std::vector& objects, int64_t size) { std::vector tensors; tensors.reserve(size); std::transform( objects.begin(), objects.end(), std::back_inserter(tensors), [=](PyObject* obj) { return THPVariable_Unpack(obj); }); return tensors; } // Expected number of possibly overlapping tensors. size_t _expected_overlapping; // Expected number of non-overlapping tensors. size_t _expected_non_overlapping; // Collected possibly overlapping tensors. std::vector _overlapping; // Collected non-overlapping tensors. std::vector _non_overlapping; }; /** * Stores relevant guard debug information, e.g., failure str for a LeafGuard * failure. The data structure is also accessible in Python. */ class GuardDebugInfo { public: GuardDebugInfo( bool result, py::list verbose_code_parts, int num_guards_executed) : result(result), verbose_code_parts(std::move(verbose_code_parts)), num_guards_executed(num_guards_executed) {} // This constructor is used when guard succeeds. GuardDebugInfo(bool result, int num_guards_executed) : result(result), num_guards_executed(num_guards_executed) {} GuardDebugInfo( bool result, const std::string& failed_reason, int num_guards_executed) : GuardDebugInfo(result, num_guards_executed) { verbose_code_parts.append(failed_reason); } std::string to_string() { std::stringstream ss; ss << "GuardDebugInfo(\n" << "result=" << result << ",\n" << "verbose_code_parts=" << verbose_code_parts << ",\n" << "num_guards_executed=" << num_guards_executed << ")\n"; return ss.str(); } // Whether the guard passed or failed. bool result; // This is a list of verbose_code_parts for the failed guard. When there are // more than one verbose_code_parts, then recompilation reasoning infra on the // Python side can iterate over this list and eval each string to pinpoint the // exact code part that failed. py::list verbose_code_parts; // Total number of executed guards so far. This is helpful in debugging if // shuffling is working. int num_guards_executed; }; class GuardManager; class RootGuardManager; class DictGuardManager; /** * Base class for the leaf guard in the GuardManager hierarchy. */ class LeafGuard { public: // Most guards do not need root guard manager. LeafGuard(py::object verbose_code_parts) : _verbose_code_parts(std::move(verbose_code_parts)) {} // Guards like TENSOR_MATCH require root_guard_manager to access local_state // shared across all leaf guards. LeafGuard(RootGuardManager* root_guard_manager, py::object verbose_code_parts) : _root_guard_manager(root_guard_manager), _verbose_code_parts(std::move(verbose_code_parts)) {} // check function could be called from python. This is useful for debugging // purpose. bool check(py::handle value) { return check_nopybind(value.ptr()); } GuardDebugInfo check_verbose(py::handle value) { return check_verbose_nopybind(value.ptr()); } virtual GuardDebugInfo check_verbose_nopybind( PyObject* value) { // borrowed ref bool result = check_nopybind(value); if (!result) { return GuardDebugInfo(result, _verbose_code_parts, 0); } return GuardDebugInfo(true, 0); } py::list verbose_code_parts() { return _verbose_code_parts; } // This is on the hot path and avoids any refcounting code from pybind. This // is not exposed to Python and can only be called from C++. virtual bool check_nopybind(PyObject* value) = 0; virtual bool check_nopybind(FrameLocalsMapping* map) { // throw std::runtime_error("fallback to python"); // Could fallback to running check on the Python dict (lazily constructed) return check_nopybind((PyObject*)map->to_dict()); } virtual ~LeafGuard() = default; protected: // RootGuardManager has state that is common across all guards like // LocalState. RootGuardManager* _root_guard_manager{nullptr}; private: // This is set while constructing the leaf guard. This is used for identifying // the cause of recompilation. py::list _verbose_code_parts; }; /** * Represents a leaf guard that accepts the python guard check function. We * would like to have most of the guards in C++ (to avoid a Python function * call). But, it will take some time to reach that goal. Also, there might be * cases where its too tedious to write an equivalent C++ guard. * * LAMBDA_GUARD allows us to gradually move to C++. We can start from all * guards of type PythonLambaGuard and incrementally move expensive guards to * C++. */ class LAMBDA_GUARD : public LeafGuard { public: LAMBDA_GUARD(py::object guard_check_fn, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) { if (py::isinstance(guard_check_fn)) { _guard_check_fn = py::cast(std::move(guard_check_fn)); } else { throw py::type_error("LAMBDA_GUARD expects (callable, str)"); } } // Runs the lambda function with the current f_locals value. bool check_nopybind(PyObject* value) override { // borrowed ref PyObject* x = PyObject_CallOneArg(_guard_check_fn.ptr(), value); // new ref if (x == nullptr) { // An exception is caught in the lambda function. PyErr_Clear(); return false; } bool result = PyObject_IsTrue(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind(PyObject* value) override { PyObject* x = PyObject_CallOneArg(_guard_check_fn.ptr(), value); // new ref if (x == nullptr) { // An exception is caught in the lambda function. std::string exc_message = get_exception_message(); PyErr_Clear(); return GuardDebugInfo(false, exc_message, 0); } bool result = PyObject_IsTrue(x); Py_DECREF(x); if (result) { return GuardDebugInfo(true, 0); } return GuardDebugInfo(false, verbose_code_parts(), 0); } private: // The user provided lambda function for check_fn. py::function _guard_check_fn; }; class TYPE_MATCH : public LeafGuard { public: // type_id = id(type(obj)) TYPE_MATCH(py::object type_id, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _expected(py::cast(std::move(type_id))) {} bool check_nopybind(PyObject* value) override { // borrowed ref // NOLINTNEXTLINE(performance-no-int-to-ptr) return Py_TYPE(value) == (void*)_expected; } private: // id of the type of the original object. intptr_t _expected; }; class ID_MATCH : public LeafGuard { public: // obj_id = id(obj) ID_MATCH(py::object obj_id, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _expected(py::cast(std::move(obj_id))) {} bool check_nopybind(PyObject* value) override { // borrowed ref // NOLINTNEXTLINE(performance-no-int-to-ptr) return value == (void*)_expected; } private: // id of the original object. intptr_t _expected; }; class EQUALS_MATCH : public LeafGuard { public: EQUALS_MATCH(py::object value, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _value(value), _value_type(Py_TYPE(value.ptr())) {} bool check_nopybind(PyObject* value) override { // borrowed ref // Fast path - pointer equality check. Pointer equality checks are ok // because objects guarded with EQUALS_MATCH are immutable. if (value != _value.ptr()) { // Check type if (Py_TYPE(value) != _value_type) { return false; } int result = PyObject_RichCompareBool(value, _value.ptr(), Py_EQ); // Check for exception if (result == -1) { PyErr_Clear(); return false; } return result; } return true; } private: // value to compare against. This is py::object so that we hold on to the // original value and prevent garbage collection. We run EQUALS_MATCH only on // selected objects which do not have high memory footprint, so holding on to // these objects is ok. py::object _value; // Type of the value PyTypeObject* _value_type; }; class RANGE_ITERATOR_MATCH : public LeafGuard { public: RANGE_ITERATOR_MATCH( py::object start, py::object stop, py::object step, py::object type_id, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _type_id(py::cast(std::move(type_id))) { PyObject* start_obj = start.ptr(); PyObject* stop_obj = stop.ptr(); PyObject* step_obj = step.ptr(); _start = THPUtils_unpackLong(start_obj); _stop = THPUtils_unpackLong(stop_obj); _step = THPUtils_unpackLong(step_obj); TORCH_CHECK( !PyErr_Occurred(), "values of start/stop/step must fit in a long type"); } bool check_nopybind(PyObject* value) override { // borrowed ref // Do a type match first. // NOLINTNEXTLINE(performance-no-int-to-ptr) if (Py_TYPE(value) != (void*)_type_id) { return false; } _PyRangeIterObject* iter = (_PyRangeIterObject*)value; #if IS_PYTHON_3_12_PLUS long start = iter->start; #else long start = iter->start + iter->index * iter->step; #endif // IS_PYTHON_3_12_PLUS long stop = iter->start + iter->len * iter->step; return start == _start && stop == _stop && iter->step == _step; } private: intptr_t _type_id; // Normalized representation of a range iterator. long _start; long _stop; long _step; }; class TUPLE_ITERATOR_LEN : public LeafGuard { public: TUPLE_ITERATOR_LEN( py::object length, py::object type_id, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _length(py::cast(std::move(length))), _type_id(py::cast(std::move(type_id))) {} bool check_nopybind(PyObject* value) override { // borrowed ref // Do a type match first. // NOLINTNEXTLINE(performance-no-int-to-ptr) if (Py_TYPE(value) != (void*)_type_id) { return false; } _PyTupleIterObject* it = (_PyTupleIterObject*)value; Py_ssize_t length = 0; if (it->it_seq) length = PyTuple_GET_SIZE(it->it_seq) - it->it_index; return length == _length; } private: // Length of the guarded list Py_ssize_t _length; intptr_t _type_id; }; class LENGTH_CHECK : public LeafGuard { public: LENGTH_CHECK(py::object value, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _length(py::cast(std::move(value))) {} bool check_nopybind(PyObject* value) override { // borrowed ref // PySequence_Length returns -1 if the object is not a sequence. So, we // don't have to test for PySequence_Check. return PySequence_Length(value) == _length; } private: // Length of the guarded list Py_ssize_t _length; }; class DICT_LENGTH : public LeafGuard { public: DICT_LENGTH(py::object value, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _length(py::cast(std::move(value))) {} bool check_nopybind(PyObject* value) override { // borrowed ref return PyDict_Check(value) && PyDict_Size(value) == _length; } private: // Length of the guarded dict Py_ssize_t _length; }; class NOT_NONE : public LeafGuard { public: NOT_NONE(py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) {} bool check_nopybind(PyObject* value) override { // borrowed ref return value != Py_None; } }; class DEFAULT_DEVICE : public LeafGuard { public: DEFAULT_DEVICE(py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) { py::handle device_module = py::module::import("torch.utils._device"); // Save the dict using py::object _utils_device_dict = device_module.attr("__dict__"); _device = _utils_device_dict["CURRENT_DEVICE"]; } template bool check_nopybind_template(T* value) { // borrowed ref // Create a static interned string. Interned string is faster than creating // a new string every time. Even though its a new reference, we don't dec // ref it. Interned strings are used for things like variable names and are // leaked by design. static PyObject* current_device_str = PyUnicode_InternFromString("CURRENT_DEVICE"); PyObject* device = PyDict_GetItem( _utils_device_dict.ptr(), current_device_str); // borrowed ref if (device != _device.ptr()) { int result = PyObject_RichCompareBool(device, _device.ptr(), Py_EQ); if (result == -1) { PyErr_Clear(); return false; } return result; } return true; } bool check_nopybind(PyObject* value) override { return check_nopybind_template(value); } bool check_nopybind(FrameLocalsMapping* value) override { return check_nopybind_template(value); } private: // Save the current device and the module dict during the guard construction. py::object _utils_device_dict; py::object _device; }; class GLOBAL_STATE : public LeafGuard { public: GLOBAL_STATE(py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) { _guard = std::make_unique(); _guard->init(); } bool check_nopybind(PyObject* value) override { // borrowed ref // Ignore value arg, this is just to satisfy the interface. return _guard->check(); } bool check_nopybind(FrameLocalsMapping* value) override { // Ignore value arg, this is just to satisfy the interface. return _guard->check(); } GuardDebugInfo check_verbose_nopybind(PyObject* value) override { if (!_guard->check()) { return GuardDebugInfo( false, "GLOBAL_STATE changed: " + _guard->reason(), 0); } return GuardDebugInfo(true, 1); } private: std::unique_ptr _guard; }; class DATA_PTR_MATCH : public LeafGuard { public: DATA_PTR_MATCH(py::object tensor, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) { PyObject* value = tensor.ptr(); if (!THPVariable_CheckExact(value) && !THPVariable_Check(value)) { throw std::runtime_error("DATA_PTR_MATCH guard requires a tensor"); } _data_ptr = THPVariable_Unpack(value).data_ptr(); } bool check_nopybind(PyObject* value) override { // borrowed ref if (!THPVariable_CheckExact(value) && !THPVariable_Check(value)) { return false; } void* data_ptr = THPVariable_Unpack(value).data_ptr(); return data_ptr == _data_ptr; } private: // Original tensor data pointer. void* _data_ptr; }; // Checks that an attr is absent in the object. We don't need the opposite // HASATTR guard because we can just rely on GetAttrGuardAccessor to act as // HASATTR guard. class NO_HASATTR : public LeafGuard { public: NO_HASATTR(py::object attr_name, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _attr_name(std::move(attr_name)) {} bool check_nopybind(PyObject* value) override { // borrowed ref return PyObject_HasAttr(value, _attr_name.ptr()) == 0; } private: py::object _attr_name; }; // Checks that dict contains or does not contain a key. This happens for // PythonSysModulesVariable tracker. // TODO(janimesh) - Check if we can use DictGuardManager. The downside could be // large number of keys for sys module, so DICT_CONTAINS might still end up // being faster. class DICT_CONTAINS : public LeafGuard { public: DICT_CONTAINS(bool contains, py::object key, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _contains(contains ? 1 : 0), _key(std::move(key)) {} bool check_nopybind(PyObject* value) override { // borrowed ref int result = PyDict_Contains(value, _key.ptr()); if (result == -1) { PyErr_Clear(); return false; } return result == _contains; } private: int _contains; py::object _key; }; /** * Relational guards compare more than one value. We implement Relational * guards by capturing some state in the guard object. For example for tensor * aliasing guards - tensor X is not tensor Y - we construct one leaf guard * and and install it at as a leaf of two guard managers (one for X and * another for Y). Therefore, this guard is run twice. In the first * invocation, it saves the first value (state) and returns True. In the * second invocation, it compares the saved value with the new value and * returns True if they do not alias. * * We have to be careful about resetting in case the other guards fail and we * have some state in the relational guard. This is done by virtual method * reset_state(). This is called by the RootGuardManager before it exits. * */ class RelationalGuard : public LeafGuard { public: RelationalGuard(py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) {} // reset the relational guard state on guard failure. This is called by the // guard manager. virtual void reset_state() = 0; }; /** * Checks that object x is object y. */ class OBJECT_ALIASING : public RelationalGuard { public: OBJECT_ALIASING(py::object verbose_code_parts) : RelationalGuard(std::move(verbose_code_parts)) {} bool check_nopybind(PyObject* value) override { // borrowed ref if (_is_first_call) { _first_tensor = value; _is_first_call = false; return true; } return _first_tensor == value; } void reset_state() final { _is_first_call = true; } private: bool _is_first_call{true}; PyObject* _first_tensor{nullptr}; }; /** * Checks that none of the tensors alias. */ class NO_TENSOR_ALIASING : public RelationalGuard { public: NO_TENSOR_ALIASING( const py::list& tensor_names, py::object verbose_code_parts) : RelationalGuard(std::move(verbose_code_parts)), _tensor_names(tensor_names) { _unique_tensors.reserve(tensor_names.size()); } bool check_nopybind(PyObject* value) override { // borrowed ref // Typically we don't have to increment the ref count here because the // tensors are held in f_locals. But there is a special case for // `from_numpy` source. `from_numpy` converts integers and such into tensors // and these tensors are ephemeral. If we don't incref, those tensors can be // garbage collected, and the next time from_numpy can reuse the memory // address. Therefore, we incref here. They are decref'd in reset_state. Py_INCREF(value); auto insertion = _unique_tensors.insert({value, nullptr}); if (!insertion.second) { // No need to clear _unique_tensors, reset_state will do // it. return false; } return true; } GuardDebugInfo check_verbose_nopybind(PyObject* value) override { bool result = check_nopybind(value); if (!result) { return GuardDebugInfo( false, "Duplicate tensor found where not expected!", 0); } return GuardDebugInfo(true, 1); } void reset_state() final { for (auto item : _unique_tensors) { Py_DECREF(item.first); } _unique_tensors.clear(); } private: py::list _tensor_names; ska::flat_hash_map _unique_tensors; }; /** * Checks the storage overlapping relation of input tensors. * * This guard is always installed in pairs: one for the possibly overlapping * tensors, and another one for the non-overlapping tensors. This is so we can * correctly identify the given tensor in the check method as one of the 2 * classes mentioned above. * * In the end, the one responsible for storing and checking is the * `StorageOverlapChecker` class. */ class STORAGE_OVERLAPPING : public RelationalGuard { public: STORAGE_OVERLAPPING( bool overlapping, std::shared_ptr checker, py::object verbose_code_parts) : RelationalGuard(std::move(verbose_code_parts)), _overlapping(overlapping), _checker(std::move(checker)) {} bool check_nopybind(PyObject* value) override { _checker->add(value, _overlapping); return _checker->maybe_check(); } void reset_state() final { _checker->reset(_overlapping); } private: // Flag that indicates which kind of tensor this guard is collecting: // 1. Possibly overlapping tensors; or // 2. Non-overlapping tensors. bool _overlapping; // Actual checker for this guard. std::shared_ptr _checker; }; class DYNAMIC_INDICES : public LeafGuard { // C++ equivalent of // code.append( // f"(({tensor_name}._dynamo_dynamic_indices.issubset({value._dynamo_dynamic_indices})) // if hasattr({tensor_name}, '_dynamo_dynamic_indices') else True)" # // noqa: B950 // ) public: DYNAMIC_INDICES(py::set dynamic_indices, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _dynamic_indices(std::move(dynamic_indices)) {} bool check_nopybind(PyObject* value) override { // borrowed ref // Make an interned string static PyObject* dynamic_indices_str = PyUnicode_InternFromString("_dynamo_dynamic_indices"); PyObject* indices = PyObject_GetAttr(value, dynamic_indices_str); // new ref if (indices == nullptr) { // Attr absent. Clear exception. PyErr_Clear(); // This is true deliberately. If hasattr fails, we return true. return true; } static PyObject* issubset_str = PyUnicode_InternFromString("issubset"); PyObject* call_result = PyObject_CallMethodObjArgs( indices, issubset_str, _dynamic_indices.ptr(), nullptr); // new ref bool result = PyObject_IsTrue(call_result); Py_DECREF(call_result); Py_DECREF(indices); return result; } private: py::set _dynamic_indices; }; class DICT_VERSION : public LeafGuard { public: DICT_VERSION(py::object value, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)) { if (!PyDict_Check(value.ptr())) { throw py::type_error("DICT_VERSION expects a dict"); } _tag = get_dict_version_unchecked(value.ptr()); } bool check_nopybind(PyObject* value) override { // borrowed ref return PyDict_Check(value) && get_dict_version_unchecked(value) == _tag; } // Saved dict version. uint64_t _tag; }; // GuardManager can be a pointer to DictGuardManager, but at this point the // compiler does not know that DictGuardManager is a derived class of // GuardManager (no way to define inheritance relationships in forward // declarations), so we forward declare a factory function and define it when // both DictGuardManager and GuardManager are fully defined. std::unique_ptr make_guard_manager( RootGuardManager* root, std::string source, py::handle example_value, py::handle guard_manager_enum); GuardManager* clone_guard_manager( GuardManager* from, RootGuardManager* root, const py::function& clone_filter_fn); void add_relational_guard_resetter_to_cloned_root( RootGuardManager* root, std::shared_ptr guard); /** * Base class representing a pair of accessor and the associated guard * manager. The accessor defines how to access the child value from the * py::object given to the parent check function. * * GuardAccessors can be considered equivalent to name() method of Source * objects in guards.py. In python, name() method returns a str which we can * then eval in f_locals and f_globals to retrieve the actual py object. * GuardAccessor serves the same purpose. The minor difference is that * GuardManager is a tree structure, so a GuardAccessor just has to retrieve * the value in the next level in this tree and pass it to the child * GuardAccessor. * * GuardAccessor also owns the GuardManager associated with the retrieved * value from the GuardAccessor. */ class GuardAccessor { public: GuardAccessor( RootGuardManager* root, py::object accessor_key, std::string source, py::handle example_value, py::handle guard_manager_enum) : _guard_manager(make_guard_manager( root, source, example_value, guard_manager_enum)), _accessor_key(std::move(accessor_key)), _source(std::move(source)) {} // Return by reference as GuardAccessor owns the GuardManager. std::unique_ptr& get_guard_manager() { return _guard_manager; } bool matches_key(const py::handle& key) const { return _accessor_key.equal(key); } std::string get_source() { return _source; } // matches_dict_tag is used by the DictGetItemGuardAccessor to skip the guard // subtree on immutable dict getitems. virtual bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) = 0; virtual bool check_nopybind(FrameLocalsMapping* map, bool matches_dict_tag) { // throw std::runtime_error("fallback to python"); // Could fallback to running check on the Python dict (lazily constructed) return check_nopybind((PyObject*)map->to_dict(), matches_dict_tag); } virtual GuardDebugInfo check_verbose_nopybind(PyObject* obj) = 0; virtual std::string repr() const = 0; virtual ~GuardAccessor() = default; public: // Cloning related functions GuardAccessor(GuardManager* guard_manager, GuardAccessor* from) : _guard_manager(std::unique_ptr(guard_manager)) { from->clone_visitor(this); } virtual GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) = 0; void clone_visitor(GuardAccessor* to) { to->_source = this->_source; to->_accessor_key = this->_accessor_key; } template GuardAccessor* clone_common( RootGuardManager* cloned_root, const py::function& clone_filter_fn) { GuardManager* cloned_mgr = clone_guard_manager( get_guard_manager().get(), cloned_root, clone_filter_fn); if (cloned_mgr == nullptr) { return nullptr; } DerivedGuardAccessor* cloned_accessor = new DerivedGuardAccessor(cloned_mgr, (DerivedGuardAccessor*)this); return cloned_accessor; } protected: // Guard manager corresponding to the retrieved value from the // GuardAccessor. std::unique_ptr _guard_manager; // accessor key could be py::str for getattr, getitem or py::function for // lambda accessor. It is a py::object because we need to keep these accessor // keys alive. py::object _accessor_key; // A string that can be eval'd on f_locals or f_globals to access the variable // value. Only used for debugging. std::string _source; }; /** * GuardManager encapsulates all the guards related to a particular * py::object. It is a tree structure and consists of 1) Leaf guards - Guards * that are run on the user given object 2) Accessors - Guard accessors (like * getattr, getitem) to access the next value in the tree hierarchy. Accessor * object also holds the child GuardManager. * * Lets look at an example to understand how it works. * class Pair: * int x = 1; * int y = 2; * * At compile time * >> guard_mananger = GuardManager() * >> guard_mananger.x.add_lambda_guard( * lambda x: isinstance(x, Pair), * lambda x: f"expected Pair, found {type(x)}" * ) * >> guard_mananger.x.add_lambda_guard(lambda x: x == 1, lambda x: f"found * {x}, expected 1") * >> guard_mananger.y.add_lambda_guard(lambda x: x == 2, lambda x: f"found * {x}, expected 2") * * At runtime * >> guard_mananger.check(Pair()) * * At compile time we build the tree structure. When we do `guard_manager.x`, * it creates an AttrGuardAccessorNode, initializes a child guard manager with * this accessor node, and adds it as a child. When we do * `guard_manager.x.add_lambda_guard`, we call add_lambda_guard on the newly * created guard manager and register a new leaf guard on it. * * At runtime, the accessor node has an important function of providing a way * to access the value for the child guard. In the above example, * guard_manager.x adds an AttrGuardAccessorNode with attr_name x. When check * function is called, parent GuardManager calls getattr(value, "x") on its * value passed to the check function to call the check function of the child * guard manager. * * Performace optimization for fail fast - An optimization for runtime here is * to sort the execution of child guards depending on the failure count. This * ensures that we run the guards that are more prone to fail statistically * first. This can improve the cache lookup time when we have multiple cache * entries. */ // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class GuardManager { public: GuardManager() = delete; GuardManager(RootGuardManager* root, std::string source) : _root(root), _source(std::move(source)), _is_dict(false) {} GuardManager( RootGuardManager* root, std::string source, py::handle example_value) : _root(root), _source(std::move(source)), _is_dict(py::isinstance(example_value)) { if (_is_dict) { _dict_tag = get_dict_version_unchecked(example_value.ptr()); } } GuardManager(const GuardManager& m) = delete; GuardManager& operator=(const GuardManager&) = delete; virtual ~GuardManager() = default; RootGuardManager* get_root() { return _root; } std::string get_source() { return _source; } virtual void add_leaf_guard(std::shared_ptr leaf_guard) { _leaf_guards.emplace_back(std::move(leaf_guard)); } public: // For cloning GuardManager(RootGuardManager* root, std::string source, bool is_dict) : _root(root), _source(std::move(source)), _is_dict(is_dict) {} void clone_common( RootGuardManager* cloned_root, GuardManager* cloned_mgr, const py::function& clone_filter_fn) { for (const auto& guard : _leaf_guards) { cloned_mgr->_leaf_guards.emplace_back(guard); if (std::shared_ptr relational_guard = std::dynamic_pointer_cast(guard)) { add_relational_guard_resetter_to_cloned_root( cloned_root, relational_guard); } } for (const auto& accessor : _accessors) { GuardAccessor* cloned_accessor = accessor->clone(cloned_root, clone_filter_fn); if (cloned_accessor != nullptr) { cloned_mgr->_accessors.emplace_back( std::unique_ptr(cloned_accessor)); } } } virtual GuardManager* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) { if (!py::cast(clone_filter_fn(this))) { return nullptr; } GuardManager* cloned_mgr = new GuardManager(cloned_root, _source, _is_dict); clone_common(cloned_root, cloned_mgr, clone_filter_fn); return cloned_mgr; } /** * Adds a new guard manager with appropriate Accessor. If the accessor is * already present, we just return the guard manager. */ template GuardManager* get_child_manager( const py::object& accessor_key, const std::string& source, py::handle example_value, py::handle guard_manager_enum) { // accessor_key type depends on the GuardAccessorT // for example for GetAttrGuardAccessor - py::str name // Return the manager if the guard accessor exists for (const auto& accessor : _accessors) { if (accessor->matches_key(accessor_key)) { return accessor->get_guard_manager().get(); } } // Construct a new guard accessor _accessors.emplace_back(std::make_unique( _root, std::move(accessor_key), source, example_value, guard_manager_enum)); return _accessors.back()->get_guard_manager().get(); } // Runs the leaf guards check and then child managers check function. // // NB: There is some code DUPLICATION between this and check_verbose // function. This is intentional. check function is in the hot path and is // kept very simple. The purpose of check_verbose function is to get guard // failure reasoning to understand recompilations. check_verbose function // does not change the state of the guard, e.g., it does not shuffle the // guards and does not change the fail count. For simplicity, we duplicate // the code here. template bool check_nopybind_template(T* value) { // borrowed ref if (!this->check_leaf_guards_nopybind(value)) { return false; } return this->check_accessors_nopybind(value); } virtual bool check_nopybind(PyObject* value) { return check_nopybind_template(value); } virtual bool check_nopybind(FrameLocalsMapping* value) { return check_nopybind_template(value); } template bool check_leaf_guards_nopybind(T* value) { // Iterate over leaf guards for (const auto& guard : _leaf_guards) { if (!guard->check_nopybind(value)) { // early exit _fail_count += 1; // no need of sorting, just return. return false; } } return true; } template bool check_accessors_nopybind(T* value) { bool matches_dict_tag = false; uint64_t new_tag = 0; if constexpr (std::is_same::value) { if (_is_dict) { // Check if the dict tag matches. If it does, propagate to the child // accessors. This will pass to the child manager via // DictGetItemGuardManager. new_tag = get_dict_version_unchecked(value); matches_dict_tag = new_tag == _dict_tag; } } // Iterate over accessors. bool result = true; bool failed_on_first = true; for (const auto& accessor : _accessors) { if (!accessor->check_nopybind(value, matches_dict_tag)) { // early exit _fail_count += 1; result = false; // need to sort, so break the loop. break; } failed_on_first = false; } // failed_on_first is just an optimization to avoid sorting if we are // failing on the first accessor itself. This is helpful when we have // already sorted the guards once, and dont need to sort again. if (!result && !failed_on_first) { // Inplace sort the child guards by fail count. This moves the guard // with higher fail count earlier in the queue, and enables fail fast // for the next check_verbose. // An alternate implementation was to use priority queue directly on // _accessors, but it was rejected because of the complexity of // popping and creating a new pq on each run_guards. Moreover, this sort // is happening on the unhappy path when check_verbose guard // fails. So, its probably ok. std::sort( _accessors.begin(), _accessors.end(), [](const std::unique_ptr& a, const std::unique_ptr& b) { return a->get_guard_manager()->fail_count() > b->get_guard_manager()->fail_count(); }); } if (_is_dict && result) { // If result is true, reset the _dict_tag. This is useful if there is a // mutation on the dict but it does not change the attr values (like // swapping). _dict_tag = new_tag; } return result; } // This function has some code duplication with function check. This is // deliberate to keep check function simple and fast. virtual GuardDebugInfo check_verbose_nopybind( PyObject* value) { // borrowed ref int num_guards_executed = 0; const GuardDebugInfo& debug_info = check_leaf_guards_verbose_nopybind(value, num_guards_executed); if (!debug_info.result) { return debug_info; } return check_accessors_verbose_nopybind(value, num_guards_executed); } GuardDebugInfo check_leaf_guards_verbose_nopybind( PyObject* value, int& num_guards_executed) { // Iterate over leaf guards for (const auto& guard : _leaf_guards) { const GuardDebugInfo& debug_info = guard->check_verbose_nopybind(value); num_guards_executed++; if (!debug_info.result) { return GuardDebugInfo( false, debug_info.verbose_code_parts, num_guards_executed); } } return GuardDebugInfo(true, num_guards_executed); } GuardDebugInfo check_accessors_verbose_nopybind( PyObject* value, int& num_guards_executed) { // Iterate over accessors for (const auto& accessor : _accessors) { const GuardDebugInfo& debug_info = accessor->check_verbose_nopybind(value); num_guards_executed += debug_info.num_guards_executed; if (!debug_info.result) { return GuardDebugInfo( false, debug_info.verbose_code_parts, num_guards_executed); } } return GuardDebugInfo(true, num_guards_executed); } int64_t fail_count() const { return _fail_count; } // DEBUG function - Returning raw pointers because we can't return unique_ptr // and pybind does not accept a unique_ptr reference return type. virtual std::vector get_accessors() const { std::vector ret; ret.reserve(_accessors.size()); for (const auto& accessor : _accessors) { ret.emplace_back(accessor.get()); } return ret; } // DEBUG function - Returning raw pointers because we can't return unique_ptr // and pybind does not accept a unique_ptr reference return type. virtual std::vector get_child_managers() { std::vector ret; ret.reserve(_accessors.size()); for (const auto& accessor : _accessors) { ret.emplace_back(accessor->get_guard_manager().get()); } return ret; } // DEBUG function - Returning raw pointers because we can't return unique_ptr // and pybind does not accept a unique_ptr reference return type. std::vector get_leaf_guards() const { std::vector ret; ret.reserve(_leaf_guards.size()); for (const auto& guard : _leaf_guards) { ret.push_back(guard.get()); } return ret; } bool is_leaf_guard_present(const std::string& guard_name) { return _inserted_leaf_guards.find(guard_name) != _inserted_leaf_guards.end(); } void insert_leaf_guard(const std::string& guard_name) { _inserted_leaf_guards.insert(guard_name); } void add_permitted_leaf_guard(std::shared_ptr leaf_guard) { // Selectively called for permitted guards. This is used by DictGuardManager // which overrides the add_leaf_guard manager to throw runtime error. GuardManager::add_leaf_guard(std::move(leaf_guard)); } protected: // Keeps a count of how many times this guard manager check function returns // False. This is used for sorting optimization. int64_t _fail_count{0}; private: // Root of the guard manager, this is the used to install the relational // guard resetters. RootGuardManager* _root; // A string that can be used to eval on f_locals or f_globals to get the // value. This is used only to pass on debugging information. std::string _source; // A map of which leaf guards are inserted. This is to prevent duplicate // guards like TYPE_MATCH. std::unordered_set _inserted_leaf_guards; // Leaf guards are the terminal guards on this object, e.g, type check on a // list. These guards have to be run before any children are run. // // These leaf guards are not shufflable. In almost all cases, these guards // will have an order, e,g., type(x) is int guard and x == 5 guard. We also // expect very few leaf guards per GuardManager node. // // NB: Why are leaf guards shared ptr? This is primarily to enable relational // guards like `tensor X is not tensor Y`. These guards require multiple // values. We handle it by creating one guard object that holds state and this // guard is installed in many guard managers, hence a shared ptr. std::vector> _leaf_guards; // GuardAccessors nodes to access the child guards. These guards are // shufflable. On a guard failure, they are sorted based on their fail count // to enable fail fast for the next check. std::vector> _accessors; bool _is_dict; uint64_t _dict_tag{0}; }; /** Note on [Ownership with cloning] - GuardManagers have the facility to clone itself. This is useful for cloning a subset of the guard manager in diff guard manager. As the ownership goes, the model is exactly same as before. We have unique_ptr for GuardAccessor and GuardManagers. So, any state required for the accessors and managers is copied over using constructors and clone_visitor functions. The main thing to notice is leaf guards. The leaf guards are represented using shared_ptr, and they are shared (not cloned) with the cloned managers. So for leaf guard state to be released, both the original and cloned managers have to be destructed. */ /** * RootGuardManager is the root of the guard tree. This is primarily * constructed to hold the relational guard pointers so that we can reset the * state of those guards on guard failure. All the other important * implementation is in GuardManager class. */ class RootGuardManager : public GuardManager { public: // This is the root node, set its _root member to nullptr RootGuardManager() : GuardManager(this, "L") {} // Adds the relational guard resetter void add_relational_guard_resetter( std::shared_ptr relational_guard) { _relational_guard_resetters.emplace_back(std::move(relational_guard)); } // Python visible API to check guard function. bool check(py::handle value) { return check_nopybind(value.ptr()); } // Python visible API to check_verbose guard function. GuardDebugInfo check_verbose(py::handle value) { return check_verbose_nopybind(value.ptr()); } // Fast check function. template bool check_nopybind_template(T* value) { // borrowed ref // Check [Note on GIL interaction with mutex lock] for details on why we // need mutex and its interactions wth GIL. PyThreadState* _save = nullptr; Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting std::lock_guard lock_guard(_lock); Py_BLOCK_THREADS; // ; is added to avoid clang-formatting // Get the local state. This will be used for TENSOR_MATCH guards. if (_init_local_state) { LocalState state; _local_state = state; } if (!GuardManager::check_leaf_guards_nopybind(value)) { _reset_relational_guard_state(); return false; } // Run accessor guards without TorchFunction enabled // Dynamo should only be adding guards on values without // torch function at this point, because if there // was a torch function, we should've traced through it const at::impl::TorchFunctionDisabledState old_state = at::impl::PythonTorchFunctionTLS::get_disabled_state(); at::impl::PythonTorchFunctionTLS::set_disabled_state( at::impl::TorchFunctionDisabledState::ALL_DISABLED); if (!GuardManager::check_accessors_nopybind(value)) { at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return false; } // Iterate over epilogue leaf guards. for (const auto& guard : _epilogue_lambda_guards) { if (!guard->check_nopybind(value)) { // early exit at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return false; } } at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return true; } bool check_nopybind(PyObject* value) override { return check_nopybind_template(value); } bool check_nopybind(FrameLocalsMapping* value) override { return check_nopybind_template(value); } // Fast check_verbose function. GuardDebugInfo check_verbose_nopybind( PyObject* value) override { // borrowed ref // Check [Note on GIL interaction with mutex lock] for details on why we // need mutex and its interactions wth GIL. PyThreadState* _save = nullptr; Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting std::lock_guard lock_guard(_lock); Py_BLOCK_THREADS; // ; is added to avoid clang-formatting // Get the local state. This will be used for TENSOR_MATCH guards. if (_init_local_state) { LocalState state; _local_state = state; } int num_guards_executed = 0; // Run leaf guards // This includes the GlobalStateGuard and the Torch Function Mode stack // guard, which require Torch Function to be in its unmodified state const GuardDebugInfo& debug_info_leaf = GuardManager::check_leaf_guards_verbose_nopybind( value, num_guards_executed); if (!debug_info_leaf.result) { _reset_relational_guard_state(); return debug_info_leaf; } const at::impl::TorchFunctionDisabledState old_state = at::impl::PythonTorchFunctionTLS::get_disabled_state(); at::impl::PythonTorchFunctionTLS::set_disabled_state( at::impl::TorchFunctionDisabledState::ALL_DISABLED); const GuardDebugInfo& debug_info_accessors = GuardManager::check_accessors_verbose_nopybind( value, num_guards_executed); if (!debug_info_accessors.result) { at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return debug_info_accessors; } // Iterate over epilogue leaf guards for (const auto& guard : _epilogue_lambda_guards) { const GuardDebugInfo& tmp_debug_info = guard->check_verbose_nopybind(value); num_guards_executed++; if (!tmp_debug_info.result) { at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return GuardDebugInfo( false, tmp_debug_info.verbose_code_parts, num_guards_executed); } } at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state); _reset_relational_guard_state(); return GuardDebugInfo(true, num_guards_executed); } void add_epilogue_lambda_guard(std::unique_ptr leaf_guard) { _epilogue_lambda_guards.emplace_back(std::move(leaf_guard)); } void set_init_local_state_flag() { _init_local_state = true; } // See note on [Ownership with cloning] RootGuardManager* clone_manager(const py::function& clone_filter_fn) { // Use clone_filter_fn if (!py::cast(clone_filter_fn(this))) { return nullptr; } RootGuardManager* cloned_root = new RootGuardManager(); clone_common(cloned_root, cloned_root, clone_filter_fn); for (const auto& guard : _epilogue_lambda_guards) { cloned_root->_epilogue_lambda_guards.emplace_back(guard); } return cloned_root; } // DEBUG function - Returning raw pointers because we can't return unique_ptr // and pybind does not accept a unique_ptr reference return type. std::vector get_epilogue_lambda_guards() const { std::vector ret; ret.reserve(_epilogue_lambda_guards.size()); for (const auto& guard : _epilogue_lambda_guards) { ret.push_back(guard.get()); } return ret; } private: // Reset the state of all the relational guards on failure. void _reset_relational_guard_state() { for (auto& guard : _relational_guard_resetters) { guard->reset_state(); } } public: // Local state for TENSOR_MATCH guards. LocalState _local_state; private: // All the relational guards under this guard mananger. We only use these // when the guard evaluates to False. This ensures that guard state is reset // on guard failure so that next invocation is clean. std::vector> _relational_guard_resetters; // These guards are lambda guards, i.e., the guards that lack C++ // implementation. For simplicity, we add these guards at the root. They // MUST be run after all other guard managers have finished to ensure that // the epilogue guards do not step on some nonexistent getattr or getitem. // NB - shared_ptr is used to share the epilogue guards with the cloned guard // manager. std::vector> _epilogue_lambda_guards; // [Note on GIL interaction with mutex lock] // We use std::mutex to prevent multiple threads from running // check/check_verbose simultaneously. This is to prevent race condition due // to state changes in RelationalGuard. // // However, we also need to be careful about GIL interaction with mutex. There // is a chance of deadlock // // Thread 1: has GIL, waiting for lock // Thread 2: has lock, waiting for GIL // // This can happen when Thread 2 earlier acquired the mutex lock, starting // running the critical section of check function and then called some python // function (like LAMBDA_GUARD) and reached Cpython codebase that checks if it // should release the GIL (typically happens after every few bytecode // instructions). Thread 2 here can decide to release the GIL. Thread 1 can // acquire GIL and reach the mutex, where it will wait forever. // // To avoid this, each thread releases the GIL before acquiring the mutex and // then acquires the GIL again after acquiring the mutex lock by using // Py_BLOCK_THREADS and Py_UNBLOCK_THREADS. This avoids the deadlock. std::mutex _lock; // We init LocalState only when this flag it set. This flag is set during // TENSOR_MATCH guard init. bool _init_local_state = false; }; /* * Dicts are common in python code. Therefore, we handle guards for dicts * differently and use PyDict_* APIs which are faster than PyObject_* APIs * because of no ref count increments/decrements. * * DictGuardManager relies on the order of dict.keys(). It keeps track of the * indices of dict.keys() to access the key, value pair. */ typedef std::pair, std::unique_ptr> KeyValueManager; class DictGuardManager : public GuardManager { public: DictGuardManager( RootGuardManager* root, std::string source, py::handle example_value) : GuardManager(root, std::move(source)), _size(PyDict_Size(example_value.ptr())), _expected_type(Py_TYPE(example_value.ptr())), _is_exact_dict_type(PyDict_CheckExact(example_value.ptr())) {} GuardManager* get_key_manager( py::object key_index, std::string source, py::handle example_value, py::handle guard_manager_enum) { KeyValueManager& key_value_manager = _get_index_manager(std::move(key_index)); if (!key_value_manager.first) { key_value_manager.first = make_guard_manager( this->get_root(), std::move(source), example_value, guard_manager_enum); }; return key_value_manager.first.get(); } GuardManager* get_value_manager( py::object key_index, std::string source, py::handle example_value, py::handle guard_manager_enum) { KeyValueManager& key_value_manager = _get_index_manager(std::move(key_index)); if (!key_value_manager.second) { key_value_manager.second = make_guard_manager( this->get_root(), std::move(source), example_value, guard_manager_enum); }; return key_value_manager.second.get(); } bool check_nopybind(PyObject* obj) override { // borrowed ref // TODO(janimesh) - Implement a fast-path using dict versions. if (Py_TYPE(obj) != _expected_type) { _fail_count += 1; return false; } if (PyDict_Size(obj) != _size) { _fail_count += 1; return false; } // Early return if (_size == 0) { return true; } // Invokes the base class's check_nopybind method. We permit a limited set // of leaf guards and accessors within the DictGuardManager framework. // Integrating certain guards or accessors directly within the // DictGuardManager can be challenging. For instance, `type(dict_object)` as // an accessor is permissible, which otherwise would be hard to integrate // directly into DictGuardManager. Similarly, incorporating guards such as // DICT_CONTAINS and DICT_VERSION as leaf guards offers a simpler solution // than embedding these functionalities within the DictGuardManager itself. if (!GuardManager::check_nopybind(obj)) { _fail_count += 1; // No need to shuffle the child guards, just return. return false; } PyObject *key = nullptr, *value = nullptr; Py_ssize_t pos = 0; // Points to an element in the _indices vector. size_t index_pointer = 0; // Points to the key index in the dict Py_ssize_t dict_pointer = 0; while (index_pointer < _indices.size() && PyDict_Next(obj, &pos, &key, &value)) { // Skip if dict_pointer is not a saved index. if (dict_pointer == _indices[index_pointer]) { index_pointer += 1; KeyValueManager& key_value_manager = _key_value_managers[dict_pointer]; std::unique_ptr& key_manager = key_value_manager.first; if (key_manager && !key_manager->check_nopybind(key)) { return false; } std::unique_ptr& value_manager = key_value_manager.second; if (value_manager && !value_manager->check_nopybind(value)) { return false; } } dict_pointer += 1; } return true; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref if (Py_TYPE(obj) != _expected_type) { return GuardDebugInfo(false, "TYPE_MISMATCH(" + get_source() + ")", 0); } if (PyDict_Size(obj) != _size) { return GuardDebugInfo( false, "len(" + get_source() + ") != " + std::to_string(_size), 0); } // Early return if (_size == 0) { return GuardDebugInfo(true, 0); } // Invokes the base class's check_nopybind method. We permit a limited set // of leaf guards and accessors within the DictGuardManager framework. // Integrating certain guards or accessors directly within the // DictGuardManager can be challenging. For instance, `type(dict_object)` as // an accessor is permissible, which otherwise would be hard to integrate // directly into DictGuardManager. Similarly, incorporating guards such as // DICT_CONTAINS and DICT_VERSION as leaf guards offers a simpler solution // than embedding these functionalities within the DictGuardManager itself. GuardDebugInfo debug_info = GuardManager::check_verbose_nopybind(obj); if (!debug_info.result) { return debug_info; } PyObject *key = nullptr, *value = nullptr; Py_ssize_t pos = 0; // Points to an element in the _indices vector. size_t index_pointer = 0; Py_ssize_t dict_pointer = 0; int num_guards_executed = 0; while (index_pointer < _indices.size() && PyDict_Next(obj, &pos, &key, &value)) { // Skip if pos is not a saved index. if (dict_pointer == _indices[index_pointer]) { index_pointer += 1; KeyValueManager& key_value_manager = _key_value_managers[dict_pointer]; std::unique_ptr& key_manager = key_value_manager.first; if (key_manager) { GuardDebugInfo debug_info = key_manager->check_verbose_nopybind(key); num_guards_executed += debug_info.num_guards_executed; if (!debug_info.result) { return GuardDebugInfo( false, debug_info.verbose_code_parts, num_guards_executed); } } std::unique_ptr& value_manager = key_value_manager.second; if (value_manager) { GuardDebugInfo debug_info = value_manager->check_verbose_nopybind(value); num_guards_executed += debug_info.num_guards_executed; if (!debug_info.result) { return GuardDebugInfo( false, debug_info.verbose_code_parts, num_guards_executed); } } } dict_pointer += 1; } return GuardDebugInfo(true, num_guards_executed); } void skip_adding_guard(const py::object& a, const py::object& b) { // The `add_leaf_guard` method in `DictGuardManager` is overridden to block // the addition of leaf guards. However, this is too strict. Python side of // guard management frequently adds TYPE_MATCH and DICT_LENGTH on // DictGuardManager. We could refactor Python side to never call these // guards on dict objects, but that results in messy code. Instead, we just // override these two guards to not go through add_leaf_guard code path and // skip adding guards. This makes the python side easy. } void fail_on_get_child_manager( const py::object& a, const std::string& source, const py::object& b) { throw std::runtime_error("Can not add an accessor to DictGuardManager"); } void add_leaf_guard(std::shared_ptr leaf_guard) override { // If you are calling this, you probably want to go through a key, value // child manager and then add a leaf guard on them. DictGuardManager already // has TYPE_MATCH and LENGTH_CHECK built in. throw std::runtime_error("DictGuardManager does not support a leaf_guard"); } // Debug helper - Returning raw pointers because we can't return unique_ptr // and pybind does not accept a unique_ptr reference return type. std::unordered_map> get_key_value_managers() { std::unordered_map> ret; for (auto index : _indices) { ret[index] = std::make_pair( _key_value_managers[index].first.get(), _key_value_managers[index].second.get()); } return ret; } bool is_exact_dict_type() { return _is_exact_dict_type; } public: // cloning functions DictGuardManager( RootGuardManager* cloned_root, std::string source, Py_ssize_t size, PyTypeObject* expected_type, bool is_exact_dict_type, std::vector indices) : GuardManager(cloned_root, std::move(source), true), _size(size), _expected_type(expected_type), _is_exact_dict_type(is_exact_dict_type), _indices(std::move(indices)) {} template GuardManager* clone_dict_guard_manager( RootGuardManager* cloned_root, const py::function& clone_filter_fn) { if (!py::cast(clone_filter_fn(this))) { return nullptr; } T* cloned_mgr = new T( cloned_root, get_source(), _size, _expected_type, _is_exact_dict_type, _indices); clone_common(cloned_root, cloned_mgr, clone_filter_fn); for (auto index : _indices) { KeyValueManager& key_value_manager = _key_value_managers[index]; std::unique_ptr& key_manager = key_value_manager.first; std::unique_ptr& value_manager = key_value_manager.second; cloned_mgr->_key_value_managers[index] = std::make_pair(nullptr, nullptr); if (key_manager) { GuardManager* cloned_key_manager = key_manager->clone(cloned_root, clone_filter_fn); if (cloned_key_manager) { cloned_mgr->_key_value_managers[index].first = std::unique_ptr(cloned_key_manager); } } if (value_manager) { GuardManager* cloned_value_manager = value_manager->clone(cloned_root, clone_filter_fn); if (cloned_value_manager) { cloned_mgr->_key_value_managers[index].second = std::unique_ptr(cloned_value_manager); } } } return cloned_mgr; } GuardManager* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_dict_guard_manager( cloned_root, clone_filter_fn); } private: /** * Adds a new KeyDictGuardAccessor. If the accessor is already present, we * just return the guard manager. */ KeyValueManager& _get_index_manager(py::object key_index) { // Check if the accessor is already present. Py_ssize_t index = py::cast(std::move(key_index)); auto it = _key_value_managers.find(index); if (it != _key_value_managers.end()) { return it->second; } _indices.push_back(index); // Always keep the _indices array sorted std::sort(_indices.begin(), _indices.end()); _key_value_managers[index] = std::make_pair(nullptr, nullptr); return _key_value_managers[index]; } protected: Py_ssize_t _size; // DictGuardManager supports both exact dict type and non-exact dict type. // Therefore, we have to compare the type to early exit. PyTypeObject* _expected_type; bool _is_exact_dict_type; // Useful to check getattr_manager validity. std::vector _indices; std::unordered_map _key_value_managers; }; GuardManager* clone_guard_manager( GuardManager* from, RootGuardManager* cloned_root, const py::function& clone_filter_fn) { return from->clone(cloned_root, clone_filter_fn); } void add_relational_guard_resetter_to_cloned_root( RootGuardManager* root, std::shared_ptr guard) { root->add_relational_guard_resetter(std::move(guard)); } std::unique_ptr make_guard_manager( RootGuardManager* root, std::string source, py::handle example_value, py::handle guard_manager_enum) { #if IS_PYBIND_2_13_PLUS using threeobjects = std::tuple; PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; auto& [guard_manager_enum_class, base_guard_manager_enum, dict_guard_manager_enum] = storage .call_once_and_store_result([]() -> threeobjects { py::object guard_manager_enum_class = py::module_::import("torch._dynamo.guards") .attr("GuardManagerType"); return { guard_manager_enum_class, guard_manager_enum_class.attr("GUARD_MANAGER"), guard_manager_enum_class.attr("DICT_GUARD_MANAGER")}; }) .get_stored(); #else static py::object guard_manager_enum_class = py::module_::import("torch._dynamo.guards").attr("GuardManagerType"); static py::object base_guard_manager_enum = guard_manager_enum_class.attr("GUARD_MANAGER"); static py::object dict_guard_manager_enum = guard_manager_enum_class.attr("DICT_GUARD_MANAGER"); #endif if (py::isinstance(example_value)) { if (guard_manager_enum.is(base_guard_manager_enum)) { // For dicts that don't need to guard on keys, we can just rely on the // base GuardManager. return std::make_unique( root, std::move(source), example_value); } else if (guard_manager_enum.is(dict_guard_manager_enum)) { return std::make_unique( root, std::move(source), example_value); } else { throw py::type_error("Invalid guard manager enum"); } } return std::make_unique(root, std::move(source)); } class TORCH_FUNCTION_MODE_STACK : public LeafGuard { public: TORCH_FUNCTION_MODE_STACK( const py::list& initial_stack, py::object verbose_code_parts) : LeafGuard(std::move(verbose_code_parts)), _ref_stack() { Py_ssize_t len = PyList_Size(initial_stack.ptr()); for (Py_ssize_t idx = 0; idx < len; idx++) { PyObject* mode = PyList_GetItem(initial_stack.ptr(), idx); // borrowed ref auto type = Py_TYPE(mode); this->_ref_stack.push_back(type); } } template bool check_nopybind_template(T* value) { // Ignore value arg, only used to satisfy the interface const size_t len = (size_t)at::impl::PythonTorchFunctionTLS::stack_len(); const size_t ref_stack_size = this->_ref_stack.size(); if (len != ref_stack_size) { return false; } for (int64_t idx = 0; (size_t)idx < len; idx++) { std::shared_ptr mode = at::impl::PythonTorchFunctionTLS::get_stack_at(idx); PyTypeObject* mode_type = Py_TYPE(mode->ptr(getPyInterpreter())); if (mode_type != _ref_stack.at(idx)) { return false; } } return true; } bool check_nopybind(PyObject* value) override { return check_nopybind_template(value); } bool check_nopybind(FrameLocalsMapping* value) override { return check_nopybind_template(value); } private: std::vector _ref_stack; }; class TENSOR_MATCH : public LeafGuard { public: TENSOR_MATCH( RootGuardManager* root_guard_manager, py::object value, py::object dynamic_dims_sizes_py, py::object dynamic_dims_strides_py, py::object tensor_name, py::object verbose_code_parts) : LeafGuard(root_guard_manager, std::move(verbose_code_parts)), _tensor_name(py::cast(std::move(tensor_name))) { root_guard_manager->set_init_local_state_flag(); PyObject* item = value.ptr(); if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) { PyErr_SetString(PyExc_TypeError, "expected Tensor()"); return; } auto tensor = THPVariable_Unpack(item); std::vector> tensor_dims_size = pyListToVecOptInt(dynamic_dims_sizes_py.ptr()); std::vector> tensor_dims_stride = pyListToVecOptInt(dynamic_dims_strides_py.ptr()); tensor_dims_size = tensor_dims_size.empty() ? wrapIntegersInOptional(tensor.sym_sizes()) : tensor_dims_size; tensor_dims_stride = tensor_dims_stride.empty() ? wrapIntegersInOptional(tensor.sym_strides()) : tensor_dims_stride; LocalState state; _tensor_check = std::make_unique( state, Py_TYPE(item), std::move(tensor), std::move(tensor_dims_size), std::move(tensor_dims_stride)); } bool check_nopybind(PyObject* value) override { // borrowed ref if (Py_TYPE(value) != _tensor_check->pytype) { return false; } return _tensor_check->check( _root_guard_manager->_local_state, THPVariable_Unpack(value)); } GuardDebugInfo check_verbose_nopybind( PyObject* value) override { // borrowed ref if (Py_TYPE(value) != _tensor_check->pytype) { std::stringstream fail_reason; PyObject* type_str = PyObject_Str(PyObject_Type(value)); fail_reason << "expected type of '" << _tensor_name << "' to be a tensor type, "; if (!type_str) { fail_reason << "but found a different type"; } else { fail_reason << "' but found " << PyUnicode_AsUTF8(type_str); } return GuardDebugInfo(false, fail_reason.str(), 0); } std::string fail_reason = _tensor_check->check_verbose( _root_guard_manager->_local_state, THPVariable_Unpack(value), _tensor_name); if (!fail_reason.empty()) { if (is_parameter(py::handle(value))) { fail_reason += ". Guard failed on a parameter, consider using "; fail_reason += "torch._dynamo.config.force_parameter_static_shapes = False "; fail_reason += "to allow dynamism on parameters."; } return GuardDebugInfo(false, fail_reason, 0); } return GuardDebugInfo(true, 1); } private: std::string _tensor_name; std::unique_ptr _tensor_check; }; /** * Represents __getattr__ acccessor. */ class GetAttrGuardAccessor : public GuardAccessor { public: GetAttrGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, name, std::move(source), example_value, guard_manager_enum), _attr_name(name.ptr()) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyObject_GetAttr(obj, _attr_name); // new ref if (x == nullptr) { // Attribute absent, clear the exception and return false. PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyObject_GetAttr(obj, _attr_name); // new ref if (x == nullptr) { // Attribute absent, clear the exception and return false. PyErr_Clear(); return GuardDebugInfo( false, "getattr failed on source " + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { // Helpful when priting GuardManager tree structure. return "GetAttrGuardAccessor(" + py::str(_attr_name).cast() + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GetAttrGuardAccessor(GuardManager* guard_manager, GetAttrGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(GetAttrGuardAccessor* to) { to->_attr_name = _attr_name; } private: // no need of py::object here because the attr_name is already passed on to // the base class as accessor_key which is a py::object. PyObject* _attr_name; }; /** * Represents x.__dict__ acccessor. */ class GetGenericDictGuardAccessor : public GuardAccessor { public: GetGenericDictGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyObject_GenericGetDict(obj, nullptr); // new ref if (x == nullptr) { // Attribute absent, clear the exception and return false. PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyObject_GenericGetDict(obj, nullptr); // new ref if (x == nullptr) { // Attribute absent, clear the exception and return false. PyErr_Clear(); return GuardDebugInfo( false, "getattr failed on source " + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { // Helpful when priting GuardManager tree structure. return "GetGenericDictGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GetGenericDictGuardAccessor( GuardManager* guard_manager, GetGenericDictGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } }; /** * Represents __getitem__ acccessor. */ class GetItemGuardAccessor : public GuardAccessor { public: GetItemGuardAccessor( RootGuardManager* root, py::object name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, name, std::move(source), example_value, guard_manager_enum), _attr_name(name.ptr()) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyObject_GetItem(obj, _attr_name); // new ref if (x == nullptr) { PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyObject_GetItem(obj, _attr_name); // new ref if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string("KeyError on ") + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { return "GetItemGuardAccessor(" + py::str(_attr_name).cast() + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GetItemGuardAccessor(GuardManager* guard_manager, GetItemGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(GetItemGuardAccessor* to) { to->_attr_name = _attr_name; } private: // no need of py::object here because the attr_name is already passed on to // the base class as accessor_key which is a py::object. PyObject* _attr_name; }; /** * Represents f_locals[name] accessor. Special handling for frame locals since * we avoid converting it to Python as much as possible. * NB: We don't check for name order in frame locals since it is constant * across frames corresponding to the same code object. */ class FrameLocalsGuardAccessor : public GuardAccessor { public: FrameLocalsGuardAccessor( RootGuardManager* root, const py::tuple& key, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, key[0], std::move(source), example_value, guard_manager_enum), _key(key[0].ptr()), _framelocals_idx(key[1].cast()), _is_immutable_object(is_immutable_object(example_value)) {} // Run as a result of calling run_root_guard_manager/check_nopybind // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind( FrameLocalsMapping* obj, bool matches_dict_tag = false) override { // borrowed ref if (matches_dict_tag && _is_immutable_object) { // immutable object and dict tag matches, we can skip the guard subtree. return true; } PyObject* x = obj->get(_framelocals_idx); if (x == nullptr) { PyErr_Clear(); return false; } return _guard_manager->check_nopybind(x); } // Run as a result of calling check(), e.g. from Python // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { if (!PyDict_Check(obj)) { // This should not cause guard failure. // If this error is encountered, it probably means // we did not convert FrameLocalsMapping to dict (using to_dict()). throw std::runtime_error( "FrameLocalsGuardAccessor check expected dict() input"); } if (matches_dict_tag && _is_immutable_object) { // immutable object and dict tag matches, we can skip the guard subtree. return true; } PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); return result; } // If we've reached here, it means the guard failed - `obj` should be the // FrameLocalsMapping converted into a Python dict and we should // behave like DictGetItemGuardAccessor. GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref if (!PyDict_Check(obj)) { PyErr_Clear(); return GuardDebugInfo( false, "FrameLocalsGuardAccessor check expected dict() input", 0); } PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string("KeyError on ") + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); return result; } std::string repr() const override { return "FrameLocalsGuardAccessor(key=" + py::repr(_key).cast() + ", framelocals_idx=" + std::to_string(_framelocals_idx) + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) FrameLocalsGuardAccessor( GuardManager* guard_manager, FrameLocalsGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(FrameLocalsGuardAccessor* to) { to->_key = _key; to->_framelocals_idx = _framelocals_idx; to->_is_immutable_object = _is_immutable_object; } private: PyObject* _key; int _framelocals_idx; // If immutable object and dict tag matches, we can skip the guard subtree and // return true. bool _is_immutable_object; }; /** * Represents dict[name] acccessor. Needed since DictGuardManager does not * support sorting. We differentiate it from GetItemGuardAccessor because * PyDict_GetItem should be faster than PyObject_GetItem. */ class DictGetItemGuardAccessor : public GuardAccessor { public: DictGetItemGuardAccessor( RootGuardManager* root, py::object key, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, key, std::move(source), example_value, guard_manager_enum), _key(key.ptr()), _is_immutable_object(is_immutable_object(example_value)) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { if (matches_dict_tag && _is_immutable_object) { // immutable object and dict tag matches, we can skip the guard subtree. return true; } PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string("KeyError on ") + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); return result; } std::string repr() const override { return "DictGetItemGuardAccessor(" + py::repr(_key).cast() + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) DictGetItemGuardAccessor( GuardManager* guard_manager, DictGetItemGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(DictGetItemGuardAccessor* to) { to->_key = _key; to->_is_immutable_object = _is_immutable_object; } private: PyObject* _key; // If immutable object and dict tag matches, we can skip the guard subtree and // return true. bool _is_immutable_object; }; /** * Represents list[index] accessor. It is faster than generic * GetItemGuardAccessor. */ class ListGetItemGuardAccessor : public GuardAccessor { public: ListGetItemGuardAccessor( RootGuardManager* root, const py::object& index, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, index, std::move(source), example_value, guard_manager_enum), _index(py::cast(index)) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyList_GetItem(obj, _index); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyList_GetItem(obj, _index); // borrowed ref if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string("IndexError on ") + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); return result; } std::string repr() const override { return "ListGetItemGuardAccessor(" + std::to_string(_index) + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) ListGetItemGuardAccessor( GuardManager* guard_manager, ListGetItemGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(ListGetItemGuardAccessor* to) { to->_index = _index; } private: Py_ssize_t _index; }; /** * Represents tuple[index] accessor. It is faster than generic * GetItemGuardAccessor. */ class TupleGetItemGuardAccessor : public GuardAccessor { public: TupleGetItemGuardAccessor( RootGuardManager* root, const py::object& index, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, index, std::move(source), example_value, guard_manager_enum), _index(py::cast(index)) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyTuple_GetItem(obj, _index); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyTuple_GetItem(obj, _index); // borrowed ref if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string("IndexError on ") + get_source(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); return result; } std::string repr() const override { return "TupleGetItemGuardAccessor(" + std::to_string(_index) + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) TupleGetItemGuardAccessor( GuardManager* guard_manager, TupleGetItemGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } void clone_visitor(TupleGetItemGuardAccessor* to) { to->_index = _index; } private: Py_ssize_t _index; }; /** * Represents tensor.grad acccessor. */ class GradGuardAccessor : public GuardAccessor { public: GradGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref // check that its a tensor if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) { return false; } PyObject* grad = THPVariable_Wrap(THPVariable_Unpack(obj).grad()); // New reference bool result = _guard_manager->check_nopybind(grad); // For undefined tensor, THPVariable_Wrap returns Py_RETURN_NONE. So, no // need of Py_XDECREF. Py_DECREF(grad); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref // check that its a tensor if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) { return GuardDebugInfo( false, "not a tensor - grad field is accessed " + get_source(), 0); } PyObject* grad = THPVariable_Wrap(THPVariable_Unpack(obj).grad()); // New reference GuardDebugInfo result = _guard_manager->check_verbose_nopybind(grad); // For undefined tensor, THPVariable_Wrap returns Py_RETURN_NONE. So, no // need of Py_XDECREF. Py_DECREF(grad); return result; } std::string repr() const override { // Helpful when priting GuardManager tree structure. return "GradGuardAccessor(grad)"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GradGuardAccessor(GuardManager* guard_manager, GradGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } }; /** * Represents func.__defaults__ accessor. */ class FuncDefaultsGuardAccessor : public GuardAccessor { public: FuncDefaultsGuardAccessor( RootGuardManager* root, py::object name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* func = obj; if (PyMethod_Check(obj)) { func = PyMethod_GET_FUNCTION(obj); // borrowed ref } else if (PyInstanceMethod_Check(obj)) { func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref } PyObject* x = PyFunction_GetDefaults(func); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } return _guard_manager->check_nopybind(x); } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* func = obj; if (PyMethod_Check(obj)) { func = PyMethod_GET_FUNCTION(obj); // borrowed ref } else if (PyInstanceMethod_Check(obj)) { func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref } PyObject* x = PyFunction_GetDefaults(func); if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string(repr() + ": Not a function on ") + get_source(), 0); } return _guard_manager->check_verbose_nopybind(x); } std::string repr() const override { return "FuncDefaultsGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) FuncDefaultsGuardAccessor( GuardManager* guard_manager, FuncDefaultsGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } }; /** * Represents func.__kwdefaults__ accessor. */ class FuncKwDefaultsGuardAccessor : public GuardAccessor { public: FuncKwDefaultsGuardAccessor( RootGuardManager* root, py::object name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* func = obj; if (PyMethod_Check(obj)) { func = PyMethod_GET_FUNCTION(obj); // borrowed ref } else if (PyInstanceMethod_Check(obj)) { func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref } PyObject* x = PyFunction_GetKwDefaults(func); // borrowed ref if (x == nullptr) { PyErr_Clear(); return false; } return _guard_manager->check_nopybind(x); } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* func = obj; if (PyMethod_Check(obj)) { func = PyMethod_GET_FUNCTION(obj); // borrowed ref } else if (PyInstanceMethod_Check(obj)) { func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref } PyObject* x = PyFunction_GetKwDefaults(func); if (x == nullptr) { PyErr_Clear(); return GuardDebugInfo( false, std::string(repr() + ": Not a function on ") + get_source(), 0); } return _guard_manager->check_verbose_nopybind(x); } std::string repr() const override { return "FuncKwDefaultsGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) FuncKwDefaultsGuardAccessor( GuardManager* guard_manager, FuncKwDefaultsGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } }; /** * Represents f_globals acccessor. This sits as a child accessor of the * RootGuardManager. */ class GlobalsGuardAccessor : public GuardAccessor { public: GlobalsGuardAccessor( RootGuardManager* root, py::dict globals_dict, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, globals_dict, std::move(source), example_value, guard_manager_enum), _globals_dict(globals_dict.ptr()) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref // Ignore the obj arg. This is required to satisfy the function signature. // Just pass on the globals dict to the child manager. return _guard_manager->check_nopybind(_globals_dict); } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref // Ignore the obj arg. This is required to satisfy the function signature. // Just pass on the globals dict to the child manager. return _guard_manager->check_verbose_nopybind(_globals_dict); } std::string repr() const override { return "GlobalsGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GlobalsGuardAccessor(GuardManager* guard_manager, GlobalsGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(GlobalsGuardAccessor* to) { to->_globals_dict = _globals_dict; } private: // no need of py::object here because the globals_dict is already passed on to // the base class as accessor_key which is a py::object. PyObject* _globals_dict; }; /** * Represent type(...) accessor. */ class TypeGuardAccessor : public GuardAccessor { public: // name = __type_accessor__, a unique string used as attribute name. TypeGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = (PyObject*)Py_TYPE(obj); // borrowed ref return _guard_manager->check_nopybind(x); } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = (PyObject*)Py_TYPE(obj); // borrowed ref return _guard_manager->check_verbose_nopybind(x); } std::string repr() const override { return "TypeGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) TypeGuardAccessor(GuardManager* guard_manager, TypeGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(TypeGuardAccessor* to) {} }; /** * Getitem tuple_iterator accessor. */ class TupleIteratorGetItemAccessor : public GuardAccessor { public: TupleIteratorGetItemAccessor( RootGuardManager* root, py::object index, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, index, std::move(source), example_value, guard_manager_enum), _index(py::cast(std::move(index))) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref _PyTupleIterObject* it = (_PyTupleIterObject*)obj; PyObject* x = PyTuple_GET_ITEM(it->it_seq, it->it_index + _index); // borrowed ref if (x == nullptr) { // Out of range. PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref _PyTupleIterObject* it = (_PyTupleIterObject*)obj; PyObject* x = PyTuple_GET_ITEM(it->it_seq, it->it_index + _index); // borrowed ref if (x == nullptr) { // Out of range. PyErr_Clear(); return GuardDebugInfo(false, std::string("IndexError ") + repr(), 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); return result; } std::string repr() const override { return "TupleIteratorGetItemAccessor(" + std::to_string(_index) + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) TupleIteratorGetItemAccessor( GuardManager* guard_manager, TupleIteratorGetItemAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } void clone_visitor(TupleIteratorGetItemAccessor* to) { to->_index = _index; } private: Py_ssize_t _index; }; /** * GlobalWeakRef accessor. Dynamo can insert a weakref object into the frame * globals. This accessor reads the globals and then calls the weakref object * to get the underlying object. This is a child of GlobalsGuardAccessor. * Therefore, we will get the globals dict while caling check_nopybind. */ class GlobalWeakRefGuardAccessor : public GuardAccessor { public: GlobalWeakRefGuardAccessor( RootGuardManager* root, py::object global_name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, global_name, std::move(source), example_value, guard_manager_enum), _global_name(global_name.ptr()) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref // obj is globals dict because GlobalWeakRefGuardAccessor has to be a // child of GlobalsGuardAccessor. PyObject* weakref = PyDict_GetItem(obj, _global_name); // borrowed ref if (weakref == nullptr) { // The weakref is not in the globals dict. PyErr_Clear(); return false; } if (!PyWeakref_Check(weakref)) { return false; } PyObject* x = nullptr; if (PyWeakref_GetRef(weakref, &x) == -1) { // strong reference // error when attempting to call ref PyErr_Clear(); return false; } if (x == nullptr) { // weakref is dead x = Py_NewRef(Py_None); } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref // obj is globals dict because GlobalWeakRefGuardAccessor has to be a // child of GlobalsGuardAccessor. PyObject* weakref = PyDict_GetItem(obj, _global_name); // borrowed ref if (weakref == nullptr) { // The weakref is not in the globals dict. PyErr_Clear(); return GuardDebugInfo( false, std::string("KeyError on ") + get_source(), 0); } if (!PyWeakref_Check(weakref)) { return GuardDebugInfo( false, std::string("Not a weakref ") + get_source(), 0); } PyObject* x = nullptr; if (PyWeakref_GetRef(weakref, &x) == -1) { // strong reference // error when attempting to call ref PyErr_Clear(); return GuardDebugInfo( false, std::string("Weakref_GetRef failed ") + get_source(), 0); } if (x == nullptr) { // weakref is dead x = Py_NewRef(Py_None); } auto result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { return "GlobalWeakRefGuardAccessor(" + py::str(_global_name).cast() + ")"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) GlobalWeakRefGuardAccessor( GuardManager* guard_manager, GlobalWeakRefGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } void clone_visitor(GlobalWeakRefGuardAccessor* to) { to->_global_name = _global_name; } private: PyObject* _global_name; }; /** * Implements weakref call - x_weak() */ class WeakRefCallGuardAccessor : public GuardAccessor { public: WeakRefCallGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref if (!PyWeakref_Check(obj)) { return false; } PyObject* x = nullptr; if (PyWeakref_GetRef(obj, &x) == -1) { // strong reference // error when attempting to call ref PyErr_Clear(); return false; } if (x == nullptr) { // weakref is dead x = Py_NewRef(Py_None); } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref if (!PyWeakref_Check(obj)) { return GuardDebugInfo( false, std::string("Not a weakref obj ") + get_source(), 0); } PyObject* x = nullptr; if (PyWeakref_GetRef(obj, &x) == -1) { // strong reference // error when attempting to call ref PyErr_Clear(); return GuardDebugInfo( false, std::string("Weakref_GetRef failed ") + get_source(), 0); } if (x == nullptr) { // weakref is dead x = Py_NewRef(Py_None); } auto result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { return "WeakRefCallGuardAccessor()"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) WeakRefCallGuardAccessor( GuardManager* guard_manager, WeakRefCallGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common(cloned_root, clone_filter_fn); } void clone_visitor(WeakRefCallGuardAccessor* to) {} }; /** * Implements function call no args - e.g, torch.cuda.current_device() */ class CallFunctionNoArgsGuardAccessor : public GuardAccessor { public: CallFunctionNoArgsGuardAccessor( RootGuardManager* root, py::str name, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, std::move(name), std::move(source), example_value, guard_manager_enum) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref if (!PyCallable_Check(obj)) { return false; } PyObject* x = PyObject_CallNoArgs(obj); if (x == nullptr) { // Call failed, clear the exception and return false. PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref if (!PyCallable_Check(obj)) { return GuardDebugInfo( false, std::string("Not a callable obj ") + get_source(), 0); } PyObject* x = PyObject_CallNoArgs(obj); if (x == nullptr) { // Call failed, clear the exception and return debug info. std::string exc_message = get_exception_message(); PyErr_Clear(); return GuardDebugInfo(false, exc_message, 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { return "CallFunctionNoArgsGuardAccessor()"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) CallFunctionNoArgsGuardAccessor( GuardManager* guard_manager, CallFunctionNoArgsGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } void clone_visitor(CallFunctionNoArgsGuardAccessor* to) {} }; /** * Similar to PythonLambdaLeafGuard, this class is a way to allow developers to * supply accessor as a python function. This is useful for from_numpy source. */ class PythonLambdaGuardAccessor : public GuardAccessor { public: PythonLambdaGuardAccessor( RootGuardManager* root, py::function accessor_fn, std::string source, py::handle example_value, py::handle guard_manager_enum) : GuardAccessor( root, accessor_fn, std::move(source), example_value, guard_manager_enum), _accessor_fn(std::move(accessor_fn)) {} // NB: Intentional duplication between check_nopybind and // check_verbose_nopybind. bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override { // borrowed ref PyObject* x = PyObject_CallOneArg(_accessor_fn.ptr(), obj); // new ref if (x == nullptr) { // The accessor function failed. PyErr_Clear(); return false; } bool result = _guard_manager->check_nopybind(x); Py_DECREF(x); return result; } GuardDebugInfo check_verbose_nopybind( PyObject* obj) override { // borrowed ref PyObject* x = PyObject_CallOneArg(_accessor_fn.ptr(), obj); // new ref if (x == nullptr) { // The accessor function failed. std::string exc_message = get_exception_message(); PyErr_Clear(); return GuardDebugInfo(false, exc_message, 0); } GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x); Py_DECREF(x); return result; } std::string repr() const override { return "PythonLambdaGuardAccessor"; } public: // cloning functions // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) PythonLambdaGuardAccessor( GuardManager* guard_manager, PythonLambdaGuardAccessor* from) : GuardAccessor(guard_manager, from) { from->clone_visitor(this); } GuardAccessor* clone( RootGuardManager* cloned_root, const py::function& clone_filter_fn) override { return clone_common( cloned_root, clone_filter_fn); } void clone_visitor(PythonLambdaGuardAccessor* to) { to->_accessor_fn = _accessor_fn; } private: py::object _accessor_fn; }; void install_object_aliasing_guard( GuardManager* x, GuardManager* y, py::object verbose_code_parts) { // Adds tensor X is tensor Y guard. This is a an example of relational guard. // There is one guard object that is shared between two guard managers. std::shared_ptr guard = std::make_shared(std::move(verbose_code_parts)); // Register the resetter on the toor guard mananger, so that it can reset // the newly added relational guard when the guard eval fails. x->get_root()->add_relational_guard_resetter(guard); // In case the guard is a DictGuardManager, OBJECT_ALIASING guard is a // permitted guard. x->add_permitted_leaf_guard(guard); y->add_permitted_leaf_guard(guard); } void install_no_tensor_aliasing_guard( const py::list& guard_managers, const py::list& tensor_names, py::object verbose_code_parts) { // Adds a guard that checks none of tensors alias. This is a an example of // relational guard. There is one guard object that is shared between multiple // guard managers. std::shared_ptr guard = std::make_shared( tensor_names, std::move(verbose_code_parts)); // Register the resetter on the toor guard mananger, so that it can reset // the newly added relational guard when the guard eval fails. py::cast(guard_managers[0]) ->get_root() ->add_relational_guard_resetter(guard); for (const auto& guard_manager : guard_managers) { py::cast(guard_manager)->add_leaf_guard(guard); } } void install_storage_overlapping_guard_with_checker( const std::shared_ptr& checker, const py::list& guard_managers, const py::object& verbose_code_parts, bool overlapping) { if (guard_managers.empty()) { // If there are no GuardManagers, there's no need to create a // STORAGE_OVERLAPPING guard. return; } std::shared_ptr guard = std::make_shared( overlapping, checker, verbose_code_parts); py::cast(guard_managers[0]) ->get_root() ->add_relational_guard_resetter(guard); for (const auto& guard_manager : guard_managers) { py::cast(guard_manager)->add_leaf_guard(guard); } } void install_storage_overlapping_guard( const py::list& overlapping_guard_managers, const py::list& non_overlapping_guard_managers, const py::object& verbose_code_parts) { // Create a single StorageOverlapChecker that will be shared amongst // the 2 STORAGE_OVERLAPPING guards below. std::shared_ptr checker = std::make_shared( overlapping_guard_managers.size(), non_overlapping_guard_managers.size()); // Create the possibly overlapping storage guard. install_storage_overlapping_guard_with_checker( checker, overlapping_guard_managers, verbose_code_parts, /* overlapping= */ true); // Create the non-overlapping storage guard. install_storage_overlapping_guard_with_checker( checker, non_overlapping_guard_managers, verbose_code_parts, /* overlapping= */ false); } double profile_guard_manager(RootGuardManager* root, py::object f_locals) { PyObject* locals = f_locals.ptr(); // Warmup for (int i = 0; i < 10; i++) { root->check_nopybind(locals); } int count = 0; auto start = std::chrono::high_resolution_clock::now(); float profile_duration = 1.0; // Run the loop for profile_duration seconds while (true) { root->check_nopybind(locals); count++; auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; // Break the loop if 1 second has passed if (elapsed.count() >= 1.0) { break; } } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration total_elapsed = end - start; // Calculate the average time per iteration in microseconds return (total_elapsed.count() * profile_duration * 1e6) / count; } } // namespace static void* _torchinductor_pyobject_tensor_data_ptr(PyObject* obj) { if (C10_UNLIKELY( obj == nullptr || (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)))) { throw std::runtime_error( "_torchinductor_pyobject_tensor_data_ptr: non-tensor input"); } return THPVariable_Unpack(obj).data_ptr(); } void* convert_to_root_guard_manager(py::object root) { // For invalidated guards, return nullptr if (root.is(py::none())) { return nullptr; } RootGuardManager* root_mgr = std::move(root).cast(); return (void*)root_mgr; } bool run_root_guard_manager(void* root, FrameLocalsMapping* f_locals) { // for invalidated guards, return false if (root == nullptr) { return false; } py::object config_module = py::module_::import("torch._dynamo.config"); bool enable_cpp_framelocals_guard_eval = config_module.attr("enable_cpp_framelocals_guard_eval").cast(); if (enable_cpp_framelocals_guard_eval) { return ((RootGuardManager*)root)->check_nopybind(f_locals); } else { return ((RootGuardManager*)root) ->check_nopybind((PyObject*)f_locals->to_dict()); } } PyObject* torch_c_dynamo_guards_init() { // initialize TensorGuardsType TensorGuardsType.tp_name = "torch._C._dynamo.guards.TensorGuards"; TensorGuardsType.tp_basicsize = sizeof(TensorGuards); TensorGuardsType.tp_itemsize = 0; TensorGuardsType.tp_dealloc = (destructor)TensorGuards_dealloc; TensorGuardsType.tp_flags = Py_TPFLAGS_DEFAULT; TensorGuardsType.tp_doc = "Check properties of a torch.Tensor"; TensorGuardsType.tp_methods = TensorGuards_methods; TensorGuardsType.tp_init = (initproc)TensorGuards_init; TensorGuardsType.tp_new = TensorGuards_new; if (PyType_Ready(&TensorGuardsType) < 0) return nullptr; GlobalStateGuardType.tp_name = "torch._C._dynamo.guards.GlobalStateGuard"; GlobalStateGuardType.tp_basicsize = sizeof(GlobalStateGuard); GlobalStateGuardType.tp_itemsize = 0; GlobalStateGuardType.tp_flags = Py_TPFLAGS_DEFAULT; GlobalStateGuardType.tp_doc = "Guard on PyTorch global flags such as no_grad"; GlobalStateGuardType.tp_methods = GlobalStateGuard_methods; GlobalStateGuardType.tp_init = (initproc)GlobalStateGuard_init; GlobalStateGuardType.tp_new = PyType_GenericNew; if (PyType_Ready(&GlobalStateGuardType) < 0) return nullptr; auto m = PyModule_Create(&_module); if (m == nullptr) return nullptr; #ifdef Py_GIL_DISABLED PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif Py_INCREF(&TensorGuardsType); if (PyModule_AddObject(m, "TensorGuards", (PyObject*)&TensorGuardsType) < 0) { Py_DECREF(&TensorGuardsType); Py_DECREF(m); return nullptr; } Py_INCREF(&GlobalStateGuardType); if (PyModule_AddObject( m, "GlobalStateGuard", (PyObject*)&GlobalStateGuardType) < 0) { Py_DECREF(&GlobalStateGuardType); Py_DECREF(m); return nullptr; } // We expose the address of _torchinductor_pyobject_tensor_data_ptr in order // to allow manual linking in our generated TorchInductor Python bindings. // While regular linking works in most cases, it does not work properly in // fbcode due to janky build setup there. if (PyModule_AddObject( m, "_torchinductor_pyobject_tensor_data_ptr", PyLong_FromVoidPtr(reinterpret_cast( &_torchinductor_pyobject_tensor_data_ptr))) < 0) { return nullptr; } auto py_m = py::handle(m).cast(); py::class_>( py_m, "GuardDebugInfo") .def(py::init()) .def("__str__", &GuardDebugInfo::to_string) .def_readonly("result", &GuardDebugInfo::result) .def_readonly("verbose_code_parts", &GuardDebugInfo::verbose_code_parts) .def_readonly( "num_guards_executed", &GuardDebugInfo::num_guards_executed); // Leaf Guards py::class_>(py_m, "LeafGuard") .def("verbose_code_parts", &LeafGuard::verbose_code_parts); py::class_>( py_m, "LAMBDA_GUARD") .def(py::init()) .def("__call__", &LAMBDA_GUARD::check); py::class_>( py_m, "TYPE_MATCH") .def(py::init()) .def("__call__", &TYPE_MATCH::check); py::class_>(py_m, "ID_MATCH") .def(py::init()) .def("__call__", &ID_MATCH::check); py::class_>( py_m, "EQUALS_MATCH") .def(py::init()) .def("__call__", &EQUALS_MATCH::check); py::class_>( py_m, "LENGTH_CHECK") .def(py::init()) .def("__call__", &LENGTH_CHECK::check); py::class_>( py_m, "DICT_LENGTH") .def(py::init()) .def("__call__", &DICT_LENGTH::check); py::class_>( py_m, "DEFAULT_DEVICE") .def(py::init()) .def("__call__", &DEFAULT_DEVICE::check); py::class_>(py_m, "NOT_NONE") .def(py::init()) .def("__call__", &NOT_NONE::check); py::class_< TUPLE_ITERATOR_LEN, LeafGuard, std::shared_ptr>(py_m, "TUPLE_ITERATOR_LEN") .def(py::init()) .def("__call__", &TUPLE_ITERATOR_LEN::check); py::class_< RANGE_ITERATOR_MATCH, LeafGuard, std::shared_ptr>(py_m, "RANGE_ITERATOR_MATCH") .def(py::init()) .def("__call__", &RANGE_ITERATOR_MATCH::check); py::class_>( py_m, "GLOBAL_STATE") .def(py::init()) .def("check_verbose", &GLOBAL_STATE::check_verbose) .def("__call__", &GLOBAL_STATE::check); py::class_< TORCH_FUNCTION_MODE_STACK, LeafGuard, std::shared_ptr>( py_m, "TORCH_FUNCTION_MODE_STACK") .def(py::init()) .def("__call__", &TORCH_FUNCTION_MODE_STACK::check); py::class_>( py_m, "DATA_PTR_MATCH") .def(py::init()) .def("__call__", &DATA_PTR_MATCH::check); py::class_>( py_m, "NO_HASATTR") .def(py::init()) .def("__call__", &NO_HASATTR::check); py::class_>( py_m, "DICT_CONTAINS") .def(py::init()) .def("__call__", &DICT_CONTAINS::check); py::class_>( py_m, "DYNAMIC_INDICES") .def(py::init()) .def("__call__", &DYNAMIC_INDICES::check); py::class_>( py_m, "DICT_VERSION") .def(py::init()) .def("__call__", &DICT_VERSION::check); py::class_>( py_m, "TENSOR_MATCH") .def(py::init< RootGuardManager*, py::object, py::object, py::object, py::str, py::list>()) .def("__call__", &TENSOR_MATCH::check); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_>( py_m, "OBJECT_ALIASING"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< NO_TENSOR_ALIASING, LeafGuard, std::shared_ptr>(py_m, "NO_TENSOR_ALIASING"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< STORAGE_OVERLAPPING, LeafGuard, std::shared_ptr>(py_m, "STORAGE_OVERLAPPING"); // Guard Accessors - These are present so that we can iterate over the // GuardManager hierarchy. We intentionally do not provide even an init // function on these, because these should be constructed from within C++. py::class_>( py_m, "GuardAccessor") .def("repr", &GuardAccessor::repr); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< GetAttrGuardAccessor, GuardAccessor, std::unique_ptr>(py_m, "GetAttrGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< GetGenericDictGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "GetGenericDictGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< GetItemGuardAccessor, GuardAccessor, std::unique_ptr>(py_m, "GetItemGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< FrameLocalsGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "FrameLocalsGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< DictGetItemGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "DictGetItemGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< ListGetItemGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "ListGetItemGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< TupleGetItemGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "TupleGetItemGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< FuncDefaultsGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "FuncDefaultsGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< FuncKwDefaultsGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "FuncKwDefaultsGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< GlobalsGuardAccessor, GuardAccessor, std::unique_ptr>(py_m, "GlobalsGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< TypeGuardAccessor, GuardAccessor, std::unique_ptr>(py_m, "TypeGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< WeakRefCallGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "WeakRefCallGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< CallFunctionNoArgsGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "CallFunctionNoArgsGuardAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< TupleIteratorGetItemAccessor, GuardAccessor, std::unique_ptr>( py_m, "TupleIteratorGetItemAccessor"); // NOLINTNEXTLINE(bugprone-unused-raii) py::class_< GlobalWeakRefGuardAccessor, GuardAccessor, std::unique_ptr>( py_m, "GlobalWeakRefGuardAccessor"); // Guard Manager - No constructor in python, python should use // RootGuardManager. py::class_>(py_m, "GuardManager") // return by reference because GuardManager has the ownership of accessors .def("get_source", &GuardManager::get_source) .def("fail_count", &GuardManager::fail_count) .def( "get_accessors", &GuardManager::get_accessors, py::return_value_policy::reference) // return by reference because GuardManager has the ownership of child // managers .def( "get_child_managers", &GuardManager::get_child_managers, py::return_value_policy::reference) // return by reference because GuardManager has the ownership of leaf // guards .def( "get_leaf_guards", &GuardManager::get_leaf_guards, py::return_value_policy::reference) .def( "add_lambda_guard", [](GuardManager& self, py::object lambda, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( std::move(lambda), std::move(verbose_code_parts))); }) .def( "add_type_match_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("TYPE_MATCH"); self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_id_match_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("ID_MATCH"); self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_equals_match_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("EQUALS_MATCH"); self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_length_check_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("LENGTH_CHECK"); self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_dict_length_check_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("DICT_LENGTH"); self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_tuple_iterator_length_guard", [](GuardManager& self, py::object length, py::object type_id, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("TUPLE_ITERATOR_LEN"); self.add_leaf_guard(std::make_shared( std::move(length), std::move(type_id), std::move(verbose_code_parts))); }) .def( "add_range_iterator_match_guard", [](GuardManager& self, py::object start, py::object stop, py::object step, py::object type_id, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("RANGE_ITERATOR_MATCH"); self.add_leaf_guard(std::make_shared( std::move(start), std::move(stop), std::move(step), std::move(type_id), std::move(verbose_code_parts))); }) .def( "add_default_device_guard", [](GuardManager& self, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( std::move(verbose_code_parts))); }) .def( "add_not_none_guard", [](GuardManager& self, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("NOT_NONE"); self.add_leaf_guard( std::make_shared(std::move(verbose_code_parts))); }) .def( "add_global_state_guard", [](GuardManager& self, py::object verbose_code_parts) -> void { self.add_leaf_guard( std::make_shared(std::move(verbose_code_parts))); }) .def( "add_torch_function_mode_stack_guard", [](GuardManager& self, const py::list& initial_stack, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( initial_stack, std::move(verbose_code_parts))); }) .def( "add_data_ptr_guard", [](GuardManager& self, py::object data_ptr, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("DATA_PTR_MATCH"); self.add_leaf_guard(std::make_shared( std::move(data_ptr), std::move(verbose_code_parts))); }) .def( "add_no_hasattr_guard", [](GuardManager& self, py::object attr_name, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( std::move(attr_name), std::move(verbose_code_parts))); }) .def( "add_dict_contains_guard", [](GuardManager& self, bool contains, py::object key, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( contains, std::move(key), std::move(verbose_code_parts))); }) .def( "add_dynamic_indices_guard", [](GuardManager& self, py::set value, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_dict_version_guard", [](GuardManager& self, py::object value, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_tensor_match_guard", [](GuardManager& self, py::object value, py::object sizes, py::object strides, py::object tensor_name, py::object verbose_code_parts) -> void { SKIP_IF_GUARD_ALREADY_PRESENT("TENSOR_MATCH"); self.add_leaf_guard(std::make_shared( self.get_root(), std::move(value), std::move(sizes), std::move(strides), std::move(tensor_name), std::move(verbose_code_parts))); }) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "getitem_manager", &GuardManager::get_child_manager, py::arg("key"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "framelocals_manager", &GuardManager::get_child_manager, py::arg("key"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "dict_getitem_manager", &GuardManager::get_child_manager, py::arg("key"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "list_getitem_manager", &GuardManager::get_child_manager, py::arg("key"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "tuple_getitem_manager", &GuardManager::get_child_manager, py::arg("key"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "func_defaults_manager", [](GuardManager& self, std::string source, py::object example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__defaults_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), std::move(example_value), guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "func_kwdefaults_manager", [](GuardManager& self, std::string source, py::object example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__kwdefaults_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), std::move(example_value), guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "globals_dict_manager", &GuardManager::get_child_manager, py::arg("f_globals"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "type_manager", [](GuardManager& self, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__type_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), example_value, guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "weakref_call_manager", [](GuardManager& self, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__weakref_call_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), example_value, guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "call_function_no_args_manager", [](GuardManager& self, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__call_function_no_args_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), example_value, guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "tuple_iterator_getitem_manager", &GuardManager::get_child_manager, py::arg("index"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "global_weakref_manager", &GuardManager::get_child_manager, py::arg("global_name"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "lambda_manager", &GuardManager::get_child_manager, py::arg("python_lambda"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "grad_manager", [](GuardManager& self, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__grad_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), example_value, guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "get_generic_dict_manager", [](GuardManager& self, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { // A unique key is used to save as the accessor key. py::str unique_key("__generic_dict_accessor__"); return self.get_child_manager( std::move(unique_key), std::move(source), example_value, guard_manager_enum); }, py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because C++ GuardManager has the ownership of // accessors and guard managers .def( "getattr_manager", &GuardManager::get_child_manager, py::arg("attr"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference); // Root Guard Manager py::class_>( py_m, "RootGuardManager") .def(py::init<>()) .def("check", &RootGuardManager::check) .def("check_verbose", &RootGuardManager::check_verbose) .def( "clone_manager", &RootGuardManager::clone_manager, py::return_value_policy::reference) // return by reference because GuardManager has the ownership of leaf // guards .def( "get_epilogue_lambda_guards", &RootGuardManager::get_epilogue_lambda_guards, py::return_value_policy::reference) .def( "add_epilogue_lambda_guard", [](RootGuardManager& self, py::object lambda, py::object verbose_code_parts) -> void { self.add_epilogue_lambda_guard(std::make_unique( std::move(lambda), std::move(verbose_code_parts))); }); // Dict Guard Manager py::class_>( py_m, "DictGuardManager") // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "get_key_manager", [](DictGuardManager& self, py::object index, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { return self.get_key_manager( std::move(index), std::move(source), example_value, guard_manager_enum); }, py::arg("index"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "get_value_manager", [](DictGuardManager& self, py::object index, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { return self.get_value_manager( std::move(index), std::move(source), example_value, guard_manager_enum); }, py::arg("index"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference) // return by reference because GuardManager has the ownership of leaf // guards .def( "get_key_value_managers", &DictGuardManager::get_key_value_managers, py::return_value_policy::reference) // Skipped leaf guards .def("add_type_match_guard", &DictGuardManager::skip_adding_guard) .def("add_dict_length_check_guard", &DictGuardManager::skip_adding_guard) // Permitted leaf guards .def( "add_dict_contains_guard", [](DictGuardManager& self, bool contains, py::object key, py::object verbose_code_parts) -> void { self.add_permitted_leaf_guard(std::make_shared( contains, std::move(key), std::move(verbose_code_parts))); }) .def( "add_dict_version_guard", [](DictGuardManager& self, py::object value, py::object verbose_code_parts) -> void { // DICT_VERSION is used in a very narrow context today to guard on // pytree SUPPPORTED_NODES. We can remove this once we have tags in // DictGuardManager. self.add_permitted_leaf_guard(std::make_shared( std::move(value), std::move(verbose_code_parts))); }) .def( "add_no_hasattr_guard", [](DictGuardManager& self, py::object attr_name, py::object verbose_code_parts) -> void { self.add_permitted_leaf_guard(std::make_shared( std::move(attr_name), std::move(verbose_code_parts))); }) // Not permitted accesssors .def("lambda_manager", &DictGuardManager::fail_on_get_child_manager) .def("getitem_manager", &DictGuardManager::fail_on_get_child_manager) .def("dict_getitem_manager", &DictGuardManager::fail_on_get_child_manager) .def("globals_dict_manager", &DictGuardManager::fail_on_get_child_manager) .def( "tuple_iterator_getitem_manager", &DictGuardManager::fail_on_get_child_manager) .def( "global_weakref_manager", &DictGuardManager::fail_on_get_child_manager) .def("lambda_manager", &DictGuardManager::fail_on_get_child_manager) // Permitted accessors (and also type_manager) // return by reference because GuardManager has the ownership of accessors // and guard managers .def( "getattr_manager", [](DictGuardManager& self, py::object attr_name, std::string source, py::handle example_value, py::handle guard_manager_enum) -> GuardManager* { if (self.is_exact_dict_type()) { throw std::runtime_error( "getattr_manager on a DictGuardManager is supported only for dict subclasses"); } return self.get_child_manager( std::move(attr_name), std::move(source), example_value, guard_manager_enum); }, py::arg("attr"), py::arg("source"), py::arg("example_value"), py::arg("guard_manager_enum"), py::return_value_policy::reference); py_m.def("install_object_aliasing_guard", install_object_aliasing_guard); py_m.def( "install_no_tensor_aliasing_guard", install_no_tensor_aliasing_guard); py_m.def( "install_storage_overlapping_guard", install_storage_overlapping_guard); py_m.def( "compute_overlapping_tensors", [](const std::vector tensors, bool symbolic) { // Pick the correct Meta class, depending on whether we are // dealing with symbolic values or not. if (symbolic) { return compute_overlapping_tensors(tensors); } else { return compute_overlapping_tensors(tensors); } }, py::arg("tensors"), py::arg("symbolic") = true); py_m.def("profile_guard_manager", profile_guard_manager); // initialize dict_version_map watcher for 3.12 #if IS_PYTHON_3_12_PLUS dict_version_watcher_id = PyDict_AddWatcher(dict_version_watch_callback); if (dict_version_watcher_id == -1) { throw std::runtime_error("Failed to install dict_version_watch_callback"); } #endif return m; } } // namespace torch::dynamo