mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Implements https://github.com/pytorch/pytorch/issues/93753 - move frame local guard accessors to C++. Before, we used dict accessors on a Python dict representing the frame's fastlocals that we manually build. We move this accessor to C++ and additionally use the fastlocal index whenever possible. Some implementation notes: - `FrameLocalsMapping` is now initialized as a C++ vector of `PyObject`s. We do not just use the frame's localsplus/fastlocals buffer because we also unbox cells. - `FrameLocalsMapping` can still be converted into a Python dict representing the frame's fastlocals, but it is done lazily. - We update `LeafGuard`, `GuardAccessor`, and `GuardManager`'s `check_nopybind` methods to accept `FrameLocalsMapping`. By default, we convert the `FrameLocalsMapping` to a Python dict and run the original `check_nopybind` on it, but in some cases, conversion is not needed. - We add a new guard accessor `FrameLocalsGuardAccessor`, which is similar to `DictGetItemGuardAccessor` but has special handling for `FrameLocalsMapping`. We create a separate class to emphasize different use cases, but we could probably combine these two (can do in a follow up) dynamo_guard_eval.py microbenchmark update: - 713.2us -> 630.0us (3.10) - 598.8us -> 530.7us (3.12) Other followups: - Add `FrameLocalsMapping` version for `check_verbose_nopybind` in order to match behavior between `check_nopybind` and `check_verbose_nopybind`. This can prevent difficult debugging situations where guards fail (`check_nopybind` returns false) but no guard error message is generated (`check_verbose_nopybind` succeeds). - Rewrite the `SHAPE_ENV` guard into C++ - it is a fairly common guard that results in `FrameLocalsMapping` needing to convert to a dict Pull Request resolved: https://github.com/pytorch/pytorch/pull/140063 Approved by: https://github.com/jansel ghstack dependencies: #142117, #142430
91 lines
2.9 KiB
C++
91 lines
2.9 KiB
C++
#pragma once
|
|
#include <c10/core/GradMode.h>
|
|
#include <torch/csrc/dynamo/framelocals_mapping.h>
|
|
#include <torch/csrc/python_headers.h>
|
|
#include <torch/csrc/utils/pybind.h>
|
|
|
|
namespace torch::dynamo {
|
|
|
|
PyObject* torch_c_dynamo_guards_init();
|
|
|
|
// interfaces for extra_state and eval_frame.c because RootGuardManager class is
|
|
// not visible there.
|
|
void* convert_to_root_guard_manager(py::object root);
|
|
bool run_root_guard_manager(void* root, FrameLocalsMapping* f_locals);
|
|
|
|
struct LocalState {
|
|
// TLS state that changes operators
|
|
c10::impl::LocalDispatchKeySet dispatch_modifier;
|
|
c10::DispatchKeySet override_dispatch_key_set;
|
|
bool grad_mode_enabled;
|
|
|
|
at::DispatchKeySet apply(at::DispatchKeySet ks) const {
|
|
if (override_dispatch_key_set.empty()) {
|
|
return (ks | dispatch_modifier.included_) - dispatch_modifier.excluded_;
|
|
} else {
|
|
return override_dispatch_key_set;
|
|
}
|
|
}
|
|
|
|
LocalState()
|
|
: dispatch_modifier(c10::impl::tls_local_dispatch_key_set()),
|
|
override_dispatch_key_set(c10::BackendComponent::InvalidBit),
|
|
grad_mode_enabled(at::GradMode::is_enabled()) {}
|
|
|
|
void overrideDispatchKeySet(c10::DispatchKeySet ks) {
|
|
override_dispatch_key_set = ks;
|
|
}
|
|
};
|
|
|
|
class TensorCheck {
|
|
public:
|
|
TensorCheck(
|
|
const LocalState& state,
|
|
PyTypeObject* pt,
|
|
const at::Tensor& v,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_sizes,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_strides);
|
|
|
|
TensorCheck(
|
|
const LocalState& state,
|
|
PyTypeObject* pt,
|
|
c10::DispatchKeySet dispatch_key_set,
|
|
at::ScalarType dtype,
|
|
at::DeviceIndex device_index,
|
|
bool requires_grad,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_sizes,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_strides);
|
|
|
|
bool check(const LocalState& state, const at::Tensor& v);
|
|
bool check(
|
|
const LocalState& state,
|
|
const c10::DispatchKeySet& dispatch_key_set,
|
|
const at::ScalarType& dtype,
|
|
const c10::Device& device,
|
|
const c10::SymIntArrayRef& dynamic_dims_sizes,
|
|
const c10::SymIntArrayRef& dynamic_dims_strides,
|
|
const bool& requires_grad);
|
|
std::string check_verbose(
|
|
const LocalState& state,
|
|
const at::Tensor& v,
|
|
const std::string& tensor_name);
|
|
|
|
PyTypeObject* pytype;
|
|
|
|
private:
|
|
uint64_t dispatch_key_; // DispatchKeySet includes device/layout
|
|
at::ScalarType dtype_;
|
|
// Note(voz): While dispatch_key_ is sufficiently representative of a device
|
|
// In that keys are more granular AND device specific - they do not
|
|
// necessarily capture device indices correctly.
|
|
at::DeviceIndex device_index_;
|
|
bool requires_grad_;
|
|
// NB: These are unset if dynamic shapes is enabled.
|
|
std::vector<std::optional<c10::SymInt>> sizes_;
|
|
std::vector<std::optional<c10::SymInt>> strides_;
|
|
// Not strictly required for dense tensors, but nested tensors need it.
|
|
int64_t dim_;
|
|
};
|
|
|
|
} // namespace torch::dynamo
|