pytorch/torch/csrc/dynamo/eval_frame_cpp.cpp
cyy 8fa81a6066 Enable misc-use-internal-linkage check and apply fixes (#148948)
Enables clang-tidy rule [`misc-use-internal-linkage`](https://clang.llvm.org/extra/clang-tidy/checks/misc/use-internal-linkage.html). This new check was introduced in Clang-Tidy 18 and is available due to recent update of Clang-Tidy 19.

The check marks functions and variables used only in the translation unit as static. Therefore undesired symbols are not leaked into other units, more link time optimisations are possible and the resulting binaries may be smaller.

The detected violations were mostly fixed by using static. In other cases, the symbols were indeed consumed by others files, then their declaring headers were included. Still some declarations were wrong and have been fixed.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/148948
Approved by: https://github.com/Skylion007
2025-03-12 14:22:56 +00:00

318 lines
11 KiB
C++

#include <torch/csrc/dynamo/cache_entry.h>
#include <torch/csrc/dynamo/cpp_shim.h>
#include <torch/csrc/dynamo/cpython_includes.h>
#include <torch/csrc/dynamo/debug_macros.h>
#include <torch/csrc/dynamo/eval_frame.h>
#include <torch/csrc/dynamo/eval_frame_cpp.h>
#include <torch/csrc/dynamo/framelocals_mapping.h>
#include <torch/csrc/utils/python_compat.h>
// NOLINTNEXTLINE(misc-use-internal-linkage)
const char* cache_lookup_profiler_str = "TorchDynamo Cache Lookup";
// Remember to update the type signature for DynamoCallbackFn.__call__ in
// torch/_dynamo/types.py if this function's signature changes.
static py::object dynamo_call_callback(
py::handle callback,
THP_EVAL_API_FRAME_OBJECT* _frame,
FrameLocalsMapping* locals,
CacheEntry* cache_entry,
FrameState* frame_state) {
THPPyInterpreterFrame* frame = THPPyInterpreterFrame_New(_frame);
if (frame == nullptr) {
throw std::runtime_error(
"Dynamo failed to initialize CPython interpreter frame wrapper");
}
frame->locals = (PyObject*)framelocals_mapping_to_dict(locals);
py::object cache_entry_obj = py::none();
if (cache_entry) {
cache_entry_obj = py::cast(cache_entry, py::return_value_policy::reference);
}
py::object result = callback(
py::handle((PyObject*)frame), cache_entry_obj, py::handle(frame_state));
Py_DECREF(frame);
return result;
}
static py::handle _callback_from_action(
py::handle callback,
FrameAction action) {
if (action == SKIP) {
return Py_None;
} else if (action == RUN_ONLY) {
return Py_False;
}
return callback;
}
// frame and callback are borrowed references.
// Returns new reference.
PyObject* dynamo__custom_eval_frame(
PyThreadState* tstate,
THP_EVAL_API_FRAME_OBJECT* frame,
int throw_flag,
PyObject* callback_py) {
#if IS_PYTHON_3_11_PLUS
DEBUG_TRACE(
"begin %s %s %i %i",
get_frame_name(frame),
PyUnicode_AsUTF8(F_CODE(frame)->co_filename),
F_CODE(frame)->co_firstlineno,
_PyInterpreterFrame_LASTI(frame));
#else
DEBUG_TRACE(
"begin %s %s %i %i %i",
get_frame_name(frame),
PyUnicode_AsUTF8(F_CODE(frame)->co_filename),
frame->f_lineno,
frame->f_lasti,
frame->f_iblock);
#endif
if (throw_flag) {
// When unwinding generators, eval frame is called with throw_flag ==
// true. Frame evaluation is supposed to continue unwinding by propagating
// the exception. Dynamo doesn't really know how to do this, nor does it
// really want to do this, because there's unlikely any code to capture
// (you're going to immediately quit out of the frame, perhaps running
// some unwinding logic along the way). So we just run the default
// handler in this case.
//
// NB: A previous version of this patch returned NULL. This is wrong,
// because returning NULL is *different* from unwinding an exception.
// In particular, you will not execute things like context manager
// __exit__ if you just return NULL.
//
// NB: It's /conceivable/ that you might want to actually still call the
// Dynamo callback when throw_flag == TRUE, to give Dynamo a chance to
// do any stack unwinding code. But this is not really useful because
// (1) Dynamo doesn't actually know how to do stack unwinding, so it would
// immediately skip the frame, and (2) even if it did, this would only
// be profitable if there was tensor code in the unwinding code. Seems
// unlikely.
DEBUG_TRACE("throw %s", get_frame_name(frame));
return dynamo_eval_frame_default(tstate, frame, throw_flag);
}
py::handle callback(callback_py);
// callback to run on recursively invoked frames
py::handle recursive_callback = callback; // borrowed
PyCodeObject* cached_code = nullptr; // borrowed
const char* trace_annotation = "";
PyObject* eval_result = nullptr; // strong reference
// exit functions
auto eval_default = [&]() {
eval_frame_callback_set(recursive_callback.ptr());
eval_result = dynamo_eval_frame_default(tstate, frame, throw_flag);
if (!callback.is(recursive_callback)) {
// NB: Only set the callback if it's different than the recursive
// callback! Setting the callback is dangerous in the case that `frame`
// also sets the eval frame callback. This happens in some functions in
// eval_frame.py. These functions should be skipped with DEFAULT recursive
// action, so we won't accidentally overwrite the callback.
eval_frame_callback_set(callback.ptr());
}
};
// NOTE: In 3.12+, the frame evaluation function (callee) is responsible for
// clearing/popping the frame, meaning that unless we default evaluate the
// original frame, we are responsible for clearing it - via
// clear_old_frame_if_python_312_plus.
auto eval_custom = [&]() {
eval_frame_callback_set(recursive_callback.ptr());
DEBUG_NULL_CHECK(cached_code);
eval_result = dynamo_eval_custom_code(
tstate, frame, cached_code, trace_annotation, throw_flag);
if (!callback.is(recursive_callback)) {
eval_frame_callback_set(callback.ptr());
}
clear_old_frame_if_python_312_plus(tstate, frame);
};
auto fail = [&]() { clear_old_frame_if_python_312_plus(tstate, frame); };
ExtraState* extra = get_extra_state(F_CODE(frame));
if (callback.is(py::bool_(false)) && extra == nullptr) {
DEBUG_TRACE("skip (run only with empty cache) %s", get_frame_name(frame));
eval_default();
return eval_result;
}
// create cache
if (extra == nullptr) {
extra = init_and_set_extra_state(F_CODE(frame));
}
// Get recursive action
FrameExecStrategy strategy = extra_state_get_exec_strategy(extra);
recursive_callback =
_callback_from_action(recursive_callback, strategy.recursive_action);
// Skip this frame
if (strategy.cur_action == SKIP) {
DEBUG_TRACE("skip %s", get_frame_name(frame));
eval_default();
return eval_result;
}
// default and run-only mode require guard eval
std::unique_ptr<FrameLocalsMapping> locals =
std::make_unique<FrameLocalsMapping>(frame);
PyObject* backend = get_backend(callback.ptr()); // borrowed
// We don't run the current custom_eval_frame behavior for guards.
// So we temporarily set the callback to Py_None to drive the correct behavior
// in the shim.
eval_frame_callback_set(Py_None);
DEBUG_CHECK(PyDict_CheckExact(frame->f_globals));
DEBUG_CHECK(PyDict_CheckExact(frame->f_builtins));
_PytorchRecordFunctionState* rf =
_pytorch_record_function_enter(cache_lookup_profiler_str);
PyObject* maybe_cached_code = nullptr;
lookup(
extra,
locals.get(),
backend,
&maybe_cached_code,
&trace_annotation,
is_skip_guard_eval_unsafe);
_pytorch_record_function_exit(rf);
// A callback of Py_False indicates "run only" mode, the cache is checked,
// but we never compile.
bool run_only =
strategy.cur_action == RUN_ONLY || callback.is(py::bool_(false));
if (run_only) {
DEBUG_TRACE("In run only mode %s", get_frame_name(frame));
}
if (maybe_cached_code == nullptr) {
// guard eval failed, keep propagating
fail();
return eval_result;
} else if (maybe_cached_code != Py_None) {
cached_code = (PyCodeObject*)maybe_cached_code;
// used cached version
DEBUG_TRACE("cache hit %s", get_frame_name(frame));
eval_custom();
return eval_result;
}
// cache miss
DEBUG_TRACE("cache miss %s", get_frame_name(frame));
if (is_skip_guard_eval_unsafe) {
PyErr_SetString(
PyExc_RuntimeError,
"Recompilation triggered with skip_guard_eval_unsafe stance. "
"This usually means that you have not warmed up your model "
"with enough inputs such that you can guarantee no more recompilations.");
fail();
return eval_result;
}
if (run_only) {
eval_default();
return eval_result;
}
// call callback
CacheEntry* cache_entry = extract_cache_entry(extra);
FrameState* frame_state = extract_frame_state(extra);
py::object callback_result;
FrameExecStrategy new_strategy;
bool apply_to_code = false;
PyObject* guarded_code = nullptr;
try {
callback_result = dynamo_call_callback(
callback, frame, locals.get(), cache_entry, frame_state);
new_strategy =
callback_result.attr("frame_exec_strategy").cast<FrameExecStrategy>();
apply_to_code = callback_result.attr("apply_to_code").cast<bool>();
guarded_code = callback_result.attr("guarded_code").ptr();
} catch (py::error_already_set& e) {
// internal exception, returning here will leak the exception into user
// code this is useful for debugging -- but we dont want it to happen
// outside of testing NB: we intentionally DO NOT re-enable custom
// behavior to prevent cascading failure from internal exceptions. The
// upshot is if Dynamo barfs, that's it for Dynamo, even if you catch the
// exception inside the torch.compile block we won't try to Dynamo
// anything else.
fail();
e.restore();
return eval_result;
}
// recursive frame action
if (strategy.recursive_action == DEFAULT) {
// old recursive action overrides new recursive action
recursive_callback = _callback_from_action(
recursive_callback, new_strategy.recursive_action);
}
// possibly apply frame strategy to future frames with same code object
if (apply_to_code) {
if (new_strategy.cur_action != DEFAULT) {
DEBUG_TRACE("create action: %d\n", new_strategy.cur_action);
}
if (new_strategy.recursive_action != DEFAULT) {
DEBUG_TRACE(
"create recursive action: %d\n", new_strategy.recursive_action);
}
extra_state_set_exec_strategy(extra, new_strategy);
}
if (guarded_code != Py_None) {
DEBUG_TRACE("create cache %s", get_frame_name(frame));
// NB: We could use extract_cache_entry to get the cache_entry, but
// extract_cache_entry returns a borrowed reference. Modifying a borrowed
// reference seems wrong. Therefore, we directly access the
// extra->cache_entry. extra wont be NULL here.
CacheEntry* new_cache_entry =
create_cache_entry(extra, guarded_code, backend);
// Update the existing cache_entry on the extra object. This extra object
// is sitting on the extra scratch space, we are just changing the
// cache_entry ptr. As a result, extra now becomes the owner of CacheEntry
// object. This will be cleaned up when set_extra_state is called.
// Re-enable custom behavior
cached_code = CacheEntry_get_code(new_cache_entry),
trace_annotation = CacheEntry_get_trace_annotation(new_cache_entry);
eval_custom();
} else {
eval_default();
}
return eval_result;
}
PyObject* set_code_exec_strategy(PyObject* dummy, PyObject* args) {
PyObject* code_obj = nullptr;
PyObject* strategy_obj = nullptr;
if (!PyArg_ParseTuple(args, "OO", &code_obj, &strategy_obj)) {
return nullptr;
}
if (!PyCode_Check(code_obj)) {
PyErr_SetString(PyExc_TypeError, "expected a code object");
return nullptr;
}
PyCodeObject* code = (PyCodeObject*)code_obj;
ExtraState* extra = get_extra_state(code);
if (extra == nullptr) {
extra = init_and_set_extra_state(code);
}
FrameExecStrategy strategy =
py::handle(strategy_obj).cast<FrameExecStrategy>();
extra_state_set_exec_strategy(extra, strategy);
Py_RETURN_NONE;
}