dynamo tracing perf: don't unnecessarily call getframeinfo on the hot path: 47.26 -> 37.66 (#143066)

See #143056 for overall docs.

This PR: Stop using `getframeinfo()` when we only care about the function name
and throw the rest away.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/143066
Approved by: https://github.com/jansel
This commit is contained in:
Aaron Orenstein 2024-12-12 16:28:30 -08:00 committed by PyTorch MergeBot
parent e0c8abda76
commit 63e1f97f4b
2 changed files with 10 additions and 11 deletions

View File

@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,27320000000,0.015
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1033000000,0.015
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1018000000,0.015
@ -38,7 +38,7 @@ update_hint_regression,compile_time_instruction_count,1669000000,0.02
sum_floordiv_regression,compile_time_instruction_count,1113000000,0.015
sum_floordiv_regression,compile_time_instruction_count,1033000000,0.015
@ -50,7 +50,7 @@ aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2018000000
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5843000000,0.015
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5796000000,0.015
@ -62,4 +62,4 @@ aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3863000000,
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10410000000,0.015
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10330000000,0.015

1 add_loop_eager compile_time_instruction_count 3066000000 0.015
18
19
20
21
22
23
24
38
39
40
41
42
43
44
50
51
52
53
54
55
56
62
63
64
65

View File

@ -21,7 +21,7 @@ import warnings
import weakref
from contextlib import contextmanager
from copy import deepcopy
from inspect import currentframe, getframeinfo
from inspect import currentframe
from typing import (
Any,
Callable,
@ -2057,18 +2057,17 @@ class GuardBuilder(GuardBuilderBase):
caller = cur_frame.f_back
del cur_frame
assert caller is not None
func_name = getframeinfo(caller)[2]
func_name = caller.f_code.co_name
del caller
# We use func_name for export, so might as well get a nice defensive check out of it
assert func_name in dir(
self.__class__
assert (
func_name in self.__class__.__dict__
), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
# Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
if provided_guarded_object is None:
name_valid = guard.name is not None and guard.name != ""
guarded_object = self.get(guard.name) if name_valid else None
name = guard.name
guarded_object = None if not name else self.get(name)
else:
guarded_object = provided_guarded_object