dynamo tracing perf: don't unnecessarily call getframeinfo on the hot path: 47.26 -> 37.66 (#143066)

See #143056 for overall docs.

This PR: Stop using `getframeinfo()` when we only care about the function name
and throw the rest away.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/143066
Approved by: https://github.com/jansel
This commit is contained in:
Aaron Orenstein 2024-12-12 16:28:30 -08:00 committed by PyTorch MergeBot
parent e0c8abda76
commit 63e1f97f4b
2 changed files with 10 additions and 11 deletions

View File

@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,27320000000,0.015
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1033000000,0.015 basic_modules_ListOfLinears_eager,compile_time_instruction_count,1018000000,0.015
@ -38,7 +38,7 @@ update_hint_regression,compile_time_instruction_count,1669000000,0.02
sum_floordiv_regression,compile_time_instruction_count,1113000000,0.015 sum_floordiv_regression,compile_time_instruction_count,1033000000,0.015
@ -50,7 +50,7 @@ aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2018000000
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5843000000,0.015 aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5796000000,0.015
@ -62,4 +62,4 @@ aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3863000000,
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10410000000,0.015 aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10330000000,0.015

1 add_loop_eager compile_time_instruction_count 3066000000 0.015
18
19
20
21
22
23
24
38
39
40
41
42
43
44
50
51
52
53
54
55
56
62
63
64
65

View File

@ -21,7 +21,7 @@ import warnings
import weakref import weakref
from contextlib import contextmanager from contextlib import contextmanager
from copy import deepcopy from copy import deepcopy
from inspect import currentframe, getframeinfo from inspect import currentframe
from typing import ( from typing import (
Any, Any,
Callable, Callable,
@ -2057,18 +2057,17 @@ class GuardBuilder(GuardBuilderBase):
caller = cur_frame.f_back caller = cur_frame.f_back
del cur_frame del cur_frame
assert caller is not None assert caller is not None
func_name = getframeinfo(caller)[2] func_name = caller.f_code.co_name
del caller del caller
# We use func_name for export, so might as well get a nice defensive check out of it # We use func_name for export, so might as well get a nice defensive check out of it
assert func_name in dir( assert (
self.__class__ func_name in self.__class__.__dict__
), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}" ), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
# Not all guards have names, some can be installed globally (see asserts on HAS_GRAD) # Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
if provided_guarded_object is None: if provided_guarded_object is None:
name_valid = guard.name is not None and guard.name != "" name = guard.name
guarded_object = None if not name else self.get(name)
guarded_object = self.get(guard.name) if name_valid else None
else: else:
guarded_object = provided_guarded_object guarded_object = provided_guarded_object