dynamo tracing perf: don't unnecessarily call getframeinfo on the hot path: 47.26 -> 37.66 (#143066)

See #143056 for overall docs. This PR: Stop using `getframeinfo()` when we only care about the function name and throw the rest away. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143066 Approved by: https://github.com/jansel
2025-12-06 12:20:52 +01:00 · 2024-12-12 16:28:30 -08:00 · 2024-12-12 16:28:30 -08:00 · 63e1f97f4b
commit 63e1f97f4b
parent e0c8abda76
2 changed files with 10 additions and 11 deletions
--- a/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv
+++ b/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv
@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,27320000000,0.015
-basic_modules_ListOfLinears_eager,compile_time_instruction_count,1033000000,0.015
+basic_modules_ListOfLinears_eager,compile_time_instruction_count,1018000000,0.015
@ -38,7 +38,7 @@ update_hint_regression,compile_time_instruction_count,1669000000,0.02
-sum_floordiv_regression,compile_time_instruction_count,1113000000,0.015
+sum_floordiv_regression,compile_time_instruction_count,1033000000,0.015
@ -50,7 +50,7 @@ aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2018000000
-aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5843000000,0.015
+aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5796000000,0.015
@ -62,4 +62,4 @@ aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3863000000,
-aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10410000000,0.015
+aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10330000000,0.015
--- a/torch/_dynamo/guards.py
+++ b/torch/_dynamo/guards.py
@ -21,7 +21,7 @@ import warnings
 import weakref
 from contextlib import contextmanager
 from copy import deepcopy
-from inspect import currentframe, getframeinfo
+from inspect import currentframe
 from typing import (
    Any,
    Callable,
@ -2057,18 +2057,17 @@ class GuardBuilder(GuardBuilderBase):
        caller = cur_frame.f_back
        del cur_frame
        assert caller is not None
-        func_name = getframeinfo(caller)[2]
+        func_name = caller.f_code.co_name
        del caller
        # We use func_name for export, so might as well get a nice defensive check out of it
-        assert func_name in dir(
+        assert (
-            self.__class__
+            func_name in self.__class__.__dict__
        ), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
        # Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
        if provided_guarded_object is None:
-            name_valid = guard.name is not None and guard.name != ""
+            name = guard.name
-
+            guarded_object = None if not name else self.get(name)
            guarded_object = self.get(guard.name) if name_valid else None
        else:
            guarded_object = provided_guarded_object