mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
dynamo tracing perf: don't unnecessarily call getframeinfo on the hot path: 47.26 -> 37.66 (#143066)
See #143056 for overall docs. This PR: Stop using `getframeinfo()` when we only care about the function name and throw the rest away. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143066 Approved by: https://github.com/jansel
This commit is contained in:
parent
e0c8abda76
commit
63e1f97f4b
|
|
@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,27320000000,0.015
|
|||
|
||||
|
||||
|
||||
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1033000000,0.015
|
||||
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1018000000,0.015
|
||||
|
||||
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ update_hint_regression,compile_time_instruction_count,1669000000,0.02
|
|||
|
||||
|
||||
|
||||
sum_floordiv_regression,compile_time_instruction_count,1113000000,0.015
|
||||
sum_floordiv_regression,compile_time_instruction_count,1033000000,0.015
|
||||
|
||||
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2018000000
|
|||
|
||||
|
||||
|
||||
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5843000000,0.015
|
||||
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5796000000,0.015
|
||||
|
||||
|
||||
|
||||
|
|
@ -62,4 +62,4 @@ aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3863000000,
|
|||
|
||||
|
||||
|
||||
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10410000000,0.015
|
||||
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10330000000,0.015
|
||||
|
|
|
|||
|
|
|
@ -21,7 +21,7 @@ import warnings
|
|||
import weakref
|
||||
from contextlib import contextmanager
|
||||
from copy import deepcopy
|
||||
from inspect import currentframe, getframeinfo
|
||||
from inspect import currentframe
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
|
|
@ -2057,18 +2057,17 @@ class GuardBuilder(GuardBuilderBase):
|
|||
caller = cur_frame.f_back
|
||||
del cur_frame
|
||||
assert caller is not None
|
||||
func_name = getframeinfo(caller)[2]
|
||||
func_name = caller.f_code.co_name
|
||||
del caller
|
||||
# We use func_name for export, so might as well get a nice defensive check out of it
|
||||
assert func_name in dir(
|
||||
self.__class__
|
||||
assert (
|
||||
func_name in self.__class__.__dict__
|
||||
), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
|
||||
|
||||
# Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
|
||||
if provided_guarded_object is None:
|
||||
name_valid = guard.name is not None and guard.name != ""
|
||||
|
||||
guarded_object = self.get(guard.name) if name_valid else None
|
||||
name = guard.name
|
||||
guarded_object = None if not name else self.get(name)
|
||||
else:
|
||||
guarded_object = provided_guarded_object
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user