mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
dynamo tracing perf: don't unnecessarily call getframeinfo on the hot path: 47.26 -> 37.66 (#143066)
See #143056 for overall docs. This PR: Stop using `getframeinfo()` when we only care about the function name and throw the rest away. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143066 Approved by: https://github.com/jansel
This commit is contained in:
parent
e0c8abda76
commit
63e1f97f4b
|
|
@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,27320000000,0.015
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1033000000,0.015
|
basic_modules_ListOfLinears_eager,compile_time_instruction_count,1018000000,0.015
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ update_hint_regression,compile_time_instruction_count,1669000000,0.02
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sum_floordiv_regression,compile_time_instruction_count,1113000000,0.015
|
sum_floordiv_regression,compile_time_instruction_count,1033000000,0.015
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -50,7 +50,7 @@ aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2018000000
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5843000000,0.015
|
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5796000000,0.015
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -62,4 +62,4 @@ aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3863000000,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10410000000,0.015
|
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10330000000,0.015
|
||||||
|
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import warnings
|
||||||
import weakref
|
import weakref
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from inspect import currentframe, getframeinfo
|
from inspect import currentframe
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
|
@ -2057,18 +2057,17 @@ class GuardBuilder(GuardBuilderBase):
|
||||||
caller = cur_frame.f_back
|
caller = cur_frame.f_back
|
||||||
del cur_frame
|
del cur_frame
|
||||||
assert caller is not None
|
assert caller is not None
|
||||||
func_name = getframeinfo(caller)[2]
|
func_name = caller.f_code.co_name
|
||||||
del caller
|
del caller
|
||||||
# We use func_name for export, so might as well get a nice defensive check out of it
|
# We use func_name for export, so might as well get a nice defensive check out of it
|
||||||
assert func_name in dir(
|
assert (
|
||||||
self.__class__
|
func_name in self.__class__.__dict__
|
||||||
), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
|
), f"_produce_guard_code must be called from inside GuardedCode. Called from {func_name}"
|
||||||
|
|
||||||
# Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
|
# Not all guards have names, some can be installed globally (see asserts on HAS_GRAD)
|
||||||
if provided_guarded_object is None:
|
if provided_guarded_object is None:
|
||||||
name_valid = guard.name is not None and guard.name != ""
|
name = guard.name
|
||||||
|
guarded_object = None if not name else self.get(name)
|
||||||
guarded_object = self.get(guard.name) if name_valid else None
|
|
||||||
else:
|
else:
|
||||||
guarded_object = provided_guarded_object
|
guarded_object = provided_guarded_object
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user