[export] Log evaluate_expr (#146939)

We want to log each symnode created so that we can do provenance tracking in the tlparse report generated for draft export. To do this, we want to assign a unique id to every symnode, which python's `id` function already does, and then for every expression created, we can find the provenance by tracing back through its arguments ids. This logging only happens when dtrace_structured is enabled, which is only when running draft export.

An example output is as follows:

<img width="799" alt="image" src="https://github.com/user-attachments/assets/88bb31b4-8c31-43fb-aa88-08b573b9f71d" />

For the increase in the compile_time_instruction_count benchmark, this seems unavoidable because I need to call `id` to get the unique identifier for each symnode. But I believe `id` is an inexpensive operation, so hopefully it should be ok?  I tried doing the following:
* Originally I was passing around `self`, which is a SymNode, which caused the compile time to be ~6.36M
* I changed it to pass around `id(self)` instead, which reduced the compile time to ~6.33M
* Then I changed it to be passed as a positional arg instead of a kwarg, which reduced the compile time to ~6.22M, but this doesn't seem to be a super worthwhile fix?

#suppress-bc-linter

Pull Request resolved: https://github.com/pytorch/pytorch/pull/146939
Approved by: https://github.com/oulgen
This commit is contained in:
angelayi 2025-02-14 16:17:40 -08:00 committed by PyTorch MergeBot
parent c6b331f7d9
commit 84abeaad5c
4 changed files with 23 additions and 16 deletions

View File

@ -2,7 +2,7 @@ add_loop_eager,compile_time_instruction_count,3121000000,0.015
add_loop_eager_dynamic,compile_time_instruction_count,5813000000,0.025
add_loop_eager_dynamic,compile_time_instruction_count,6446000000,0.025
@ -10,7 +10,7 @@ add_loop_inductor,compile_time_instruction_count,30000000000,0.015
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,44000000000,0.025
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,46470000000,0.025
@ -18,7 +18,7 @@ add_loop_inductor_gpu,compile_time_instruction_count,26040000000,0.015
basic_modules_ListOfLinears_eager,compile_time_instruction_count,977200000,0.015
basic_modules_ListOfLinears_eager,compile_time_instruction_count,976400000,0.015
@ -34,7 +34,7 @@ basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,11000000
update_hint_regression,compile_time_instruction_count,1708000000,0.02
update_hint_regression,compile_time_instruction_count,1787000000,0.02
@ -42,7 +42,7 @@ sum_floordiv_regression,compile_time_instruction_count,1055000000,0.015
symint_sum,compile_time_instruction_count,3360000000,0.015
symint_sum,compile_time_instruction_count,3345000000,0.015

1 add_loop_eager compile_time_instruction_count 3121000000 0.015
2 add_loop_eager_dynamic compile_time_instruction_count 5813000000 6446000000 0.025
3 add_loop_inductor compile_time_instruction_count 30000000000 0.015
4 add_loop_inductor_dynamic_gpu compile_time_instruction_count 44000000000 46470000000 0.025
5 add_loop_inductor_gpu compile_time_instruction_count 26040000000 0.015
6 basic_modules_ListOfLinears_eager compile_time_instruction_count 977200000 976400000 0.015
7 basic_modules_ListOfLinears_inductor compile_time_instruction_count 19360000000 0.015
8 basic_modules_ListOfLinears_inductor_gpu_force_shape_pad compile_time_instruction_count 17240000000 0.015
10 update_hint_regression compile_time_instruction_count 1708000000 1787000000 0.02
11 sum_floordiv_regression compile_time_instruction_count 1055000000 0.015
12 symint_sum compile_time_instruction_count 3360000000 3345000000 0.015
13 aotdispatcher_inference_nosubclass_cpu compile_time_instruction_count 2045000000 0.015
14 aotdispatcher_inference_subclass_cpu compile_time_instruction_count 5836000000 0.015
15 aotdispatcher_partitioner_cpu compile_time_instruction_count 9298000000 0.015
16 aotdispatcher_training_nosubclass_cpu compile_time_instruction_count 3924000000 0.015
18
19
20
21
22
23
24
34
35
36
37
38
39
40
42
43
44
45
46
47
48

View File

@ -287,6 +287,7 @@ class TestDraftExport(TestCase):
self.assertEqual(
report.failures[0].failure_type, FailureType.DATA_DEPENDENT_ERROR
)
self.assertTrue(len(report.expressions_created) > 4)
def test_offsets(self):
class M(torch.nn.Module):

View File

@ -494,7 +494,9 @@ class SymNode:
def guard_int(self, file, line):
# TODO: use the file/line for some useful diagnostic on why a
# guard occurred
r = self.shape_env.evaluate_expr(self.expr, self.hint, fx_node=self.fx_node)
r = self.shape_env.evaluate_expr(
self.expr, self.hint, fx_node=self.fx_node, expr_sym_node_id=id(self)
)
try:
return int(r)
except Exception:
@ -504,7 +506,9 @@ class SymNode:
def guard_float(self, file, line):
# TODO: use the file/line for some useful diagnostic on why a
# guard occurred
r = self.shape_env.evaluate_expr(self.expr, self.hint, fx_node=self.fx_node)
r = self.shape_env.evaluate_expr(
self.expr, self.hint, fx_node=self.fx_node, expr_sym_node_id=id(self)
)
try:
return float(r)
except Exception:
@ -514,7 +518,9 @@ class SymNode:
def guard_bool(self, file, line):
# TODO: use the file/line for some useful diagnostic on why a
# guard occurred
r = self.shape_env.evaluate_expr(self.expr, self.hint, fx_node=self.fx_node)
r = self.shape_env.evaluate_expr(
self.expr, self.hint, fx_node=self.fx_node, expr_sym_node_id=id(self)
)
try:
return bool(r)
except Exception:
@ -571,6 +577,7 @@ class SymNode:
self.hint,
fx_node=self.fx_node,
size_oblivious=True,
expr_sym_node_id=id(self),
)
try:
return bool(r)

View File

@ -5903,7 +5903,7 @@ class ShapeEnv:
unhinted_expr: sympy.Basic,
*,
size_oblivious_result: Optional[sympy.Basic] = None,
expr_sym_node: Optional[SymNode] = None,
expr_sym_node_id: Optional[int] = None,
) -> GuardOnDataDependentSymNode:
# TODO: in a Dynamo context, having user code, and having the
# name of the local, will be much better
@ -5949,7 +5949,7 @@ class ShapeEnv:
metadata_fn=lambda: {
"expr": repr(expr),
"unhinted_expr": repr(unhinted_expr),
"expr_id": id(expr_sym_node),
"expr_id": expr_sym_node_id,
"stack": structured.from_traceback(
CapturedTraceback.extract(skip=1).summary()
),
@ -6572,9 +6572,9 @@ class ShapeEnv:
hint: Optional[Union[int, bool, float]] = None,
fx_node: Optional[torch.fx.Node] = None,
size_oblivious: bool = False,
expr_sym_node_id: Optional[int] = None,
*,
forcing_spec: bool = False,
expr_sym_node: Optional[SymNode] = None,
) -> sympy.Basic:
try:
return self._evaluate_expr(
@ -6582,8 +6582,8 @@ class ShapeEnv:
hint,
fx_node,
size_oblivious,
expr_sym_node_id,
forcing_spec=forcing_spec,
expr_sym_node=expr_sym_node,
)
except Exception:
self.log.warning(
@ -6601,9 +6601,9 @@ class ShapeEnv:
hint: Optional[Union[bool, int, float]] = None,
fx_node: Optional[torch.fx.Node] = None,
size_oblivious: bool = False,
expr_sym_node_id: Optional[int] = None,
*,
forcing_spec: bool = False,
expr_sym_node: Optional[SymNode] = None,
) -> sympy.Basic:
"""
Given an expression, evaluates it, adding guards if necessary
@ -6773,7 +6773,6 @@ class ShapeEnv:
"stack": structured.from_traceback(
CapturedTraceback.extract(skip=1).summary()
),
"expr_node_id": id(expr_sym_node),
},
)
dtrace_structured(
@ -6781,7 +6780,7 @@ class ShapeEnv:
metadata_fn=lambda: {
"expr": repr(orig_expr),
"result": repr(unsound_result),
"expr_node_id": id(expr_sym_node),
"expr_node_id": expr_sym_node_id,
"user_stack": structured.get_user_stack(3),
"stack": structured.get_framework_stack(3),
"symbol_to_sources": {
@ -6801,7 +6800,7 @@ class ShapeEnv:
expr.xreplace(self.var_to_val),
expr,
size_oblivious_result=size_oblivious_result,
expr_sym_node=expr_sym_node,
expr_sym_node_id=expr_sym_node_id,
)
else:
expr = new_expr