[dynamo] Recursively realize the stack_values (#152853)

Might also fix - https://github.com/pytorch/pytorch/issues/135696 Pull Request resolved: https://github.com/pytorch/pytorch/pull/152853 Approved by: https://github.com/Lucaskabela, https://github.com/mlazos, https://github.com/jansel
2025-12-06 12:20:52 +01:00 · 2025-05-06 11:15:33 -07:00 · 2025-05-06 11:15:33 -07:00 · ecd74c953f
commit ecd74c953f
parent 1965a2ca1e
15 changed files with 24 additions and 23 deletions
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv
@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46



-detectron2_fcos_r_50_fpn,pass,22
+detectron2_fcos_r_50_fpn,pass,24



--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46



-detectron2_fcos_r_50_fpn,pass,22
+detectron2_fcos_r_50_fpn,pass,24



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46



-detectron2_fcos_r_50_fpn,pass,22
+detectron2_fcos_r_50_fpn,pass,24



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv
@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,20
+detectron2_fcos_r_50_fpn,pass,22



--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@ -3947,7 +3947,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
        root = []
        root[:] = [root, root, None, None]

-        @torch.compile(fullgraph=True, backend="eager")
+        @torch.compile(fullgraph=False, backend="eager")
        def test_bug():
            return root[0]

--- a/test/dynamo/test_structured_trace.py
+++ b/test/dynamo/test_structured_trace.py
@ -621,6 +621,9 @@ class StructuredTraceTest(TestCase):
                """\
 {"dynamo_start": {"stack": "STACK"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
 {"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "HASH"}
+{"describe_storage": {"id": 0, "describer_id": "ID", "size": 4194304}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+{"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 1024], "is_leaf": true, "stride": [1024, 1], "storage": 0, "view_func": "VIEW_FUNC", "describer_id": "ID"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+{"describe_source": {"describer_id": "ID", "id": 0, "source": "L['args'][0]"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
 {"dynamo_cpp_guards_str": {}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "HASH"}
 {"compilation_metrics": "METRICS", "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
 {"dynamo_start": {"stack": "STACK"}, "rank": 0, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
--- a/test/export/test_export_with_inline_and_install.py
+++ b/test/export/test_export_with_inline_and_install.py
@ -72,17 +72,6 @@ unittest.expectedFailure(
    InlineAndInstallStrictExportTestExport.test_buffer_util_inline_and_install_strict  # noqa: F821
 )

-# NOTE: For this test, when we call `LOAD_ATTR`, we fail to realizing the LazyVariableTracker
-# This is because the variable is popped off stack, pushed into TupleVariable (then ConstDictVariable)
-# So, in the first case (not nested return), the LazyVariable is realized at the RETURN_VALUE call;
-# for the second case (nested return), the LazyVariable is not realized until we begin COMPILING_GRAPH
-# As a result, we don't install the variable, so crash when we expect the variable to be installed later
-# Potential fix: We can force the lazy variable tracker to realize; just need to see how this is done for the non
-# nested case
-unittest.expectedFailure(
-    InlineAndInstallStrictExportTestExport.test_constant_output_inline_and_install_strict  # noqa: F821
-)
-

 if __name__ == "__main__":
    from torch._dynamo.test_case import run_tests
--- a/torch/_dynamo/output_graph.py
+++ b/torch/_dynamo/output_graph.py
@ -1122,7 +1122,7 @@ class OutputGraph(OutputGraphGuardsState):
        # realize any unrealized tensor VTs in case they
        # need to be added to self.nn_modules as attributes
        for value in stack_values:
-            value.realize()
+            variables.LazyVariableTracker.realize_all(value)

        # Use nn.Module "proxies" in the constructed GraphModule so that
        # the resulting GM does not hold additional strong references to the original modules.
--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@ -1456,6 +1456,15 @@ class VariableBuilder:
        return self.tx.output.side_effects.track_object_existing(value, result)

    def wrap_listlike(self, value: Union[tuple, list, odict_values, NamedTuple]):
+        for item in value:
+            if item is value:
+                unimplemented_v2(
+                    gb_type="list elements are pointing to the list itself",
+                    context="",
+                    explanation="Dynamo does not support lists whose items reference to itself",
+                    hints=["Avoid using self referential list"],
+                )
+
        if config.specialize_int and type(value) is torch.Size:
            self.install_guards(GuardBuilder.CONSTANT_MATCH)
            return ConstantVariable.create(value=value)