[dynamo] Recursively realize the stack_values (#152853)

Might also fix - https://github.com/pytorch/pytorch/issues/135696

Pull Request resolved: https://github.com/pytorch/pytorch/pull/152853
Approved by: https://github.com/Lucaskabela, https://github.com/mlazos, https://github.com/jansel
This commit is contained in:
Animesh Jain 2025-05-06 11:15:33 -07:00 committed by PyTorch MergeBot
parent 1965a2ca1e
commit ecd74c953f
15 changed files with 24 additions and 23 deletions

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 soft_actor_critic pass 0
83 speech_transformer pass 10
84 squeezenet1_1 pass 0
85 stable_diffusion_text_encoder pass 0
86 stable_diffusion_unet pass_due_to_skip 0
87 timm_efficientnet pass 0
88 timm_regnet pass 0

View File

@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46
detectron2_fcos_r_50_fpn,pass,22
detectron2_fcos_r_50_fpn,pass,24

1 name accuracy graph_breaks
74 speech_transformer pass 10
75 squeezenet1_1 pass 0
76 stable_diffusion_unet pass_due_to_skip 0
77 timm_efficientdet model_fail_to_load 0
78 timm_efficientnet pass 0
79 timm_nfnet pass 0
80 timm_regnet pass 0

View File

@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46
detectron2_fcos_r_50_fpn,pass,22
detectron2_fcos_r_50_fpn,pass,24

1 name accuracy graph_breaks
74 speech_transformer pass 10
75 squeezenet1_1 pass 0
76 stable_diffusion_unet pass_due_to_skip 0
77 timm_efficientdet model_fail_to_load 0
78 timm_efficientnet pass 0
79 timm_nfnet pass 0
80 timm_regnet pass 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 soft_actor_critic pass 0
83 speech_transformer pass 10
84 squeezenet1_1 pass 0
85 stable_diffusion_text_encoder pass 0
86 stable_diffusion_unet pass_due_to_skip 0
87 timm_efficientnet pass 0
88 timm_regnet pass 0

View File

@ -74,7 +74,7 @@ detectron2_fasterrcnn_r_50_fpn,pass,46
detectron2_fcos_r_50_fpn,pass,22
detectron2_fcos_r_50_fpn,pass,24

1 name accuracy graph_breaks
74 timm_efficientnet pass 0
75 timm_nfnet pass 0
76 timm_regnet pass 0
77 timm_resnest pass 0
78 timm_vision_transformer pass 0
79 timm_vision_transformer_large pass_due_to_skip 0
80 timm_vovnet pass 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 soft_actor_critic pass 0
83 speech_transformer pass 10
84 squeezenet1_1 pass 0
85 stable_diffusion_text_encoder pass 0
86 stable_diffusion_unet pass_due_to_skip 0
87 timm_efficientnet pass 0
88 timm_regnet pass 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 soft_actor_critic pass 0
83 speech_transformer pass 10
84 squeezenet1_1 pass 0
85 stable_diffusion_text_encoder pass 0
86 stable_diffusion_unet pass_due_to_skip 0
87 timm_efficientnet pass 0
88 timm_regnet pass 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 soft_actor_critic pass 0
83 speech_transformer pass 10
84 squeezenet1_1 pass 0
85 stable_diffusion_text_encoder pass 0
86 stable_diffusion_unet pass_due_to_skip 0
87 timm_efficientnet pass 0
88 timm_regnet pass 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 stable_diffusion_text_encoder pass 0
83 stable_diffusion_unet pass_due_to_skip 0
84 timm_efficientnet pass 0
85 timm_regnet pass 0
86 timm_resnest pass 0
87 timm_vision_transformer pass 0
88 timm_vision_transformer_large pass_due_to_skip 0

View File

@ -82,7 +82,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0
detectron2_fcos_r_50_fpn,pass,20
detectron2_fcos_r_50_fpn,pass,22

1 name accuracy graph_breaks
82 squeezenet1_1 pass 0
83 stable_diffusion_text_encoder pass 0
84 stable_diffusion_unet pass_due_to_skip 0
85 timm_efficientnet pass 0
86 timm_regnet pass 0
87 timm_resnest pass 0
88 timm_vision_transformer pass 0

View File

@ -3947,7 +3947,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
root = []
root[:] = [root, root, None, None]
@torch.compile(fullgraph=True, backend="eager")
@torch.compile(fullgraph=False, backend="eager")
def test_bug():
return root[0]

View File

@ -621,6 +621,9 @@ class StructuredTraceTest(TestCase):
"""\
{"dynamo_start": {"stack": "STACK"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
{"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "HASH"}
{"describe_storage": {"id": 0, "describer_id": "ID", "size": 4194304}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
{"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1024, 1024], "is_leaf": true, "stride": [1024, 1], "storage": 0, "view_func": "VIEW_FUNC", "describer_id": "ID"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
{"describe_source": {"describer_id": "ID", "id": 0, "source": "L['args'][0]"}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
{"dynamo_cpp_guards_str": {}, "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "HASH"}
{"compilation_metrics": "METRICS", "rank": 0, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
{"dynamo_start": {"stack": "STACK"}, "rank": 0, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}

View File

@ -72,17 +72,6 @@ unittest.expectedFailure(
InlineAndInstallStrictExportTestExport.test_buffer_util_inline_and_install_strict # noqa: F821
)
# NOTE: For this test, when we call `LOAD_ATTR`, we fail to realizing the LazyVariableTracker
# This is because the variable is popped off stack, pushed into TupleVariable (then ConstDictVariable)
# So, in the first case (not nested return), the LazyVariable is realized at the RETURN_VALUE call;
# for the second case (nested return), the LazyVariable is not realized until we begin COMPILING_GRAPH
# As a result, we don't install the variable, so crash when we expect the variable to be installed later
# Potential fix: We can force the lazy variable tracker to realize; just need to see how this is done for the non
# nested case
unittest.expectedFailure(
InlineAndInstallStrictExportTestExport.test_constant_output_inline_and_install_strict # noqa: F821
)
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests

View File

@ -1122,7 +1122,7 @@ class OutputGraph(OutputGraphGuardsState):
# realize any unrealized tensor VTs in case they
# need to be added to self.nn_modules as attributes
for value in stack_values:
value.realize()
variables.LazyVariableTracker.realize_all(value)
# Use nn.Module "proxies" in the constructed GraphModule so that
# the resulting GM does not hold additional strong references to the original modules.

View File

@ -1456,6 +1456,15 @@ class VariableBuilder:
return self.tx.output.side_effects.track_object_existing(value, result)
def wrap_listlike(self, value: Union[tuple, list, odict_values, NamedTuple]):
for item in value:
if item is value:
unimplemented_v2(
gb_type="list elements are pointing to the list itself",
context="",
explanation="Dynamo does not support lists whose items reference to itself",
hints=["Avoid using self referential list"],
)
if config.specialize_int and type(value) is torch.Size:
self.install_guards(GuardBuilder.CONSTANT_MATCH)
return ConstantVariable.create(value=value)