[inductor] pre grad graph bisecting (#166344)

A few things to note: 1. Customers like vllm use a custom backend (e.g. VllmBackend), split the graph, and call standalone_compile for each split. If we let the bisector override the backend, we won't bisect thru the custom backend. `test_configs.bisect_keep_custom_backend_for_inductor` is used to keep the custom backend if we are bisecting for inductor. 2. pre_grad_graph bisecting and lowering bisecting so far does not compose well with each other since an issue may be just captured by the first one we try. `test_configs.bisect_pre_grad_graph` is used to enable the 'pre_grad_graph' bisecting. Pull Request resolved: https://github.com/pytorch/pytorch/pull/166344 Approved by: https://github.com/eellison
2025-12-06 12:20:52 +01:00 · 2025-10-31 17:43:55 -07:00 · 2025-10-31 17:43:55 -07:00 · 4cc64d6234
commit 4cc64d6234
parent 1aef88c72d
5 changed files with 81 additions and 1 deletions
--- a/test/dynamo/test_compiler_bisector.py
+++ b/test/dynamo/test_compiler_bisector.py
@ -275,6 +275,59 @@ class TestCompilerBisector(TestCase):
        self.assertEqual(out.backend, "eager")
        self.assertEqual(out.subsystem, None)

+    @config.patch(
+        {
+            "test_configs.bisect_pre_grad_graph": True,
+            "test_configs.bisect_keep_custom_backend_for_inductor": True,
+        }
+    )
+    def test_bisect_pre_grad_graph(self):
+        def f(x):
+            for i in range(5):
+                x = x + 1
+            return x.relu()
+
+        class MyBackend:
+            def __call__(self, gm, example_inputs):
+                node_idx = 0
+
+                def node_to_graph_id(node):
+                    nonlocal node_idx
+                    out = 0 if node_idx < 3 else 1
+                    node_idx += 1
+                    return out
+
+                split_gm = torch.fx.passes.split_module.split_module(
+                    gm, None, node_to_graph_id, keep_original_order=True
+                )
+
+                for name, submod in split_gm.named_modules():
+                    if "submod_" in name:
+                        # the test case is simple enough that using
+                        # the original example_inputs works for sub
+                        # moule
+                        submod.forward = torch._inductor.standalone_compile(
+                            submod,
+                            example_inputs,
+                            dynamic_shapes="from_example_inputs",
+                            options={},
+                        )
+
+                return split_gm
+
+        def test_fn():
+            torch._dynamo.reset()
+
+            x = torch.randn(1024, device="cuda")
+            with config.patch("triton.inject_relu_bug_TESTING_ONLY", "accuracy"):
+                opt_f = torch.compile(f, backend=MyBackend())
+                return torch.allclose(opt_f(x), f(x))
+
+        out = CompilerBisector.do_bisect(test_fn)
+        self.assertEqual(out.backend, "inductor")
+        self.assertEqual(out.subsystem, "pre_grad_graph")
+        self.assertEqual(out.bisect_number, 1)
+

 if __name__ == "__main__":
    from torch._dynamo.test_case import run_tests
--- a/torch/init.py
+++ b/torch/init.py
@ -2644,7 +2644,16 @@ def compile(
    from torch._inductor.compiler_bisector import CompilerBisector

    if bisect_backend := CompilerBisector.get_backend():
-        backend = bisect_backend
+        import torch._inductor.config as inductor_config
+
+        # don't override the backend for use cases like vllm
+        # which leverages their custom backend.
+        if not (
+            inductor_config.test_configs.bisect_keep_custom_backend_for_inductor
+            and bisect_backend == "inductor"
+            and not isinstance(backend, str)
+        ):
+            backend = bisect_backend

    guard_filter_fn = None
    if options and isinstance(options, dict):
--- a/torch/_inductor/compile_fx.py
+++ b/torch/_inductor/compile_fx.py
@ -2448,6 +2448,11 @@ def compile_fx(
    # Some arguments trigger a recursive call to compile_fx.  Handle these
    # short circuits first, before anything else

+    from torch._inductor.compiler_bisector import CompilerBisector
+
+    if CompilerBisector.disable_subsystem("inductor", "pre_grad_graph"):
+        return model_
+
    if config_patches:
        with config.patch(config_patches):
            return compile_fx(
--- a/torch/_inductor/compiler_bisector.py
+++ b/torch/_inductor/compiler_bisector.py
@ -491,6 +491,13 @@ class CompilerBisector:
        Run fn repeatedly attempting to bisect torch.compile. fn should return True on success and False on failure.
        """

+        # TODO graph bisecting is not well composed with lowering
+        # bisector so far. Use a config to opt-in
+        import torch._inductor.config as inductor_config
+
+        if inductor_config.test_configs.bisect_pre_grad_graph:
+            BACKENDS["inductor"].insert(0, BisectSubsystem("pre_grad_graph"))
+
        if not cli_interface:
            bisection_enabled_orig = cls.bisection_enabled
            cls.delete_bisect_status()
@ -502,6 +509,9 @@ class CompilerBisector:
                cls.delete_bisect_status()
                cls.in_process_cache = None

+                if BACKENDS["inductor"][0].name == "pre_grad_graph":
+                    del BACKENDS["inductor"][0]
+
            cleanup_handler = atexit.register(cleanup)

            class DisableBisect:
--- a/torch/_inductor/config.py
+++ b/torch/_inductor/config.py
@ -2150,6 +2150,9 @@ class test_configs:
        "TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULT", ""
    )

+    bisect_pre_grad_graph = False
+    bisect_keep_custom_backend_for_inductor = False
+

 if TYPE_CHECKING:
    from torch.utils._config_typing import *  # noqa: F401, F403