[inductor] Fix logging for run_and_get_cpp_code (#128794)

Summary: Found during testing with remote caching: Use the same output logger object between graph.py and codecache.py since it's patched in `run_and_get_cpp_code`. That allows us to capture any logging produced from the codecache path when using `run_and_get_cpp_code`. I'm also fixing a few tests that were passing mistakenly because logging was missing. Pull Request resolved: https://github.com/pytorch/pytorch/pull/128794 Approved by: https://github.com/oulgen, https://github.com/leslie-fang-intel
2025-12-07 12:21:27 +01:00 · 2024-06-18 21:24:59 -07:00 · 2024-06-18 21:24:59 -07:00 · 571a0db132
commit 571a0db132
parent 277f2914a5
5 changed files with 19 additions and 7 deletions
--- a/test/inductor/test_cpu_cpp_wrapper.py
+++ b/test/inductor/test_cpu_cpp_wrapper.py
@ -256,7 +256,8 @@ if RUN_CPU:
        BaseTest("test_multihead_attention", "cpu", test_cpu_repro.CPUReproTests()),
        BaseTest(
            "test_multi_threading",
-            code_string_count={"py::gil_scoped_release release;": 1},
+            # Two threads compile, so we expect the output code to be printed twice.
+            code_string_count={"py::gil_scoped_release release;": 2},
        ),
        BaseTest("test_profiler_mark_wrapper_call"),
        BaseTest(
--- a/test/inductor/test_cpu_repro.py
+++ b/test/inductor/test_cpu_repro.py
@ -1920,6 +1920,8 @@ class CPUReproTests(TestCase):
                FileCheck().check(_target_code_check).run(code)
            if _target_code_check_not:
                FileCheck().check_not(_target_code_check_not).run(code)
+                # Verify that the output isn't empty
+                FileCheck().check("Output code:").run(code)

            self.assertEqual(
                _fn(*_inps),
@ -1933,10 +1935,16 @@ class CPUReproTests(TestCase):
            _internal_check(fn, inps, "aten.scatter_reduce_")

        if "ATen parallel backend: OpenMP" in torch.__config__.parallel_info():
-            # Fix https://github.com/pytorch/pytorch/issues/118518
-            # which fails to change thread number with native thread pool
            with set_num_threads(1):
-                _internal_check(fn, inps, _target_code_check_not="aten.scatter_reduce_")
+                # When running with a single thread, we expect the aten.scatter will go
+                # into the cpp backend codegen instead of a fallback to aten.scatter_reduce_.
+                # Avoid the inductor cache so we don't serve an entry compiled above.
+                with config.patch(
+                    {"fx_graph_cache": False, "fx_graph_remote_cache": False}
+                ):
+                    _internal_check(
+                        fn, inps, _target_code_check_not="aten.scatter_reduce_"
+                    )

            with config.patch({"cpp.dynamic_threads": True}), set_num_threads(1):
                _internal_check(fn, inps, "aten.scatter_reduce_")
--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@ -10177,7 +10177,8 @@ class CommonTemplate:
        self.assertEqual(rot.grad, rot_e.grad)
        self.assertEqual(trans.grad, trans_e.grad)

-    @config.patch({"fx_graph_cache": False})
+    # If we serve from the cache, the init hook isn't called
+    @config.patch({"fx_graph_cache": False, "fx_graph_remote_cache": False})
    def test_inner_fn_str_and_stride(self):
        def f(x):
            x = x + 1
--- a/torch/_inductor/graph.py
+++ b/torch/_inductor/graph.py
@ -97,9 +97,11 @@ if TYPE_CHECKING:
    from torch._higher_order_ops.effects import _EffectType
    from .codegen.wrapper import WrapperCodeGen

+from torch._inductor.codecache import output_code_log
+
 log = logging.getLogger(__name__)
 perf_hint_log = torch._logging.getArtifactLogger(__name__, "perf_hints")
-output_code_log = torch._logging.getArtifactLogger(__name__, "output_code")
+
 aten = torch.ops.aten

 _post_grad_graph_counter = itertools.count()
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@ -1624,7 +1624,7 @@ def run_and_get_cpp_code(fn, *args, **kwargs):

        log_capture_string = io.StringIO()
        ch = logging.StreamHandler(log_capture_string)
-        from torch._inductor.graph import output_code_log
+        from torch._inductor.codecache import output_code_log

        output_code_log.addHandler(ch)
        prev_level = output_code_log.level