[inductor] Fix logging for run_and_get_cpp_code (#128794)

Summary: Found during testing with remote caching: Use the same output logger object between graph.py and codecache.py since it's patched in `run_and_get_cpp_code`. That allows us to capture any logging produced from the codecache path when using `run_and_get_cpp_code`. I'm also fixing a few tests that were passing mistakenly because logging was missing.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/128794
Approved by: https://github.com/oulgen, https://github.com/leslie-fang-intel
This commit is contained in:
Sam Larsen 2024-06-18 21:24:59 -07:00 committed by PyTorch MergeBot
parent 277f2914a5
commit 571a0db132
5 changed files with 19 additions and 7 deletions

View File

@ -256,7 +256,8 @@ if RUN_CPU:
BaseTest("test_multihead_attention", "cpu", test_cpu_repro.CPUReproTests()),
BaseTest(
"test_multi_threading",
code_string_count={"py::gil_scoped_release release;": 1},
# Two threads compile, so we expect the output code to be printed twice.
code_string_count={"py::gil_scoped_release release;": 2},
),
BaseTest("test_profiler_mark_wrapper_call"),
BaseTest(

View File

@ -1920,6 +1920,8 @@ class CPUReproTests(TestCase):
FileCheck().check(_target_code_check).run(code)
if _target_code_check_not:
FileCheck().check_not(_target_code_check_not).run(code)
# Verify that the output isn't empty
FileCheck().check("Output code:").run(code)
self.assertEqual(
_fn(*_inps),
@ -1933,10 +1935,16 @@ class CPUReproTests(TestCase):
_internal_check(fn, inps, "aten.scatter_reduce_")
if "ATen parallel backend: OpenMP" in torch.__config__.parallel_info():
# Fix https://github.com/pytorch/pytorch/issues/118518
# which fails to change thread number with native thread pool
with set_num_threads(1):
_internal_check(fn, inps, _target_code_check_not="aten.scatter_reduce_")
# When running with a single thread, we expect the aten.scatter will go
# into the cpp backend codegen instead of a fallback to aten.scatter_reduce_.
# Avoid the inductor cache so we don't serve an entry compiled above.
with config.patch(
{"fx_graph_cache": False, "fx_graph_remote_cache": False}
):
_internal_check(
fn, inps, _target_code_check_not="aten.scatter_reduce_"
)
with config.patch({"cpp.dynamic_threads": True}), set_num_threads(1):
_internal_check(fn, inps, "aten.scatter_reduce_")

View File

@ -10177,7 +10177,8 @@ class CommonTemplate:
self.assertEqual(rot.grad, rot_e.grad)
self.assertEqual(trans.grad, trans_e.grad)
@config.patch({"fx_graph_cache": False})
# If we serve from the cache, the init hook isn't called
@config.patch({"fx_graph_cache": False, "fx_graph_remote_cache": False})
def test_inner_fn_str_and_stride(self):
def f(x):
x = x + 1

View File

@ -97,9 +97,11 @@ if TYPE_CHECKING:
from torch._higher_order_ops.effects import _EffectType
from .codegen.wrapper import WrapperCodeGen
from torch._inductor.codecache import output_code_log
log = logging.getLogger(__name__)
perf_hint_log = torch._logging.getArtifactLogger(__name__, "perf_hints")
output_code_log = torch._logging.getArtifactLogger(__name__, "output_code")
aten = torch.ops.aten
_post_grad_graph_counter = itertools.count()

View File

@ -1624,7 +1624,7 @@ def run_and_get_cpp_code(fn, *args, **kwargs):
log_capture_string = io.StringIO()
ch = logging.StreamHandler(log_capture_string)
from torch._inductor.graph import output_code_log
from torch._inductor.codecache import output_code_log
output_code_log.addHandler(ch)
prev_level = output_code_log.level