From c934ed65673428c35f70cbce6ebd730ad22f058d Mon Sep 17 00:00:00 2001 From: "augusto.yjh" Date: Thu, 31 Oct 2024 13:24:20 +0000 Subject: [PATCH] init kineto after torch module initialized (#131448) Fixes #131020 As discussed in the issue thread, we can use ` KINETO_DAEMON_INIT_DELAY_S` to delay the initialization of `kineto` in case `kineto` is initialized before `libtorch_cuda.so`. It's not clear to set a proper value of environmental variable `KINETO_DAEMON_INIT_DELAY_S`, here's a trick to make the initialization of `kineto` after the initialization of module `torch`. I'm not sure whether this is an acceptable trick, please take a look at this pr, thanks. Pull Request resolved: https://github.com/pytorch/pytorch/pull/131448 Approved by: https://github.com/sraikund16, https://github.com/briancoutinho --- test/profiler/test_kineto.py | 51 +++++++++++++++++++ torch/csrc/Module.cpp | 5 ++ .../csrc/profiler/kineto_client_interface.cpp | 45 +++++----------- torch/csrc/profiler/kineto_client_interface.h | 11 ++++ 4 files changed, 79 insertions(+), 33 deletions(-) create mode 100644 test/profiler/test_kineto.py create mode 100644 torch/csrc/profiler/kineto_client_interface.h diff --git a/test/profiler/test_kineto.py b/test/profiler/test_kineto.py new file mode 100644 index 00000000000..a122170e5ac --- /dev/null +++ b/test/profiler/test_kineto.py @@ -0,0 +1,51 @@ +# Owner(s): ["oncall: profiler"] +import os +import subprocess +import sys +from unittest.mock import patch + +import torch +from torch.testing._internal.common_utils import run_tests, TestCase + + +class SimpleKinetoInitializationTest(TestCase): + @patch.dict(os.environ, {"KINETO_USE_DAEMON": "1"}) + def test_kineto_profiler_with_environment_variable(self): + """ + This test checks whether kineto works with torch in daemon mode, please refer to issue #112389 and #131020. + Besides that, this test will also check that kineto will not be initialized when user loads the shared library + directly. + """ + script = """ +import torch +if torch.cuda.is_available() > 0: + torch.cuda.init() +""" + try: + subprocess.check_output( + [sys.executable, "-W", "always", "-c", script], + cwd=os.path.dirname(os.path.realpath(__file__)), + ) + except subprocess.CalledProcessError as e: + if e.returncode != 0: + self.assertTrue( + False, + "Kineto is not working properly with the Dynolog environment variable", + ) + # import the shared library directly - it triggers static init but doesn't call kineto_init + env = os.environ.copy() + env["KINETO_USE_DAEMON"] = "1" + if "KINETO_DAEMON_INIT_DELAY_S" in env: + env.pop("KINETO_DAEMON_INIT_DELAY_S") + _, stderr = TestCase.run_process_no_exception( + f"from ctypes import CDLL; CDLL('{torch._C.__file__}')" + ) + self.assertNotRegex( + stderr.decode("ascii"), + "Registering daemon config loader", + "kineto should not be initialized when the shared library is imported directly", + ) + + +if __name__ == "__main__": + run_tests() diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 383603bd3aa..03f30797f18 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -102,6 +102,7 @@ #include #include +#include #include #ifdef USE_CUDA @@ -2443,6 +2444,10 @@ Call this whenever a new thread is created in order to propagate values from torch::set_disabled_torch_dispatch_impl( PyObject_GetAttrString(module, "_disabled_torch_dispatch_impl")); ASSERT_TRUE(torch::disabled_torch_dispatch_impl() != nullptr); + // init kineto here +#ifdef USE_KINETO + torch::global_kineto_init(); +#endif return module; END_HANDLE_TH_ERRORS } diff --git a/torch/csrc/profiler/kineto_client_interface.cpp b/torch/csrc/profiler/kineto_client_interface.cpp index f8929b74c75..fd145f4c4fa 100644 --- a/torch/csrc/profiler/kineto_client_interface.cpp +++ b/torch/csrc/profiler/kineto_client_interface.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -71,46 +72,24 @@ class LibKinetoClient : public libkineto::ClientInterface { } // namespace profiler::impl +void global_kineto_init() { +#if ENABLE_GLOBAL_OBSERVER + if (c10::utils::get_env("KINETO_USE_DAEMON").has_value()) { + libkineto_init( + /*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()), + /*logOnError=*/true); + libkineto::api().suppressLogMessages(); + } +#endif +} + #if ENABLE_GLOBAL_OBSERVER namespace { -int get_init_delay() { - const char* delay_c = std::getenv("KINETO_DAEMON_INIT_DELAY_S"); - if (!delay_c) { - return -1; - } - std::string delay_s{delay_c}; - try { - return std::stoi(delay_s); - } catch (const std::invalid_argument& _) { - return -1; - } -} - struct RegisterLibKinetoClient { RegisterLibKinetoClient() { static profiler::impl::LibKinetoClient client; libkineto::api().registerClient(&client); - - auto kineto_init = []() { - libkineto_init( - /*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()), - /*logOnError=*/true); - libkineto::api().suppressLogMessages(); - }; - - if (std::getenv("KINETO_USE_DAEMON") != nullptr) { - int init_delay_s = get_init_delay(); - if (init_delay_s > 0) { - std::thread t([init_delay_s, kineto_init]() { - std::this_thread::sleep_for(std::chrono::seconds(init_delay_s)); - kineto_init(); - }); - t.detach(); - } else { - kineto_init(); - } - } } } register_libkineto_client; diff --git a/torch/csrc/profiler/kineto_client_interface.h b/torch/csrc/profiler/kineto_client_interface.h new file mode 100644 index 00000000000..6d328256085 --- /dev/null +++ b/torch/csrc/profiler/kineto_client_interface.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +namespace torch { + +// declare global_kineto_init for libtorch_cpu.so to call +TORCH_API void global_kineto_init(void); + +} // namespace torch