Expand Kineto platform support (ci-all) (#56323)

Summary: Expanding support to all builds Pull Request resolved: https://github.com/pytorch/pytorch/pull/56323 Test Plan: CI Reviewed By: malfet Differential Revision: D28171478 Pulled By: ilia-cher fbshipit-source-id: 16bc752d1be3cbaeda5316f5d8a687ae05a83d22
2025-12-06 12:20:52 +01:00 · 2021-05-05 14:56:21 -07:00 · 2021-05-05 14:56:21 -07:00 · 65fad0ebd2
commit 65fad0ebd2
parent 30c96c9419
7 changed files with 11 additions and 39 deletions
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -1824,14 +1824,6 @@ if(USE_KINETO AND INTERN_BUILD_MOBILE)
  message(STATUS "Not using libkineto in a mobile build.")
  set(USE_KINETO OFF)
 endif()
 if(USE_KINETO AND MSVC)
  message(STATUS "Not using libkineto in a Windows build.")
  set(USE_KINETO OFF)
 endif()
 if(USE_KINETO AND APPLE)
  message(STATUS "Not using libkineto in a Mac build.")
  set(USE_KINETO OFF)
 endif()
 if(USE_KINETO AND (NOT USE_CUDA))
  set(LIBKINETO_NOCUPTI ON CACHE STRING "")
  message(STATUS "Using CPU-only Kineto in the non-CUDA build.")
--- a/cmake/TorchConfig.cmake.in
+++ b/cmake/TorchConfig.cmake.in
@ -123,6 +123,10 @@ else()
  append_torchlib_if_found(sleef asmjit)
 endif()
 if(@USE_KINETO@)
  append_torchlib_if_found(kineto)
 endif()
 if(@USE_CUDA@)
  if(MSVC)
    if(NOT NVTOOLEXT_HOME)
--- a/scripts/xcode_build.rb
+++ b/scripts/xcode_build.rb
@ -51,7 +51,7 @@ end
 # link static libraries
 target.frameworks_build_phases.clear
-libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a']
+libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a', 'libkineto.a']
 for lib in libs do
    path = "#{install_path}/lib/#{lib}"
    if File.exist?(path)
--- a/test/test_profiler.py
+++ b/test/test_profiler.py
@ -482,6 +482,7 @@ class TestProfiler(TestCase):
                assert is_int, "Invalid stacks record"
    @unittest.skipIf(not kineto_available(), "Kineto is required")
    @unittest.skipIf(IS_WINDOWS, "Test is flaky on Windows")
    def test_tensorboard_trace_handler(self):
        use_cuda = torch.cuda.is_available()
        with _profile(use_cuda=use_cuda, use_kineto=True):
--- a/third_party/kineto
+++ b/third_party/kineto
@ -1 +1 @@
-Subproject commit 5bc9386b6d60c3b34b77961ea2900947103304b9
+Subproject commit 07344e6d29fb17283fbc909d7631b5256b98537d
--- a/torch/csrc/autograd/profiler_kineto.cpp
+++ b/torch/csrc/autograd/profiler_kineto.cpp
@ -9,12 +9,6 @@
 #ifdef USE_KINETO
 #include <libkineto.h>
 #ifndef USE_KINETO_UPDATED
 #include <pthread.h>
 #include <unistd.h>
 #include <sys/syscall.h>
 #endif
 #ifndef _MSC_VER
 // TODO: TO be removed, once this properly works from libkineto
 // Literal copy-n-paste from third_party/kineto/libkineto/src/WeakSymbols.cpp
@ -41,14 +35,6 @@ inline int64_t getTimeUs() {
  return duration_cast<microseconds>(high_resolution_clock::now().time_since_epoch()).count();
 }
 #ifndef USE_KINETO_UPDATED
 // Getting the linux tid is expensive, so cache it.
 // Caching linux pids and tids is not advisable in the general case,
 // but this is only for profiling purposes and we don't need to handle
 // special cases during fork, clone etc.
 static thread_local pid_t cachedTid;
 #endif
 std::string shapesToStr(const std::vector<std::vector<int64_t>>& shapes);
 std::string stacksToStr(const std::vector<std::string>& stacks);
 std::string dtypesToStr(const std::vector<std::string>& types);
@ -64,14 +50,11 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalState {
    if (!ctx) {
      return;
    }
-#ifdef USE_KINETO_UPDATED
+
    libkineto::GenericTraceActivity op;
    op.activityType = libkineto::ActivityType::CPU_OP;
    op.activityName = std::string(fn.name().str());
-#else
+
    libkineto::ClientTraceActivity op;
    op.opType = std::string(fn.name().str());
 #endif
    op.startTime = ctx->startUs;
    op.endTime = getTimeUs();
    op.device = 0;
@ -82,16 +65,8 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalState {
    //   op.inputDims = shapesToStr(*ctx->shapes);
    // }
 #ifdef USE_KINETO_UPDATED
    libkineto::api().activityProfiler().recordThreadInfo();
    op.sysThreadId = libkineto::systemThreadId();
 #else
    if (!cachedTid) {
      cachedTid = (pid_t)syscall(SYS_gettid);
      libkineto::api().activityProfiler().recordThreadInfo(cachedTid, pthread_self());
    }
    op.sysThreadId = cachedTid;
 #endif
    {
      std::lock_guard<std::mutex> guard(state_mutex_);
--- a/torch/csrc/autograd/profiler_kineto.h
+++ b/torch/csrc/autograd/profiler_kineto.h
@ -2,9 +2,9 @@
 #include <torch/csrc/autograd/profiler_legacy.h>
 // Kineto is currently available on Linux server-side
 #ifdef USE_KINETO
-#if !defined(__linux__) || defined(_WIN32) || defined(C10_MOBILE) || defined(__APPLE__) || defined(DISABLE_KINETO)
+// skip Kineto dependency on mobile
 #ifdef C10_MOBILE
 #undef USE_KINETO
 #endif
 #endif
		`@ -1 +1 @@`
			`Subproject commit 5bc9386b6d60c3b34b77961ea2900947103304b9`				`Subproject commit 07344e6d29fb17283fbc909d7631b5256b98537d`