Add toggle functionality for XPU profiler (#155135)

Fixes #154898 by adding ability to toggle XPU profiler on and off (which has already been added in pytorch/kineto#1088 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155135 Approved by: https://github.com/guangyey, https://github.com/sraikund16
2025-12-06 12:20:52 +01:00 · 2025-06-20 17:27:44 +00:00 · 2025-06-20 17:27:44 +00:00 · d83ff89d3b
commit d83ff89d3b
parent 1b50c12584
2 changed files with 29 additions and 20 deletions
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@ -65,8 +65,10 @@ from torch.testing._internal.common_utils import (
    skipIfTorchDynamo,
    TemporaryDirectoryName,
    TemporaryFileName,
+    TEST_CUDA,
    TEST_WITH_CROSSREF,
    TEST_WITH_ROCM,
+    TEST_XPU,
    TestCase,
 )

@ -2031,7 +2033,7 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
            else:
                self.assertFalse(evt.is_user_annotation)

-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
+    @unittest.skipUnless(TEST_CUDA or TEST_XPU, "requires gpu")
    @skipIfTorchDynamo("profiler gets ignored if dynamo activated")
    def test_basic_profile(self):
        # test a really basic profile to make sure no erroneous aten ops are run
@ -2047,35 +2049,39 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
    @skipIfTorchDynamo("profiler gets ignored if dynamo activated")
    def test_dynamic_toggle(self):
-        with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p:
+        acc = torch.accelerator.current_accelerator()
+        self.assertIsNotNone(acc)
+        device = acc.type
+        gpu_activity = getattr(ProfilerActivity, device.upper(), None)
+        self.assertIsNotNone(gpu_activity)
+        activities = [ProfilerActivity.CPU, gpu_activity]
+        with profile(activities=activities) as p:
            with torch.profiler.record_function("test_user_annotation"):
-                x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
+                x, y = (torch.rand(4, 4).to(device) for _ in range(2))
                torch.add(x, y)

        self.assertTrue(any("aten" in e.name for e in p.events()))

-        self.assertTrue(any("cuda" in e.name for e in p.events()))
+        self.assertTrue(any(device in e.name for e in p.events()))

-        self.assertTrue(any("kernel" in e.name for e in p.events()))
+        self.assertTrue(any("kernel" in e.name.lower() for e in p.events()))

-        with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p1:
-            p1.toggle_collection_dynamic(False, [ProfilerActivity.CUDA])
+        with profile(activities=activities) as p1:
+            p1.toggle_collection_dynamic(False, [gpu_activity])
            with torch.profiler.record_function("test_user_annotation"):
-                x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
+                x, y = (torch.rand(4, 4).to(device) for _ in range(2))
                torch.add(x, y)

        self.assertTrue(any("aten" in e.name for e in p1.events()))

-        self.assertTrue(all("cuda" not in e.name for e in p1.events()))
+        self.assertTrue(all(device not in e.name for e in p1.events()))

-        self.assertTrue(all("kernel" not in e.name for e in p1.events()))
+        self.assertTrue(all("kernel" not in e.name.lower() for e in p1.events()))

-        with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p2:
-            p2.toggle_collection_dynamic(
-                False, [ProfilerActivity.CUDA, ProfilerActivity.CPU]
-            )
+        with profile(activities=activities) as p2:
+            p2.toggle_collection_dynamic(False, activities)
            with torch.profiler.record_function("test_user_annotation"):
-                x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
+                x, y = (torch.rand(4, 4).to(device) for _ in range(2))
                torch.add(x, y)
        self.assertTrue(len(p2.events()) == 0)

--- a/torch/csrc/autograd/profiler_kineto.cpp
+++ b/torch/csrc/autograd/profiler_kineto.cpp
@ -690,17 +690,20 @@ void toggleCollectionDynamic(
    const bool enable,
    const std::set<torch::profiler::impl::ActivityType>& activities) {
  if (activities.count(torch::autograd::profiler::ActivityType::CPU) > 0 &&
-      activities.count(torch::autograd::profiler::ActivityType::CUDA) == 0) {
+      (activities.count(torch::autograd::profiler::ActivityType::CUDA) == 0 ||
+       activities.count(torch::autograd::profiler::ActivityType::XPU) == 0)) {
    LOG(WARNING)
-        << "Toggling CPU activity with CUDA activity on may result in traces with CUDA events on artibrary tracks";
+        << "Toggling CPU activity with GPU activity on may result in traces with GPU events on artibrary tracks";
  } else if (
-      activities.count(torch::autograd::profiler::ActivityType::CUDA) > 0 &&
+      (activities.count(torch::autograd::profiler::ActivityType::CUDA) > 0 ||
+       activities.count(torch::autograd::profiler::ActivityType::XPU) > 0) &&
      activities.count(torch::autograd::profiler::ActivityType::CPU) == 0) {
    LOG(WARNING)
-        << "Toggling CUDA activity with CPU activity on may result in traces with incorrect correlation between CPU and CUDA events";
+        << "Toggling GPU activity with CPU activity on may result in traces with incorrect correlation between CPU and GPU events";
  }
  for (auto act : activities) {
-    if (act == torch::autograd::profiler::ActivityType::CUDA) {
+    if (act == torch::autograd::profiler::ActivityType::CUDA ||
+        act == torch::autograd::profiler::ActivityType::XPU) {
      torch::profiler::impl::kineto::toggleCollectionDynamic(enable);
    } else if (act == torch::autograd::profiler::ActivityType::CPU) {
      toggleCPUCollectionDynamic(enable);