Add toggle functionality for XPU profiler (#155135)

Fixes #154898 by adding ability to toggle XPU profiler on and off (which has already been added in pytorch/kineto#1088

Pull Request resolved: https://github.com/pytorch/pytorch/pull/155135
Approved by: https://github.com/guangyey, https://github.com/sraikund16
This commit is contained in:
Frost Mitchell 2025-06-20 17:27:44 +00:00 committed by PyTorch MergeBot
parent 1b50c12584
commit d83ff89d3b
2 changed files with 29 additions and 20 deletions

View File

@ -65,8 +65,10 @@ from torch.testing._internal.common_utils import (
skipIfTorchDynamo,
TemporaryDirectoryName,
TemporaryFileName,
TEST_CUDA,
TEST_WITH_CROSSREF,
TEST_WITH_ROCM,
TEST_XPU,
TestCase,
)
@ -2031,7 +2033,7 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
else:
self.assertFalse(evt.is_user_annotation)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
@unittest.skipUnless(TEST_CUDA or TEST_XPU, "requires gpu")
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
def test_basic_profile(self):
# test a really basic profile to make sure no erroneous aten ops are run
@ -2047,35 +2049,39 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
def test_dynamic_toggle(self):
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p:
acc = torch.accelerator.current_accelerator()
self.assertIsNotNone(acc)
device = acc.type
gpu_activity = getattr(ProfilerActivity, device.upper(), None)
self.assertIsNotNone(gpu_activity)
activities = [ProfilerActivity.CPU, gpu_activity]
with profile(activities=activities) as p:
with torch.profiler.record_function("test_user_annotation"):
x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
x, y = (torch.rand(4, 4).to(device) for _ in range(2))
torch.add(x, y)
self.assertTrue(any("aten" in e.name for e in p.events()))
self.assertTrue(any("cuda" in e.name for e in p.events()))
self.assertTrue(any(device in e.name for e in p.events()))
self.assertTrue(any("kernel" in e.name for e in p.events()))
self.assertTrue(any("kernel" in e.name.lower() for e in p.events()))
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p1:
p1.toggle_collection_dynamic(False, [ProfilerActivity.CUDA])
with profile(activities=activities) as p1:
p1.toggle_collection_dynamic(False, [gpu_activity])
with torch.profiler.record_function("test_user_annotation"):
x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
x, y = (torch.rand(4, 4).to(device) for _ in range(2))
torch.add(x, y)
self.assertTrue(any("aten" in e.name for e in p1.events()))
self.assertTrue(all("cuda" not in e.name for e in p1.events()))
self.assertTrue(all(device not in e.name for e in p1.events()))
self.assertTrue(all("kernel" not in e.name for e in p1.events()))
self.assertTrue(all("kernel" not in e.name.lower() for e in p1.events()))
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as p2:
p2.toggle_collection_dynamic(
False, [ProfilerActivity.CUDA, ProfilerActivity.CPU]
)
with profile(activities=activities) as p2:
p2.toggle_collection_dynamic(False, activities)
with torch.profiler.record_function("test_user_annotation"):
x, y = (torch.rand(4, 4).to("cuda") for _ in range(2))
x, y = (torch.rand(4, 4).to(device) for _ in range(2))
torch.add(x, y)
self.assertTrue(len(p2.events()) == 0)

View File

@ -690,17 +690,20 @@ void toggleCollectionDynamic(
const bool enable,
const std::set<torch::profiler::impl::ActivityType>& activities) {
if (activities.count(torch::autograd::profiler::ActivityType::CPU) > 0 &&
activities.count(torch::autograd::profiler::ActivityType::CUDA) == 0) {
(activities.count(torch::autograd::profiler::ActivityType::CUDA) == 0 ||
activities.count(torch::autograd::profiler::ActivityType::XPU) == 0)) {
LOG(WARNING)
<< "Toggling CPU activity with CUDA activity on may result in traces with CUDA events on artibrary tracks";
<< "Toggling CPU activity with GPU activity on may result in traces with GPU events on artibrary tracks";
} else if (
activities.count(torch::autograd::profiler::ActivityType::CUDA) > 0 &&
(activities.count(torch::autograd::profiler::ActivityType::CUDA) > 0 ||
activities.count(torch::autograd::profiler::ActivityType::XPU) > 0) &&
activities.count(torch::autograd::profiler::ActivityType::CPU) == 0) {
LOG(WARNING)
<< "Toggling CUDA activity with CPU activity on may result in traces with incorrect correlation between CPU and CUDA events";
<< "Toggling GPU activity with CPU activity on may result in traces with incorrect correlation between CPU and GPU events";
}
for (auto act : activities) {
if (act == torch::autograd::profiler::ActivityType::CUDA) {
if (act == torch::autograd::profiler::ActivityType::CUDA ||
act == torch::autograd::profiler::ActivityType::XPU) {
torch::profiler::impl::kineto::toggleCollectionDynamic(enable);
} else if (act == torch::autograd::profiler::ActivityType::CPU) {
toggleCPUCollectionDynamic(enable);