[Profiler] Add profiler activity for HPU devices (#148182)

Fixes #148181

Pull Request resolved: https://github.com/pytorch/pytorch/pull/148182
Approved by: https://github.com/sraikund16
This commit is contained in:
wdziurdz 2025-03-05 01:37:45 +00:00 committed by PyTorch MergeBot
parent 3985ce0b88
commit edc3ca577e
7 changed files with 28 additions and 1 deletions

View File

@ -41,6 +41,7 @@ class ProfilerActivity(Enum):
CUDA = ...
XPU = ...
MTIA = ...
HPU = ...
PrivateUse1 = ...
class _EventType(Enum):

View File

@ -256,7 +256,7 @@ class profile:
), "Device-only events supported only with Kineto (use_kineto=True)"
if self.use_device is not None:
VALID_DEVICE_OPTIONS = ["cuda", "xpu", "mtia"]
VALID_DEVICE_OPTIONS = ["cuda", "xpu", "mtia", "hpu"]
if _get_privateuse1_backend_name() != "privateuseone":
VALID_DEVICE_OPTIONS.append(_get_privateuse1_backend_name())
if self.use_device not in VALID_DEVICE_OPTIONS:
@ -272,6 +272,12 @@ class profile:
warn("XPU is not available, disabling XPU profiling")
self.use_device = None
if self.use_device == "hpu" and not (
hasattr(torch, "hpu") and torch.hpu.is_available()
):
warn("HPU is not available, disabling HPU profiling")
self.use_device = None
self.kineto_activities = set()
if self.use_cpu:
self.kineto_activities.add(ProfilerActivity.CPU)
@ -293,6 +299,11 @@ class profile:
use_kineto and ProfilerActivity.MTIA in _supported_activities()
), "Legacy MTIA profiling is not supported. Requires use_kineto=True on MTIA devices."
self.kineto_activities.add(ProfilerActivity.MTIA)
elif self.use_device == "hpu":
assert (
use_kineto and ProfilerActivity.HPU in _supported_activities()
), "Legacy HPU profiling is not supported. Requires use_kineto=True on HPU devices."
self.kineto_activities.add(ProfilerActivity.HPU)
elif self.use_device is not None and self.use_device != "privateuseone":
if (
not use_kineto

View File

@ -386,6 +386,9 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
if (at::hasMTIA()) {
activities.insert(torch::profiler::impl::ActivityType::MTIA);
}
if (at::hasHPU()) {
activities.insert(torch::profiler::impl::ActivityType::HPU);
}
if (at::getNumGPUs() > 0) {
activities.insert(torch::profiler::impl::ActivityType::CUDA);
}

View File

@ -50,6 +50,9 @@ const std::set<libkineto::ActivityType> kMtiaTypes = {
libkineto::ActivityType::MTIA_RUNTIME,
libkineto::ActivityType::MTIA_WORKLOADD,
};
const std::set<libkineto::ActivityType> hpuTypes = {
libkineto::ActivityType::HPU_OP,
};
const std::set<libkineto::ActivityType> kPrivateUse1Types = {
libkineto::ActivityType::GPU_MEMCPY,
libkineto::ActivityType::GPU_MEMSET,
@ -265,6 +268,9 @@ void prepareTrace(
if (activities.count(torch::autograd::profiler::ActivityType::MTIA)) {
k_activities.insert(kMtiaTypes.begin(), kMtiaTypes.end());
}
if (activities.count(torch::autograd::profiler::ActivityType::HPU)) {
k_activities.insert(hpuTypes.begin(), hpuTypes.end());
}
if (activities.count(torch::autograd::profiler::ActivityType::CUDA)) {
k_activities.insert(kCudaTypes.begin(), kCudaTypes.end());
if (config.enable_cuda_sync_events || get_cuda_sync_enabled()) {
@ -399,6 +405,8 @@ c10::DeviceType deviceTypeFromActivity(libkineto::ActivityType activity_type) {
}();
return device_type;
}
case libkineto::ActivityType::HPU_OP:
return c10::DeviceType::HPU;
case libkineto::ActivityType::CPU_OP:
case libkineto::ActivityType::USER_ANNOTATION:
case libkineto::ActivityType::EXTERNAL_CORRELATION:

View File

@ -14,6 +14,7 @@ enum class C10_API_ENUM ActivityType {
CPU = 0,
XPU, // XPU kernels, runtime
CUDA, // CUDA kernels, runtime
HPU, // HPU kernels, runtime
MTIA, // MTIA kernels, runtime
PrivateUse1, // PrivateUse1 kernels, runtime
NUM_KINETO_ACTIVITIES, // must be the last one

View File

@ -326,6 +326,7 @@ void initPythonBindings(PyObject* module) {
.value("XPU", ActivityType::XPU)
.value("MTIA", ActivityType::MTIA)
.value("CUDA", ActivityType::CUDA)
.value("HPU", ActivityType::HPU)
.value("PrivateUse1", ActivityType::PrivateUse1);
py::class_<ExperimentalConfig>(m, "_ExperimentalConfig")

View File

@ -164,6 +164,8 @@ class _KinetoProfile:
self.use_device = "xpu"
elif ProfilerActivity.MTIA in self.activities:
self.use_device = "mtia"
elif ProfilerActivity.HPU in self.activities:
self.use_device = "hpu"
elif ProfilerActivity.PrivateUse1 in self.activities:
self.use_device = _get_privateuse1_backend_name()