[profiler] add more CUDA API for kernel launcher (#156016)

Add more kernel detection options, resolving TODO
- References : [NVIDIA - docs](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/156016
Approved by: https://github.com/albanD

Co-authored-by: albanD <desmaison.alban@gmail.com>
This commit is contained in:
namgyu-youn 2025-07-03 15:26:39 +00:00 committed by PyTorch MergeBot
parent c9174a20f7
commit f17f658125

View File

@ -142,8 +142,16 @@ class BasicEvaluation:
cuda_event_list = self.profile.kineto_results.events()
def is_cuda_launch_kernel(e):
# TODO: find a better way to identify cudaLaunchKernel
return e.name == "cudaLaunchKernel"
"""Check if the event is a CUDA launch kernel."""
launch_patterns = {
"cudaLaunchKernel", # Standard CUDA
"cudaLaunchKernelExC", # Extended C
"__cudaLaunchKernel", # Internal
"cudaLaunchCooperativeKernel", # Collaborative (single-device)
"cudaLaunchCooperativeKernelMultiDevice", # Collaborative (multi-devices)
}
name = str(getattr(e, "name", e))
return any(name.startswith(pattern) for pattern in launch_patterns)
def is_cuda_kernel(e):
# TODO: find a better way to identify CUDA Kernel