allocate cuMem memory with rdma flag (#153261)

to be able to register memory with ibverbs

Pull Request resolved: https://github.com/pytorch/pytorch/pull/153261
Approved by: https://github.com/kwen2501, https://github.com/eqy, https://github.com/Skylion007
This commit is contained in:
Natalia Gimelshein 2025-05-09 21:48:44 +00:00 committed by PyTorch MergeBot
parent f11d7a5978
commit 9ae722cdb4

View File

@ -403,6 +403,13 @@ struct ExpandableSegment {
#ifndef FBCODE_CAFFE2 #ifndef FBCODE_CAFFE2
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
#endif #endif
int flag = 0;
C10_CUDA_DRIVER_CHECK(DriverAPI::get()->cuDeviceGetAttribute_(
&flag,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
device_));
if (flag)
prop.allocFlags.gpuDirectRDMACapable = 1;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
// NOLINTNEXTLINE(bugprone-signed-char-misuse) // NOLINTNEXTLINE(bugprone-signed-char-misuse)
prop.location.id = static_cast<int>(device_); prop.location.id = static_cast<int>(device_);