[ROCm] use correct workspace for hipblaslt, silence warning (#150227)

Follow up to #145130. That PR caused a warning on ROCm the first time hipblaslt was called for any workload, always. Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/150227 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com>
2025-12-06 12:20:52 +01:00 · 2025-03-31 09:49:40 +00:00 · 2025-03-31 09:49:40 +00:00 · c158eac0de
commit c158eac0de
parent 51f0403f46
1 changed files with 19 additions and 5 deletions
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@ -222,19 +222,35 @@ static size_t _getWorkspaceSize() {
  return workspace_size;
 }
 static at::DataPtr _getNewWorkspace() {
  return c10::cuda::CUDACachingAllocator::get()->allocate(_getWorkspaceSize());
 }
 // See Note [hipblaslt handles].
 // ROCm's hipblas and hipblaslt do not share handles, unlike with CUDA.
 // Using getCurrentCUDABlasLtHandle is on purpose. For CUDA it's the same as
 // getCurrentCUDABlasHandle, but for ROCm it's a unique handle.
 void* _getWorkspaceWithoutHandle() {
-  cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
+  cublasLtHandle_t handle = at::cuda::getCurrentCUDABlasLtHandle();
  auto stream = c10::cuda::getCurrentCUDAStream();
  cudaStream_t _stream = stream;
  auto key = std::make_tuple(static_cast<void *>(handle), static_cast<void *>(_stream));
  auto workspace_it = at::cuda::cublas_handle_stream_to_workspace().find(key);
 #ifdef USE_ROCM
  // The first call to _getWorkspaceWithoutHandle could be empty, so allocate and store.
  if (workspace_it == at::cuda::cublas_handle_stream_to_workspace().end()) {
    workspace_it = at::cuda::cublas_handle_stream_to_workspace().insert(workspace_it, {key, _getNewWorkspace()});
  }
 #else
  TORCH_INTERNAL_ASSERT(workspace_it != at::cuda::cublas_handle_stream_to_workspace().end());
 #endif
  return workspace_it->second.mutable_get();
 }
 void* _getWorkspace(size_t& workspaceSize) {
 // #ifdef (defined(USE_ROCM) || defined(FBCODE_CAFFE2))
  workspaceSize = _getWorkspaceSize();
 #ifndef USE_ROCM
  // See Note [hipblaslt handles].
  auto cublasWorkspaceSize = at::cuda::getChosenWorkspaceSize();
  if (cublasWorkspaceSize < workspaceSize) {
    TORCH_WARN_ONCE("Requested CUBLASLT workspace size of ", workspaceSize,
@ -245,9 +261,7 @@ void* _getWorkspace(size_t& workspaceSize) {
                    " size will be limited to the CUBLAS workspace size.");
    workspaceSize = cublasWorkspaceSize;
  }
-// #else
+#endif
 //   workspaceSize = at::cuda::getChosenWorkspaceSize();
 // #endif
  auto workspace_ptr = _getWorkspaceWithoutHandle();
  return workspace_ptr;
 }