mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable RCCL in ROCm build (#27383)
Summary: continues https://github.com/pytorch/pytorch/pull/23884 Pull Request resolved: https://github.com/pytorch/pytorch/pull/27383 Differential Revision: D17767248 Pulled By: bddppq fbshipit-source-id: 3a506844ca6f01d7bbe8be5bde0976999e3a2b90
This commit is contained in:
parent
7b3881f68c
commit
f4d0d0a811
|
|
@ -140,7 +140,7 @@ option(USE_METAL "Use Metal for iOS build" ON)
|
|||
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_NCCL "Use NCCL" ON
|
||||
"USE_CUDA;UNIX;NOT APPLE" OFF)
|
||||
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_STATIC_NCCL "Use static NCCL" OFF
|
||||
"USE_NCCL" OFF)
|
||||
|
|
|
|||
|
|
@ -948,6 +948,11 @@ if(USE_ROCM)
|
|||
message(INFO "Compiling with HIP for AMD.")
|
||||
caffe2_update_option(USE_ROCM ON)
|
||||
|
||||
if (USE_NCCL AND NOT USE_SYSTEM_NCCL)
|
||||
message(INFO "Forcing USE_SYSTEM_NCCL to ON since it's required by using RCCL")
|
||||
caffe2_update_option(USE_SYSTEM_NCCL ON)
|
||||
endif()
|
||||
|
||||
list(APPEND HIP_CXX_FLAGS -fPIC)
|
||||
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
|
||||
list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1)
|
||||
|
|
@ -983,7 +988,7 @@ if(USE_ROCM)
|
|||
hip_include_directories(${Caffe2_HIP_INCLUDE})
|
||||
|
||||
set(Caffe2_HIP_DEPENDENCY_LIBS
|
||||
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB})
|
||||
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${PYTORCH_RCCL_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB})
|
||||
|
||||
# Note [rocblas & rocfft cmake bug]
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
@ -1006,17 +1011,20 @@ endif()
|
|||
|
||||
# ---[ NCCL
|
||||
if(USE_NCCL)
|
||||
if(NOT USE_CUDA)
|
||||
if(NOT (USE_CUDA OR USE_ROCM))
|
||||
message(WARNING
|
||||
"Not using CUDA, so disabling NCCL. Suppress this warning with "
|
||||
"Not using CUDA/ROCM, so disabling USE_NCCL. Suppress this warning with "
|
||||
"-DUSE_NCCL=OFF.")
|
||||
caffe2_update_option(USE_NCCL OFF)
|
||||
elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
message(WARNING "NCCL is currently only supported under Linux.")
|
||||
caffe2_update_option(USE_NCCL OFF)
|
||||
else()
|
||||
elseif(USE_CUDA)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/External/nccl.cmake)
|
||||
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS __caffe2_nccl)
|
||||
elseif(USE_ROCM)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/External/rccl.cmake)
|
||||
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS __caffe2_nccl)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
@ -1058,7 +1066,7 @@ if(USE_GLOO)
|
|||
# Add explicit dependency since NCCL is built from third_party.
|
||||
# Without dependency, make -jN with N>1 can fail if the NCCL build
|
||||
# hasn't finished when CUDA targets are linked.
|
||||
if(USE_NCCL)
|
||||
if(USE_NCCL AND NOT USE_ROCM)
|
||||
add_dependencies(gloo_cuda nccl_external)
|
||||
endif()
|
||||
# Pick the right dependency depending on USE_CUDA
|
||||
|
|
|
|||
18
cmake/External/rccl.cmake
vendored
Normal file
18
cmake/External/rccl.cmake
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
if (NOT __NCCL_INCLUDED)
|
||||
set(__NCCL_INCLUDED TRUE)
|
||||
|
||||
if (USE_SYSTEM_NCCL)
|
||||
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
|
||||
find_package(RCCL REQUIRED)
|
||||
if (RCCL_FOUND)
|
||||
message (STATUS "RCCL Found!")
|
||||
add_library(__caffe2_nccl INTERFACE)
|
||||
target_link_libraries(__caffe2_nccl INTERFACE ${PYTORCH_RCCL_LIBRARIES})
|
||||
target_include_directories(__caffe2_nccl INTERFACE ${RCCL_INCLUDE_DIRS})
|
||||
else()
|
||||
message (STATUS "RCCL NOT Found!")
|
||||
endif()
|
||||
else()
|
||||
message (STATUS "USE_SYSTEM_NCCL=OFF is not supported yet when using RCCL")
|
||||
endif()
|
||||
endif()
|
||||
|
|
@ -80,6 +80,13 @@ ELSE()
|
|||
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
|
||||
ENDIF()
|
||||
|
||||
# RCCL_PATH
|
||||
IF(NOT DEFINED ENV{RCCL_PATH})
|
||||
SET(RCCL_PATH ${ROCM_PATH}/rccl)
|
||||
ELSE()
|
||||
SET(RCCL_PATH $ENV{RCCL_PATH})
|
||||
ENDIF()
|
||||
|
||||
# ROCPRIM_PATH
|
||||
IF(NOT DEFINED ENV{ROCPRIM_PATH})
|
||||
SET(ROCPRIM_PATH ${ROCM_PATH}/rocprim)
|
||||
|
|
@ -145,6 +152,7 @@ IF(HIP_FOUND)
|
|||
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
|
||||
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
|
||||
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
|
||||
set(rccl_DIR ${RCCL_PATH}/lib/cmake/rccl)
|
||||
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
|
||||
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
|
||||
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
|
||||
|
|
@ -155,6 +163,7 @@ IF(HIP_FOUND)
|
|||
find_package_and_print_version(miopen REQUIRED)
|
||||
find_package_and_print_version(rocfft REQUIRED)
|
||||
find_package_and_print_version(hipsparse REQUIRED)
|
||||
find_package_and_print_version(rccl)
|
||||
find_package_and_print_version(rocprim REQUIRED)
|
||||
find_package_and_print_version(hipcub REQUIRED)
|
||||
find_package_and_print_version(rocthrust REQUIRED)
|
||||
|
|
@ -167,6 +176,9 @@ IF(HIP_FOUND)
|
|||
# TODO: miopen_LIBRARIES should return fullpath to the library file,
|
||||
# however currently it's just the lib name
|
||||
FIND_LIBRARY(PYTORCH_MIOPEN_LIBRARIES ${miopen_LIBRARIES} HINTS ${MIOPEN_PATH}/lib)
|
||||
# TODO: rccl_LIBRARIES should return fullpath to the library file,
|
||||
# however currently it's just the lib name
|
||||
FIND_LIBRARY(PYTORCH_RCCL_LIBRARIES ${rccl_LIBRARIES} HINTS ${RCCL_PATH}/lib)
|
||||
# hiprtc is part of HIP
|
||||
FIND_LIBRARY(ROCM_HIPRTC_LIB hiprtc HINTS ${HIP_PATH}/lib)
|
||||
|
||||
|
|
|
|||
|
|
@ -278,8 +278,9 @@ CUDA_INCLUDE_MAP = collections.OrderedDict([
|
|||
("cusparse.h", ("hipsparse.h", CONV_INCLUDE, API_RAND)),
|
||||
("cufft.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
|
||||
("cufftXt.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
|
||||
("<nccl.h>", ("<rccl.h>", CONV_INCLUDE, API_RUNTIME)), #PyTorch also has a source file named "nccl.h", so we need to "<"">" to differentiate
|
||||
("nvrtc.h", ("hip/hiprtc.h", CONV_INCLUDE, API_RTC)),
|
||||
("thrust/system/cuda/", ("thrust/system/hip/", CONV_INCLUDE, API_BLAS)),
|
||||
("thrust/system/cuda", ("thrust/system/hip", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/util_allocator.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/block/block_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/cub.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
|
|
@ -2205,7 +2206,7 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict([
|
|||
("nvrtcGetProgramLogSize", ("hiprtcGetProgramLogSize", CONV_JIT, API_RTC)),
|
||||
("nvrtcGetPTX", ("hiprtcGetCode", CONV_JIT, API_RTC)),
|
||||
("nvrtcGetPTXSize", ("hiprtcGetCodeSize", CONV_JIT, API_RTC)),
|
||||
("thrust::cuda::", ("thrust::hip::", CONV_MATH_FUNC, API_BLAS)),
|
||||
("thrust::cuda", ("thrust::hip", CONV_MATH_FUNC, API_BLAS)),
|
||||
("cub::", ("hipcub::", CONV_MATH_FUNC, API_BLAS)),
|
||||
])
|
||||
|
||||
|
|
|
|||
|
|
@ -594,7 +594,7 @@ for mapping in CUDA_TO_HIP_MAPPINGS:
|
|||
CAFFE2_TRIE.add(src)
|
||||
CAFFE2_MAP[src] = dst
|
||||
RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.pattern())
|
||||
RE_PYTORCH_PREPROCESSOR = re.compile(r'\b{0}\b'.format(PYTORCH_TRIE.pattern()))
|
||||
RE_PYTORCH_PREPROCESSOR = re.compile(r'(?<=\W)({0})(?=\W)'.format(PYTORCH_TRIE.pattern()))
|
||||
|
||||
RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"')
|
||||
RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>')
|
||||
|
|
|
|||
|
|
@ -251,8 +251,6 @@ if (USE_NCCL)
|
|||
${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
|
||||
if (USE_SYSTEM_NCCL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list' allows much of the boilerplate of defining the lists
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user