Enable RCCL in ROCm build (#27383)

Summary:
continues https://github.com/pytorch/pytorch/pull/23884
Pull Request resolved: https://github.com/pytorch/pytorch/pull/27383

Differential Revision: D17767248

Pulled By: bddppq

fbshipit-source-id: 3a506844ca6f01d7bbe8be5bde0976999e3a2b90
This commit is contained in:
Junjie Bai 2019-10-04 17:39:53 -07:00 committed by Facebook Github Bot
parent 7b3881f68c
commit f4d0d0a811
7 changed files with 48 additions and 11 deletions

View File

@ -140,7 +140,7 @@ option(USE_METAL "Use Metal for iOS build" ON)
option(USE_NATIVE_ARCH "Use -march=native" OFF)
cmake_dependent_option(
USE_NCCL "Use NCCL" ON
"USE_CUDA;UNIX;NOT APPLE" OFF)
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
cmake_dependent_option(
USE_STATIC_NCCL "Use static NCCL" OFF
"USE_NCCL" OFF)

View File

@ -948,6 +948,11 @@ if(USE_ROCM)
message(INFO "Compiling with HIP for AMD.")
caffe2_update_option(USE_ROCM ON)
if (USE_NCCL AND NOT USE_SYSTEM_NCCL)
message(INFO "Forcing USE_SYSTEM_NCCL to ON since it's required by using RCCL")
caffe2_update_option(USE_SYSTEM_NCCL ON)
endif()
list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1)
@ -983,7 +988,7 @@ if(USE_ROCM)
hip_include_directories(${Caffe2_HIP_INCLUDE})
set(Caffe2_HIP_DEPENDENCY_LIBS
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB})
${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${PYTORCH_RCCL_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB})
# Note [rocblas & rocfft cmake bug]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -1006,17 +1011,20 @@ endif()
# ---[ NCCL
if(USE_NCCL)
if(NOT USE_CUDA)
if(NOT (USE_CUDA OR USE_ROCM))
message(WARNING
"Not using CUDA, so disabling NCCL. Suppress this warning with "
"Not using CUDA/ROCM, so disabling USE_NCCL. Suppress this warning with "
"-DUSE_NCCL=OFF.")
caffe2_update_option(USE_NCCL OFF)
elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
message(WARNING "NCCL is currently only supported under Linux.")
caffe2_update_option(USE_NCCL OFF)
else()
elseif(USE_CUDA)
include(${CMAKE_CURRENT_LIST_DIR}/External/nccl.cmake)
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS __caffe2_nccl)
elseif(USE_ROCM)
include(${CMAKE_CURRENT_LIST_DIR}/External/rccl.cmake)
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS __caffe2_nccl)
endif()
endif()
@ -1058,7 +1066,7 @@ if(USE_GLOO)
# Add explicit dependency since NCCL is built from third_party.
# Without dependency, make -jN with N>1 can fail if the NCCL build
# hasn't finished when CUDA targets are linked.
if(USE_NCCL)
if(USE_NCCL AND NOT USE_ROCM)
add_dependencies(gloo_cuda nccl_external)
endif()
# Pick the right dependency depending on USE_CUDA

18
cmake/External/rccl.cmake vendored Normal file
View File

@ -0,0 +1,18 @@
if (NOT __NCCL_INCLUDED)
set(__NCCL_INCLUDED TRUE)
if (USE_SYSTEM_NCCL)
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
find_package(RCCL REQUIRED)
if (RCCL_FOUND)
message (STATUS "RCCL Found!")
add_library(__caffe2_nccl INTERFACE)
target_link_libraries(__caffe2_nccl INTERFACE ${PYTORCH_RCCL_LIBRARIES})
target_include_directories(__caffe2_nccl INTERFACE ${RCCL_INCLUDE_DIRS})
else()
message (STATUS "RCCL NOT Found!")
endif()
else()
message (STATUS "USE_SYSTEM_NCCL=OFF is not supported yet when using RCCL")
endif()
endif()

View File

@ -80,6 +80,13 @@ ELSE()
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
ENDIF()
# RCCL_PATH
IF(NOT DEFINED ENV{RCCL_PATH})
SET(RCCL_PATH ${ROCM_PATH}/rccl)
ELSE()
SET(RCCL_PATH $ENV{RCCL_PATH})
ENDIF()
# ROCPRIM_PATH
IF(NOT DEFINED ENV{ROCPRIM_PATH})
SET(ROCPRIM_PATH ${ROCM_PATH}/rocprim)
@ -145,6 +152,7 @@ IF(HIP_FOUND)
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
set(rccl_DIR ${RCCL_PATH}/lib/cmake/rccl)
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
@ -155,6 +163,7 @@ IF(HIP_FOUND)
find_package_and_print_version(miopen REQUIRED)
find_package_and_print_version(rocfft REQUIRED)
find_package_and_print_version(hipsparse REQUIRED)
find_package_and_print_version(rccl)
find_package_and_print_version(rocprim REQUIRED)
find_package_and_print_version(hipcub REQUIRED)
find_package_and_print_version(rocthrust REQUIRED)
@ -167,6 +176,9 @@ IF(HIP_FOUND)
# TODO: miopen_LIBRARIES should return fullpath to the library file,
# however currently it's just the lib name
FIND_LIBRARY(PYTORCH_MIOPEN_LIBRARIES ${miopen_LIBRARIES} HINTS ${MIOPEN_PATH}/lib)
# TODO: rccl_LIBRARIES should return fullpath to the library file,
# however currently it's just the lib name
FIND_LIBRARY(PYTORCH_RCCL_LIBRARIES ${rccl_LIBRARIES} HINTS ${RCCL_PATH}/lib)
# hiprtc is part of HIP
FIND_LIBRARY(ROCM_HIPRTC_LIB hiprtc HINTS ${HIP_PATH}/lib)

View File

@ -278,8 +278,9 @@ CUDA_INCLUDE_MAP = collections.OrderedDict([
("cusparse.h", ("hipsparse.h", CONV_INCLUDE, API_RAND)),
("cufft.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
("cufftXt.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)),
("<nccl.h>", ("<rccl.h>", CONV_INCLUDE, API_RUNTIME)), #PyTorch also has a source file named "nccl.h", so we need to "<"">" to differentiate
("nvrtc.h", ("hip/hiprtc.h", CONV_INCLUDE, API_RTC)),
("thrust/system/cuda/", ("thrust/system/hip/", CONV_INCLUDE, API_BLAS)),
("thrust/system/cuda", ("thrust/system/hip", CONV_INCLUDE, API_BLAS)),
("cub/util_allocator.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/block/block_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/cub.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
@ -2205,7 +2206,7 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict([
("nvrtcGetProgramLogSize", ("hiprtcGetProgramLogSize", CONV_JIT, API_RTC)),
("nvrtcGetPTX", ("hiprtcGetCode", CONV_JIT, API_RTC)),
("nvrtcGetPTXSize", ("hiprtcGetCodeSize", CONV_JIT, API_RTC)),
("thrust::cuda::", ("thrust::hip::", CONV_MATH_FUNC, API_BLAS)),
("thrust::cuda", ("thrust::hip", CONV_MATH_FUNC, API_BLAS)),
("cub::", ("hipcub::", CONV_MATH_FUNC, API_BLAS)),
])

View File

@ -594,7 +594,7 @@ for mapping in CUDA_TO_HIP_MAPPINGS:
CAFFE2_TRIE.add(src)
CAFFE2_MAP[src] = dst
RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.pattern())
RE_PYTORCH_PREPROCESSOR = re.compile(r'\b{0}\b'.format(PYTORCH_TRIE.pattern()))
RE_PYTORCH_PREPROCESSOR = re.compile(r'(?<=\W)({0})(?=\W)'.format(PYTORCH_TRIE.pattern()))
RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"')
RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>')

View File

@ -251,8 +251,6 @@ if (USE_NCCL)
${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
if (USE_SYSTEM_NCCL)
endif()
endif()
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list' allows much of the boilerplate of defining the lists