mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
Update CMake and use native CUDA language support (#62445)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/62445 PyTorch currently uses the old style of compiling CUDA in CMake which is just a bunch of scripts in `FindCUDA.cmake`. Newer versions support CUDA natively as a language just like C++ or C. Test Plan: Imported from OSS Reviewed By: ejguan Differential Revision: D31503350 fbshipit-source-id: 2ee817edc9698531ae1b87eda3ad271ee459fd55
This commit is contained in:
parent
d3b29afbb6
commit
c373387709
|
|
@ -46,7 +46,7 @@ steps:
|
|||
curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
|
||||
curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
|
||||
copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
|
||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
|
||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe
|
||||
displayName: Install sccache and randomtemp
|
||||
condition: not(eq(variables.CUDA_VERSION, ''))
|
||||
|
||||
|
|
|
|||
|
|
@ -120,9 +120,7 @@ steps:
|
|||
Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
|
||||
Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
|
||||
Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
|
||||
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
|
||||
Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
|
||||
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
|
||||
Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe"
|
||||
displayName: Set MKL, sccache and randomtemp environment variables
|
||||
|
||||
# View current environment variables
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ RUN rm install_cmake.sh
|
|||
ADD ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
ENV CUDA_NVCC_EXECUTABLE=/opt/cache/lib/nvcc
|
||||
ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
|
||||
|
||||
# Add jni.h for java host build
|
||||
ADD ./common/install_jni.sh install_jni.sh
|
||||
|
|
@ -94,6 +94,7 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
|||
# AWS specific CUDA build guidance
|
||||
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||
ENV CUDA_PATH /usr/local/cuda
|
||||
|
||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
||||
|
|
|
|||
2
.github/templates/windows_ci_workflow.yml.j2
vendored
2
.github/templates/windows_ci_workflow.yml.j2
vendored
|
|
@ -55,8 +55,8 @@ env:
|
|||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
{%- if cuda_version != "cpu" %}
|
||||
TORCH_CUDA_ARCH_LIST: "7.0"
|
||||
USE_CUDA: 1
|
||||
{%- endif %}
|
||||
USE_CUDA: !{{ 1 if cuda_version != "cpu" else 0 }}
|
||||
|
||||
!{{ common.concurrency(build_environment) }}
|
||||
|
||||
|
|
|
|||
1
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
1
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
|
|
@ -31,6 +31,7 @@ env:
|
|||
AWS_DEFAULT_REGION: us-east-1
|
||||
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
USE_CUDA: 0
|
||||
|
||||
concurrency:
|
||||
group: win-vs2019-cpu-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ if [ -z "${SCCACHE}" ] && which ccache > /dev/null; then
|
|||
ln -sf "$(which ccache)" ./ccache/g++
|
||||
ln -sf "$(which ccache)" ./ccache/x86_64-linux-gnu-gcc
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then
|
||||
ln -sf "$(which ccache)" ./ccache/nvcc
|
||||
mkdir -p ./ccache/cuda
|
||||
ln -sf "$(which ccache)" ./ccache/cuda/nvcc
|
||||
fi
|
||||
export CACHE_WRAPPER_DIR="$PWD/ccache"
|
||||
export PATH="$CACHE_WRAPPER_DIR:$PATH"
|
||||
|
|
@ -93,7 +94,8 @@ if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
|
|||
|
||||
# Explicitly set path to NVCC such that the symlink to ccache or sccache is used
|
||||
if [ -n "${CACHE_WRAPPER_DIR}" ]; then
|
||||
build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
|
||||
build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/cuda/nvcc")
|
||||
build_args+=("CMAKE_CUDA_COMPILER_LAUNCHER=${CACHE_WRAPPER_DIR}/ccache")
|
||||
fi
|
||||
|
||||
# Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
|
||||
|
|
|
|||
|
|
@ -97,23 +97,20 @@ set CXX=sccache-cl
|
|||
set CMAKE_GENERATOR=Ninja
|
||||
|
||||
if "%USE_CUDA%"=="1" (
|
||||
copy %TMP_DIR_WIN%\bin\sccache.exe %TMP_DIR_WIN%\bin\nvcc.exe
|
||||
|
||||
:: randomtemp is used to resolve the intermittent build error related to CUDA.
|
||||
:: code: https://github.com/peterjc123/randomtemp-rust
|
||||
:: issue: https://github.com/pytorch/pytorch/issues/25393
|
||||
::
|
||||
:: Previously, CMake uses CUDA_NVCC_EXECUTABLE for finding nvcc and then
|
||||
:: the calls are redirected to sccache. sccache looks for the actual nvcc
|
||||
:: in PATH, and then pass the arguments to it.
|
||||
:: Currently, randomtemp is placed before sccache (%TMP_DIR_WIN%\bin\nvcc)
|
||||
:: so we are actually pretending sccache instead of nvcc itself.
|
||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
|
||||
:: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
|
||||
:: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
|
||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
set RANDOMTEMP_EXECUTABLE=%TMP_DIR_WIN%\bin\nvcc.exe
|
||||
set CUDA_NVCC_EXECUTABLE=%TMP_DIR_WIN%\bin\randomtemp.exe
|
||||
set RANDOMTEMP_BASEDIR=%TMP_DIR_WIN%\bin
|
||||
echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
|
||||
cat %TMP_DIR%/bin/nvcc.bat
|
||||
set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
|
||||
for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
|
||||
set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
|
||||
)
|
||||
|
||||
@echo off
|
||||
|
|
|
|||
|
|
@ -517,16 +517,14 @@ if(MSVC)
|
|||
endforeach(flag_var)
|
||||
|
||||
# Try harder
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/w" "-w")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w")
|
||||
endif(MSVC)
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xfatbin" "-compress-all")
|
||||
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-Xfatbin" "-compress-all")
|
||||
list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-Xfatbin" "-compress-all")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
||||
|
||||
if(NOT MSVC)
|
||||
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g" "-lineinfo" "--source-in-ptx")
|
||||
list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-g" "-lineinfo" "--source-in-ptx")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx")
|
||||
endif(NOT MSVC)
|
||||
|
||||
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
||||
|
|
@ -667,6 +665,16 @@ endif()
|
|||
|
||||
include(cmake/Dependencies.cmake)
|
||||
|
||||
if((CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10.2) AND (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows"))
|
||||
# CUDA < 10.2 doesn't support compiling and extracting header dependencies in
|
||||
# one call, so instead CMake calls nvcc twice with && in between.
|
||||
# However, on windows cmd.exe has a 8191 character limit for commands which we
|
||||
# start hitting. This moves most argments into a file to avoid going over the limit
|
||||
|
||||
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS ON)
|
||||
set(CMAKE_NINJA_FORCE_RESPONSE_FILE ON CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
if(USE_FBGEMM)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -69,12 +69,6 @@ if(USE_CUDA AND USE_ROCM)
|
|||
message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
# we want to respect the standard, and we are bored of those **** .
|
||||
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/wd4819" "-Xcompiler" "/wd4503" "-Xcompiler" "/wd4190" "-Xcompiler" "/wd4244" "-Xcompiler" "/wd4251" "-Xcompiler" "/wd4275" "-Xcompiler" "/wd4522")
|
||||
endif(MSVC)
|
||||
|
||||
if(USE_ROCM)
|
||||
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in
|
||||
# ATen proper)
|
||||
|
|
|
|||
|
|
@ -49,9 +49,7 @@ if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
|
|||
endif()
|
||||
|
||||
# ---[ Dependency of c10_cuda
|
||||
target_link_libraries(c10_cuda PUBLIC c10)
|
||||
|
||||
target_link_libraries(c10_cuda INTERFACE torch::cudart)
|
||||
target_link_libraries(c10_cuda PUBLIC c10 torch::cudart)
|
||||
|
||||
target_include_directories(
|
||||
c10_cuda PUBLIC
|
||||
|
|
|
|||
|
|
@ -895,19 +895,18 @@ elseif(USE_CUDA)
|
|||
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
||||
if(CUDA_SEPARABLE_COMPILATION)
|
||||
# Separate compilation fails when kernels using `thrust::sort_by_key`
|
||||
# are linked with the rest of CUDA code. Workaround by linking them separately
|
||||
set(_generated_name "torch_cuda_w_sort_by_key_intermediate_link${CMAKE_C_OUTPUT_EXTENSION}")
|
||||
set(torch_cuda_w_sort_by_key_link_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/torch_cuda.dir/${CMAKE_CFG_INTDIR}/${_generated_name}")
|
||||
cuda_wrap_srcs(torch_cuda OBJ Caffe2_GPU_W_SORT_BY_KEY_OBJ ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${torch_cuda_w_sort_by_key_link_file}" torch_cpu "${_options}" "${torch_cuda_SEPARABLE_COMPILATION_OBJECTS}")
|
||||
set( torch_cuda_SEPARABLE_COMPILATION_OBJECTS )
|
||||
# Pass compiled sort-by-key object + device-linked fatbin as extra dependencies of torch_cuda
|
||||
cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${torch_cuda_w_sort_by_key_link_file} ${Caffe2_GPU_W_SORT_BY_KEY_OBJ})
|
||||
# are linked with the rest of CUDA code. Workaround by linking them separately.
|
||||
add_library(torch_cuda ${Caffe2_GPU_SRCS})
|
||||
set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
add_library(torch_cuda_w_sort_by_key OBJECT ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
|
||||
target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
|
||||
elseif(BUILD_SPLIT_CUDA)
|
||||
cuda_add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
||||
cuda_add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
||||
add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
||||
add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
||||
else()
|
||||
cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
endif()
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
|
|
@ -1803,7 +1802,7 @@ if(BUILD_TEST)
|
|||
if(USE_CUDA)
|
||||
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
|
||||
get_filename_component(test_name ${test_src} NAME_WE)
|
||||
cuda_add_executable(${test_name} "${test_src}")
|
||||
add_executable(${test_name} "${test_src}")
|
||||
target_link_libraries(${test_name} torch_library gtest_main)
|
||||
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
|
||||
|
|
|
|||
|
|
@ -33,6 +33,50 @@ macro(enable_ubsan)
|
|||
endif()
|
||||
endmacro()
|
||||
|
||||
# ---[ CUDA
|
||||
if(USE_CUDA)
|
||||
# public/*.cmake uses CAFFE2_USE_*
|
||||
set(CAFFE2_USE_CUDA ${USE_CUDA})
|
||||
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
|
||||
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
|
||||
set(CAFFE2_USE_TENSORRT ${USE_TENSORRT})
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
|
||||
if(CAFFE2_USE_CUDA)
|
||||
# A helper variable recording the list of Caffe2 dependent libraries
|
||||
# torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY
|
||||
# design reason (it adds CUDA_LIBRARIES itself).
|
||||
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS
|
||||
caffe2::cufft caffe2::curand caffe2::cublas)
|
||||
if(CAFFE2_USE_NVRTC)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc)
|
||||
else()
|
||||
caffe2_update_option(USE_NVRTC OFF)
|
||||
endif()
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
|
||||
else()
|
||||
caffe2_update_option(USE_CUDNN OFF)
|
||||
endif()
|
||||
if(CAFFE2_USE_TENSORRT)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt)
|
||||
else()
|
||||
caffe2_update_option(USE_TENSORRT OFF)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING
|
||||
"Not compiling with CUDA. Suppress this warning with "
|
||||
"-DUSE_CUDA=OFF.")
|
||||
caffe2_update_option(USE_CUDA OFF)
|
||||
caffe2_update_option(USE_CUDNN OFF)
|
||||
caffe2_update_option(USE_NVRTC OFF)
|
||||
caffe2_update_option(USE_TENSORRT OFF)
|
||||
set(CAFFE2_USE_CUDA OFF)
|
||||
set(CAFFE2_USE_CUDNN OFF)
|
||||
set(CAFFE2_USE_NVRTC OFF)
|
||||
set(CAFFE2_USE_TENSORRT OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ Custom Protobuf
|
||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND (NOT INTERN_BUILD_MOBILE OR BUILD_CAFFE2_MOBILE))
|
||||
disable_ubsan()
|
||||
|
|
@ -77,8 +121,8 @@ endif(MSVC)
|
|||
|
||||
# ---[ Threads
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake)
|
||||
if(TARGET Threads::Threads)
|
||||
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads)
|
||||
if(TARGET caffe2::Threads)
|
||||
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS caffe2::Threads)
|
||||
else()
|
||||
message(FATAL_ERROR
|
||||
"Cannot find threading library. Caffe2 requires Threads to compile.")
|
||||
|
|
@ -661,7 +705,7 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
|
|||
# We need to replace googletest cmake scripts too.
|
||||
# Otherwise, it will sometimes break the build.
|
||||
# To make the git clean after the build, we make a backup first.
|
||||
if(MSVC AND MSVC_Z7_OVERRIDE)
|
||||
if((MSVC AND MSVC_Z7_OVERRIDE) OR USE_CUDA)
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DFILENAME=${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/cmake/internal_utils.cmake"
|
||||
|
|
@ -1181,50 +1225,6 @@ if(USE_LLVM)
|
|||
endif(LLVM_FOUND)
|
||||
endif(USE_LLVM)
|
||||
|
||||
# ---[ CUDA
|
||||
if(USE_CUDA)
|
||||
# public/*.cmake uses CAFFE2_USE_*
|
||||
set(CAFFE2_USE_CUDA ${USE_CUDA})
|
||||
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
|
||||
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
|
||||
set(CAFFE2_USE_TENSORRT ${USE_TENSORRT})
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
|
||||
if(CAFFE2_USE_CUDA)
|
||||
# A helper variable recording the list of Caffe2 dependent libraries
|
||||
# torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY
|
||||
# design reason (it adds CUDA_LIBRARIES itself).
|
||||
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS
|
||||
caffe2::cufft caffe2::curand caffe2::cublas)
|
||||
if(CAFFE2_USE_NVRTC)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc)
|
||||
else()
|
||||
caffe2_update_option(USE_NVRTC OFF)
|
||||
endif()
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
|
||||
else()
|
||||
caffe2_update_option(USE_CUDNN OFF)
|
||||
endif()
|
||||
if(CAFFE2_USE_TENSORRT)
|
||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt)
|
||||
else()
|
||||
caffe2_update_option(USE_TENSORRT OFF)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING
|
||||
"Not compiling with CUDA. Suppress this warning with "
|
||||
"-DUSE_CUDA=OFF.")
|
||||
caffe2_update_option(USE_CUDA OFF)
|
||||
caffe2_update_option(USE_CUDNN OFF)
|
||||
caffe2_update_option(USE_NVRTC OFF)
|
||||
caffe2_update_option(USE_TENSORRT OFF)
|
||||
set(CAFFE2_USE_CUDA OFF)
|
||||
set(CAFFE2_USE_CUDNN OFF)
|
||||
set(CAFFE2_USE_NVRTC OFF)
|
||||
set(CAFFE2_USE_TENSORRT OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ cuDNN
|
||||
if(USE_CUDNN)
|
||||
set(CUDNN_FRONTEND_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../third_party/cudnn_frontend/include)
|
||||
|
|
@ -1371,6 +1371,8 @@ if(USE_GLOO)
|
|||
set(ENV{GLOO_ROCM_ARCH} "${PYTORCH_ROCM_ARCH}")
|
||||
endif()
|
||||
if(NOT USE_SYSTEM_GLOO)
|
||||
# gloo uses cuda_add_library
|
||||
torch_update_find_cuda_flags()
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
||||
else()
|
||||
add_library(gloo SHARED IMPORTED)
|
||||
|
|
@ -1417,6 +1419,8 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
|
|||
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
|
||||
set(TP_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE)
|
||||
|
||||
# Tensorpipe uses cuda_add_library
|
||||
torch_update_find_cuda_flags()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
||||
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
|
||||
|
|
@ -1560,7 +1564,6 @@ function(add_onnx_tensorrt_subdir)
|
|||
endfunction()
|
||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||
if(USE_TENSORRT)
|
||||
set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE})
|
||||
add_onnx_tensorrt_subdir()
|
||||
include_directories("${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt")
|
||||
caffe2_interface_library(nvonnxparser_static onnx_trt_library)
|
||||
|
|
@ -1579,8 +1582,7 @@ endif()
|
|||
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
|
||||
set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS})
|
||||
separate_arguments(TORCH_NVCC_FLAGS)
|
||||
string(APPEND CMAKE_CUDA_FLAGS " $ENV{TORCH_NVCC_FLAGS}")
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||
|
||||
# Top-level build config
|
||||
|
|
@ -1599,7 +1601,7 @@ if(NOT INTERN_BUILD_MOBILE)
|
|||
if(MSVC)
|
||||
# we want to respect the standard, and we are bored of those **** .
|
||||
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
|
|
@ -1610,22 +1612,19 @@ if(NOT INTERN_BUILD_MOBILE)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets)
|
||||
list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda)
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda")
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
endif()
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS ${TORCH_NVCC_FLAGS})
|
||||
if(CMAKE_POSITION_INDEPENDENT_CODE AND NOT MSVC)
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC")
|
||||
endif()
|
||||
|
||||
if(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
|
||||
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
|
||||
list(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1" "-D__CUDA_NO_HALF_OPERATORS__" "-D__CUDA_NO_HALF_CONVERSIONS__"
|
||||
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__" "-D__CUDA_NO_HALF2_OPERATORS__")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
|
||||
" -D__CUDA_NO_HALF_OPERATORS__"
|
||||
" -D__CUDA_NO_HALF_CONVERSIONS__"
|
||||
" -D__CUDA_NO_HALF2_OPERATORS__"
|
||||
" -D__CUDA_NO_BFLOAT16_CONVERSIONS__")
|
||||
add_compile_options(-DCUDA_HAS_FP16=1)
|
||||
else()
|
||||
message(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor")
|
||||
|
|
|
|||
|
|
@ -20,5 +20,6 @@ else(REVERT)
|
|||
file(READ ${FILENAME} content)
|
||||
file(WRITE ${BACKUP} "${content}")
|
||||
string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}")
|
||||
string(REGEX REPLACE "Threads::Threads" "caffe2::Threads" content "${content}")
|
||||
file(WRITE ${FILENAME} "${content}")
|
||||
endif(REVERT)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
# CUB_INCLUDE_DIRS - the CUB include directory
|
||||
|
||||
find_path(CUB_INCLUDE_DIR
|
||||
HINTS "${CUDA_TOOLKIT_INCLUDE}"
|
||||
NAMES cub/cub.cuh
|
||||
DOC "The directory where CUB includes reside"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -103,9 +103,10 @@ function(caffe2_print_configuration_summary)
|
|||
message(STATUS " nvrtc : ${__tmp}")
|
||||
message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}")
|
||||
message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}")
|
||||
message(STATUS " NVCC flags : ${CUDA_NVCC_FLAGS}")
|
||||
message(STATUS " CUDA host compiler : ${CUDA_HOST_COMPILER}")
|
||||
message(STATUS " NVCC --device-c : ${CUDA_SEPARABLE_COMPILATION}")
|
||||
message(STATUS " CUDA compiler : ${CMAKE_CUDA_COMPILER}")
|
||||
message(STATUS " CUDA flags : ${CMAKE_CUDA_FLAGS}")
|
||||
message(STATUS " CUDA host compiler : ${CMAKE_CUDA_HOST_COMPILER}")
|
||||
message(STATUS " CUDA --device-c : ${CUDA_SEPARABLE_COMPILATION}")
|
||||
message(STATUS " USE_TENSORRT : ${USE_TENSORRT}")
|
||||
if(${USE_TENSORRT})
|
||||
message(STATUS " TensorRT runtime library: ${TENSORRT_LIBRARY}")
|
||||
|
|
|
|||
|
|
@ -35,6 +35,13 @@ if(NOT CUDA_FOUND)
|
|||
set(CAFFE2_USE_CUDA OFF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Enable CUDA language support
|
||||
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
enable_language(CUDA)
|
||||
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
|
||||
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
|
||||
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
|
||||
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
|
|
@ -435,6 +442,8 @@ endif()
|
|||
|
||||
# setting nvcc arch flags
|
||||
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
|
||||
# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
|
||||
set(CMAKE_CUDA_ARCHITECTURES OFF)
|
||||
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
|
||||
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
|
||||
|
||||
|
|
@ -453,14 +462,10 @@ endforeach()
|
|||
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS})
|
||||
|
||||
# Set C++14 support
|
||||
set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror")
|
||||
if(MSVC)
|
||||
list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call")
|
||||
list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward")
|
||||
else()
|
||||
list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC")
|
||||
endif()
|
||||
|
||||
# OpenMP flags for NVCC with Clang-cl
|
||||
|
|
@ -477,9 +482,15 @@ endif()
|
|||
# Debug and Release symbol support
|
||||
if(MSVC)
|
||||
if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MT$<$<CONFIG:Debug>:d>")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT")
|
||||
else()
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MD$<$<CONFIG:Debug>:d>")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD")
|
||||
endif()
|
||||
if(CUDA_NVCC_FLAGS MATCHES "Zi")
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
|
||||
|
|
@ -493,3 +504,11 @@ list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
|
|||
|
||||
# Set expt-extended-lambda to support lambda on device
|
||||
list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda")
|
||||
|
||||
foreach(FLAG ${CUDA_NVCC_FLAGS})
|
||||
string(FIND "${FLAG}" " " flag_space_position)
|
||||
if(NOT flag_space_position EQUAL -1)
|
||||
message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'")
|
||||
endif()
|
||||
string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}")
|
||||
endforeach()
|
||||
|
|
|
|||
|
|
@ -1,16 +1,29 @@
|
|||
if(TARGET caffe2::Threads)
|
||||
return()
|
||||
endif()
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
# For newer CMake, Threads::Threads is already defined. Otherwise, we will
|
||||
# provide a backward compatible wrapper for Threads::Threads.
|
||||
if(THREADS_FOUND AND NOT TARGET Threads::Threads)
|
||||
add_library(Threads::Threads INTERFACE IMPORTED)
|
||||
|
||||
# Threads::Threads doesn't work if the target has CUDA code
|
||||
if(THREADS_FOUND)
|
||||
add_library(caffe2::Threads INTERFACE IMPORTED)
|
||||
|
||||
if(THREADS_HAVE_PTHREAD_ARG)
|
||||
set_property(TARGET Threads::Threads
|
||||
PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
|
||||
set(compile_options
|
||||
$<$<COMPILE_LANGUAGE:C>:-pthread>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-pthread>)
|
||||
if(USE_CUDA)
|
||||
list(APPEND compile_options
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler -pthread>)
|
||||
endif()
|
||||
|
||||
set_property(TARGET caffe2::Threads
|
||||
PROPERTY INTERFACE_COMPILE_OPTIONS
|
||||
${compile_options})
|
||||
endif()
|
||||
|
||||
if(CMAKE_THREAD_LIBS_INIT)
|
||||
set_property(TARGET Threads::Threads
|
||||
set_property(TARGET caffe2::Threads
|
||||
PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ macro(torch_cuda_based_add_library cuda_target)
|
|||
if(USE_ROCM)
|
||||
hip_add_library(${cuda_target} ${ARGN})
|
||||
elseif(USE_CUDA)
|
||||
cuda_add_library(${cuda_target} ${ARGN})
|
||||
add_library(${cuda_target} ${ARGN})
|
||||
else()
|
||||
endif()
|
||||
endmacro()
|
||||
|
|
@ -388,10 +388,11 @@ endmacro()
|
|||
# torch_compile_options(lib_name)
|
||||
function(torch_compile_options libname)
|
||||
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14)
|
||||
set(private_compile_options "")
|
||||
|
||||
# ---[ Check if warnings should be errors.
|
||||
if(WERROR)
|
||||
target_compile_options(${libname} PRIVATE -Werror)
|
||||
list(APPEND private_compile_options -Werror)
|
||||
endif()
|
||||
|
||||
if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
||||
|
|
@ -405,6 +406,7 @@ function(torch_compile_options libname)
|
|||
endif()
|
||||
|
||||
target_compile_options(${libname} PUBLIC
|
||||
$<$<COMPILE_LANGUAGE:CXX>:
|
||||
${MSVC_RUNTIME_LIBRARY_OPTION}
|
||||
$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:${MSVC_DEBINFO_OPTION}>
|
||||
/EHsc
|
||||
|
|
@ -420,23 +422,34 @@ function(torch_compile_options libname)
|
|||
/wd4101
|
||||
/wd4996
|
||||
/wd4275
|
||||
/bigobj
|
||||
/bigobj>
|
||||
)
|
||||
else()
|
||||
target_compile_options(${libname} PRIVATE
|
||||
list(APPEND private_compile_options
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wno-unused-parameter
|
||||
-Wno-unused-variable
|
||||
-Wno-unused-function
|
||||
-Wno-unused-result
|
||||
-Wno-unused-local-typedefs
|
||||
-Wno-missing-field-initializers
|
||||
-Wno-write-strings
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-type-limits
|
||||
-Wno-array-bounds
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-sign-compare
|
||||
-Wno-strict-overflow
|
||||
-Wno-strict-aliasing
|
||||
-Wno-error=deprecated-declarations
|
||||
# Clang has an unfixed bug leading to spurious missing braces
|
||||
# warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629
|
||||
-Wno-missing-braces
|
||||
)
|
||||
|
||||
if(NOT APPLE)
|
||||
target_compile_options(${libname} PRIVATE
|
||||
list(APPEND private_compile_options
|
||||
# Considered to be flaky. See the discussion at
|
||||
# https://github.com/pytorch/pytorch/pull/9608
|
||||
-Wno-maybe-uninitialized)
|
||||
|
|
@ -446,10 +459,23 @@ function(torch_compile_options libname)
|
|||
|
||||
if(MSVC)
|
||||
elseif(WERROR)
|
||||
target_compile_options(${libname} PRIVATE -Wno-strict-overflow)
|
||||
list(APPEND private_compile_options -Wno-strict-overflow)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
|
||||
if(USE_CUDA)
|
||||
string(FIND "${private_compile_options}" " " space_position)
|
||||
if(NOT space_position EQUAL -1)
|
||||
message(FATAL_ERROR "Found spaces in private_compile_options='${private_compile_options}'")
|
||||
endif()
|
||||
# Convert CMake list to comma-separated list
|
||||
string(REPLACE ";" "," private_compile_options "${private_compile_options}")
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${private_compile_options}>)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32 AND NOT USE_ASAN)
|
||||
# Enable hidden visibility by default to make it easier to debug issues with
|
||||
# TORCH_API annotations. Hidden visibility with selective default visibility
|
||||
|
|
@ -458,11 +484,13 @@ function(torch_compile_options libname)
|
|||
# Unfortunately, hidden visibility messes up some ubsan warnings because
|
||||
# templated classes crossing library boundary get duplicated (but identical)
|
||||
# definitions. It's easier to just disable it.
|
||||
target_compile_options(${libname} PRIVATE "-fvisibility=hidden")
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=hidden>)
|
||||
endif()
|
||||
|
||||
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
|
||||
target_compile_options(${libname} PRIVATE "$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>:-O2>")
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>)
|
||||
|
||||
endfunction()
|
||||
|
||||
|
|
@ -484,3 +512,40 @@ function(torch_set_target_props libname)
|
|||
set_target_properties(${libname} PROPERTIES STATIC_LIBRARY_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}d")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Set old-style FindCuda.cmake compile flags from modern CMake cuda flags.
|
||||
# Usage:
|
||||
# torch_update_find_cuda_flags()
|
||||
function(torch_update_find_cuda_flags)
|
||||
# Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall"
|
||||
if(USE_CUDA)
|
||||
separate_arguments(FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}")
|
||||
string(REPLACE " " "," FLAGS "${FLAGS}")
|
||||
set(CUDA_NVCC_FLAGS ${FLAGS} PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_DEBUG UNIX_COMMAND "${CMAKE_CUDA_FLAGS_DEBUG}")
|
||||
string(REPLACE " " "," FLAGS_DEBUG "${FLAGS_DEBUG}")
|
||||
set(CUDA_NVCC_FLAGS_DEBUG "${FLAGS_DEBUG}" PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_RELEASE UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELEASE}")
|
||||
string(REPLACE " " "," FLAGS_RELEASE "${FLAGS_RELEASE}")
|
||||
set(CUDA_NVCC_FLAGS_RELEASE "${FLAGS_RELEASE}" PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND "${CMAKE_CUDA_FLAGS_MINSIZEREL}")
|
||||
string(REPLACE " " "," FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}")
|
||||
set(CUDA_NVCC_FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}" PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
|
||||
string(REPLACE " " "," FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}")
|
||||
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}" PARENT_SCOPE)
|
||||
|
||||
message(STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
|
||||
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
|
||||
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
|
||||
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
|
||||
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
|
||||
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ if(BUILD_CAFFE2_OPS)
|
|||
# Note(ilijar): Since Detectron ops currently have no
|
||||
# CPU implementation, we only build GPU ops for now.
|
||||
if(USE_CUDA)
|
||||
CUDA_ADD_LIBRARY(
|
||||
add_library(
|
||||
caffe2_detectron_ops_gpu SHARED
|
||||
${Detectron_CPU_SRCS}
|
||||
${Detectron_GPU_SRCS})
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
if(USE_CUDA)
|
||||
cuda_add_library(c10d_cuda_test CUDATest.cu)
|
||||
add_library(c10d_cuda_test CUDATest.cu)
|
||||
target_include_directories(c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_link_libraries(c10d_cuda_test torch_cuda)
|
||||
add_dependencies(c10d_cuda_test torch_cuda)
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ class CMake:
|
|||
var: var for var in
|
||||
('BLAS',
|
||||
'BUILDING_WITH_TORCH_LIBS',
|
||||
'CUDA_HOST_COMPILER',
|
||||
'CUDA_HOST_COMILER',
|
||||
'CUDA_NVCC_EXECUTABLE',
|
||||
'CUDA_SEPARABLE_COMPILATION',
|
||||
'CUDNN_LIBRARY',
|
||||
|
|
@ -267,6 +267,15 @@ class CMake:
|
|||
'OPENSSL_ROOT_DIR')
|
||||
})
|
||||
|
||||
# Aliases which are lower priority than their canonical option
|
||||
low_priority_aliases = {
|
||||
'CUDA_HOST_COMPILER': 'CMAKE_CUDA_HOST_COMPILER',
|
||||
'CUDAHOSTCXX': 'CUDA_HOST_COMPILER',
|
||||
'CMAKE_CUDA_HOST_COMPILER': 'CUDA_HOST_COMPILER',
|
||||
'CMAKE_CUDA_COMPILER': 'CUDA_NVCC_EXECUTABLE',
|
||||
'CUDACXX': 'CUDA_NVCC_EXECUTABLE'
|
||||
}
|
||||
|
||||
for var, val in my_env.items():
|
||||
# We currently pass over all environment variables that start with "BUILD_", "USE_", and "CMAKE_". This is
|
||||
# because we currently have no reliable way to get the list of all build options we have specified in
|
||||
|
|
@ -279,6 +288,11 @@ class CMake:
|
|||
elif var.startswith(('BUILD_', 'USE_', 'CMAKE_')) or var.endswith(('EXITCODE', 'EXITCODE__TRYRUN_OUTPUT')):
|
||||
build_options[var] = val
|
||||
|
||||
if var in low_priority_aliases:
|
||||
key = low_priority_aliases[var]
|
||||
if key not in build_options:
|
||||
build_options[key] = val
|
||||
|
||||
# The default value cannot be easily obtained in CMakeLists.txt. We set it here.
|
||||
py_lib_path = sysconfig.get_path('purelib')
|
||||
cmake_prefix_path = build_options.get('CMAKE_PREFIX_PATH', None)
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ else()
|
|||
endif()
|
||||
|
||||
if(USE_CUDA)
|
||||
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
|
||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||
|
||||
|
|
@ -119,16 +120,7 @@ if(USE_CUDA)
|
|||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib)
|
||||
list(APPEND TORCH_PYTHON_INCLUDE_DIRECTORIES "${NVTOOLEXT_HOME}/include")
|
||||
elseif(APPLE)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib)
|
||||
else()
|
||||
find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${LIBNVTOOLSEXT})
|
||||
endif()
|
||||
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
|
|
|
|||
|
|
@ -67,13 +67,13 @@ if(UNIX AND NOT APPLE)
|
|||
# site above though in case there was a reason we were testing
|
||||
# against clock_gettime. In principle, the choice of symbol you
|
||||
# test for shouldn't matter.
|
||||
set(CMAKE_REQUIRED_LIBRARIES Threads::Threads)
|
||||
set(CMAKE_REQUIRED_LIBRARIES caffe2::Threads)
|
||||
check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
if(NEED_RT_AND_PTHREAD)
|
||||
message(STATUS "Needs it, linking against pthread and rt")
|
||||
target_link_libraries(shm rt Threads::Threads)
|
||||
target_link_libraries(torch_shm_manager rt Threads::Threads)
|
||||
target_link_libraries(shm rt caffe2::Threads)
|
||||
target_link_libraries(torch_shm_manager rt caffe2::Threads)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user