mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
Update CMake and use native CUDA language support (#62445)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/62445 PyTorch currently uses the old style of compiling CUDA in CMake which is just a bunch of scripts in `FindCUDA.cmake`. Newer versions support CUDA natively as a language just like C++ or C. Test Plan: Imported from OSS Reviewed By: ejguan Differential Revision: D31503350 fbshipit-source-id: 2ee817edc9698531ae1b87eda3ad271ee459fd55
This commit is contained in:
parent
d3b29afbb6
commit
c373387709
|
|
@ -46,7 +46,7 @@ steps:
|
||||||
curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
|
curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
|
||||||
curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
|
curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
|
||||||
copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
|
copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
|
||||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
|
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe
|
||||||
displayName: Install sccache and randomtemp
|
displayName: Install sccache and randomtemp
|
||||||
condition: not(eq(variables.CUDA_VERSION, ''))
|
condition: not(eq(variables.CUDA_VERSION, ''))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -120,9 +120,7 @@ steps:
|
||||||
Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
|
Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
|
||||||
Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
|
Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
|
||||||
Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
|
Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
|
||||||
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
|
Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe"
|
||||||
Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
|
|
||||||
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
|
|
||||||
displayName: Set MKL, sccache and randomtemp environment variables
|
displayName: Set MKL, sccache and randomtemp environment variables
|
||||||
|
|
||||||
# View current environment variables
|
# View current environment variables
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ RUN rm install_cmake.sh
|
||||||
ADD ./common/install_cache.sh install_cache.sh
|
ADD ./common/install_cache.sh install_cache.sh
|
||||||
ENV PATH /opt/cache/bin:$PATH
|
ENV PATH /opt/cache/bin:$PATH
|
||||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||||
ENV CUDA_NVCC_EXECUTABLE=/opt/cache/lib/nvcc
|
ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
|
||||||
|
|
||||||
# Add jni.h for java host build
|
# Add jni.h for java host build
|
||||||
ADD ./common/install_jni.sh install_jni.sh
|
ADD ./common/install_jni.sh install_jni.sh
|
||||||
|
|
@ -94,6 +94,7 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||||
# AWS specific CUDA build guidance
|
# AWS specific CUDA build guidance
|
||||||
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||||
|
ENV CUDA_PATH /usr/local/cuda
|
||||||
|
|
||||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
||||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
||||||
|
|
|
||||||
2
.github/templates/windows_ci_workflow.yml.j2
vendored
2
.github/templates/windows_ci_workflow.yml.j2
vendored
|
|
@ -55,8 +55,8 @@ env:
|
||||||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
{%- if cuda_version != "cpu" %}
|
{%- if cuda_version != "cpu" %}
|
||||||
TORCH_CUDA_ARCH_LIST: "7.0"
|
TORCH_CUDA_ARCH_LIST: "7.0"
|
||||||
USE_CUDA: 1
|
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
|
USE_CUDA: !{{ 1 if cuda_version != "cpu" else 0 }}
|
||||||
|
|
||||||
!{{ common.concurrency(build_environment) }}
|
!{{ common.concurrency(build_environment) }}
|
||||||
|
|
||||||
|
|
|
||||||
1
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
1
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
|
|
@ -31,6 +31,7 @@ env:
|
||||||
AWS_DEFAULT_REGION: us-east-1
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
|
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
|
USE_CUDA: 0
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: win-vs2019-cpu-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: win-vs2019-cpu-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,8 @@ if [ -z "${SCCACHE}" ] && which ccache > /dev/null; then
|
||||||
ln -sf "$(which ccache)" ./ccache/g++
|
ln -sf "$(which ccache)" ./ccache/g++
|
||||||
ln -sf "$(which ccache)" ./ccache/x86_64-linux-gnu-gcc
|
ln -sf "$(which ccache)" ./ccache/x86_64-linux-gnu-gcc
|
||||||
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then
|
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then
|
||||||
ln -sf "$(which ccache)" ./ccache/nvcc
|
mkdir -p ./ccache/cuda
|
||||||
|
ln -sf "$(which ccache)" ./ccache/cuda/nvcc
|
||||||
fi
|
fi
|
||||||
export CACHE_WRAPPER_DIR="$PWD/ccache"
|
export CACHE_WRAPPER_DIR="$PWD/ccache"
|
||||||
export PATH="$CACHE_WRAPPER_DIR:$PATH"
|
export PATH="$CACHE_WRAPPER_DIR:$PATH"
|
||||||
|
|
@ -93,7 +94,8 @@ if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
|
||||||
|
|
||||||
# Explicitly set path to NVCC such that the symlink to ccache or sccache is used
|
# Explicitly set path to NVCC such that the symlink to ccache or sccache is used
|
||||||
if [ -n "${CACHE_WRAPPER_DIR}" ]; then
|
if [ -n "${CACHE_WRAPPER_DIR}" ]; then
|
||||||
build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
|
build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/cuda/nvcc")
|
||||||
|
build_args+=("CMAKE_CUDA_COMPILER_LAUNCHER=${CACHE_WRAPPER_DIR}/ccache")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
|
# Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
|
||||||
|
|
|
||||||
|
|
@ -97,23 +97,20 @@ set CXX=sccache-cl
|
||||||
set CMAKE_GENERATOR=Ninja
|
set CMAKE_GENERATOR=Ninja
|
||||||
|
|
||||||
if "%USE_CUDA%"=="1" (
|
if "%USE_CUDA%"=="1" (
|
||||||
copy %TMP_DIR_WIN%\bin\sccache.exe %TMP_DIR_WIN%\bin\nvcc.exe
|
|
||||||
|
|
||||||
:: randomtemp is used to resolve the intermittent build error related to CUDA.
|
:: randomtemp is used to resolve the intermittent build error related to CUDA.
|
||||||
:: code: https://github.com/peterjc123/randomtemp-rust
|
:: code: https://github.com/peterjc123/randomtemp-rust
|
||||||
:: issue: https://github.com/pytorch/pytorch/issues/25393
|
:: issue: https://github.com/pytorch/pytorch/issues/25393
|
||||||
::
|
::
|
||||||
:: Previously, CMake uses CUDA_NVCC_EXECUTABLE for finding nvcc and then
|
:: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
|
||||||
:: the calls are redirected to sccache. sccache looks for the actual nvcc
|
:: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
|
||||||
:: in PATH, and then pass the arguments to it.
|
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
|
||||||
:: Currently, randomtemp is placed before sccache (%TMP_DIR_WIN%\bin\nvcc)
|
|
||||||
:: so we are actually pretending sccache instead of nvcc itself.
|
|
||||||
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
|
|
||||||
if errorlevel 1 exit /b
|
if errorlevel 1 exit /b
|
||||||
if not errorlevel 0 exit /b
|
if not errorlevel 0 exit /b
|
||||||
set RANDOMTEMP_EXECUTABLE=%TMP_DIR_WIN%\bin\nvcc.exe
|
echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
|
||||||
set CUDA_NVCC_EXECUTABLE=%TMP_DIR_WIN%\bin\randomtemp.exe
|
cat %TMP_DIR%/bin/nvcc.bat
|
||||||
set RANDOMTEMP_BASEDIR=%TMP_DIR_WIN%\bin
|
set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
|
||||||
|
for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
|
||||||
|
set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
|
||||||
)
|
)
|
||||||
|
|
||||||
@echo off
|
@echo off
|
||||||
|
|
|
||||||
|
|
@ -517,16 +517,14 @@ if(MSVC)
|
||||||
endforeach(flag_var)
|
endforeach(flag_var)
|
||||||
|
|
||||||
# Try harder
|
# Try harder
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/w" "-w")
|
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w")
|
||||||
endif(MSVC)
|
endif(MSVC)
|
||||||
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xfatbin" "-compress-all")
|
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
||||||
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-Xfatbin" "-compress-all")
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-Xfatbin" "-compress-all")
|
|
||||||
|
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-g" "-lineinfo" "--source-in-ptx")
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx")
|
||||||
list(APPEND CUDA_NVCC_FLAGS_RELWITHDEBINFO "-g" "-lineinfo" "--source-in-ptx")
|
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx")
|
||||||
endif(NOT MSVC)
|
endif(NOT MSVC)
|
||||||
|
|
||||||
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
||||||
|
|
@ -667,6 +665,16 @@ endif()
|
||||||
|
|
||||||
include(cmake/Dependencies.cmake)
|
include(cmake/Dependencies.cmake)
|
||||||
|
|
||||||
|
if((CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10.2) AND (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows"))
|
||||||
|
# CUDA < 10.2 doesn't support compiling and extracting header dependencies in
|
||||||
|
# one call, so instead CMake calls nvcc twice with && in between.
|
||||||
|
# However, on windows cmd.exe has a 8191 character limit for commands which we
|
||||||
|
# start hitting. This moves most argments into a file to avoid going over the limit
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS ON)
|
||||||
|
set(CMAKE_NINJA_FORCE_RESPONSE_FILE ON CACHE INTERNAL "")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(USE_FBGEMM)
|
if(USE_FBGEMM)
|
||||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -69,12 +69,6 @@ if(USE_CUDA AND USE_ROCM)
|
||||||
message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.")
|
message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
# we want to respect the standard, and we are bored of those **** .
|
|
||||||
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "/wd4819" "-Xcompiler" "/wd4503" "-Xcompiler" "/wd4190" "-Xcompiler" "/wd4244" "-Xcompiler" "/wd4251" "-Xcompiler" "/wd4275" "-Xcompiler" "/wd4522")
|
|
||||||
endif(MSVC)
|
|
||||||
|
|
||||||
if(USE_ROCM)
|
if(USE_ROCM)
|
||||||
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in
|
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in
|
||||||
# ATen proper)
|
# ATen proper)
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,7 @@ if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# ---[ Dependency of c10_cuda
|
# ---[ Dependency of c10_cuda
|
||||||
target_link_libraries(c10_cuda PUBLIC c10)
|
target_link_libraries(c10_cuda PUBLIC c10 torch::cudart)
|
||||||
|
|
||||||
target_link_libraries(c10_cuda INTERFACE torch::cudart)
|
|
||||||
|
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
c10_cuda PUBLIC
|
c10_cuda PUBLIC
|
||||||
|
|
|
||||||
|
|
@ -895,19 +895,18 @@ elseif(USE_CUDA)
|
||||||
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
||||||
if(CUDA_SEPARABLE_COMPILATION)
|
if(CUDA_SEPARABLE_COMPILATION)
|
||||||
# Separate compilation fails when kernels using `thrust::sort_by_key`
|
# Separate compilation fails when kernels using `thrust::sort_by_key`
|
||||||
# are linked with the rest of CUDA code. Workaround by linking them separately
|
# are linked with the rest of CUDA code. Workaround by linking them separately.
|
||||||
set(_generated_name "torch_cuda_w_sort_by_key_intermediate_link${CMAKE_C_OUTPUT_EXTENSION}")
|
add_library(torch_cuda ${Caffe2_GPU_SRCS})
|
||||||
set(torch_cuda_w_sort_by_key_link_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/torch_cuda.dir/${CMAKE_CFG_INTDIR}/${_generated_name}")
|
set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
|
||||||
cuda_wrap_srcs(torch_cuda OBJ Caffe2_GPU_W_SORT_BY_KEY_OBJ ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
|
||||||
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${torch_cuda_w_sort_by_key_link_file}" torch_cpu "${_options}" "${torch_cuda_SEPARABLE_COMPILATION_OBJECTS}")
|
add_library(torch_cuda_w_sort_by_key OBJECT ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||||
set( torch_cuda_SEPARABLE_COMPILATION_OBJECTS )
|
set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
|
||||||
# Pass compiled sort-by-key object + device-linked fatbin as extra dependencies of torch_cuda
|
target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
|
||||||
cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${torch_cuda_w_sort_by_key_link_file} ${Caffe2_GPU_W_SORT_BY_KEY_OBJ})
|
|
||||||
elseif(BUILD_SPLIT_CUDA)
|
elseif(BUILD_SPLIT_CUDA)
|
||||||
cuda_add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
||||||
cuda_add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
||||||
else()
|
else()
|
||||||
cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||||
endif()
|
endif()
|
||||||
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
||||||
if(BUILD_SPLIT_CUDA)
|
if(BUILD_SPLIT_CUDA)
|
||||||
|
|
@ -1803,7 +1802,7 @@ if(BUILD_TEST)
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
|
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
|
||||||
get_filename_component(test_name ${test_src} NAME_WE)
|
get_filename_component(test_name ${test_src} NAME_WE)
|
||||||
cuda_add_executable(${test_name} "${test_src}")
|
add_executable(${test_name} "${test_src}")
|
||||||
target_link_libraries(${test_name} torch_library gtest_main)
|
target_link_libraries(${test_name} torch_library gtest_main)
|
||||||
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
|
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
|
||||||
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
|
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,50 @@ macro(enable_ubsan)
|
||||||
endif()
|
endif()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
# ---[ CUDA
|
||||||
|
if(USE_CUDA)
|
||||||
|
# public/*.cmake uses CAFFE2_USE_*
|
||||||
|
set(CAFFE2_USE_CUDA ${USE_CUDA})
|
||||||
|
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
|
||||||
|
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
|
||||||
|
set(CAFFE2_USE_TENSORRT ${USE_TENSORRT})
|
||||||
|
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
|
||||||
|
if(CAFFE2_USE_CUDA)
|
||||||
|
# A helper variable recording the list of Caffe2 dependent libraries
|
||||||
|
# torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY
|
||||||
|
# design reason (it adds CUDA_LIBRARIES itself).
|
||||||
|
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS
|
||||||
|
caffe2::cufft caffe2::curand caffe2::cublas)
|
||||||
|
if(CAFFE2_USE_NVRTC)
|
||||||
|
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc)
|
||||||
|
else()
|
||||||
|
caffe2_update_option(USE_NVRTC OFF)
|
||||||
|
endif()
|
||||||
|
if(CAFFE2_USE_CUDNN)
|
||||||
|
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
|
||||||
|
else()
|
||||||
|
caffe2_update_option(USE_CUDNN OFF)
|
||||||
|
endif()
|
||||||
|
if(CAFFE2_USE_TENSORRT)
|
||||||
|
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt)
|
||||||
|
else()
|
||||||
|
caffe2_update_option(USE_TENSORRT OFF)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(WARNING
|
||||||
|
"Not compiling with CUDA. Suppress this warning with "
|
||||||
|
"-DUSE_CUDA=OFF.")
|
||||||
|
caffe2_update_option(USE_CUDA OFF)
|
||||||
|
caffe2_update_option(USE_CUDNN OFF)
|
||||||
|
caffe2_update_option(USE_NVRTC OFF)
|
||||||
|
caffe2_update_option(USE_TENSORRT OFF)
|
||||||
|
set(CAFFE2_USE_CUDA OFF)
|
||||||
|
set(CAFFE2_USE_CUDNN OFF)
|
||||||
|
set(CAFFE2_USE_NVRTC OFF)
|
||||||
|
set(CAFFE2_USE_TENSORRT OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# ---[ Custom Protobuf
|
# ---[ Custom Protobuf
|
||||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND (NOT INTERN_BUILD_MOBILE OR BUILD_CAFFE2_MOBILE))
|
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND (NOT INTERN_BUILD_MOBILE OR BUILD_CAFFE2_MOBILE))
|
||||||
disable_ubsan()
|
disable_ubsan()
|
||||||
|
|
@ -77,8 +121,8 @@ endif(MSVC)
|
||||||
|
|
||||||
# ---[ Threads
|
# ---[ Threads
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake)
|
include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake)
|
||||||
if(TARGET Threads::Threads)
|
if(TARGET caffe2::Threads)
|
||||||
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads)
|
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS caffe2::Threads)
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR
|
message(FATAL_ERROR
|
||||||
"Cannot find threading library. Caffe2 requires Threads to compile.")
|
"Cannot find threading library. Caffe2 requires Threads to compile.")
|
||||||
|
|
@ -661,7 +705,7 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
|
||||||
# We need to replace googletest cmake scripts too.
|
# We need to replace googletest cmake scripts too.
|
||||||
# Otherwise, it will sometimes break the build.
|
# Otherwise, it will sometimes break the build.
|
||||||
# To make the git clean after the build, we make a backup first.
|
# To make the git clean after the build, we make a backup first.
|
||||||
if(MSVC AND MSVC_Z7_OVERRIDE)
|
if((MSVC AND MSVC_Z7_OVERRIDE) OR USE_CUDA)
|
||||||
execute_process(
|
execute_process(
|
||||||
COMMAND ${CMAKE_COMMAND}
|
COMMAND ${CMAKE_COMMAND}
|
||||||
"-DFILENAME=${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/cmake/internal_utils.cmake"
|
"-DFILENAME=${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/cmake/internal_utils.cmake"
|
||||||
|
|
@ -1181,50 +1225,6 @@ if(USE_LLVM)
|
||||||
endif(LLVM_FOUND)
|
endif(LLVM_FOUND)
|
||||||
endif(USE_LLVM)
|
endif(USE_LLVM)
|
||||||
|
|
||||||
# ---[ CUDA
|
|
||||||
if(USE_CUDA)
|
|
||||||
# public/*.cmake uses CAFFE2_USE_*
|
|
||||||
set(CAFFE2_USE_CUDA ${USE_CUDA})
|
|
||||||
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
|
|
||||||
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
|
|
||||||
set(CAFFE2_USE_TENSORRT ${USE_TENSORRT})
|
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
|
|
||||||
if(CAFFE2_USE_CUDA)
|
|
||||||
# A helper variable recording the list of Caffe2 dependent libraries
|
|
||||||
# torch::cudart is dealt with separately, due to CUDA_ADD_LIBRARY
|
|
||||||
# design reason (it adds CUDA_LIBRARIES itself).
|
|
||||||
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS
|
|
||||||
caffe2::cufft caffe2::curand caffe2::cublas)
|
|
||||||
if(CAFFE2_USE_NVRTC)
|
|
||||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cuda caffe2::nvrtc)
|
|
||||||
else()
|
|
||||||
caffe2_update_option(USE_NVRTC OFF)
|
|
||||||
endif()
|
|
||||||
if(CAFFE2_USE_CUDNN)
|
|
||||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
|
|
||||||
else()
|
|
||||||
caffe2_update_option(USE_CUDNN OFF)
|
|
||||||
endif()
|
|
||||||
if(CAFFE2_USE_TENSORRT)
|
|
||||||
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt)
|
|
||||||
else()
|
|
||||||
caffe2_update_option(USE_TENSORRT OFF)
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
message(WARNING
|
|
||||||
"Not compiling with CUDA. Suppress this warning with "
|
|
||||||
"-DUSE_CUDA=OFF.")
|
|
||||||
caffe2_update_option(USE_CUDA OFF)
|
|
||||||
caffe2_update_option(USE_CUDNN OFF)
|
|
||||||
caffe2_update_option(USE_NVRTC OFF)
|
|
||||||
caffe2_update_option(USE_TENSORRT OFF)
|
|
||||||
set(CAFFE2_USE_CUDA OFF)
|
|
||||||
set(CAFFE2_USE_CUDNN OFF)
|
|
||||||
set(CAFFE2_USE_NVRTC OFF)
|
|
||||||
set(CAFFE2_USE_TENSORRT OFF)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# ---[ cuDNN
|
# ---[ cuDNN
|
||||||
if(USE_CUDNN)
|
if(USE_CUDNN)
|
||||||
set(CUDNN_FRONTEND_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../third_party/cudnn_frontend/include)
|
set(CUDNN_FRONTEND_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../third_party/cudnn_frontend/include)
|
||||||
|
|
@ -1371,6 +1371,8 @@ if(USE_GLOO)
|
||||||
set(ENV{GLOO_ROCM_ARCH} "${PYTORCH_ROCM_ARCH}")
|
set(ENV{GLOO_ROCM_ARCH} "${PYTORCH_ROCM_ARCH}")
|
||||||
endif()
|
endif()
|
||||||
if(NOT USE_SYSTEM_GLOO)
|
if(NOT USE_SYSTEM_GLOO)
|
||||||
|
# gloo uses cuda_add_library
|
||||||
|
torch_update_find_cuda_flags()
|
||||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
||||||
else()
|
else()
|
||||||
add_library(gloo SHARED IMPORTED)
|
add_library(gloo SHARED IMPORTED)
|
||||||
|
|
@ -1417,6 +1419,8 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
|
||||||
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
|
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
|
||||||
set(TP_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE)
|
set(TP_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE)
|
||||||
|
|
||||||
|
# Tensorpipe uses cuda_add_library
|
||||||
|
torch_update_find_cuda_flags()
|
||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
||||||
|
|
||||||
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
|
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
|
||||||
|
|
@ -1560,7 +1564,6 @@ function(add_onnx_tensorrt_subdir)
|
||||||
endfunction()
|
endfunction()
|
||||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||||
if(USE_TENSORRT)
|
if(USE_TENSORRT)
|
||||||
set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE})
|
|
||||||
add_onnx_tensorrt_subdir()
|
add_onnx_tensorrt_subdir()
|
||||||
include_directories("${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt")
|
include_directories("${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt")
|
||||||
caffe2_interface_library(nvonnxparser_static onnx_trt_library)
|
caffe2_interface_library(nvonnxparser_static onnx_trt_library)
|
||||||
|
|
@ -1579,8 +1582,7 @@ endif()
|
||||||
|
|
||||||
if(NOT INTERN_BUILD_MOBILE)
|
if(NOT INTERN_BUILD_MOBILE)
|
||||||
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
|
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
|
||||||
set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS})
|
string(APPEND CMAKE_CUDA_FLAGS " $ENV{TORCH_NVCC_FLAGS}")
|
||||||
separate_arguments(TORCH_NVCC_FLAGS)
|
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||||
|
|
||||||
# Top-level build config
|
# Top-level build config
|
||||||
|
|
@ -1599,7 +1601,7 @@ if(NOT INTERN_BUILD_MOBILE)
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
# we want to respect the standard, and we are bored of those **** .
|
# we want to respect the standard, and we are bored of those **** .
|
||||||
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
|
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522")
|
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
|
|
@ -1610,22 +1612,19 @@ if(NOT INTERN_BUILD_MOBILE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets)
|
string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda")
|
||||||
list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda)
|
|
||||||
|
|
||||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS ${TORCH_NVCC_FLAGS})
|
|
||||||
if(CMAKE_POSITION_INDEPENDENT_CODE AND NOT MSVC)
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
|
if(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
|
||||||
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
|
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1" "-D__CUDA_NO_HALF_OPERATORS__" "-D__CUDA_NO_HALF_CONVERSIONS__"
|
string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
|
||||||
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__" "-D__CUDA_NO_HALF2_OPERATORS__")
|
" -D__CUDA_NO_HALF_OPERATORS__"
|
||||||
|
" -D__CUDA_NO_HALF_CONVERSIONS__"
|
||||||
|
" -D__CUDA_NO_HALF2_OPERATORS__"
|
||||||
|
" -D__CUDA_NO_BFLOAT16_CONVERSIONS__")
|
||||||
add_compile_options(-DCUDA_HAS_FP16=1)
|
add_compile_options(-DCUDA_HAS_FP16=1)
|
||||||
else()
|
else()
|
||||||
message(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor")
|
message(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor")
|
||||||
|
|
|
||||||
|
|
@ -20,5 +20,6 @@ else(REVERT)
|
||||||
file(READ ${FILENAME} content)
|
file(READ ${FILENAME} content)
|
||||||
file(WRITE ${BACKUP} "${content}")
|
file(WRITE ${BACKUP} "${content}")
|
||||||
string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}")
|
string(REGEX REPLACE "[-/]Z[iI]" "/Z7" content "${content}")
|
||||||
|
string(REGEX REPLACE "Threads::Threads" "caffe2::Threads" content "${content}")
|
||||||
file(WRITE ${FILENAME} "${content}")
|
file(WRITE ${FILENAME} "${content}")
|
||||||
endif(REVERT)
|
endif(REVERT)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
# CUB_INCLUDE_DIRS - the CUB include directory
|
# CUB_INCLUDE_DIRS - the CUB include directory
|
||||||
|
|
||||||
find_path(CUB_INCLUDE_DIR
|
find_path(CUB_INCLUDE_DIR
|
||||||
|
HINTS "${CUDA_TOOLKIT_INCLUDE}"
|
||||||
NAMES cub/cub.cuh
|
NAMES cub/cub.cuh
|
||||||
DOC "The directory where CUB includes reside"
|
DOC "The directory where CUB includes reside"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -103,9 +103,10 @@ function(caffe2_print_configuration_summary)
|
||||||
message(STATUS " nvrtc : ${__tmp}")
|
message(STATUS " nvrtc : ${__tmp}")
|
||||||
message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}")
|
message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}")
|
||||||
message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}")
|
message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}")
|
||||||
message(STATUS " NVCC flags : ${CUDA_NVCC_FLAGS}")
|
message(STATUS " CUDA compiler : ${CMAKE_CUDA_COMPILER}")
|
||||||
message(STATUS " CUDA host compiler : ${CUDA_HOST_COMPILER}")
|
message(STATUS " CUDA flags : ${CMAKE_CUDA_FLAGS}")
|
||||||
message(STATUS " NVCC --device-c : ${CUDA_SEPARABLE_COMPILATION}")
|
message(STATUS " CUDA host compiler : ${CMAKE_CUDA_HOST_COMPILER}")
|
||||||
|
message(STATUS " CUDA --device-c : ${CUDA_SEPARABLE_COMPILATION}")
|
||||||
message(STATUS " USE_TENSORRT : ${USE_TENSORRT}")
|
message(STATUS " USE_TENSORRT : ${USE_TENSORRT}")
|
||||||
if(${USE_TENSORRT})
|
if(${USE_TENSORRT})
|
||||||
message(STATUS " TensorRT runtime library: ${TENSORRT_LIBRARY}")
|
message(STATUS " TensorRT runtime library: ${TENSORRT_LIBRARY}")
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,13 @@ if(NOT CUDA_FOUND)
|
||||||
set(CAFFE2_USE_CUDA OFF)
|
set(CAFFE2_USE_CUDA OFF)
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Enable CUDA language support
|
||||||
|
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||||
|
enable_language(CUDA)
|
||||||
|
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
|
||||||
|
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
|
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
|
||||||
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
|
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
|
||||||
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
|
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
|
||||||
|
|
@ -435,6 +442,8 @@ endif()
|
||||||
|
|
||||||
# setting nvcc arch flags
|
# setting nvcc arch flags
|
||||||
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
|
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
|
||||||
|
# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES OFF)
|
||||||
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
|
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
|
||||||
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
|
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
|
||||||
|
|
||||||
|
|
@ -453,14 +462,10 @@ endforeach()
|
||||||
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
|
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
|
||||||
list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS})
|
list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS})
|
||||||
|
|
||||||
# Set C++14 support
|
|
||||||
set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror")
|
set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror")
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call")
|
list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call")
|
||||||
list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward")
|
list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward")
|
||||||
else()
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC")
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# OpenMP flags for NVCC with Clang-cl
|
# OpenMP flags for NVCC with Clang-cl
|
||||||
|
|
@ -477,9 +482,15 @@ endif()
|
||||||
# Debug and Release symbol support
|
# Debug and Release symbol support
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
|
if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MT$<$<CONFIG:Debug>:d>")
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT")
|
||||||
else()
|
else()
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-MD$<$<CONFIG:Debug>:d>")
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD")
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD")
|
||||||
endif()
|
endif()
|
||||||
if(CUDA_NVCC_FLAGS MATCHES "Zi")
|
if(CUDA_NVCC_FLAGS MATCHES "Zi")
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
|
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
|
||||||
|
|
@ -493,3 +504,11 @@ list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
|
||||||
|
|
||||||
# Set expt-extended-lambda to support lambda on device
|
# Set expt-extended-lambda to support lambda on device
|
||||||
list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda")
|
list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda")
|
||||||
|
|
||||||
|
foreach(FLAG ${CUDA_NVCC_FLAGS})
|
||||||
|
string(FIND "${FLAG}" " " flag_space_position)
|
||||||
|
if(NOT flag_space_position EQUAL -1)
|
||||||
|
message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'")
|
||||||
|
endif()
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}")
|
||||||
|
endforeach()
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,29 @@
|
||||||
|
if(TARGET caffe2::Threads)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
# For newer CMake, Threads::Threads is already defined. Otherwise, we will
|
|
||||||
# provide a backward compatible wrapper for Threads::Threads.
|
# Threads::Threads doesn't work if the target has CUDA code
|
||||||
if(THREADS_FOUND AND NOT TARGET Threads::Threads)
|
if(THREADS_FOUND)
|
||||||
add_library(Threads::Threads INTERFACE IMPORTED)
|
add_library(caffe2::Threads INTERFACE IMPORTED)
|
||||||
|
|
||||||
if(THREADS_HAVE_PTHREAD_ARG)
|
if(THREADS_HAVE_PTHREAD_ARG)
|
||||||
set_property(TARGET Threads::Threads
|
set(compile_options
|
||||||
PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
|
$<$<COMPILE_LANGUAGE:C>:-pthread>
|
||||||
|
$<$<COMPILE_LANGUAGE:CXX>:-pthread>)
|
||||||
|
if(USE_CUDA)
|
||||||
|
list(APPEND compile_options
|
||||||
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler -pthread>)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_property(TARGET caffe2::Threads
|
||||||
|
PROPERTY INTERFACE_COMPILE_OPTIONS
|
||||||
|
${compile_options})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_THREAD_LIBS_INIT)
|
if(CMAKE_THREAD_LIBS_INIT)
|
||||||
set_property(TARGET Threads::Threads
|
set_property(TARGET caffe2::Threads
|
||||||
PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
|
PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -348,7 +348,7 @@ macro(torch_cuda_based_add_library cuda_target)
|
||||||
if(USE_ROCM)
|
if(USE_ROCM)
|
||||||
hip_add_library(${cuda_target} ${ARGN})
|
hip_add_library(${cuda_target} ${ARGN})
|
||||||
elseif(USE_CUDA)
|
elseif(USE_CUDA)
|
||||||
cuda_add_library(${cuda_target} ${ARGN})
|
add_library(${cuda_target} ${ARGN})
|
||||||
else()
|
else()
|
||||||
endif()
|
endif()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
@ -388,10 +388,11 @@ endmacro()
|
||||||
# torch_compile_options(lib_name)
|
# torch_compile_options(lib_name)
|
||||||
function(torch_compile_options libname)
|
function(torch_compile_options libname)
|
||||||
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14)
|
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 14)
|
||||||
|
set(private_compile_options "")
|
||||||
|
|
||||||
# ---[ Check if warnings should be errors.
|
# ---[ Check if warnings should be errors.
|
||||||
if(WERROR)
|
if(WERROR)
|
||||||
target_compile_options(${libname} PRIVATE -Werror)
|
list(APPEND private_compile_options -Werror)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
||||||
|
|
@ -405,38 +406,50 @@ function(torch_compile_options libname)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_compile_options(${libname} PUBLIC
|
target_compile_options(${libname} PUBLIC
|
||||||
${MSVC_RUNTIME_LIBRARY_OPTION}
|
$<$<COMPILE_LANGUAGE:CXX>:
|
||||||
$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:${MSVC_DEBINFO_OPTION}>
|
${MSVC_RUNTIME_LIBRARY_OPTION}
|
||||||
/EHsc
|
$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:${MSVC_DEBINFO_OPTION}>
|
||||||
/DNOMINMAX
|
/EHsc
|
||||||
/wd4267
|
/DNOMINMAX
|
||||||
/wd4251
|
/wd4267
|
||||||
/wd4522
|
/wd4251
|
||||||
/wd4522
|
/wd4522
|
||||||
/wd4838
|
/wd4522
|
||||||
/wd4305
|
/wd4838
|
||||||
/wd4244
|
/wd4305
|
||||||
/wd4190
|
/wd4244
|
||||||
/wd4101
|
/wd4190
|
||||||
/wd4996
|
/wd4101
|
||||||
/wd4275
|
/wd4996
|
||||||
/bigobj
|
/wd4275
|
||||||
|
/bigobj>
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
target_compile_options(${libname} PRIVATE
|
list(APPEND private_compile_options
|
||||||
-Wall
|
-Wall
|
||||||
-Wextra
|
-Wextra
|
||||||
-Wno-unused-parameter
|
-Wno-unused-parameter
|
||||||
|
-Wno-unused-variable
|
||||||
|
-Wno-unused-function
|
||||||
|
-Wno-unused-result
|
||||||
|
-Wno-unused-local-typedefs
|
||||||
-Wno-missing-field-initializers
|
-Wno-missing-field-initializers
|
||||||
-Wno-write-strings
|
-Wno-write-strings
|
||||||
-Wno-unknown-pragmas
|
-Wno-unknown-pragmas
|
||||||
|
-Wno-type-limits
|
||||||
|
-Wno-array-bounds
|
||||||
|
-Wno-unknown-pragmas
|
||||||
|
-Wno-sign-compare
|
||||||
|
-Wno-strict-overflow
|
||||||
|
-Wno-strict-aliasing
|
||||||
|
-Wno-error=deprecated-declarations
|
||||||
# Clang has an unfixed bug leading to spurious missing braces
|
# Clang has an unfixed bug leading to spurious missing braces
|
||||||
# warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629
|
# warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629
|
||||||
-Wno-missing-braces
|
-Wno-missing-braces
|
||||||
)
|
)
|
||||||
|
|
||||||
if(NOT APPLE)
|
if(NOT APPLE)
|
||||||
target_compile_options(${libname} PRIVATE
|
list(APPEND private_compile_options
|
||||||
# Considered to be flaky. See the discussion at
|
# Considered to be flaky. See the discussion at
|
||||||
# https://github.com/pytorch/pytorch/pull/9608
|
# https://github.com/pytorch/pytorch/pull/9608
|
||||||
-Wno-maybe-uninitialized)
|
-Wno-maybe-uninitialized)
|
||||||
|
|
@ -446,10 +459,23 @@ function(torch_compile_options libname)
|
||||||
|
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
elseif(WERROR)
|
elseif(WERROR)
|
||||||
target_compile_options(${libname} PRIVATE -Wno-strict-overflow)
|
list(APPEND private_compile_options -Wno-strict-overflow)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
target_compile_options(${libname} PRIVATE
|
||||||
|
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
|
||||||
|
if(USE_CUDA)
|
||||||
|
string(FIND "${private_compile_options}" " " space_position)
|
||||||
|
if(NOT space_position EQUAL -1)
|
||||||
|
message(FATAL_ERROR "Found spaces in private_compile_options='${private_compile_options}'")
|
||||||
|
endif()
|
||||||
|
# Convert CMake list to comma-separated list
|
||||||
|
string(REPLACE ";" "," private_compile_options "${private_compile_options}")
|
||||||
|
target_compile_options(${libname} PRIVATE
|
||||||
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${private_compile_options}>)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT WIN32 AND NOT USE_ASAN)
|
if(NOT WIN32 AND NOT USE_ASAN)
|
||||||
# Enable hidden visibility by default to make it easier to debug issues with
|
# Enable hidden visibility by default to make it easier to debug issues with
|
||||||
# TORCH_API annotations. Hidden visibility with selective default visibility
|
# TORCH_API annotations. Hidden visibility with selective default visibility
|
||||||
|
|
@ -458,11 +484,13 @@ function(torch_compile_options libname)
|
||||||
# Unfortunately, hidden visibility messes up some ubsan warnings because
|
# Unfortunately, hidden visibility messes up some ubsan warnings because
|
||||||
# templated classes crossing library boundary get duplicated (but identical)
|
# templated classes crossing library boundary get duplicated (but identical)
|
||||||
# definitions. It's easier to just disable it.
|
# definitions. It's easier to just disable it.
|
||||||
target_compile_options(${libname} PRIVATE "-fvisibility=hidden")
|
target_compile_options(${libname} PRIVATE
|
||||||
|
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=hidden>)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
|
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
|
||||||
target_compile_options(${libname} PRIVATE "$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>:-O2>")
|
target_compile_options(${libname} PRIVATE
|
||||||
|
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>)
|
||||||
|
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
|
@ -484,3 +512,40 @@ function(torch_set_target_props libname)
|
||||||
set_target_properties(${libname} PROPERTIES STATIC_LIBRARY_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}d")
|
set_target_properties(${libname} PROPERTIES STATIC_LIBRARY_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}d")
|
||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# Set old-style FindCuda.cmake compile flags from modern CMake cuda flags.
|
||||||
|
# Usage:
|
||||||
|
# torch_update_find_cuda_flags()
|
||||||
|
function(torch_update_find_cuda_flags)
|
||||||
|
# Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall"
|
||||||
|
if(USE_CUDA)
|
||||||
|
separate_arguments(FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}")
|
||||||
|
string(REPLACE " " "," FLAGS "${FLAGS}")
|
||||||
|
set(CUDA_NVCC_FLAGS ${FLAGS} PARENT_SCOPE)
|
||||||
|
|
||||||
|
separate_arguments(FLAGS_DEBUG UNIX_COMMAND "${CMAKE_CUDA_FLAGS_DEBUG}")
|
||||||
|
string(REPLACE " " "," FLAGS_DEBUG "${FLAGS_DEBUG}")
|
||||||
|
set(CUDA_NVCC_FLAGS_DEBUG "${FLAGS_DEBUG}" PARENT_SCOPE)
|
||||||
|
|
||||||
|
separate_arguments(FLAGS_RELEASE UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELEASE}")
|
||||||
|
string(REPLACE " " "," FLAGS_RELEASE "${FLAGS_RELEASE}")
|
||||||
|
set(CUDA_NVCC_FLAGS_RELEASE "${FLAGS_RELEASE}" PARENT_SCOPE)
|
||||||
|
|
||||||
|
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND "${CMAKE_CUDA_FLAGS_MINSIZEREL}")
|
||||||
|
string(REPLACE " " "," FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}")
|
||||||
|
set(CUDA_NVCC_FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}" PARENT_SCOPE)
|
||||||
|
|
||||||
|
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
|
||||||
|
string(REPLACE " " "," FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}")
|
||||||
|
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}" PARENT_SCOPE)
|
||||||
|
|
||||||
|
message(STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
|
||||||
|
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
|
||||||
|
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
|
||||||
|
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
|
||||||
|
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
|
||||||
|
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ if(BUILD_CAFFE2_OPS)
|
||||||
# Note(ilijar): Since Detectron ops currently have no
|
# Note(ilijar): Since Detectron ops currently have no
|
||||||
# CPU implementation, we only build GPU ops for now.
|
# CPU implementation, we only build GPU ops for now.
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
CUDA_ADD_LIBRARY(
|
add_library(
|
||||||
caffe2_detectron_ops_gpu SHARED
|
caffe2_detectron_ops_gpu SHARED
|
||||||
${Detectron_CPU_SRCS}
|
${Detectron_CPU_SRCS}
|
||||||
${Detectron_GPU_SRCS})
|
${Detectron_GPU_SRCS})
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
cuda_add_library(c10d_cuda_test CUDATest.cu)
|
add_library(c10d_cuda_test CUDATest.cu)
|
||||||
target_include_directories(c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
target_include_directories(c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||||
target_link_libraries(c10d_cuda_test torch_cuda)
|
target_link_libraries(c10d_cuda_test torch_cuda)
|
||||||
add_dependencies(c10d_cuda_test torch_cuda)
|
add_dependencies(c10d_cuda_test torch_cuda)
|
||||||
|
|
|
||||||
|
|
@ -243,7 +243,7 @@ class CMake:
|
||||||
var: var for var in
|
var: var for var in
|
||||||
('BLAS',
|
('BLAS',
|
||||||
'BUILDING_WITH_TORCH_LIBS',
|
'BUILDING_WITH_TORCH_LIBS',
|
||||||
'CUDA_HOST_COMPILER',
|
'CUDA_HOST_COMILER',
|
||||||
'CUDA_NVCC_EXECUTABLE',
|
'CUDA_NVCC_EXECUTABLE',
|
||||||
'CUDA_SEPARABLE_COMPILATION',
|
'CUDA_SEPARABLE_COMPILATION',
|
||||||
'CUDNN_LIBRARY',
|
'CUDNN_LIBRARY',
|
||||||
|
|
@ -267,6 +267,15 @@ class CMake:
|
||||||
'OPENSSL_ROOT_DIR')
|
'OPENSSL_ROOT_DIR')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Aliases which are lower priority than their canonical option
|
||||||
|
low_priority_aliases = {
|
||||||
|
'CUDA_HOST_COMPILER': 'CMAKE_CUDA_HOST_COMPILER',
|
||||||
|
'CUDAHOSTCXX': 'CUDA_HOST_COMPILER',
|
||||||
|
'CMAKE_CUDA_HOST_COMPILER': 'CUDA_HOST_COMPILER',
|
||||||
|
'CMAKE_CUDA_COMPILER': 'CUDA_NVCC_EXECUTABLE',
|
||||||
|
'CUDACXX': 'CUDA_NVCC_EXECUTABLE'
|
||||||
|
}
|
||||||
|
|
||||||
for var, val in my_env.items():
|
for var, val in my_env.items():
|
||||||
# We currently pass over all environment variables that start with "BUILD_", "USE_", and "CMAKE_". This is
|
# We currently pass over all environment variables that start with "BUILD_", "USE_", and "CMAKE_". This is
|
||||||
# because we currently have no reliable way to get the list of all build options we have specified in
|
# because we currently have no reliable way to get the list of all build options we have specified in
|
||||||
|
|
@ -279,6 +288,11 @@ class CMake:
|
||||||
elif var.startswith(('BUILD_', 'USE_', 'CMAKE_')) or var.endswith(('EXITCODE', 'EXITCODE__TRYRUN_OUTPUT')):
|
elif var.startswith(('BUILD_', 'USE_', 'CMAKE_')) or var.endswith(('EXITCODE', 'EXITCODE__TRYRUN_OUTPUT')):
|
||||||
build_options[var] = val
|
build_options[var] = val
|
||||||
|
|
||||||
|
if var in low_priority_aliases:
|
||||||
|
key = low_priority_aliases[var]
|
||||||
|
if key not in build_options:
|
||||||
|
build_options[key] = val
|
||||||
|
|
||||||
# The default value cannot be easily obtained in CMakeLists.txt. We set it here.
|
# The default value cannot be easily obtained in CMakeLists.txt. We set it here.
|
||||||
py_lib_path = sysconfig.get_path('purelib')
|
py_lib_path = sysconfig.get_path('purelib')
|
||||||
cmake_prefix_path = build_options.get('CMAKE_PREFIX_PATH', None)
|
cmake_prefix_path = build_options.get('CMAKE_PREFIX_PATH', None)
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ else()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
|
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
|
||||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||||
|
|
||||||
|
|
@ -119,16 +120,7 @@ if(USE_CUDA)
|
||||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MSVC)
|
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib)
|
|
||||||
list(APPEND TORCH_PYTHON_INCLUDE_DIRECTORIES "${NVTOOLEXT_HOME}/include")
|
|
||||||
elseif(APPLE)
|
|
||||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib)
|
|
||||||
else()
|
|
||||||
find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
|
|
||||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${LIBNVTOOLSEXT})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_ROCM)
|
if(USE_ROCM)
|
||||||
|
|
|
||||||
|
|
@ -67,13 +67,13 @@ if(UNIX AND NOT APPLE)
|
||||||
# site above though in case there was a reason we were testing
|
# site above though in case there was a reason we were testing
|
||||||
# against clock_gettime. In principle, the choice of symbol you
|
# against clock_gettime. In principle, the choice of symbol you
|
||||||
# test for shouldn't matter.
|
# test for shouldn't matter.
|
||||||
set(CMAKE_REQUIRED_LIBRARIES Threads::Threads)
|
set(CMAKE_REQUIRED_LIBRARIES caffe2::Threads)
|
||||||
check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD)
|
check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD)
|
||||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||||
if(NEED_RT_AND_PTHREAD)
|
if(NEED_RT_AND_PTHREAD)
|
||||||
message(STATUS "Needs it, linking against pthread and rt")
|
message(STATUS "Needs it, linking against pthread and rt")
|
||||||
target_link_libraries(shm rt Threads::Threads)
|
target_link_libraries(shm rt caffe2::Threads)
|
||||||
target_link_libraries(torch_shm_manager rt Threads::Threads)
|
target_link_libraries(torch_shm_manager rt caffe2::Threads)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user