mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Split cuda: list cpp files that go in _cu library explicitly (#69082)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/69082
Test Plan: Imported from OSS
Reviewed By: dagitses, mruberry
Differential Revision: D32723669
Pulled By: malfet
fbshipit-source-id: e9a815b882089dcf1ba76194c728cd7c45377deb
(cherry picked from commit 456d1ebeb8)
This commit is contained in:
parent
e784808bc6
commit
bb456d2bf7
|
|
@ -21,7 +21,8 @@ set(ATen_CPU_SRCS)
|
|||
set(ATen_CPU_TEST_SRCS)
|
||||
set(ATen_CPU_INCLUDE)
|
||||
set(ATen_THIRD_PARTY_INCLUDE)
|
||||
set(ATen_CUDA_SRCS)
|
||||
set(ATen_CUDA_CPP_SRCS)
|
||||
set(ATen_CUDA_CU_SRCS)
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY)
|
||||
set(ATen_CUDA_TEST_SRCS)
|
||||
set(ATen_CUDA_INCLUDE)
|
||||
|
|
@ -95,8 +96,10 @@ add_subdirectory(src/ATen)
|
|||
|
||||
# Pass source, includes, and libs to parent
|
||||
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS_W_SORT_BY_KEY ${ATen_HIP_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
|
||||
|
|
|
|||
|
|
@ -113,9 +113,7 @@ file(GLOB native_ao_sparse_h
|
|||
file(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h")
|
||||
file(GLOB native_cpu_h "native/cpu/*.h")
|
||||
|
||||
file(GLOB native_cuda_cu_sp "native/cuda/Unique.cu" "native/cuda/TensorFactories.cu")
|
||||
file(GLOB native_cuda_cu "native/cuda/*.cu")
|
||||
exclude(native_cuda_cu "${native_cuda_cu}" ${native_cuda_cu_sp})
|
||||
file(GLOB native_cuda_cpp "native/cuda/*.cpp")
|
||||
file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh")
|
||||
file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh")
|
||||
|
|
@ -189,13 +187,35 @@ endif()
|
|||
|
||||
if(USE_CUDA)
|
||||
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda)
|
||||
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu} ${native_sparse_cuda_cu} ${native_quantized_cuda_cu})
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${native_cuda_cu_sp})
|
||||
set(all_cuda_cpp ${native_sparse_cuda_cpp} ${native_quantized_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_sources} ${ATen_CUDA_SRCS})
|
||||
set(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${all_cuda_cpp})
|
||||
|
||||
list(APPEND ATen_CUDA_CU_SRCS
|
||||
${cuda_cu}
|
||||
${native_cuda_cu}
|
||||
${native_sparse_cuda_cu}
|
||||
${native_quantized_cuda_cu}
|
||||
${cuda_generated_sources}
|
||||
)
|
||||
list(APPEND ATen_CUDA_CPP_SRCS
|
||||
${cuda_cpp}
|
||||
${native_cuda_cpp}
|
||||
${native_cudnn_cpp}
|
||||
${native_miopen_cpp}
|
||||
${native_quantized_cuda_cpp}
|
||||
${native_sparse_cuda_cpp}
|
||||
)
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
set(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
|
||||
list(APPEND ATen_CUDA_CPP_SRCS ${cudnn_cpp})
|
||||
endif()
|
||||
|
||||
append_filelist("aten_cuda_cu_source_list" ATen_CUDA_CU_SRCS)
|
||||
append_filelist("aten_cuda_with_sort_by_key_source_list" ATen_CUDA_SRCS_W_SORT_BY_KEY)
|
||||
append_filelist("aten_cuda_cu_with_sort_by_key_source_list" ATen_CUDA_CU_SRCS_W_SORT_BY_KEY)
|
||||
|
||||
exclude(ATen_CUDA_CPP_SRCS "${ATen_CUDA_CPP_SRCS}"
|
||||
${ATen_CUDA_CU_SRCS}
|
||||
${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
|
||||
exclude(ATen_CUDA_CU_SRCS "${ATen_CUDA_CU_SRCS}"
|
||||
${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
|
|
@ -428,7 +448,6 @@ set(ATen_CPU_SRCS ${all_cpu_cpp})
|
|||
list(APPEND ATen_CPU_DEPENDENCY_LIBS ATEN_CPU_FILES_GEN_LIB)
|
||||
|
||||
if(USE_CUDA)
|
||||
set(ATen_CUDA_SRCS ${all_cuda_cpp})
|
||||
set(ATen_NVRTC_STUB_SRCS ${cuda_nvrtc_stub_cpp})
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
|
||||
endif()
|
||||
|
|
@ -512,8 +531,10 @@ list(APPEND ATen_MOBILE_BENCHMARK_SRCS
|
|||
# Pass source, includes, and libs to parent
|
||||
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
|
||||
|
|
|
|||
|
|
@ -84,8 +84,10 @@ if(INTERN_BUILD_ATEN_OPS)
|
|||
|
||||
# Add source, includes, and libs to lists
|
||||
list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS})
|
||||
list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_SRCS})
|
||||
list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_CPP_SRCS})
|
||||
list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY})
|
||||
list(APPEND Caffe2_GPU_CU_SRCS ${ATen_CUDA_CU_SRCS})
|
||||
list(APPEND Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
|
||||
list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS})
|
||||
list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS_W_SORT_BY_KEY})
|
||||
list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS})
|
||||
|
|
@ -171,68 +173,37 @@ endif()
|
|||
if(CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_CPU_SRCS CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_GPU_SRCS CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_GPU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_GPU_CU_SRCS CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
|
||||
caffe2_do_allowlist(Caffe2_HIP_SRCS CAFFE2_ALLOWLISTED_FILES)
|
||||
endif()
|
||||
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
# Splitting the source files that'll be in torch_cuda between torch_cuda_cu and torch_cuda_cpp
|
||||
foreach(tmp ${Caffe2_GPU_SRCS})
|
||||
if("${tmp}" MATCHES "(.*aten.*\\.cu|.*(b|B)las.*|.*((s|S)olver|Register.*CUDA|Legacy|THC|TensorShapeCUDA|BatchLinearAlgebra|ReduceOps|Equal|Activation|ScanKernels|Sort|TensorTopK|TensorModeKernel|IndexKernel|jit_utils).*\\.cpp)" AND NOT "${tmp}" MATCHES ".*(THC((CachingHost)?Allocator|General)).*")
|
||||
# Currently, torch_cuda_cu will have all the .cu files in aten, as well as some others that depend on those files
|
||||
list(APPEND Caffe2_GPU_SRCS_CU ${tmp})
|
||||
else()
|
||||
list(APPEND Caffe2_GPU_SRCS_CPP ${tmp})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
if("${tmp}" MATCHES ".*aten.*\\.cu" AND NOT "${tmp}" MATCHES ".*TensorFactories.*")
|
||||
list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU ${tmp})
|
||||
else()
|
||||
list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP ${tmp})
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(PRINT_CMAKE_DEBUG_INFO)
|
||||
message(STATUS "CPU sources: ")
|
||||
foreach(tmp ${Caffe2_CPU_SRCS})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
|
||||
message(STATUS "GPU sources: ")
|
||||
message(STATUS "GPU sources: (for torch_cuda_cpp)")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
message(STATUS "GPU sources: (for torch_cuda_cpp)")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_CPP})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
|
||||
message(STATUS "GPU sources: (for torch_cuda_cu)")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_CU})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message(STATUS "GPU sources (w/ sort by key): ")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
foreach(tmp ${Caffe2_GPU_CU_SRCS})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
message(STATUS "torch_cuda_cu GPU sources (w/ sort by key): ")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
||||
foreach(tmp ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
|
||||
message(STATUS "torch_cuda_cpp GPU sources (w/ sort by key): ")
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
||||
foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message(STATUS "CPU include: ")
|
||||
foreach(tmp ${Caffe2_CPU_INCLUDE})
|
||||
|
|
@ -626,11 +597,7 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
endif()
|
||||
|
||||
if(USE_CUDA)
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
list(APPEND Caffe2_GPU_SRCS_CU ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
|
||||
else()
|
||||
list(APPEND Caffe2_GPU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
|
||||
endif()
|
||||
list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
|
||||
add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
|
||||
if(MSVC)
|
||||
# Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine
|
||||
|
|
@ -641,22 +608,14 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} ${DELAY_LOAD_FLAGS})
|
||||
target_include_directories(caffe2_nvrtc PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
if(USE_NCCL AND BUILD_SPLIT_CUDA)
|
||||
list(APPEND Caffe2_GPU_SRCS_CPP
|
||||
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
|
||||
elseif(USE_NCCL)
|
||||
if(USE_NCCL)
|
||||
list(APPEND Caffe2_GPU_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
|
||||
endif()
|
||||
if(USE_DISTRIBUTED)
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
set(_target "Caffe2_GPU_SRCS_CPP")
|
||||
else()
|
||||
set(_target "Caffe2_GPU_SRCS")
|
||||
endif()
|
||||
append_filelist("libtorch_cuda_distributed_base_sources" ${_target})
|
||||
append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS)
|
||||
if(NOT WIN32)
|
||||
append_filelist("libtorch_cuda_distributed_extra_sources" ${_target})
|
||||
append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS)
|
||||
endif()
|
||||
endif()
|
||||
set_source_files_properties(
|
||||
|
|
@ -904,17 +863,21 @@ elseif(USE_CUDA)
|
|||
if(CUDA_SEPARABLE_COMPILATION)
|
||||
# Separate compilation fails when kernels using `thrust::sort_by_key`
|
||||
# are linked with the rest of CUDA code. Workaround by linking them separately.
|
||||
add_library(torch_cuda ${Caffe2_GPU_SRCS})
|
||||
add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS})
|
||||
set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
add_library(torch_cuda_w_sort_by_key OBJECT ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
add_library(torch_cuda_w_sort_by_key OBJECT
|
||||
${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
|
||||
${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
|
||||
set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
|
||||
target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
|
||||
elseif(BUILD_SPLIT_CUDA)
|
||||
add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS_CPP} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CPP})
|
||||
add_library(torch_cuda_cu ${Caffe2_GPU_SRCS_CU} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY_CU})
|
||||
add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
add_library(torch_cuda_cu ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
|
||||
else()
|
||||
add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
|
||||
add_library(torch_cuda
|
||||
${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
|
||||
${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
|
||||
endif()
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
||||
if(BUILD_SPLIT_CUDA)
|
||||
|
|
|
|||
|
|
@ -1314,3 +1314,41 @@ aten_native_source_non_codegen_list = [
|
|||
# TODO: move the exceptions to proper locations
|
||||
# 2. The whole aten native source list includes the list with and without aten codegen process.
|
||||
aten_native_source_list = sorted(aten_native_source_non_codegen_list + aten_native_source_codegen_list)
|
||||
|
||||
# These are cpp files which need to go in the torch_cuda_cu library
|
||||
# .cu files can be found via glob
|
||||
aten_cuda_cu_source_list = [
|
||||
"aten/src/ATen/cuda/CUDABlas.cpp",
|
||||
"aten/src/ATen/cuda/CUDASolver.cpp",
|
||||
"aten/src/ATen/cuda/CUDASparseBlas.cpp",
|
||||
"aten/src/ATen/cuda/CublasHandlePool.cpp",
|
||||
"aten/src/ATen/cuda/CusolverDnHandlePool.cpp",
|
||||
"aten/src/ATen/native/cuda/Activation.cpp",
|
||||
"aten/src/ATen/native/cuda/BatchLinearAlgebra.cpp",
|
||||
"aten/src/ATen/native/cuda/BatchLinearAlgebraLib.cpp",
|
||||
"aten/src/ATen/native/cuda/Blas.cpp",
|
||||
"aten/src/ATen/native/cuda/Equal.cpp",
|
||||
"aten/src/ATen/native/cuda/IndexKernel.cpp",
|
||||
"aten/src/ATen/native/cuda/ReduceOps.cpp",
|
||||
"aten/src/ATen/native/cuda/ScanKernels.cpp",
|
||||
"aten/src/ATen/native/cuda/Sort.cpp",
|
||||
"aten/src/ATen/native/cuda/Sorting.cpp",
|
||||
"aten/src/ATen/native/cuda/TensorModeKernel.cpp",
|
||||
"aten/src/ATen/native/cuda/TensorShapeCUDA.cpp",
|
||||
"aten/src/ATen/native/cuda/TensorTopK.cpp",
|
||||
"aten/src/ATen/native/cuda/jit_utils.cpp",
|
||||
"aten/src/ATen/native/sparse/cuda/SparseBlas.cpp",
|
||||
"aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp",
|
||||
"aten/src/ATen/native/sparse/cuda/SparseBlasLegacy.cpp",
|
||||
"aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp",
|
||||
]
|
||||
|
||||
# Files using thrust::sort_by_key need to be linked last
|
||||
aten_cuda_with_sort_by_key_source_list = [
|
||||
# empty_cuda is needed by torch_cuda_cpp
|
||||
"aten/src/ATen/native/cuda/TensorFactories.cu",
|
||||
]
|
||||
|
||||
aten_cuda_cu_with_sort_by_key_source_list = [
|
||||
"aten/src/ATen/native/cuda/Unique.cu",
|
||||
]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user