Revert D33992795: Add BUILD_LAZY_CUDA_LINALG option

Test Plan: revert-hammer

Differential Revision:
D33992795 (82130758f0)

Original commit changeset: d1fa351a3206

Original Phabricator Diff: D33992795 (82130758f0)

fbshipit-source-id: f0a66d7431aea2c358718eef16fab05712cd6cae
(cherry picked from commit df4900115f712e477ed5cc97510e6515a1ca17a9)
This commit is contained in:
Jane Xu 2022-02-25 10:18:50 -08:00 committed by PyTorch MergeBot
parent dc1bd9711e
commit 31271284bc
11 changed files with 33 additions and 290 deletions

View File

@ -189,8 +189,6 @@ option(USE_CUDA "Use CUDA" ON)
cmake_dependent_option(
BUILD_SPLIT_CUDA "Split torch_cuda library into torch_cuda_cu and torch_cuda_cpp" OFF
"USE_CUDA AND NOT CUDA_SEPARABLE_COMPILATION" OFF)
cmake_dependent_option(
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
option(USE_FAST_NVCC "Use parallel NVCC build" OFF)
option(USE_ROCM "Use ROCm" ON)
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)

View File

@ -23,7 +23,6 @@ set(ATen_CPU_INCLUDE)
set(ATen_THIRD_PARTY_INCLUDE)
set(ATen_CUDA_CPP_SRCS)
set(ATen_CUDA_CU_SRCS)
set(ATen_CUDA_LINALG_SRCS)
set(ATen_CUDA_SRCS_W_SORT_BY_KEY)
set(ATen_CUDA_TEST_SRCS)
set(ATen_CUDA_INCLUDE)
@ -100,7 +99,6 @@ set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE)
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)

View File

@ -194,6 +194,7 @@ if(USE_CUDA)
list(APPEND ATen_CUDA_CU_SRCS
${cuda_cu}
${native_cuda_cu}
${native_cuda_linalg_cpp}
${native_sparse_cuda_cu}
${native_quantized_cuda_cu}
${cuda_generated_sources}
@ -207,10 +208,6 @@ if(USE_CUDA)
${native_quantized_cudnn_cpp}
${native_sparse_cuda_cpp}
)
set(ATen_CUDA_LINALG_SRCS ${native_cuda_linalg_cpp})
if(NOT BUILD_LAZY_CUDA_LINALG)
list(APPEND ATen_CUDA_CU_SRCS ${native_cuda_linalg_cpp})
endif()
if(CAFFE2_USE_CUDNN)
list(APPEND ATen_CUDA_CPP_SRCS ${cudnn_cpp})
endif()
@ -395,25 +392,17 @@ if(USE_CUDA AND NOT USE_ROCM)
${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static_nocallback.a
)
if(NOT BUILD_LAZY_CUDA_LINALG)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusolver_static.a
${CUDA_TOOLKIT_ROOT_DIR}/lib64/liblapack_static.a # needed for libcusolver_static
)
endif()
else()
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
${CUDA_LIBRARIES}
${CUDA_cusparse_LIBRARY}
${CUDA_curand_LIBRARY}
)
if(NOT BUILD_LAZY_CUDA_LINALG)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
${CUDA_cusolver_LIBRARY}
)
endif()
endif()
if(CAFFE2_USE_CUDNN)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDNN_LIBRARIES})
@ -426,9 +415,9 @@ endif()
if(USE_MAGMA)
if(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG)
if(USE_CUDA)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS torch::magma)
endif(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG)
endif(USE_CUDA)
if(USE_ROCM)
list(APPEND ATen_HIP_DEPENDENCY_LIBS torch::magma)
endif(USE_ROCM)
@ -547,7 +536,6 @@ set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE)
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)

View File

@ -25,7 +25,7 @@ static void* checkDL(void* x) {
return x;
}
DynamicLibrary::DynamicLibrary(const char* name, const char* alt_name, bool leak_handle_): leak_handle(leak_handle_) {
DynamicLibrary::DynamicLibrary(const char* name, const char* alt_name) {
// NOLINTNEXTLINE(hicpp-signed-bitwise)
handle = dlopen(name, RTLD_LOCAL | RTLD_NOW);
if (!handle) {
@ -46,9 +46,8 @@ void* DynamicLibrary::sym(const char* name) {
}
DynamicLibrary::~DynamicLibrary() {
if (!handle || leak_handle) {
if (!handle)
return;
}
dlclose(handle);
}
@ -56,7 +55,7 @@ DynamicLibrary::~DynamicLibrary() {
// Windows
DynamicLibrary::DynamicLibrary(const char* name, const char* alt_name, bool leak_handle_): leak_handle(leak_handle_) {
DynamicLibrary::DynamicLibrary(const char* name, const char* alt_name) {
// NOLINTNEXTLINE(hicpp-signed-bitwise)
HMODULE theModule;
bool reload = true;
@ -98,7 +97,7 @@ void* DynamicLibrary::sym(const char* name) {
}
DynamicLibrary::~DynamicLibrary() {
if (!handle || leak_handle) {
if (!handle) {
return;
}
FreeLibrary((HMODULE)handle);

View File

@ -8,14 +8,13 @@ namespace at {
struct DynamicLibrary {
AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary);
TORCH_API DynamicLibrary(const char* name, const char* alt_name = nullptr, bool leak_handle = false);
TORCH_API DynamicLibrary(const char* name, const char* alt_name = nullptr);
TORCH_API void* sym(const char* name);
TORCH_API ~DynamicLibrary();
private:
bool leak_handle;
void* handle = nullptr;
};

View File

@ -1,203 +0,0 @@
// LinearAlgebraStubs.cpp
// Mostly a no-op unless BUILD_LAZY_CUDA_LINALG is defined
// In that case load library is dynamically loaded when first linalg call is made
// This helps reduce size of GPU memory context if linear algebra functions are not used
#include <ATen/Context.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAConfig.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Dispatch.h>
#include <ATen/DynamicLibrary.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/cuda/MiscUtils.h>
#include <ATen/native/Resize.h>
#include <ATen/native/LinearAlgebra.h>
#include <ATen/native/BatchLinearAlgebra.h>
#if defined(BUILD_LAZY_CUDA_LINALG)
#include <ATen/native/cuda/linalg/BatchLinearAlgebraLib.h>
#if AT_MAGMA_ENABLED()
#include <ATen/cuda/detail/CUDAHooks.h>
namespace {
struct MagmaInitializer {
MagmaInitializer() {
::at::cuda::detail::set_magma_init_fn([]{ });
};
} initializer;
} // namespace (anonymous)
#endif
#endif
namespace at {
namespace native {
#if defined(BUILD_LAZY_CUDA_LINALG)
namespace {
cuda::detail::LinalgDispatch disp = {nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
at::DynamicLibrary& getTorchLinalgLibrary() {
static at::DynamicLibrary lib("libtorch_cuda_linalg.so", nullptr, true);
return lib;
}
// Lazy dispatches do nothing but load linalg library and call the stub
// Loading the library should override the registration of those with the proper implementation
// getTorchLinalgLibrary() throws an exception if library is not found
// Which makes it unnecessary to have an explicit error checking
void lazy_cholesky_kernel(const Tensor& input, const Tensor& info, bool upper) {
getTorchLinalgLibrary();
cholesky_stub(DeviceType::CUDA, input, info, upper);
}
Tensor& lazy_cholesky_inverse_kernel(Tensor &result, Tensor& infos, bool upper) {
getTorchLinalgLibrary();
return cholesky_inverse_stub(DeviceType::CUDA, result, infos, upper);
}
void lazy_lu_factor(const Tensor& input, const Tensor& pivots, const Tensor& infos, bool compute_pivots) {
getTorchLinalgLibrary();
lu_factor_stub(DeviceType::CUDA, input, pivots, infos, compute_pivots);
}
void lazy_triangular_solve_kernel(const Tensor& A, const Tensor& B, bool left, bool upper, TransposeType transpose, bool unitriangular) {
getTorchLinalgLibrary();
triangular_solve_stub(DeviceType::CUDA, A, B, left, upper, transpose, unitriangular);
}
Tensor& lazy_orgqr_kernel(Tensor& result, const Tensor& tau) {
getTorchLinalgLibrary();
return orgqr_stub(DeviceType::CUDA, result, tau);
}
void lazy_ormqr_kernel(const Tensor& input, const Tensor& tau, const Tensor& other, bool left, bool transpose) {
getTorchLinalgLibrary();
ormqr_stub(DeviceType::CUDA, input, tau, other, left, transpose);
}
void lazy_geqrf_kernel(const Tensor& input, const Tensor& tau) {
getTorchLinalgLibrary();
geqrf_stub(DeviceType::CUDA, input, tau);
}
void lazy_linalg_eigh_kernel(const Tensor& eigenvalues, const Tensor& eigenvectors, const Tensor& infos, bool upper, bool compute_eigenvectors) {
getTorchLinalgLibrary();
linalg_eigh_stub(DeviceType::CUDA, eigenvalues, eigenvectors, infos, upper, compute_eigenvectors);
}
std::tuple<Tensor, Tensor> lazy_eig_kernel(const Tensor& self, bool& eigenvectors) {
getTorchLinalgLibrary();
return eig_stub(DeviceType::CUDA, self, eigenvectors);
}
void lazy_linalg_eig_kernel(Tensor& eigenvalues, Tensor& eigenvectors, Tensor& infos, const Tensor& input, bool compute_eigenvectors) {
getTorchLinalgLibrary();
linalg_eig_stub(DeviceType::CUDA, eigenvalues, eigenvectors, infos, input, compute_eigenvectors);
}
void lazy_svd_kernel(const Tensor& A,
const bool full_matrices,
const bool compute_uv,
const Tensor& U,
const Tensor& S,
const Tensor& Vh,
const Tensor& info) {
getTorchLinalgLibrary();
svd_stub(DeviceType::CUDA, A, full_matrices, compute_uv, U, S, Vh, info);
}
void lazy_lu_solve_trans(const Tensor& b, const Tensor& lu, const Tensor& pivots, TransposeType trans) {
getTorchLinalgLibrary();
lu_solve_trans_stub(DeviceType::CUDA, b, lu, pivots, trans);
}
void lazy_lu_solve(const Tensor& b, const Tensor& lu, const Tensor& pivots) {
getTorchLinalgLibrary();
lu_solve_stub(DeviceType::CUDA, b, lu, pivots);
}
void lazy_lstsq_kernel(const Tensor& a, Tensor& b, Tensor& rank, Tensor& singular_values, Tensor& infos, double rcond, std::string driver_name) {
getTorchLinalgLibrary();
lstsq_stub(DeviceType::CUDA, a, b, rank, singular_values, infos, rcond, driver_name);
}
REGISTER_CUDA_DISPATCH(cholesky_stub, &lazy_cholesky_kernel)
REGISTER_CUDA_DISPATCH(cholesky_inverse_stub, &lazy_cholesky_inverse_kernel);
REGISTER_CUDA_DISPATCH(lu_factor_stub, &lazy_lu_factor);
REGISTER_CUDA_DISPATCH(triangular_solve_stub, &lazy_triangular_solve_kernel);
REGISTER_CUDA_DISPATCH(orgqr_stub, &lazy_orgqr_kernel);
REGISTER_CUDA_DISPATCH(ormqr_stub, &lazy_ormqr_kernel);
REGISTER_CUDA_DISPATCH(geqrf_stub, &lazy_geqrf_kernel);
REGISTER_CUDA_DISPATCH(linalg_eigh_stub, &lazy_linalg_eigh_kernel);
REGISTER_CUDA_DISPATCH(eig_stub, &lazy_eig_kernel);
REGISTER_CUDA_DISPATCH(linalg_eig_stub, &lazy_linalg_eig_kernel);
REGISTER_CUDA_DISPATCH(svd_stub, &lazy_svd_kernel)
REGISTER_CUDA_DISPATCH(lu_solve_trans_stub, &lazy_lu_solve_trans);
REGISTER_CUDA_DISPATCH(lu_solve_stub, &lazy_lu_solve);
REGISTER_CUDA_DISPATCH(lstsq_stub, &lazy_lstsq_kernel);
} // anonymous namespace
// Old style dispatches
// torch_cuda_linalg dynamic library should have a global constructor
// that calls regiserLinaglDispatch so in order ot lazy bind
// old style dispatch all one have to do is to load library and call disp.func_name
namespace cuda {
namespace detail {
void registerLinalgDispatch(const LinalgDispatch& disp_) {
disp = disp_;
}
}} //namespace cuda::detail
Tensor& _linalg_inv_out_helper_cuda(Tensor &result, Tensor& infos_lu, Tensor& infos_getri) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.inv_out_helper != nullptr, "Can't find _linalg_inv_out_helper_cuda");
return disp.inv_out_helper(result, infos_lu, infos_getri);
}
std::tuple<Tensor, Tensor> legacy_lstsq_cuda(const Tensor &B, const Tensor &A) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.legacy_lstsq != nullptr, "Can't find legacy_lstsq_cuda");
return disp.legacy_lstsq(B, A);
}
Tensor _cholesky_solve_helper_cuda(const Tensor& self, const Tensor& A, bool upper) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.cholesky_solve_helper != nullptr, "Can't find _cholesky_solve_helper_cuda");
return disp.cholesky_solve_helper(self, A, upper);
}
std::tuple<Tensor, Tensor> _linalg_qr_helper_cuda(const Tensor& input, c10::string_view mode) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.qr_helper != nullptr, "Can't find _linalg_qr_helper_cuda");
return disp.qr_helper(input, mode);
}
std::tuple<Tensor, Tensor> _symeig_helper_cuda(const Tensor& self, bool eigenvectors, bool upper) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.symeig_helper != nullptr, "Can't find _symeig_helper_cuda");
return disp.symeig_helper(self, eigenvectors, upper);
}
std::tuple<Tensor, Tensor> _solve_helper_cuda(const Tensor& self, const Tensor& A) {
getTorchLinalgLibrary();
TORCH_CHECK(disp.solve_helper != nullptr, "Can't find _solve_helper_cuda");
return disp.solve_helper(self, A);
}
#endif /*defined(BUILD_LAZY_CUDA_LINALG)*/
std::tuple<Tensor&, Tensor&> legacy_lstsq_out_cuda(
const Tensor& B, const Tensor& A, Tensor& B_out, Tensor& A_out) {
const auto dtype = A.scalar_type();
TORCH_CHECK(B.scalar_type() == dtype, "exepected A and B dtypes to match but found ",
A.scalar_type(), " and ", B.scalar_type());
TORCH_CHECK(A_out.scalar_type() == dtype, "A_out to have scalar type ", dtype,
" but found", A_out.scalar_type());
TORCH_CHECK(B_out.scalar_type() == dtype, "A_out to have scalar type ", dtype,
" but found", B_out.scalar_type());
Tensor A_tmp, B_tmp;
std::tie(B_tmp, A_tmp) = native::legacy_lstsq_cuda(B, A);
resize_output(A_out, A_tmp.sizes());
A_out.copy_(A_tmp);
resize_output(B_out, B_tmp.sizes());
B_out.copy_(B_tmp);
return std::tuple<Tensor&, Tensor&>(B_out, A_out);
}
}} // namespace at::native

View File

@ -9,6 +9,7 @@
#include <ATen/native/LinearAlgebraUtils.h>
#include <ATen/native/cuda/MiscUtils.h>
#include <ATen/native/Resize.h>
#include <ATen/native/LinearAlgebra.h>
#include <ATen/native/BatchLinearAlgebra.h>
#include <ATen/native/cuda/linalg/BatchLinearAlgebraLib.h>
@ -25,12 +26,8 @@ const bool use_magma_ = true;
namespace {
struct MagmaInitializer {
MagmaInitializer() {
#if defined(BUILD_LAZY_CUDA_LINALG)
magma_init();
#else
::at::cuda::detail::set_magma_init_fn([]{ magma_init(); });
#endif
}
};
} initializer;
} // namespace (anonymous)
@ -3248,22 +3245,25 @@ std::tuple<Tensor, Tensor> legacy_lstsq_cuda(const Tensor &B, const Tensor &A) {
#endif // AT_MAGMA_ENABLED()
}
std::tuple<Tensor&, Tensor&> legacy_lstsq_out_cuda(
const Tensor& B, const Tensor& A, Tensor& B_out, Tensor& A_out) {
const auto dtype = A.scalar_type();
TORCH_CHECK(B.scalar_type() == dtype, "exepected A and B dtypes to match but found ",
A.scalar_type(), " and ", B.scalar_type());
TORCH_CHECK(A_out.scalar_type() == dtype, "A_out to have scalar type ", dtype,
" but found", A_out.scalar_type());
TORCH_CHECK(B_out.scalar_type() == dtype, "A_out to have scalar type ", dtype,
" but found", B_out.scalar_type());
Tensor A_tmp, B_tmp;
std::tie(B_tmp, A_tmp) = native::legacy_lstsq_cuda(B, A);
resize_output(A_out, A_tmp.sizes());
A_out.copy_(A_tmp);
resize_output(B_out, B_tmp.sizes());
B_out.copy_(B_tmp);
return std::tuple<Tensor&, Tensor&>(B_out, A_out);
}
#if defined(BUILD_LAZY_CUDA_LINALG)
namespace {
struct DispatchInitializer {
DispatchInitializer() {
cuda::detail::LinalgDispatch disp{ _solve_helper_cuda,
_symeig_helper_cuda,
_linalg_qr_helper_cuda,
_cholesky_solve_helper_cuda,
legacy_lstsq_cuda,
_linalg_inv_out_helper_cuda};
cuda::detail::registerLinalgDispatch(disp);
};
} initializer;
} // namespace (anonymous)
#endif
}} // namespace at::native
#undef ALLOCATE_ARRAY

View File

@ -65,20 +65,4 @@ void lu_factor_looped_cusolver(const Tensor& self, const Tensor& pivots, const T
#endif // USE_CUSOLVER
#if defined(BUILD_LAZY_CUDA_LINALG)
namespace cuda { namespace detail {
// This is only used for an old-style dispatches
// Please do not add any new entires to it
struct LinalgDispatch {
std::tuple<Tensor, Tensor> (*solve_helper)(const Tensor& self, const Tensor& A);
std::tuple<Tensor, Tensor> (*symeig_helper)(const Tensor& self, bool eigenvectors, bool upper);
std::tuple<Tensor, Tensor> (*qr_helper)(const Tensor& input, c10::string_view mode);
Tensor (*cholesky_solve_helper)(const Tensor& self, const Tensor& A, bool upper);
std::tuple<Tensor, Tensor> (*legacy_lstsq)(const Tensor &B, const Tensor &A);
Tensor& (*inv_out_helper)(Tensor &result, Tensor& infos_lu, Tensor& infos_getri);
};
C10_EXPORT void registerLinalgDispatch(const LinalgDispatch&);
}} // namespace cuda::detail
#endif
}} // namespace at::native

View File

@ -901,22 +901,6 @@ elseif(USE_CUDA)
target_link_libraries(torch_cuda PRIVATE __caffe2_nccl)
target_compile_definitions(torch_cuda PRIVATE USE_NCCL)
endif()
if(BUILD_LAZY_CUDA_LINALG)
add_library(torch_cuda_linalg ${ATen_CUDA_LINALG_SRCS})
target_compile_definitions(torch_cuda_linalg PRIVATE USE_CUDA BUILD_LAZY_CUDA_LINALG)
target_link_libraries(torch_cuda_linalg PRIVATE
torch_cpu
torch_cuda
${CUDA_cusolver_LIBRARY}
)
if(USE_MAGMA)
target_link_libraries(torch_cuda_linalg PRIVATE torch::magma)
# CUDAHooks reports version of MAGMA PyTorch was compiled against, i.e. needs to be able to include magma headers
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/detail/CUDAHooks.cpp PROPERTIES INCLUDE_DIRECTORIES "${MAGMA_INCLUDE_DIR}")
endif()
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")
endif()
if(USE_PRECOMPILED_HEADERS)
if(BUILD_SPLIT_CUDA)

View File

@ -51,18 +51,15 @@ if torch.cuda.is_available() and (CUDA_HOME is not None or ROCM_HOME is not None
# todo(mkozuki): Figure out the root cause
if (not IS_WINDOWS) and torch.cuda.is_available() and CUDA_HOME is not None:
# malfet: One shoudl not assume that PyTorch re-exports CUDA dependencies
cublas_extension = CUDAExtension(
name='torch_test_cpp_extension.cublas_extension',
sources=['cublas_extension.cpp'],
libraries=['cublas'] if torch.version.hip is None else [],
sources=['cublas_extension.cpp']
)
ext_modules.append(cublas_extension)
cusolver_extension = CUDAExtension(
name='torch_test_cpp_extension.cusolver_extension',
sources=['cusolver_extension.cpp'],
libraries=['cusolver'] if torch.version.hip is None else [],
sources=['cusolver_extension.cpp']
)
ext_modules.append(cusolver_extension)

View File

@ -1339,7 +1339,6 @@ aten_cuda_cu_source_list = [
"aten/src/ATen/cuda/CUDASparseBlas.cpp",
"aten/src/ATen/cuda/CublasHandlePool.cpp",
"aten/src/ATen/native/cuda/Activation.cpp",
"aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp",
"aten/src/ATen/native/cuda/Blas.cpp",
"aten/src/ATen/native/cuda/Equal.cpp",
"aten/src/ATen/native/cuda/GridSampler.cpp",