Pass Werror to CUDA host compiler (#130213)

Fixes #ISSUE_NUMBER

Pull Request resolved: https://github.com/pytorch/pytorch/pull/130213
Approved by: https://github.com/ezyang
This commit is contained in:
cyy 2024-09-21 08:01:06 +00:00 committed by PyTorch MergeBot
parent e18439113e
commit c459430558
2 changed files with 14 additions and 6 deletions

View File

@ -55,7 +55,7 @@ void gemm_grouped_cuda_internal(
const std::vector<scalar_t*>& bptr,
const std::vector<scalar_t*>& dptr,
const std::vector<cutlass::gemm::GemmCoord>& gemm_sizes,
const int problem_count,
const int64_t problem_count,
at::Device& device) {
using Element = scalar_t;
using ElementAcc = float;
@ -183,7 +183,7 @@ bool group_gemm_dispatch(
const std::vector<int64_t>& lda,
const std::vector<int64_t>& ldb,
const std::vector<int64_t>& ldd,
std::vector<cutlass::gemm::GemmCoord> gemm_sizes,
const std::vector<cutlass::gemm::GemmCoord>& gemm_sizes,
int64_t ntensors) {
return false;
}
@ -197,7 +197,7 @@ bool group_gemm_dispatch(
const std::vector<int64_t>& lda,
const std::vector<int64_t>& ldb,
const std::vector<int64_t>& ldd,
std::vector<cutlass::gemm::GemmCoord> gemm_sizes,
const std::vector<cutlass::gemm::GemmCoord>& gemm_sizes,
int64_t ntensors) {
gemm_grouped_cuda_internal<
@ -223,7 +223,7 @@ bool group_gemm_dispatch(
const std::vector<int64_t>& lda,
const std::vector<int64_t>& ldb,
const std::vector<int64_t>& ldd,
std::vector<cutlass::gemm::GemmCoord> gemm_sizes,
const std::vector<cutlass::gemm::GemmCoord>& gemm_sizes,
int64_t ntensors) {
// Check alignment
@ -357,8 +357,7 @@ Tensor bmm_nested_cuda(const Tensor& self, const Tensor& mat2) {
const int64_t &self_size1 = self_shape[1];
const int64_t &mat2_size0 = mat2_shape[0];
const int64_t &mat2_size1 = mat2_shape[1];
gemm_sizes.push_back(
cutlass::gemm::GemmCoord(self_size0, mat2_size1, self_size1));
gemm_sizes.emplace_back(self_size0, mat2_size1, self_size1);
aptr[i] = self_buffer.data_ptr<scalar_t>() + get_offset_for_index(self, i);
bptr[i] = mat2_buffer.data_ptr<scalar_t>() + get_offset_for_index(mat2, i);
dptr[i] = out_buffer.data_ptr<scalar_t>() + out_offsets_ptr[i];

View File

@ -1380,6 +1380,15 @@ if(NOT INTERN_BUILD_MOBILE)
# we want to respect the standard, and we are bored of those **** .
add_definitions(-D_CRT_SECURE_NO_DEPRECATE=1)
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=/wd4819,/wd4503,/wd4190,/wd4244,/wd4251,/wd4275,/wd4522")
else()
if(WERROR)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 13)
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler -Wno-dangling-reference ")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 13))
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler -Werror -Xcompiler -Wno-error=sign-compare ")
endif()
endif()
endif()
string(APPEND CMAKE_CUDA_FLAGS " -Wno-deprecated-gpu-targets --expt-extended-lambda")