diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index b10795fbc37..5ba7b3f1c93 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -229,14 +229,14 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { } void resize_(int64_t sparse_dim, int64_t dense_dim, ArrayRef size) { - return _resize_(sparse_dim, dense_dim, size); + _resize_(sparse_dim, dense_dim, size); } void resize_( int64_t sparse_dim, int64_t dense_dim, ArrayRef size) { - return _resize_(sparse_dim, dense_dim, size); + _resize_(sparse_dim, dense_dim, size); } // NOTE: this function will resize the sparse tensor and also set `indices` diff --git a/aten/src/ATen/TensorIndexing.cpp b/aten/src/ATen/TensorIndexing.cpp index bd50282b46e..1fa85268665 100644 --- a/aten/src/ATen/TensorIndexing.cpp +++ b/aten/src/ATen/TensorIndexing.cpp @@ -59,7 +59,7 @@ static inline void set_item(const Tensor& self, ArrayRef indices, c } } - return set_item(self, indices, value); + set_item(self, indices, value); } } // namespace indexing diff --git a/aten/src/ATen/TensorIterator.cpp b/aten/src/ATen/TensorIterator.cpp index 9096cbfc68e..61262914a72 100644 --- a/aten/src/ATen/TensorIterator.cpp +++ b/aten/src/ATen/TensorIterator.cpp @@ -765,7 +765,8 @@ void TensorIteratorBase::for_each(loop2d_t loop, int64_t grain_size) { if (numel == 0) { return; } else if (numel < grain_size || at::get_num_threads() == 1) { - return serial_for_each(loop, {0, numel}); + serial_for_each(loop, {0, numel}); + return; } else { at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { serial_for_each(loop, {begin, end}); diff --git a/aten/src/ATen/core/NamedTensor.cpp b/aten/src/ATen/core/NamedTensor.cpp index eaca01fe5e0..0bbeb9ddc13 100644 --- a/aten/src/ATen/core/NamedTensor.cpp +++ b/aten/src/ATen/core/NamedTensor.cpp @@ -49,7 +49,7 @@ static void check_unique_names(DimnameList names) { } void check_names_valid_for(const TensorBase& tensor, DimnameList names) { - return impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names); + impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names); } void check_names_valid_for(size_t tensor_dim, DimnameList names) { diff --git a/aten/src/ATen/core/Tensor.cpp b/aten/src/ATen/core/Tensor.cpp index 246418ad7ce..fea5d5652c3 100644 --- a/aten/src/ATen/core/Tensor.cpp +++ b/aten/src/ATen/core/Tensor.cpp @@ -138,7 +138,7 @@ void Tensor::_backward(TensorList inputs, const std::optional& gradient, std::optional keep_graph, bool create_graph) const { - return impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph); + impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph); } const TensorBase& TensorBase::requires_grad_(bool _requires_grad) const { diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h index 3a65b288bf8..57a78ecf03a 100644 --- a/aten/src/ATen/core/dispatch/Dispatcher.h +++ b/aten/src/ATen/core/dispatch/Dispatcher.h @@ -496,7 +496,7 @@ class TORCH_API OperatorHandle { } void checkInvariants() const { - return operatorDef_->op.checkInvariants(); + operatorDef_->op.checkInvariants(); } c10::ArrayRef getTags() const { @@ -932,7 +932,7 @@ inline void Dispatcher::redispatchBoxed( } #endif const auto& kernel = entry.lookup(dispatchKeySet); - return kernel.callBoxed(op, dispatchKeySet, stack); + kernel.callBoxed(op, dispatchKeySet, stack); } } // namespace c10 diff --git a/aten/src/ATen/functorch/DynamicLayer.cpp b/aten/src/ATen/functorch/DynamicLayer.cpp index 4ec902b668e..69af08a7bd7 100644 --- a/aten/src/ATen/functorch/DynamicLayer.cpp +++ b/aten/src/ATen/functorch/DynamicLayer.cpp @@ -465,11 +465,11 @@ static void dynamicLayerBack(const c10::OperatorHandle& op, torch::jit::Stack* s // used for functions that have aliasing operations but should be treated like they're out of place (i.e. lift_fresh) static void dynamicLayerBackGradSpecialCase(const c10::OperatorHandle& op, torch::jit::Stack* stack) { - return dynamicLayerBack(op, stack, true); + dynamicLayerBack(op, stack, true); } static void dynamicLayerBackFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) { - return dynamicLayerBack(op, stack, false); + dynamicLayerBack(op, stack, false); } TORCH_LIBRARY_IMPL(_, FuncTorchDynamicLayerFrontMode, m) { diff --git a/aten/src/ATen/native/BlasKernel.cpp b/aten/src/ATen/native/BlasKernel.cpp index 5f3976bd18d..a77604c535c 100644 --- a/aten/src/ATen/native/BlasKernel.cpp +++ b/aten/src/ATen/native/BlasKernel.cpp @@ -375,7 +375,7 @@ static void bf16_gemv_trans( const at::BFloat16 beta, at::BFloat16* y, const int incy) { - return bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy); + bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy); } template <> diff --git a/aten/src/ATen/native/BucketizationUtils.h b/aten/src/ATen/native/BucketizationUtils.h index 70878ecd704..bd19f9c987f 100644 --- a/aten/src/ATen/native/BucketizationUtils.h +++ b/aten/src/ATen/native/BucketizationUtils.h @@ -70,7 +70,7 @@ inline void searchsorted_maybe_trim_input_tensors( const Tensor& raw_boundaries) { Tensor trimmed_sorter; Tensor raw_sorter; - return searchsorted_maybe_trim_input_tensors( + searchsorted_maybe_trim_input_tensors( trimmed_input, trimmed_boundaries, trimmed_sorter, diff --git a/aten/src/ATen/native/VariableMethodStubs.cpp b/aten/src/ATen/native/VariableMethodStubs.cpp index 8c8ad45acc4..02c798a3d04 100644 --- a/aten/src/ATen/native/VariableMethodStubs.cpp +++ b/aten/src/ATen/native/VariableMethodStubs.cpp @@ -25,11 +25,11 @@ namespace at::native { void _backward(const Tensor& self, TensorList inputs, const std::optional& gradient_opt, std::optional keep_graph, bool create_graph) { - return self._backward(inputs, gradient_opt, keep_graph, create_graph); + self._backward(inputs, gradient_opt, keep_graph, create_graph); } void set_data(Tensor& self, const Tensor& new_data) { - return self.set_data(new_data); + self.set_data(new_data); } Tensor data(const Tensor& self) { @@ -54,7 +54,7 @@ Tensor& requires_grad_(Tensor& self, bool _requires_grad) { } void retain_grad(Tensor& self) { - return self.retain_grad(); + self.retain_grad(); } bool retains_grad(const Tensor& self) { diff --git a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp index 3db9646b31c..10e0daacab3 100644 --- a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp @@ -300,7 +300,8 @@ void div_floor_kernel(TensorIteratorBase& iter) { // In the special case of unsigned integer division, floor division is // equivalent to truncation division (since the signs of the divisor and // dividend are always the same) - return div_trunc_kernel(iter); + div_trunc_kernel(iter); + return; } else if (isIntegralType(dtype, /*includeBool*/ false)) { // There's no SIMD integer division, so don't try to vectorize it. AT_DISPATCH_INTEGRAL_TYPES(dtype, "div_floor_cpu", [&]() { diff --git a/aten/src/ATen/native/cpu/IndexKernel.cpp b/aten/src/ATen/native/cpu/IndexKernel.cpp index 1e6723b5f08..57d3ab89c61 100644 --- a/aten/src/ATen/native/cpu/IndexKernel.cpp +++ b/aten/src/ATen/native/cpu/IndexKernel.cpp @@ -749,21 +749,29 @@ void flip_kernel(TensorIterator& iter, const bool quantized) { // }); if (iter_dtype == kByte) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kChar) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kInt) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kLong) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kShort) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kBool) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kFloat) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } else if (iter_dtype == kDouble) { - return cpu_hflip_vec(iter); + cpu_hflip_vec(iter); + return; } } // other dtypes (float16, bfloat16, complex) are handled by cpu_kernel_vec (see below) @@ -778,10 +786,12 @@ void flip_kernel(TensorIterator& iter, const bool quantized) { c == input_strides_2[1] && c == iter.element_size(0) * iter.shape()[0] // checks if dim=1 is contiguous as well ) { - return cpu_hflip_channels_last_vec(iter); + cpu_hflip_channels_last_vec(iter); + return; } // Special case: vertical flip using memcpy (faster than generic cpu_kernel_vec) - return cpu_vflip_memcpy(iter); + cpu_vflip_memcpy(iter); + return; } AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(), "flip_cpu", diff --git a/aten/src/ATen/native/cpu/PowKernel.cpp b/aten/src/ATen/native/cpu/PowKernel.cpp index 2cf751f0511..18e14ed5d30 100644 --- a/aten/src/ATen/native/cpu/PowKernel.cpp +++ b/aten/src/ATen/native/cpu/PowKernel.cpp @@ -96,11 +96,14 @@ static void pow_tensor_scalar_kernel( dtype == kBFloat16 || isComplexType(dtype)) { // Dispatch to fast specialization for sqrt, rsqrt and reciprocal if (exp_scalar.equal(.5)) { - return sqrt_kernel(iter); + sqrt_kernel(iter); + return; } else if (exp_scalar.equal(-0.5)) { - return rsqrt_kernel(iter); + rsqrt_kernel(iter); + return; } else if (exp_scalar.equal(-1.0)) { - return reciprocal_kernel(iter); + reciprocal_kernel(iter); + return; } } diff --git a/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp b/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp index c06731dfc71..2067a74ac25 100644 --- a/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp @@ -256,10 +256,10 @@ static void norm_kernel_tensor_iterator_impl( } else { if (iter.input_dtype() == kHalf && iter.dtype(0) == kFloat) { // type promotion that does cast and reduction in a single kernel - return norm_kernel_cpu_impl(iter, val); + norm_kernel_cpu_impl(iter, val); return; } else if (iter.input_dtype() == kBFloat16 && iter.dtype(0) == kFloat) { // type promotion that does cast and reduction in a single kernel - return norm_kernel_cpu_impl(iter, val); + norm_kernel_cpu_impl(iter, val); return; } AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND3(kHalf, kBFloat16, kComplexHalf, iter.input_dtype(), "norm_cpu", [&] { diff --git a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp index 4aa696e93c9..79a00543ff3 100644 --- a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp +++ b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp @@ -428,10 +428,11 @@ void fp16_gemv_trans( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0); #if !defined(__aarch64__) || defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) if (at::globalContext().allowFP16ReductionCPU()) { - return fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy); + fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy); + return; } #endif - return fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy); + fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy); } float bf16_dot_with_fp32_arith(const at::BFloat16* vec1, const at::BFloat16* vec2, int64_t len) { @@ -465,7 +466,7 @@ void bf16_gemv_trans( at::BFloat16* y, const int incy) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0 && beta == 0.0); - return bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy); + bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy); } float fp16_dot( diff --git a/aten/src/ATen/native/cuda/SpectralOps.cpp b/aten/src/ATen/native/cuda/SpectralOps.cpp index 46ef404ebea..7f9d0eaa4ef 100644 --- a/aten/src/ATen/native/cuda/SpectralOps.cpp +++ b/aten/src/ATen/native/cuda/SpectralOps.cpp @@ -121,7 +121,7 @@ void cufft_set_plan_cache_max_size_impl(DeviceIndex device_index, int64_t max_si "cufft_set_plan_cache_max_size: expected 0 <= device_index < ", at::detail::getCUDAHooks().deviceCount(), "], but got device_index=", device_index); - return cufft_get_plan_cache(device_index).resize(max_size); + cufft_get_plan_cache(device_index).resize(max_size); } int64_t cufft_get_plan_cache_size_impl(DeviceIndex device_index) { @@ -137,7 +137,7 @@ void cufft_clear_plan_cache_impl(DeviceIndex device_index) { "cufft_clear_plan_cache: expected 0 <= device_index < ", at::detail::getCUDAHooks().deviceCount(), "], but got device_index=", device_index); - return cufft_get_plan_cache(device_index).clear(); + cufft_get_plan_cache(device_index).clear(); } } // namespace at::native::detail diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp index 813c7fbdcc9..56fb015dfaf 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp @@ -1107,10 +1107,14 @@ void ldl_factor_kernel( auto preferred_backend = at::globalContext().linalgPreferredBackend(); switch (preferred_backend) { case at::LinalgBackend::Cusolver: - return ldl_factor_cusolver( + { ldl_factor_cusolver( LD, pivots, info, upper, hermitian); + return; +} case at::LinalgBackend::Magma: - return ldl_factor_magma(LD, pivots, info, upper, hermitian); + { ldl_factor_magma(LD, pivots, info, upper, hermitian); + return; +} default: // By default use cusolver if available and magma otherwise. // If cusolver and magma 2.5.4+ are both available and hermitian=true, @@ -1122,8 +1126,10 @@ void ldl_factor_kernel( LD, pivots, info, upper, hermitian); } #endif - return ldl_factor_cusolver( - LD, pivots, info, upper, hermitian); + { ldl_factor_cusolver( + LD, pivots, info, upper, hermitian); + return; + } #else return ldl_factor_magma(LD, pivots, info, upper, hermitian); #endif @@ -1839,11 +1845,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) { // For the benchmarks see // https://github.com/pytorch/pytorch/pull/56253#discussion_r622851107 if (input.size(-2) <= 256 && batchCount(input) >= std::max(2, input.size(-2) / 16)) { - return geqrf_batched_cublas(input, tau); + geqrf_batched_cublas(input, tau); + return; } else { - return geqrf_cusolver(input, tau); + geqrf_cusolver(input, tau); + return; } - return geqrf_batched_cublas(input, tau); + geqrf_batched_cublas(input, tau); + return; }; auto preferred_backend = at::globalContext().linalgPreferredBackend(); @@ -1856,10 +1865,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) { // - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly. // - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu case at::LinalgBackend::Magma: - return geqrf_magma(input, tau); + { geqrf_magma(input, tau); + return; + } case at::LinalgBackend::Cusolver: default: - return geqrf_cusolver_backend(input, tau); + { geqrf_cusolver_backend(input, tau); + return; + } } #else return geqrf_magma(input, tau); @@ -2703,13 +2716,17 @@ void gels_looped(const Tensor& a, Tensor& b, Tensor& infos) { auto preferred_backend = at::globalContext().linalgPreferredBackend(); switch (preferred_backend) { case at::LinalgBackend::Magma: - return gels_magma(a, b, infos); + { gels_magma(a, b, infos); + return; + } case at::LinalgBackend::Cusolver: default: // linalg_lstsq_gels is a generic function that is implemented using // geqrf_stub, ormqr_stub, and triangular_solve_stub // It dispatches to cuSOLVER for CUDA inputs if USE_LINALG_SOLVER is defined - return linalg_lstsq_gels(a, b, infos); + { linalg_lstsq_gels(a, b, infos); + return; + } } #else return gels_magma(a, b, infos); diff --git a/aten/src/ATen/native/mkl/SparseBlasImpl.cpp b/aten/src/ATen/native/mkl/SparseBlasImpl.cpp index c9c6127f90a..ba555ac1bfb 100644 --- a/aten/src/ATen/native/mkl/SparseBlasImpl.cpp +++ b/aten/src/ATen/native/mkl/SparseBlasImpl.cpp @@ -373,59 +373,67 @@ void addmm_out_sparse_csr( if (mat2.layout() == kSparseCsr) { if (result.layout() == kStrided) { // TODO: Add native CSC support via cuSPARSE if supported. - return addmm_dense_result( + addmm_dense_result( mat2.transpose(0, 1).to_sparse_csr(), mat1.transpose(0, 1), beta, alpha, result.transpose(0, 1)); + return; } } if (mat2.layout() == kSparseCsc) { if (result.layout() == kStrided) { - return addmm_dense_result( + addmm_dense_result( mat2.transpose(-2, -1), mat1.transpose(-2, -1), beta, alpha, result.transpose(-2, -1)); + return; } } if (mat2.layout() == kSparseBsc) { if (result.layout() == kStrided) { - return addmm_dense_result( + addmm_dense_result( mat2.transpose(-2, -1), mat1.transpose(-2, -1), beta, alpha, result.transpose(-2, -1)); + return; } } } if (mat1.layout() == kSparseCsr) { if (mat2.layout() == kStrided) { if (result.layout() == kStrided) { - return addmm_dense_result(mat1, mat2, beta, alpha, result); + addmm_dense_result(mat1, mat2, beta, alpha, result); + return; } } if (mat2.layout() == kSparseCsr) { if (result.layout() == kStrided) { - return addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result); + addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result); + return; } if (result.layout() == kSparseCsr) { - return addmm_sparse_result(mat1, mat2, beta, alpha, result); + addmm_sparse_result(mat1, mat2, beta, alpha, result); + return; } } if (mat2.layout() == kSparseCsc) { if (result.layout() == kStrided) { // TODO: CSR @ CSC kernel would be very fast due to format alignment - return addmm_sparse_input_dense_result( - mat1, mat2.to_sparse_csr(), beta, alpha, result); + addmm_sparse_input_dense_result( + mat1, mat2.to_sparse_csr(), beta, alpha, result); + return; } if (result.layout() == kSparseCsr) { // TODO: CSR @ CSC kernel would be very fast due to format alignment - return addmm_sparse_result( - mat1, mat2.to_sparse_csr(), beta, alpha, result); + addmm_sparse_result( + mat1, mat2.to_sparse_csr(), beta, alpha, result); + return; } } } @@ -433,56 +441,62 @@ void addmm_out_sparse_csr( if (mat2.layout() == kStrided) { if (result.layout() == kStrided) { // TODO: avoid csc->csr conversion with native csc support - return addmm_dense_result( - mat1.to_sparse_csr(), mat2, beta, alpha, result); + addmm_dense_result( + mat1.to_sparse_csr(), mat2, beta, alpha, result); + return; } } if (mat2.layout() == kSparseCsr) { if (result.layout() == kSparseCsr) { // TODO: avoid csc->csr conversion with native csc support - return addmm_sparse_result( - mat1.to_sparse_csr(), mat2, beta, alpha, result); + addmm_sparse_result( + mat1.to_sparse_csr(), mat2, beta, alpha, result); + return; } } if (mat2.layout() == kSparseCsc) { if (result.layout() == kStrided) { - return addmm_sparse_input_dense_result( - mat2.transpose(-2, -1), - mat1.transpose(-2, -1), - beta, - alpha, - result.transpose(-2, -1)); + addmm_sparse_input_dense_result( + mat2.transpose(-2, -1), + mat1.transpose(-2, -1), + beta, + alpha, + result.transpose(-2, -1)); + return; } if (result.layout() == kSparseCsr) { // TODO avoid csc->csr - return addmm_sparse_result( - mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); + addmm_sparse_result( + mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); + return; } if (result.layout() == kSparseCsc) { - return addmm_sparse_result( - mat2.transpose(-2, -1), - mat1.transpose(-2, -1), - beta, - alpha, - result.transpose(-2, -1)); + addmm_sparse_result( + mat2.transpose(-2, -1), + mat1.transpose(-2, -1), + beta, + alpha, + result.transpose(-2, -1)); + return; } } } if (mat1.layout() == kSparseBsr) { if (mat2.layout() == kStrided) { if (result.layout() == kStrided) { - return addmm_dense_result(mat1, mat2, beta, alpha, result); + addmm_dense_result(mat1, mat2, beta, alpha, result); + return; } } } TORCH_CHECK( - false, - "addmm: computation on CPU is not implemented for ", - result.layout(), - " + ", - mat1.layout(), - " @ ", - mat2.layout()); + false, + "addmm: computation on CPU is not implemented for ", + result.layout(), + " + ", + mat1.layout(), + " @ ", + mat2.layout()); } /* @@ -496,16 +510,16 @@ void addmm_out_sparse_csr( [out] result of the operation. */ void addmv_out_sparse_csr( - const Tensor& mat, - const Tensor& vec, - const Scalar& beta, - const Scalar& alpha, - const Tensor& result) { + const Tensor& mat, + const Tensor& vec, + const Scalar& beta, + const Scalar& alpha, + const Tensor& result) { #if !AT_USE_MKL_SPARSE() TORCH_CHECK( - false, - "Calling addmv on a sparse CPU tensor requires Linux platform. ", - "Please use PyTorch built with MKL on Linux."); + false, + "Calling addmv on a sparse CPU tensor requires Linux platform. ", + "Please use PyTorch built with MKL on Linux."); #else c10::MaybeOwned result_ = prepare_dense_vector_for_mkl(result); c10::MaybeOwned vec_ = prepare_dense_vector_for_mkl(vec); diff --git a/aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp b/aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp index f22afbac7d6..43638aa2d1b 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp +++ b/aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp @@ -810,7 +810,8 @@ void addmm_out_sparse_csr( if (mat1.layout() == kSparseBsr) { if (mat2.layout() == kStrided) { if (result.layout() == kStrided) - return block_sparse_mm(input, mat1, mat2, beta, alpha, result); + { block_sparse_mm(input, mat1, mat2, beta, alpha, result); return; +} } } @@ -819,13 +820,13 @@ void addmm_out_sparse_csr( if (result.layout() == kStrided) { auto result_t = result.transpose(-2, -1); auto input_t = (result.is_same(input) ? result_t : input.transpose(-2, -1)); - return block_sparse_mm( + block_sparse_mm( input_t, mat2.transpose(-2, -1), mat1.transpose(-2, -1), beta, alpha, - result_t); + result_t); return; } } } @@ -840,41 +841,41 @@ void addmm_out_sparse_csr( if (mat2.layout() == kSparseCsr) { if (result.layout() == kStrided) { // TODO: Add native CSC support via cuSPARSE if supported. - return spmm( + spmm( mat2.transpose(0, 1).to_sparse_csr(), mat1.transpose(0, 1), beta, alpha, - result.transpose(0, 1)); + result.transpose(0, 1)); return; } } if (mat2.layout() == kSparseCsc) { if (result.layout() == kStrided) { - return spmm( + spmm( mat2.transpose(-2, -1), mat1.transpose(-2, -1), beta, alpha, - result.transpose(-2, -1)); + result.transpose(-2, -1)); return; } } } if (mat1.layout() == kSparseCsr) { if (mat2.layout() == kStrided) { if (result.layout() == kStrided) { - return spmm(mat1, mat2, beta, alpha, result); + spmm(mat1, mat2, beta, alpha, result); return; } } if (mat2.layout() == kSparseCsr) { if (result.layout() == kSparseCsr) { - return spgemm(mat1, mat2, beta, alpha, result); + spgemm(mat1, mat2, beta, alpha, result); return; } } if (mat2.layout() == kSparseCsc) { if (result.layout() == kSparseCsr) { // TODO: Add native CSC support via cuSPARSE if supported. // CSR @ CSC kernel would be very fast due to format alignment - return spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result); + spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result); return; } } } @@ -882,27 +883,28 @@ void addmm_out_sparse_csr( if (mat2.layout() == kStrided) { if (result.layout() == kStrided) { // TODO: Add native CSC support via cuSPARSE if supported. - return spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result); + spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return; } } if (mat2.layout() == kSparseCsr) { if (result.layout() == kSparseCsr) // TODO: Add native CSC support via cuSPARSE if supported. - return spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result); + { spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return; +} } if (mat2.layout() == kSparseCsc) { if (result.layout() == kSparseCsr) { // TODO: Add native CSC support via cuSPARSE if supported. - return spgemm( - mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); + spgemm( + mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); return; } if (result.layout() == kSparseCsc) { - return spgemm( + spgemm( mat2.transpose(-2, -1), mat1.transpose(-2, -1), beta, alpha, - result.transpose(-2, -1)); + result.transpose(-2, -1)); return; } } } @@ -933,7 +935,7 @@ void addmv_out_sparse_csr( const Scalar& alpha, const Tensor& result) { if (mat.layout() == kSparseBsr) { - return block_sparse_mv(mat, vec, beta, alpha, result); + block_sparse_mv(mat, vec, beta, alpha, result); return; } cusparseOperation_t opA = CUSPARSE_OPERATION_NON_TRANSPOSE; @@ -1213,9 +1215,9 @@ void triangular_solve_out_sparse_csr( } if (A.layout() == kSparseBsr) { if (B.size(-1) == 1) { - return block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular); + block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular); return; } else { - return block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular); + block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular); return; } } #ifdef USE_ROCM diff --git a/c10/core/Event.h b/c10/core/Event.h index b94db9f4f26..dfbb17e37da 100644 --- a/c10/core/Event.h +++ b/c10/core/Event.h @@ -127,7 +127,7 @@ struct Event final { } void synchronize() const { - return impl_.synchronize(); + impl_.synchronize(); } private: diff --git a/c10/core/Storage.h b/c10/core/Storage.h index 611133e1bcb..5abbcb22ece 100644 --- a/c10/core/Storage.h +++ b/c10/core/Storage.h @@ -149,7 +149,7 @@ struct C10_API Storage { } void set_data_ptr_noswap(at::DataPtr&& data_ptr) const { - return storage_impl_->set_data_ptr_noswap(std::move(data_ptr)); + storage_impl_->set_data_ptr_noswap(std::move(data_ptr)); } DeviceType device_type() const { diff --git a/c10/core/impl/VirtualGuardImpl.h b/c10/core/impl/VirtualGuardImpl.h index badcb623291..3d259f5e390 100644 --- a/c10/core/impl/VirtualGuardImpl.h +++ b/c10/core/impl/VirtualGuardImpl.h @@ -94,11 +94,11 @@ class VirtualGuardImpl final : public DeviceGuardImplInterface { } void synchronizeEvent(void* event) const override { - return impl_->synchronizeEvent(event); + impl_->synchronizeEvent(event); } void synchronizeDevice(const DeviceIndex device_index) const override { - return impl_->synchronizeDevice(device_index); + impl_->synchronizeDevice(device_index); } private: diff --git a/c10/cuda/CUDACachingAllocator.h b/c10/cuda/CUDACachingAllocator.h index 84acfd78209..509c542668f 100644 --- a/c10/cuda/CUDACachingAllocator.h +++ b/c10/cuda/CUDACachingAllocator.h @@ -360,11 +360,11 @@ inline void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream) { } inline void raw_delete(void* ptr) { - return get()->raw_delete(ptr); + get()->raw_delete(ptr); } inline void init(int device_count) { - return get()->init(device_count); + get()->init(device_count); } inline double getMemoryFraction(c10::DeviceIndex device) { @@ -372,7 +372,7 @@ inline double getMemoryFraction(c10::DeviceIndex device) { } inline void setMemoryFraction(double fraction, c10::DeviceIndex device) { - return get()->setMemoryFraction(fraction, device); + get()->setMemoryFraction(fraction, device); } inline std::vector getExpandableSegmentSizes( @@ -381,11 +381,11 @@ inline std::vector getExpandableSegmentSizes( } inline void emptyCache(MempoolId_t mempool_id = {0, 0}) { - return get()->emptyCache(mempool_id); + get()->emptyCache(mempool_id); } inline void enable(bool value) { - return get()->enable(value); + get()->enable(value); } inline bool isEnabled() { @@ -393,7 +393,7 @@ inline bool isEnabled() { } inline void cacheInfo(c10::DeviceIndex device, size_t* largestBlock) { - return get()->cacheInfo(device, largestBlock); + get()->cacheInfo(device, largestBlock); } inline void* getBaseAllocation(void* ptr, size_t* size) { @@ -401,7 +401,7 @@ inline void* getBaseAllocation(void* ptr, size_t* size) { } inline void recordStream(const DataPtr& dataPtr, CUDAStream stream) { - return get()->recordStream(dataPtr, stream); + get()->recordStream(dataPtr, stream); } inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats( @@ -410,11 +410,11 @@ inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats( } inline void resetAccumulatedStats(c10::DeviceIndex device) { - return get()->resetAccumulatedStats(device); + get()->resetAccumulatedStats(device); } inline void resetPeakStats(c10::DeviceIndex device) { - return get()->resetPeakStats(device); + get()->resetPeakStats(device); } inline SnapshotInfo snapshot(MempoolId_t mempool_id = {0, 0}) { @@ -451,21 +451,21 @@ inline void recordHistory( size_t alloc_trace_max_entries, RecordContext when, bool clearHistory) { - return get()->recordHistory( + get()->recordHistory( enabled, context_recorder, alloc_trace_max_entries, when, clearHistory); } inline void recordAnnotation( const std::vector>& md) { - return get()->recordAnnotation(md); + get()->recordAnnotation(md); } inline void pushCompileContext(std::string& md) { - return get()->pushCompileContext(md); + get()->pushCompileContext(md); } inline void popCompileContext() { - return get()->popCompileContext(); + get()->popCompileContext(); } inline bool isHistoryEnabled() { @@ -481,15 +481,15 @@ inline bool checkPoolLiveAllocations( } inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) { - return get()->attachOutOfMemoryObserver(std::move(observer)); + get()->attachOutOfMemoryObserver(std::move(observer)); } inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) { - return get()->attachAllocatorTraceTracker(std::move(tracker)); + get()->attachAllocatorTraceTracker(std::move(tracker)); } inline void releasePool(c10::DeviceIndex device, MempoolId_t mempool_id) { - return get()->releasePool(device, mempool_id); + get()->releasePool(device, mempool_id); } inline void createOrIncrefPool( c10::DeviceIndex device, @@ -533,7 +533,7 @@ inline cudaError_t memcpyAsync( inline void enablePeerAccess( c10::DeviceIndex dev, c10::DeviceIndex dev_to_access) { - return get()->enablePeerAccess(dev, dev_to_access); + get()->enablePeerAccess(dev, dev_to_access); } } // namespace c10::cuda::CUDACachingAllocator diff --git a/c10/util/WaitCounter.cpp b/c10/util/WaitCounter.cpp index b1695802825..fb004ee39b6 100644 --- a/c10/util/WaitCounter.cpp +++ b/c10/util/WaitCounter.cpp @@ -49,7 +49,7 @@ class DynamicBackendWrapper : public WaitCounterBackendIf { void stop(std::chrono::steady_clock::time_point now, intptr_t ctx) noexcept override { - return impl_.stop( + impl_.stop( impl_.self, std::chrono::duration_cast( now.time_since_epoch()) @@ -162,6 +162,6 @@ WaitCounterHandle::WaitGuard WaitCounterHandle::start() { } void WaitCounterHandle::stop(const SmallVector& ctxs) { - return impl_.stop(ctxs); + impl_.stop(ctxs); } } // namespace c10::monitor