mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Don't return values in void functions (#164809)
This PR fixes returning values in void C++ functions. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164809 Approved by: https://github.com/janeyx99
This commit is contained in:
parent
f713abab16
commit
43fc859625
|
|
@ -229,14 +229,14 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
|
|||
}
|
||||
|
||||
void resize_(int64_t sparse_dim, int64_t dense_dim, ArrayRef<int64_t> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
void resize_(
|
||||
int64_t sparse_dim,
|
||||
int64_t dense_dim,
|
||||
ArrayRef<c10::SymInt> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
// NOTE: this function will resize the sparse tensor and also set `indices`
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ static inline void set_item(const Tensor& self, ArrayRef<TensorIndex> indices, c
|
|||
}
|
||||
}
|
||||
|
||||
return set_item(self, indices, value);
|
||||
set_item(self, indices, value);
|
||||
}
|
||||
|
||||
} // namespace indexing
|
||||
|
|
|
|||
|
|
@ -765,7 +765,8 @@ void TensorIteratorBase::for_each(loop2d_t loop, int64_t grain_size) {
|
|||
if (numel == 0) {
|
||||
return;
|
||||
} else if (numel < grain_size || at::get_num_threads() == 1) {
|
||||
return serial_for_each(loop, {0, numel});
|
||||
serial_for_each(loop, {0, numel});
|
||||
return;
|
||||
} else {
|
||||
at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) {
|
||||
serial_for_each(loop, {begin, end});
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ static void check_unique_names(DimnameList names) {
|
|||
}
|
||||
|
||||
void check_names_valid_for(const TensorBase& tensor, DimnameList names) {
|
||||
return impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
}
|
||||
|
||||
void check_names_valid_for(size_t tensor_dim, DimnameList names) {
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ void Tensor::_backward(TensorList inputs,
|
|||
const std::optional<Tensor>& gradient,
|
||||
std::optional<bool> keep_graph,
|
||||
bool create_graph) const {
|
||||
return impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
const TensorBase& TensorBase::requires_grad_(bool _requires_grad) const {
|
||||
|
|
|
|||
|
|
@ -496,7 +496,7 @@ class TORCH_API OperatorHandle {
|
|||
}
|
||||
|
||||
void checkInvariants() const {
|
||||
return operatorDef_->op.checkInvariants();
|
||||
operatorDef_->op.checkInvariants();
|
||||
}
|
||||
|
||||
c10::ArrayRef<at::Tag> getTags() const {
|
||||
|
|
@ -932,7 +932,7 @@ inline void Dispatcher::redispatchBoxed(
|
|||
}
|
||||
#endif
|
||||
const auto& kernel = entry.lookup(dispatchKeySet);
|
||||
return kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
|
|
|
|||
|
|
@ -465,11 +465,11 @@ static void dynamicLayerBack(const c10::OperatorHandle& op, torch::jit::Stack* s
|
|||
|
||||
// used for functions that have aliasing operations but should be treated like they're out of place (i.e. lift_fresh)
|
||||
static void dynamicLayerBackGradSpecialCase(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, true);
|
||||
dynamicLayerBack(op, stack, true);
|
||||
}
|
||||
|
||||
static void dynamicLayerBackFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, false);
|
||||
dynamicLayerBack(op, stack, false);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, FuncTorchDynamicLayerFrontMode, m) {
|
||||
|
|
|
|||
|
|
@ -375,7 +375,7 @@ static void bf16_gemv_trans(
|
|||
const at::BFloat16 beta,
|
||||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
return bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ inline void searchsorted_maybe_trim_input_tensors(
|
|||
const Tensor& raw_boundaries) {
|
||||
Tensor trimmed_sorter;
|
||||
Tensor raw_sorter;
|
||||
return searchsorted_maybe_trim_input_tensors(
|
||||
searchsorted_maybe_trim_input_tensors(
|
||||
trimmed_input,
|
||||
trimmed_boundaries,
|
||||
trimmed_sorter,
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
namespace at::native {
|
||||
|
||||
void _backward(const Tensor& self, TensorList inputs, const std::optional<Tensor>& gradient_opt, std::optional<bool> keep_graph, bool create_graph) {
|
||||
return self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
void set_data(Tensor& self, const Tensor& new_data) {
|
||||
return self.set_data(new_data);
|
||||
self.set_data(new_data);
|
||||
}
|
||||
|
||||
Tensor data(const Tensor& self) {
|
||||
|
|
@ -54,7 +54,7 @@ Tensor& requires_grad_(Tensor& self, bool _requires_grad) {
|
|||
}
|
||||
|
||||
void retain_grad(Tensor& self) {
|
||||
return self.retain_grad();
|
||||
self.retain_grad();
|
||||
}
|
||||
|
||||
bool retains_grad(const Tensor& self) {
|
||||
|
|
|
|||
|
|
@ -300,7 +300,8 @@ void div_floor_kernel(TensorIteratorBase& iter) {
|
|||
// In the special case of unsigned integer division, floor division is
|
||||
// equivalent to truncation division (since the signs of the divisor and
|
||||
// dividend are always the same)
|
||||
return div_trunc_kernel(iter);
|
||||
div_trunc_kernel(iter);
|
||||
return;
|
||||
} else if (isIntegralType(dtype, /*includeBool*/ false)) {
|
||||
// There's no SIMD integer division, so don't try to vectorize it.
|
||||
AT_DISPATCH_INTEGRAL_TYPES(dtype, "div_floor_cpu", [&]() {
|
||||
|
|
|
|||
|
|
@ -749,21 +749,29 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
|||
// });
|
||||
|
||||
if (iter_dtype == kByte) {
|
||||
return cpu_hflip_vec<uint8_t>(iter);
|
||||
cpu_hflip_vec<uint8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kChar) {
|
||||
return cpu_hflip_vec<int8_t>(iter);
|
||||
cpu_hflip_vec<int8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kInt) {
|
||||
return cpu_hflip_vec<int32_t>(iter);
|
||||
cpu_hflip_vec<int32_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kLong) {
|
||||
return cpu_hflip_vec<int64_t>(iter);
|
||||
cpu_hflip_vec<int64_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kShort) {
|
||||
return cpu_hflip_vec<int16_t>(iter);
|
||||
cpu_hflip_vec<int16_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kBool) {
|
||||
return cpu_hflip_vec<bool>(iter);
|
||||
cpu_hflip_vec<bool>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kFloat) {
|
||||
return cpu_hflip_vec<float>(iter);
|
||||
cpu_hflip_vec<float>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kDouble) {
|
||||
return cpu_hflip_vec<double>(iter);
|
||||
cpu_hflip_vec<double>(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// other dtypes (float16, bfloat16, complex) are handled by cpu_kernel_vec (see below)
|
||||
|
|
@ -778,10 +786,12 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
|||
c == input_strides_2[1] &&
|
||||
c == iter.element_size(0) * iter.shape()[0] // checks if dim=1 is contiguous as well
|
||||
) {
|
||||
return cpu_hflip_channels_last_vec(iter);
|
||||
cpu_hflip_channels_last_vec(iter);
|
||||
return;
|
||||
}
|
||||
// Special case: vertical flip using memcpy (faster than generic cpu_kernel_vec)
|
||||
return cpu_vflip_memcpy(iter);
|
||||
cpu_vflip_memcpy(iter);
|
||||
return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(), "flip_cpu",
|
||||
|
|
|
|||
|
|
@ -96,11 +96,14 @@ static void pow_tensor_scalar_kernel(
|
|||
dtype == kBFloat16 || isComplexType(dtype)) {
|
||||
// Dispatch to fast specialization for sqrt, rsqrt and reciprocal
|
||||
if (exp_scalar.equal(.5)) {
|
||||
return sqrt_kernel(iter);
|
||||
sqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-0.5)) {
|
||||
return rsqrt_kernel(iter);
|
||||
rsqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-1.0)) {
|
||||
return reciprocal_kernel(iter);
|
||||
reciprocal_kernel(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -256,10 +256,10 @@ static void norm_kernel_tensor_iterator_impl(
|
|||
} else {
|
||||
if (iter.input_dtype() == kHalf && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::Half, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::Half, float>(iter, val); return;
|
||||
} else if (iter.input_dtype() == kBFloat16 && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::BFloat16, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::BFloat16, float>(iter, val); return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND3(kHalf, kBFloat16, kComplexHalf, iter.input_dtype(), "norm_cpu", [&] {
|
||||
|
|
|
|||
|
|
@ -428,10 +428,11 @@ void fp16_gemv_trans(
|
|||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0);
|
||||
#if !defined(__aarch64__) || defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
|
||||
if (at::globalContext().allowFP16ReductionCPU()) {
|
||||
return fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
return fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
}
|
||||
|
||||
float bf16_dot_with_fp32_arith(const at::BFloat16* vec1, const at::BFloat16* vec2, int64_t len) {
|
||||
|
|
@ -465,7 +466,7 @@ void bf16_gemv_trans(
|
|||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0 && beta == 0.0);
|
||||
return bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
}
|
||||
|
||||
float fp16_dot(
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ void cufft_set_plan_cache_max_size_impl(DeviceIndex device_index, int64_t max_si
|
|||
"cufft_set_plan_cache_max_size: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).resize(max_size);
|
||||
cufft_get_plan_cache(device_index).resize(max_size);
|
||||
}
|
||||
|
||||
int64_t cufft_get_plan_cache_size_impl(DeviceIndex device_index) {
|
||||
|
|
@ -137,7 +137,7 @@ void cufft_clear_plan_cache_impl(DeviceIndex device_index) {
|
|||
"cufft_clear_plan_cache: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).clear();
|
||||
cufft_get_plan_cache(device_index).clear();
|
||||
}
|
||||
|
||||
} // namespace at::native::detail
|
||||
|
|
|
|||
|
|
@ -1107,10 +1107,14 @@ void ldl_factor_kernel(
|
|||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Cusolver:
|
||||
return ldl_factor_cusolver(
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Magma:
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
// By default use cusolver if available and magma otherwise.
|
||||
// If cusolver and magma 2.5.4+ are both available and hermitian=true,
|
||||
|
|
@ -1122,8 +1126,10 @@ void ldl_factor_kernel(
|
|||
LD, pivots, info, upper, hermitian);
|
||||
}
|
||||
#endif
|
||||
return ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
#endif
|
||||
|
|
@ -1839,11 +1845,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
|||
// For the benchmarks see
|
||||
// https://github.com/pytorch/pytorch/pull/56253#discussion_r622851107
|
||||
if (input.size(-2) <= 256 && batchCount(input) >= std::max<int64_t>(2, input.size(-2) / 16)) {
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
} else {
|
||||
return geqrf_cusolver(input, tau);
|
||||
geqrf_cusolver(input, tau);
|
||||
return;
|
||||
}
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
};
|
||||
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
|
|
@ -1856,10 +1865,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
|||
// - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly.
|
||||
// - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu
|
||||
case at::LinalgBackend::Magma:
|
||||
return geqrf_magma(input, tau);
|
||||
{ geqrf_magma(input, tau);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
return geqrf_cusolver_backend(input, tau);
|
||||
{ geqrf_cusolver_backend(input, tau);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return geqrf_magma(input, tau);
|
||||
|
|
@ -2703,13 +2716,17 @@ void gels_looped(const Tensor& a, Tensor& b, Tensor& infos) {
|
|||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Magma:
|
||||
return gels_magma(a, b, infos);
|
||||
{ gels_magma(a, b, infos);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
// linalg_lstsq_gels is a generic function that is implemented using
|
||||
// geqrf_stub, ormqr_stub, and triangular_solve_stub
|
||||
// It dispatches to cuSOLVER for CUDA inputs if USE_LINALG_SOLVER is defined
|
||||
return linalg_lstsq_gels(a, b, infos);
|
||||
{ linalg_lstsq_gels(a, b, infos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return gels_magma(a, b, infos);
|
||||
|
|
|
|||
|
|
@ -373,59 +373,67 @@ void addmm_out_sparse_csr(
|
|||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseBsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -433,56 +441,62 @@ void addmm_out_sparse_csr(
|
|||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO avoid csc->csr
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -496,16 +510,16 @@ void addmm_out_sparse_csr(
|
|||
[out] result of the operation.
|
||||
*/
|
||||
void addmv_out_sparse_csr(
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
#if !AT_USE_MKL_SPARSE()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
#else
|
||||
c10::MaybeOwned<Tensor> result_ = prepare_dense_vector_for_mkl(result);
|
||||
c10::MaybeOwned<Tensor> vec_ = prepare_dense_vector_for_mkl(vec);
|
||||
|
|
|
|||
|
|
@ -810,7 +810,8 @@ void addmm_out_sparse_csr(
|
|||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided)
|
||||
return block_sparse_mm(input, mat1, mat2, beta, alpha, result);
|
||||
{ block_sparse_mm(input, mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -819,13 +820,13 @@ void addmm_out_sparse_csr(
|
|||
if (result.layout() == kStrided) {
|
||||
auto result_t = result.transpose(-2, -1);
|
||||
auto input_t = (result.is_same(input) ? result_t : input.transpose(-2, -1));
|
||||
return block_sparse_mm(
|
||||
block_sparse_mm(
|
||||
input_t,
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result_t);
|
||||
result_t); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -840,41 +841,41 @@ void addmm_out_sparse_csr(
|
|||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
result.transpose(0, 1)); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(mat1, mat2, beta, alpha, result);
|
||||
spmm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return spgemm(mat1, mat2, beta, alpha, result);
|
||||
spgemm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
// CSR @ CSC kernel would be very fast due to format alignment
|
||||
return spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -882,27 +883,28 @@ void addmm_out_sparse_csr(
|
|||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr)
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
{ spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return spgemm(
|
||||
spgemm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -933,7 +935,7 @@ void addmv_out_sparse_csr(
|
|||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
if (mat.layout() == kSparseBsr) {
|
||||
return block_sparse_mv(mat, vec, beta, alpha, result);
|
||||
block_sparse_mv(mat, vec, beta, alpha, result); return;
|
||||
}
|
||||
cusparseOperation_t opA = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
|
||||
|
|
@ -1213,9 +1215,9 @@ void triangular_solve_out_sparse_csr(
|
|||
}
|
||||
if (A.layout() == kSparseBsr) {
|
||||
if (B.size(-1) == 1) {
|
||||
return block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular); return;
|
||||
} else {
|
||||
return block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular); return;
|
||||
}
|
||||
}
|
||||
#ifdef USE_ROCM
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ struct Event final {
|
|||
}
|
||||
|
||||
void synchronize() const {
|
||||
return impl_.synchronize();
|
||||
impl_.synchronize();
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ struct C10_API Storage {
|
|||
}
|
||||
|
||||
void set_data_ptr_noswap(at::DataPtr&& data_ptr) const {
|
||||
return storage_impl_->set_data_ptr_noswap(std::move(data_ptr));
|
||||
storage_impl_->set_data_ptr_noswap(std::move(data_ptr));
|
||||
}
|
||||
|
||||
DeviceType device_type() const {
|
||||
|
|
|
|||
|
|
@ -94,11 +94,11 @@ class VirtualGuardImpl final : public DeviceGuardImplInterface {
|
|||
}
|
||||
|
||||
void synchronizeEvent(void* event) const override {
|
||||
return impl_->synchronizeEvent(event);
|
||||
impl_->synchronizeEvent(event);
|
||||
}
|
||||
|
||||
void synchronizeDevice(const DeviceIndex device_index) const override {
|
||||
return impl_->synchronizeDevice(device_index);
|
||||
impl_->synchronizeDevice(device_index);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -360,11 +360,11 @@ inline void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream) {
|
|||
}
|
||||
|
||||
inline void raw_delete(void* ptr) {
|
||||
return get()->raw_delete(ptr);
|
||||
get()->raw_delete(ptr);
|
||||
}
|
||||
|
||||
inline void init(int device_count) {
|
||||
return get()->init(device_count);
|
||||
get()->init(device_count);
|
||||
}
|
||||
|
||||
inline double getMemoryFraction(c10::DeviceIndex device) {
|
||||
|
|
@ -372,7 +372,7 @@ inline double getMemoryFraction(c10::DeviceIndex device) {
|
|||
}
|
||||
|
||||
inline void setMemoryFraction(double fraction, c10::DeviceIndex device) {
|
||||
return get()->setMemoryFraction(fraction, device);
|
||||
get()->setMemoryFraction(fraction, device);
|
||||
}
|
||||
|
||||
inline std::vector<StreamSegmentSize> getExpandableSegmentSizes(
|
||||
|
|
@ -381,11 +381,11 @@ inline std::vector<StreamSegmentSize> getExpandableSegmentSizes(
|
|||
}
|
||||
|
||||
inline void emptyCache(MempoolId_t mempool_id = {0, 0}) {
|
||||
return get()->emptyCache(mempool_id);
|
||||
get()->emptyCache(mempool_id);
|
||||
}
|
||||
|
||||
inline void enable(bool value) {
|
||||
return get()->enable(value);
|
||||
get()->enable(value);
|
||||
}
|
||||
|
||||
inline bool isEnabled() {
|
||||
|
|
@ -393,7 +393,7 @@ inline bool isEnabled() {
|
|||
}
|
||||
|
||||
inline void cacheInfo(c10::DeviceIndex device, size_t* largestBlock) {
|
||||
return get()->cacheInfo(device, largestBlock);
|
||||
get()->cacheInfo(device, largestBlock);
|
||||
}
|
||||
|
||||
inline void* getBaseAllocation(void* ptr, size_t* size) {
|
||||
|
|
@ -401,7 +401,7 @@ inline void* getBaseAllocation(void* ptr, size_t* size) {
|
|||
}
|
||||
|
||||
inline void recordStream(const DataPtr& dataPtr, CUDAStream stream) {
|
||||
return get()->recordStream(dataPtr, stream);
|
||||
get()->recordStream(dataPtr, stream);
|
||||
}
|
||||
|
||||
inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
||||
|
|
@ -410,11 +410,11 @@ inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
|||
}
|
||||
|
||||
inline void resetAccumulatedStats(c10::DeviceIndex device) {
|
||||
return get()->resetAccumulatedStats(device);
|
||||
get()->resetAccumulatedStats(device);
|
||||
}
|
||||
|
||||
inline void resetPeakStats(c10::DeviceIndex device) {
|
||||
return get()->resetPeakStats(device);
|
||||
get()->resetPeakStats(device);
|
||||
}
|
||||
|
||||
inline SnapshotInfo snapshot(MempoolId_t mempool_id = {0, 0}) {
|
||||
|
|
@ -451,21 +451,21 @@ inline void recordHistory(
|
|||
size_t alloc_trace_max_entries,
|
||||
RecordContext when,
|
||||
bool clearHistory) {
|
||||
return get()->recordHistory(
|
||||
get()->recordHistory(
|
||||
enabled, context_recorder, alloc_trace_max_entries, when, clearHistory);
|
||||
}
|
||||
|
||||
inline void recordAnnotation(
|
||||
const std::vector<std::pair<std::string, std::string>>& md) {
|
||||
return get()->recordAnnotation(md);
|
||||
get()->recordAnnotation(md);
|
||||
}
|
||||
|
||||
inline void pushCompileContext(std::string& md) {
|
||||
return get()->pushCompileContext(md);
|
||||
get()->pushCompileContext(md);
|
||||
}
|
||||
|
||||
inline void popCompileContext() {
|
||||
return get()->popCompileContext();
|
||||
get()->popCompileContext();
|
||||
}
|
||||
|
||||
inline bool isHistoryEnabled() {
|
||||
|
|
@ -481,15 +481,15 @@ inline bool checkPoolLiveAllocations(
|
|||
}
|
||||
|
||||
inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) {
|
||||
return get()->attachOutOfMemoryObserver(std::move(observer));
|
||||
get()->attachOutOfMemoryObserver(std::move(observer));
|
||||
}
|
||||
|
||||
inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) {
|
||||
return get()->attachAllocatorTraceTracker(std::move(tracker));
|
||||
get()->attachAllocatorTraceTracker(std::move(tracker));
|
||||
}
|
||||
|
||||
inline void releasePool(c10::DeviceIndex device, MempoolId_t mempool_id) {
|
||||
return get()->releasePool(device, mempool_id);
|
||||
get()->releasePool(device, mempool_id);
|
||||
}
|
||||
inline void createOrIncrefPool(
|
||||
c10::DeviceIndex device,
|
||||
|
|
@ -533,7 +533,7 @@ inline cudaError_t memcpyAsync(
|
|||
inline void enablePeerAccess(
|
||||
c10::DeviceIndex dev,
|
||||
c10::DeviceIndex dev_to_access) {
|
||||
return get()->enablePeerAccess(dev, dev_to_access);
|
||||
get()->enablePeerAccess(dev, dev_to_access);
|
||||
}
|
||||
|
||||
} // namespace c10::cuda::CUDACachingAllocator
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ class DynamicBackendWrapper : public WaitCounterBackendIf {
|
|||
|
||||
void stop(std::chrono::steady_clock::time_point now, intptr_t ctx) noexcept
|
||||
override {
|
||||
return impl_.stop(
|
||||
impl_.stop(
|
||||
impl_.self,
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch())
|
||||
|
|
@ -162,6 +162,6 @@ WaitCounterHandle::WaitGuard WaitCounterHandle::start() {
|
|||
}
|
||||
|
||||
void WaitCounterHandle::stop(const SmallVector<intptr_t>& ctxs) {
|
||||
return impl_.stop(ctxs);
|
||||
impl_.stop(ctxs);
|
||||
}
|
||||
} // namespace c10::monitor
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user