mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/61858 This PR adds `triangular_solve_out_sparse_csr_cuda`. The operation is used to comput the solution to the linear system where coefficient matrix is triangular. Structured kernels are used and the meta function needed some changes to support sparse csr layout. With sparse matrix input the `cloned_coefficient` tensor is 0-sized tensor. cc nikitaved pearu cpuhrsch IvanYashchuk ngimel Test Plan: Imported from OSS Reviewed By: pbelevich Differential Revision: D31948435 Pulled By: cpuhrsch fbshipit-source-id: 7775fece83ca705a26d75f82aead10b956b14bfd
177 lines
5.3 KiB
C++
177 lines
5.3 KiB
C++
#include <ATen/ATen.h>
|
||
#include <ATen/Dispatch.h>
|
||
#include <ATen/cuda/CUDASparse.h>
|
||
#include <ATen/native/Resize.h>
|
||
#include <ATen/native/sparse/cuda/SparseBlasImpl.h>
|
||
|
||
#include <c10/util/MaybeOwned.h>
|
||
|
||
namespace at {
|
||
namespace native {
|
||
|
||
Tensor& addmm_out_sparse_csr_dense_cuda(
|
||
const Tensor& self,
|
||
const Tensor& mat1,
|
||
const Tensor& mat2,
|
||
const Scalar& beta,
|
||
const Scalar& alpha,
|
||
Tensor& result) {
|
||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(mat1.is_sparse_csr());
|
||
|
||
// Same checks as in TORCH_META_FUNC(addmm) at
|
||
// aten/src/ATen/native/LinearAlgebra.cpp
|
||
TORCH_CHECK(
|
||
mat1.dim() == 2, "mat1 must be a matrix, got ", mat1.dim(), "-D tensor");
|
||
TORCH_CHECK(
|
||
mat2.dim() == 2, "mat2 must be a matrix, got ", mat2.dim(), "-D tensor");
|
||
|
||
IntArrayRef mat1_sizes = mat1.sizes();
|
||
IntArrayRef mat2_sizes = mat2.sizes();
|
||
TORCH_CHECK(
|
||
mat1_sizes[1] == mat2_sizes[0],
|
||
"mat1 and mat2 shapes cannot be multiplied (",
|
||
mat1_sizes[0],
|
||
"x",
|
||
mat1_sizes[1],
|
||
" and ",
|
||
mat2_sizes[0],
|
||
"x",
|
||
mat2_sizes[1],
|
||
")");
|
||
|
||
// From addmm_out_cuda_impl at ATen/native/cuda/Blas.cpp
|
||
// TODO: remove code duplication and unify code
|
||
// There were undefined symbol problems,
|
||
// when using the same function for CUDA and SparseCsrCUDA dispatch keys
|
||
// Also structured kernels do not support sparse output
|
||
IntArrayRef self__sizes;
|
||
c10::MaybeOwned<Tensor> self_;
|
||
if (&result != &self && self.layout() == kStrided) {
|
||
self_ = expand_size(self, {mat1_sizes[0], mat2_sizes[1]}, "addmm");
|
||
self__sizes = self_->sizes();
|
||
} else {
|
||
self_ = c10::MaybeOwned<Tensor>::borrowed(self);
|
||
self__sizes = self_->sizes();
|
||
TORCH_CHECK(result.dim() == 2, "tensors must be 2-D");
|
||
TORCH_CHECK(
|
||
self__sizes[0] == mat1_sizes[0], "self_ dim 0 must match mat1 dim 0");
|
||
TORCH_CHECK(
|
||
self__sizes[1] == mat2_sizes[1], "self_ dim 1 must match mat2 dim 1");
|
||
}
|
||
|
||
if (&result != &self) {
|
||
if (result.layout() == kStrided) {
|
||
at::native::resize_output(result, self__sizes);
|
||
} else {
|
||
at::native::resize_as_sparse_csr_(result, *self_);
|
||
}
|
||
result.copy_(*self_);
|
||
}
|
||
|
||
IntArrayRef result_sizes = result.sizes();
|
||
if ((result_sizes[0] == 0) || (result_sizes[1] == 0)) {
|
||
return result;
|
||
}
|
||
|
||
if (mat1._nnz() == 0 && mat2.layout() == kStrided) {
|
||
// According to docs, when beta==0 values in self should be ignored
|
||
// nans and infs should not propagate
|
||
if (beta.toComplexDouble() == 0.) {
|
||
result.zero_();
|
||
} else {
|
||
result.mul_(beta);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
if (mat2.is_sparse_csr() && (mat1._nnz() == 0 || mat2._nnz() == 0)) {
|
||
if (beta.toComplexDouble() == 0.) {
|
||
result.values().zero_();
|
||
} else {
|
||
result.values().mul_(beta);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
sparse::impl::cuda::addmm_out_sparse_csr(mat1, mat2, beta, alpha, result);
|
||
return result;
|
||
}
|
||
|
||
Tensor& addmv_out_sparse_csr_cuda(
|
||
const Tensor& self,
|
||
const Tensor& mat,
|
||
const Tensor& vec,
|
||
const Scalar& beta,
|
||
const Scalar& alpha,
|
||
Tensor& result) {
|
||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(mat.is_sparse_csr());
|
||
|
||
TORCH_CHECK(mat.dim() == 2, "addmv: Expected mat to be 2-D");
|
||
TORCH_CHECK(vec.dim() == 1, "addmv: Expected vec to be 1-D");
|
||
|
||
// Preprocessing code is copied from TORCH_IMPL_FUNC(addmv_out_cuda) at
|
||
// aten/src/ATen/native/cuda/Blas.cpp
|
||
// It would be nice to have it unified but there were undefined symbol
|
||
// problems, when using the same function for CUDA and SparseCsrCUDA dispatch
|
||
// keys and structured kernel
|
||
c10::MaybeOwned<Tensor> self_ = expand_size(self, {mat.size(0)});
|
||
auto betaval = beta.toComplexDouble();
|
||
|
||
if (&result != &self) {
|
||
at::native::resize_output(result, self_->sizes());
|
||
if (betaval != 0.0) {
|
||
at::native::copy_(result, *self_);
|
||
}
|
||
}
|
||
|
||
if (mat._nnz() == 0) {
|
||
// shortcut for an empty matrix
|
||
// By definition, when beta==0, values in self should be ignored. nans and
|
||
// infs should not propagate
|
||
if (betaval == 0.0) {
|
||
return result.zero_();
|
||
} else {
|
||
return at::mul_out(
|
||
const_cast<Tensor&>(result),
|
||
self,
|
||
at::native::scalar_tensor(
|
||
beta,
|
||
self.scalar_type(),
|
||
c10::nullopt /* layout */,
|
||
at::kCPU,
|
||
c10::nullopt /* pin_memory */));
|
||
}
|
||
}
|
||
|
||
sparse::impl::cuda::addmv_out_sparse_csr(mat, vec, beta, alpha, result);
|
||
return result;
|
||
}
|
||
|
||
/*
|
||
Solves a system of linear equations whose coefficients are represented in a sparse triangular matrix A:
|
||
op(A) X = B.
|
||
|
||
Args:
|
||
* `B` - dense Tensor of size m × nrhs.
|
||
* `A` - sparse Tensor of size m × m.
|
||
* `upper` - controls whether upper or lower triangular part of A is considered in computations.
|
||
* `transpose` - if true then op(A) = A^T.
|
||
* `unitriangular` - if true then the diagonal elements of A are assumed to be one.
|
||
* `X` - dense Tensor of size m × nrhs.
|
||
* `clone_A` - cloned matrix A, required only for compatibility with strided layout interface.
|
||
*/
|
||
std::tuple<Tensor&, Tensor&> triangular_solve_out_sparse_csr_cuda(
|
||
const Tensor& B,
|
||
const Tensor& A,
|
||
bool upper,
|
||
bool transpose,
|
||
bool unitriangular,
|
||
Tensor& X,
|
||
Tensor& clone_A) {
|
||
sparse::impl::cuda::triangular_solve_out_sparse_csr(A, B, X, upper, transpose, unitriangular);
|
||
return std::tuple<Tensor&, Tensor&>(X, clone_A);
|
||
}
|
||
|
||
} // namespace native
|
||
} // namespace at
|