Fix unbind_copy and add its decomposition (#134319)

* Fixes https://github.com/pytorch/pytorch/issues/130829

Pull Request resolved: https://github.com/pytorch/pytorch/pull/134319
Approved by: https://github.com/amjames, https://github.com/eellison
This commit is contained in:
Tom Ritchford 2024-10-23 13:57:49 +00:00 committed by PyTorch MergeBot
parent cd9c6e9408
commit 8aedc649bd
13 changed files with 107 additions and 28 deletions

View File

@ -26,6 +26,7 @@
#include <ATen/native/cpu/SerialStackImpl.h>
#include <ATen/native/cpu/StackKernel.h>
#include <ATen/quantized/QTensorImpl.h>
#include <c10/core/GradMode.h>
#include <c10/util/Exception.h>
#include <optional>
#include <c10/util/SmallVector.h>
@ -4071,29 +4072,41 @@ void split_copy_Tensor_out(const at::Tensor & self, int64_t split_size, int64_t
}
}
void split_with_sizes_copy_out(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim, at::TensorList out) {
auto tmp = self.split_with_sizes(split_sizes, dim);
namespace {
TORCH_CHECK(out.size() == tmp.size(), "split_with_sizes_copy_out() expected an out= argument of size ", tmp.size(), ", got size ", out.size());
void copy_tensor_array_to_out(const char* name, const std::vector<Tensor>& array, at::TensorList out) {
TORCH_CHECK(out.size() == array.size(), name, " expected an out= argument of size ", array.size(), ", got size ", out.size());
for (const auto i : c10::irange(out.size())) {
if (resize_output_check(out[i], tmp[i].sizes())) {
out[i].resize_(tmp[i].sizes());
if (resize_output_check(out[i], array[i].sizes())) {
out[i].resize_(array[i].sizes());
}
TORCH_CHECK(out[i].dtype() == tmp[i].dtype(),
"Expected out tensor to have dtype ", tmp[i].dtype(), ", but got ", out[i].dtype(), " instead");
TORCH_CHECK(out[i].device() == tmp[i].device(),
"Expected out tensor to have device ", tmp[i].device(), ", but got ", out[i].device(), " instead");
out[i].copy_(tmp[i]);
TORCH_CHECK(out[i].dtype() == array[i].dtype(),
"Expected out tensor to have dtype ", array[i].dtype(), ", but got ", out[i].dtype(), " instead");
TORCH_CHECK(out[i].device() == array[i].device(),
"Expected out tensor to have device ", array[i].device(), ", but got ", out[i].device(), " instead");
out[i].copy_(array[i]);
}
}
void unbind_copy_int_out(const at::Tensor & self, int64_t dim, at::TensorList out) {
auto tmp = self.unbind(dim);
}
TORCH_CHECK(out.size() == tmp.size(), "unbind_copy_int_out() expected an out= argument of size ", tmp.size(), ", got size ", out.size());
for (const auto i : c10::irange(out.size())) {
out[i].copy_(tmp[i]);
void split_with_sizes_copy_out(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim, at::TensorList out) {
auto tmp = self.split_with_sizes(split_sizes, dim);
copy_tensor_array_to_out("split_with_sizes_copy_out()", tmp, out);
}
void unbind_copy_int_out(const at::Tensor & self, int64_t dim, at::TensorList out) {
if (at::GradMode::is_enabled()) {
for (const auto i : c10::irange(out.size())) {
TORCH_CHECK(!out[i].requires_grad(),
"unbind_copy(): functions with out=... arguments don't support automatic differentiation, "
"but one of the arguments requires grad."
);
}
}
auto tmp = self.unbind(dim);
copy_tensor_array_to_out("unbind_copy_int_out()", tmp, out);
}
int64_t sparse_dim_default(const Tensor& self) {

View File

@ -446,6 +446,7 @@ dtensor_fails = {
xfail("trapz"),
xfail("triangular_solve"),
xfail("unbind"),
xfail("unbind_copy"),
xfail("unfold"),
xfail("unfold_copy"),
xfail("uniform"),

View File

@ -506,6 +506,8 @@ aten::triu_indices.out
aten::trunc
aten::trunc.out
aten::trunc_
aten::unbind_copy.int
aten::unbind_copy.int_out
aten::unfold
aten::uniform
aten::uniform.out

View File

@ -1292,8 +1292,6 @@ aten::topk.values
aten::transpose_
aten::triangular_solve
aten::triangular_solve.X
aten::unbind_copy.int
aten::unbind_copy.int_out
aten::unique_consecutive
aten::unique_consecutive.out
aten::unique_dim

View File

@ -1038,6 +1038,9 @@ class TestOperators(TestCase):
xfail("_native_batch_norm_legit"),
# TODO: implement batching rule
xfail("_batch_norm_with_update"),
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
}
),
)
@ -1177,6 +1180,9 @@ class TestOperators(TestCase):
xfail("sparse.mm", "reduce"),
xfail("as_strided_scatter", ""), # calls as_strided
xfail("index_reduce", "prod"), # .item() call
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
# ---------------------------------------------------------------------
}
)
@ -1315,6 +1321,9 @@ class TestOperators(TestCase):
xfail("_native_batch_norm_legit"),
# TODO: implement batching rule
xfail("_batch_norm_with_update"),
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
# ----------------------------------------------------------------------
}
@ -1628,6 +1637,9 @@ class TestOperators(TestCase):
xfail("__getitem__", ""),
xfail("index_put", ""),
xfail("view_as_complex"),
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
xfail("nn.functional.gaussian_nll_loss"),
xfail("masked_select"),
xfail(
@ -1922,6 +1934,9 @@ class TestOperators(TestCase):
xfail(
"as_strided_scatter"
), # AssertionError: Tensor-likes are not close!
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
xfail("bernoulli"), # calls random op
xfail("bfloat16"), # required rank 4 tensor to use channels_last format
xfail("cdist"), # Forward AD not implemented and no decomposition

View File

@ -4375,6 +4375,9 @@ class TestVmapOperatorsOpInfo(TestCase):
xfail("torch.ops.aten._efficient_attention_forward"), # outputs ints
# TypeError: expected Tensor as element 0 in argument 0, but got float
xfail("item"),
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
}
),
)
@ -4450,6 +4453,9 @@ class TestVmapOperatorsOpInfo(TestCase):
xfail("item"),
xfail("tril"), # Exception not raised on error input
xfail("triu"), # Exception not raised on error input
xfail(
"unbind_copy"
), # Batching rule not implemented for aten::unbind_copy.int.
xfail("__getitem__", ""),
xfail("count_nonzero"),
xfail(

View File

@ -350,6 +350,7 @@ def mps_ops_modifier(ops):
'transpose_copy',
'T',
'unbind',
'unbind_copy',
'unflatten',
'unfold',
'unfold_copy',

View File

@ -241,6 +241,7 @@ GRADIENT_IMPLEMENTED_FOR_COMPLEX = {
"slice",
"constant_pad_nd",
"unbind",
"unbind_copy",
"split",
"split_with_sizes",
"unsafe_split",

View File

@ -83,6 +83,7 @@ inductor_decompositions = get_decompositions(
aten._to_copy,
aten.tril_indices,
aten.triu_indices,
aten.unbind_copy.int,
aten.upsample_bilinear2d.vec,
quantized.linear_dynamic_fp16_unpacked_weight,
_quantized.wrapped_quantized_linear,

View File

@ -129,6 +129,8 @@ class TorchRefsMode(torch.overrides.TorchFunctionMode):
func = torch._decomp.decomposition_table.get(orig_func, None)
elif func is None and isinstance(orig_func, torch._ops.OpOverloadPacket):
default = getattr(orig_func, "default", None)
if default is None and orig_func._dir:
default = getattr(orig_func, orig_func._dir[0], None)
if default is not None:
func = torch._decomp.decomposition_table.get(default, None)

View File

@ -2,7 +2,16 @@
import inspect
import warnings
from functools import wraps
from typing import Callable, NamedTuple, Optional, overload, Sequence, Tuple, TypeVar
from typing import (
Callable,
List,
NamedTuple,
Optional,
overload,
Sequence,
Tuple,
TypeVar,
)
from typing_extensions import ParamSpec
import torch
@ -288,11 +297,17 @@ def out_wrapper(
else:
result = fn(*args, **kwargs)
assert (
isinstance(result, TensorLike)
and is_tensor
or isinstance(result, Tuple) # type: ignore[arg-type]
and len(result) == len(out_names) # type: ignore[arg-type]
(isinstance(result, TensorLike) and is_tensor)
or (
isinstance(result, Tuple) # type: ignore[arg-type]
and len(result) == len(out_names) # type: ignore[arg-type]
)
or (
fn.__name__ == "unbind"
and isinstance(result, (List, Tuple)) # type: ignore[arg-type]
)
)
# unbind_copy is a special case: see https://github.com/pytorch/pytorch/issues/130829
if out is not None:
# Naively you might expect this assert to be true, but
# it's not:
@ -310,7 +325,7 @@ def out_wrapper(
# the output tensor, but not the result--which will
# be a normal meta tensor, but this is perfectly
# harmless.
if is_tensor:
if is_tensor and fn.__name__ != "unbind":
assert isinstance(out, TensorLike)
# These two operations are done in-place
_maybe_resize_out(
@ -318,7 +333,10 @@ def out_wrapper(
)
_safe_copy_out(copy_from=result, copy_to=out, exact_dtype=exact_dtype) # type: ignore[arg-type]
else:
assert isinstance(out, Tuple) # type: ignore[arg-type]
if fn.__name__ != "unbind":
assert isinstance(out, Tuple) # type: ignore[arg-type]
else:
assert isinstance(out, (List, Tuple)) # type: ignore[arg-type]
torch._check_type(
len(out) == len(result), # type: ignore[arg-type]
lambda: f"expected tuple of {len(result)} elements but got {len(out)}", # type: ignore[arg-type]

View File

@ -305,6 +305,7 @@ __all__ = [
"tensor_split",
"transpose",
"transpose_copy",
"unbind_copy",
"unfold",
"unfold_copy",
"unsqueeze",
@ -6382,6 +6383,7 @@ squeeze_copy = _make_copy_from_view(aten.squeeze)
permute_copy = _make_copy_from_view(aten.permute)
t_copy = _make_copy_from_view(aten.t)
transpose_copy = _make_copy_from_view(aten.transpose)
unbind_copy = _make_copy_from_view(aten.unbind)
unsqueeze_copy = _make_copy_from_view(aten.unsqueeze)
view_copy = _make_copy_from_view(aten.view)

View File

@ -19455,6 +19455,25 @@ op_db: List[OpInfo] = [
supports_gradgrad=True,
supports_out=False,
),
OpInfo('unbind_copy',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
ref=reference_unbind,
sample_inputs_func=sample_inputs_unbind,
error_inputs_func=error_inputs_unbind,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_gradgrad=True,
supports_out=True,
check_batched_grad=False,
skips=(
# Expected __torch_dispatch__ for aten::unbind_copy.int_out to return None
# but it returned something else instead.
DecorateInfo(
unittest.expectedFailure,
'TestProxyTensorOpInfo',
'test_make_fx_symbolic_exhaustive_out'
),
)),
OpInfo('vstack',
aliases=('row_stack',),
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
@ -24056,10 +24075,6 @@ python_ref_db = [
PythonRefInfo(
"_refs.transpose_copy",
torch_opinfo_name="transpose_copy",
skips=(
# RuntimeError: no _refs support for torch.Tensor.is_conj
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
),
supports_out=True,
),
PythonRefInfo(
@ -24076,6 +24091,10 @@ python_ref_db = [
torch_opinfo_name="T",
error_inputs_func=partial(error_inputs_T, has_ndims_error=True),
),
PythonRefInfo(
"_refs.unbind_copy",
torch_opinfo_name="unbind_copy",
),
PythonRefInfo(
"_refs.unfold",
torch_opinfo_name="unfold",