Fix unbind_copy and add its decomposition (#134319)

* Fixes https://github.com/pytorch/pytorch/issues/130829 Pull Request resolved: https://github.com/pytorch/pytorch/pull/134319 Approved by: https://github.com/amjames, https://github.com/eellison
2025-12-06 12:20:52 +01:00 · 2024-10-23 13:57:49 +00:00 · 2024-10-23 13:57:49 +00:00 · 8aedc649bd
commit 8aedc649bd
parent cd9c6e9408
13 changed files with 107 additions and 28 deletions
--- a/aten/src/ATen/native/TensorShape.cpp
+++ b/aten/src/ATen/native/TensorShape.cpp
@ -26,6 +26,7 @@
 #include <ATen/native/cpu/SerialStackImpl.h>
 #include <ATen/native/cpu/StackKernel.h>
 #include <ATen/quantized/QTensorImpl.h>
+#include <c10/core/GradMode.h>
 #include <c10/util/Exception.h>
 #include <optional>
 #include <c10/util/SmallVector.h>
@ -4071,29 +4072,41 @@ void split_copy_Tensor_out(const at::Tensor & self, int64_t split_size, int64_t
  }
 }

-void split_with_sizes_copy_out(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim, at::TensorList  out) {
-  auto tmp = self.split_with_sizes(split_sizes, dim);
+namespace {

-  TORCH_CHECK(out.size() == tmp.size(), "split_with_sizes_copy_out() expected an out= argument of size ", tmp.size(), ", got size ", out.size());
+void copy_tensor_array_to_out(const char* name, const std::vector<Tensor>& array, at::TensorList out) {
+  TORCH_CHECK(out.size() == array.size(), name, " expected an out= argument of size ", array.size(), ", got size ", out.size());
  for (const auto i : c10::irange(out.size())) {
-    if (resize_output_check(out[i], tmp[i].sizes())) {
-      out[i].resize_(tmp[i].sizes());
+    if (resize_output_check(out[i], array[i].sizes())) {
+      out[i].resize_(array[i].sizes());
    }
-    TORCH_CHECK(out[i].dtype() == tmp[i].dtype(),
-        "Expected out tensor to have dtype ", tmp[i].dtype(), ", but got ", out[i].dtype(), " instead");
-    TORCH_CHECK(out[i].device() == tmp[i].device(),
-        "Expected out tensor to have device ", tmp[i].device(), ", but got ", out[i].device(), " instead");
-    out[i].copy_(tmp[i]);
+    TORCH_CHECK(out[i].dtype() == array[i].dtype(),
+        "Expected out tensor to have dtype ", array[i].dtype(), ", but got ", out[i].dtype(), " instead");
+    TORCH_CHECK(out[i].device() == array[i].device(),
+        "Expected out tensor to have device ", array[i].device(), ", but got ", out[i].device(), " instead");
+    out[i].copy_(array[i]);
  }
 }

-void unbind_copy_int_out(const at::Tensor & self, int64_t dim, at::TensorList  out) {
-  auto tmp = self.unbind(dim);
+}

-  TORCH_CHECK(out.size() == tmp.size(), "unbind_copy_int_out() expected an out= argument of size ", tmp.size(), ", got size ", out.size());
-  for (const auto i : c10::irange(out.size())) {
-    out[i].copy_(tmp[i]);
+void split_with_sizes_copy_out(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim, at::TensorList out) {
+  auto tmp = self.split_with_sizes(split_sizes, dim);
+  copy_tensor_array_to_out("split_with_sizes_copy_out()", tmp, out);
+}
+
+void unbind_copy_int_out(const at::Tensor & self, int64_t dim, at::TensorList out) {
+  if (at::GradMode::is_enabled()) {
+    for (const auto i : c10::irange(out.size())) {
+      TORCH_CHECK(!out[i].requires_grad(),
+        "unbind_copy(): functions with out=... arguments don't support automatic differentiation, "
+        "but one of the arguments requires grad."
+      );
+    }
  }
+
+  auto tmp = self.unbind(dim);
+  copy_tensor_array_to_out("unbind_copy_int_out()", tmp, out);
 }

 int64_t sparse_dim_default(const Tensor& self) {
--- a/test/distributed/_tensor/test_dtensor_ops.py
+++ b/test/distributed/_tensor/test_dtensor_ops.py
@ -446,6 +446,7 @@ dtensor_fails = {
    xfail("trapz"),
    xfail("triangular_solve"),
    xfail("unbind"),
+    xfail("unbind_copy"),
    xfail("unfold"),
    xfail("unfold_copy"),
    xfail("uniform"),
--- a/test/expect/HasDecompTest.test_aten_core_operators.expect
+++ b/test/expect/HasDecompTest.test_aten_core_operators.expect
@ -506,6 +506,8 @@ aten::triu_indices.out
 aten::trunc
 aten::trunc.out
 aten::trunc_
+aten::unbind_copy.int
+aten::unbind_copy.int_out
 aten::unfold
 aten::uniform
 aten::uniform.out
--- a/test/expect/HasDecompTest.test_has_decomposition.expect
+++ b/test/expect/HasDecompTest.test_has_decomposition.expect
@ -1292,8 +1292,6 @@ aten::topk.values
 aten::transpose_
 aten::triangular_solve
 aten::triangular_solve.X
-aten::unbind_copy.int
-aten::unbind_copy.int_out
 aten::unique_consecutive
 aten::unique_consecutive.out
 aten::unique_dim
--- a/test/functorch/test_ops.py
+++ b/test/functorch/test_ops.py
@ -1038,6 +1038,9 @@ class TestOperators(TestCase):
                xfail("_native_batch_norm_legit"),
                # TODO: implement batching rule
                xfail("_batch_norm_with_update"),
+                xfail(
+                    "unbind_copy"
+                ),  # Batching rule not implemented for aten::unbind_copy.int.
            }
        ),
    )
@ -1177,6 +1180,9 @@ class TestOperators(TestCase):
            xfail("sparse.mm", "reduce"),
            xfail("as_strided_scatter", ""),  # calls as_strided
            xfail("index_reduce", "prod"),  # .item() call
+            xfail(
+                "unbind_copy"
+            ),  # Batching rule not implemented for aten::unbind_copy.int.
            # ---------------------------------------------------------------------
        }
    )
@ -1315,6 +1321,9 @@ class TestOperators(TestCase):
        xfail("_native_batch_norm_legit"),
        # TODO: implement batching rule
        xfail("_batch_norm_with_update"),
+        xfail(
+            "unbind_copy"
+        ),  # Batching rule not implemented for aten::unbind_copy.int.
        # ----------------------------------------------------------------------
    }

@ -1628,6 +1637,9 @@ class TestOperators(TestCase):
                xfail("__getitem__", ""),
                xfail("index_put", ""),
                xfail("view_as_complex"),
+                xfail(
+                    "unbind_copy"
+                ),  # Batching rule not implemented for aten::unbind_copy.int.
                xfail("nn.functional.gaussian_nll_loss"),
                xfail("masked_select"),
                xfail(
@ -1922,6 +1934,9 @@ class TestOperators(TestCase):
                xfail(
                    "as_strided_scatter"
                ),  # AssertionError: Tensor-likes are not close!
+                xfail(
+                    "unbind_copy"
+                ),  # Batching rule not implemented for aten::unbind_copy.int.
                xfail("bernoulli"),  # calls random op
                xfail("bfloat16"),  # required rank 4 tensor to use channels_last format
                xfail("cdist"),  # Forward AD not implemented and no decomposition
--- a/test/functorch/test_vmap.py
+++ b/test/functorch/test_vmap.py
@ -4375,6 +4375,9 @@ class TestVmapOperatorsOpInfo(TestCase):
                xfail("torch.ops.aten._efficient_attention_forward"),  # outputs ints
                # TypeError: expected Tensor as element 0 in argument 0, but got float
                xfail("item"),
+                xfail(
+                    "unbind_copy"
+                ),  # Batching rule not implemented for aten::unbind_copy.int.
            }
        ),
    )
@ -4450,6 +4453,9 @@ class TestVmapOperatorsOpInfo(TestCase):
                xfail("item"),
                xfail("tril"),  # Exception not raised on error input
                xfail("triu"),  # Exception not raised on error input
+                xfail(
+                    "unbind_copy"
+                ),  # Batching rule not implemented for aten::unbind_copy.int.
                xfail("__getitem__", ""),
                xfail("count_nonzero"),
                xfail(
--- a/test/test_mps.py
+++ b/test/test_mps.py
@ -350,6 +350,7 @@ def mps_ops_modifier(ops):
        'transpose_copy',
        'T',
        'unbind',
+        'unbind_copy',
        'unflatten',
        'unfold',
        'unfold_copy',
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@ -241,6 +241,7 @@ GRADIENT_IMPLEMENTED_FOR_COMPLEX = {
    "slice",
    "constant_pad_nd",
    "unbind",
+    "unbind_copy",
    "split",
    "split_with_sizes",
    "unsafe_split",
--- a/torch/_inductor/decomposition.py
+++ b/torch/_inductor/decomposition.py
@ -83,6 +83,7 @@ inductor_decompositions = get_decompositions(
        aten._to_copy,
        aten.tril_indices,
        aten.triu_indices,
+        aten.unbind_copy.int,
        aten.upsample_bilinear2d.vec,
        quantized.linear_dynamic_fp16_unpacked_weight,
        _quantized.wrapped_quantized_linear,
--- a/torch/_prims/context.py
+++ b/torch/_prims/context.py
@ -129,6 +129,8 @@ class TorchRefsMode(torch.overrides.TorchFunctionMode):
            func = torch._decomp.decomposition_table.get(orig_func, None)
        elif func is None and isinstance(orig_func, torch._ops.OpOverloadPacket):
            default = getattr(orig_func, "default", None)
+            if default is None and orig_func._dir:
+                default = getattr(orig_func, orig_func._dir[0], None)
            if default is not None:
                func = torch._decomp.decomposition_table.get(default, None)

--- a/torch/_prims_common/wrappers.py
+++ b/torch/_prims_common/wrappers.py
@ -2,7 +2,16 @@
 import inspect
 import warnings
 from functools import wraps
-from typing import Callable, NamedTuple, Optional, overload, Sequence, Tuple, TypeVar
+from typing import (
+    Callable,
+    List,
+    NamedTuple,
+    Optional,
+    overload,
+    Sequence,
+    Tuple,
+    TypeVar,
+)
 from typing_extensions import ParamSpec

 import torch
@ -288,11 +297,17 @@ def out_wrapper(
            else:
                result = fn(*args, **kwargs)
            assert (
-                isinstance(result, TensorLike)
-                and is_tensor
-                or isinstance(result, Tuple)  # type: ignore[arg-type]
-                and len(result) == len(out_names)  # type: ignore[arg-type]
+                (isinstance(result, TensorLike) and is_tensor)
+                or (
+                    isinstance(result, Tuple)  # type: ignore[arg-type]
+                    and len(result) == len(out_names)  # type: ignore[arg-type]
+                )
+                or (
+                    fn.__name__ == "unbind"
+                    and isinstance(result, (List, Tuple))  # type: ignore[arg-type]
+                )
            )
+            # unbind_copy is a special case: see https://github.com/pytorch/pytorch/issues/130829
            if out is not None:
                # Naively you might expect this assert to be true, but
                # it's not:
@ -310,7 +325,7 @@ def out_wrapper(
                # the output tensor, but not the result--which will
                # be a normal meta tensor, but this is perfectly
                # harmless.
-                if is_tensor:
+                if is_tensor and fn.__name__ != "unbind":
                    assert isinstance(out, TensorLike)
                    # These two operations are done in-place
                    _maybe_resize_out(
@ -318,7 +333,10 @@ def out_wrapper(
                    )
                    _safe_copy_out(copy_from=result, copy_to=out, exact_dtype=exact_dtype)  # type: ignore[arg-type]
                else:
-                    assert isinstance(out, Tuple)  # type: ignore[arg-type]
+                    if fn.__name__ != "unbind":
+                        assert isinstance(out, Tuple)  # type: ignore[arg-type]
+                    else:
+                        assert isinstance(out, (List, Tuple))  # type: ignore[arg-type]
                    torch._check_type(
                        len(out) == len(result),  # type: ignore[arg-type]
                        lambda: f"expected tuple of {len(result)} elements but got {len(out)}",  # type: ignore[arg-type]
--- a/torch/_refs/init.py
+++ b/torch/_refs/init.py
@ -305,6 +305,7 @@ __all__ = [
    "tensor_split",
    "transpose",
    "transpose_copy",
+    "unbind_copy",
    "unfold",
    "unfold_copy",
    "unsqueeze",
@ -6382,6 +6383,7 @@ squeeze_copy = _make_copy_from_view(aten.squeeze)
 permute_copy = _make_copy_from_view(aten.permute)
 t_copy = _make_copy_from_view(aten.t)
 transpose_copy = _make_copy_from_view(aten.transpose)
+unbind_copy = _make_copy_from_view(aten.unbind)
 unsqueeze_copy = _make_copy_from_view(aten.unsqueeze)
 view_copy = _make_copy_from_view(aten.view)

--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@ -19455,6 +19455,25 @@ op_db: List[OpInfo] = [
           supports_gradgrad=True,
           supports_out=False,
           ),
+    OpInfo('unbind_copy',
+           dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
+           ref=reference_unbind,
+           sample_inputs_func=sample_inputs_unbind,
+           error_inputs_func=error_inputs_unbind,
+           supports_forward_ad=True,
+           supports_fwgrad_bwgrad=True,
+           supports_gradgrad=True,
+           supports_out=True,
+           check_batched_grad=False,
+           skips=(
+               # Expected __torch_dispatch__ for aten::unbind_copy.int_out to return None
+               # but it returned something else instead.
+               DecorateInfo(
+                   unittest.expectedFailure,
+                   'TestProxyTensorOpInfo',
+                   'test_make_fx_symbolic_exhaustive_out'
+               ),
+           )),
    OpInfo('vstack',
           aliases=('row_stack',),
           dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
@ -24056,10 +24075,6 @@ python_ref_db = [
    PythonRefInfo(
        "_refs.transpose_copy",
        torch_opinfo_name="transpose_copy",
-        skips=(
-            # RuntimeError: no _refs support for torch.Tensor.is_conj
-            DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
-        ),
        supports_out=True,
    ),
    PythonRefInfo(
@ -24076,6 +24091,10 @@ python_ref_db = [
        torch_opinfo_name="T",
        error_inputs_func=partial(error_inputs_T, has_ndims_error=True),
    ),
+    PythonRefInfo(
+        "_refs.unbind_copy",
+        torch_opinfo_name="unbind_copy",
+    ),
    PythonRefInfo(
        "_refs.unfold",
        torch_opinfo_name="unfold",