Extend impl_backward to be usable with torch.library operators (#106817)

- impl_save_for_backward/impl_backward only work for functional, non-view schemas. We validate this. - impl_save_for_backward/impl_backward raise if there already exists an autograd implementation from torch.library / TORCH_LIBRARY. - Operators constructed via custom_op receive an "autograd indirection kernel". The "autograd indirection kernel" automatically pulls the constructed autograd kernel out of a dict. When impl_save_for_backward/impl_backward get used with torch.library operators, we also register the "autograd indirection kernel" so we can reuse the logic. Test Plan: - new tests Pull Request resolved: https://github.com/pytorch/pytorch/pull/106817 Approved by: https://github.com/soulitzer ghstack dependencies: #106799, #106800
2025-12-06 12:20:52 +01:00 · 2023-08-11 11:41:54 -07:00 · 2023-08-11 11:41:54 -07:00 · 2932b0bf37
commit 2932b0bf37
parent db9a0cf689
4 changed files with 161 additions and 12 deletions
--- a/test/test_custom_ops.py
+++ b/test/test_custom_ops.py
@ -1615,6 +1615,76 @@ def forward(self, x_1):
            result = op(x)
            self.assertEqual(result.shape, ())
    def _test_backward_impl_raises(self, qualname, err_regex):
        with self.assertRaisesRegex(RuntimeError, err_regex):
            @custom_ops.impl_save_for_backward(qualname)
            def foo2(x):
                return
        with self.assertRaisesRegex(RuntimeError, err_regex):
            @custom_ops.impl_backward(qualname)
            def foo3(x):
                return
    def test_backward_impl_on_existing_op_incorrect_schema_views(self):
        lib = self.lib()
        lib.define("foo(Tensor(a) x) -> Tensor(a)")
        qualname = f"{self.test_ns}::foo"
        self._test_backward_impl_raises(qualname, "operator that returns views")
    def test_backward_impl_on_existing_op_incorrect_schema_mutable(self):
        lib = self.lib()
        lib.define("foo(Tensor(a!) x) -> Tensor")
        qualname = f"{self.test_ns}::foo"
        self._test_backward_impl_raises(qualname, "non-functional")
    def test_backward_impl_on_existing_op_incorrect_schema_no_output(self):
        lib = self.lib()
        lib.define("foo(Tensor x) -> ()")
        qualname = f"{self.test_ns}::foo"
        self._test_backward_impl_raises(qualname, "no returns")
    def test_backward_impl_on_existing_op_CompositeImplicitAutograd(self):
        lib = self.lib()
        lib.define("foo(Tensor x) -> Tensor")
        qualname = f"{self.test_ns}::foo"
        lib.impl("foo", lambda x: x.sin().cos(), "CompositeImplicitAutograd")
        self._test_backward_impl_raises(qualname, "CompositeImplicitAutograd")
    @parametrize("key", ["Autograd", "AutogradCPU", "AutogradCUDA"])
    def test_backward_impl_on_existing_op_with_key(self, key):
        lib = self.lib()
        lib.define("foo(Tensor x) -> Tensor")
        qualname = f"{self.test_ns}::foo"
        lib.impl("foo", lambda x: x.sin().cos(), key)
        self._test_backward_impl_raises(qualname, key)
    def test_backward_impl_on_existing_op(self):
        lib = self.lib()
        lib.define("foo(Tensor x) -> Tensor")
        qualname = f"{self.test_ns}::foo"
        @custom_ops.impl(qualname)
        def foo_impl(x):
            with torch.no_grad():
                return x.sin()
        @custom_ops.impl_save_for_backward(qualname)
        def foo_save_for_backward(inputs, output):
            return inputs.x
        @custom_ops.impl_backward(qualname)
        def foo_backward(ctx, saved, grad_out):
            return {"x": grad_out * saved.cos()}
        op = self.get_op(qualname)
        x = torch.randn([], requires_grad=True)
        y = op(x)
        (gx,) = torch.autograd.grad(y, x)
        self.assertEqual(gx, x.cos())
 only_for = ("cpu", "cuda")
 instantiate_device_type_tests(TestCustomOpTesting, globals(), only_for=only_for)
--- a/torch/_custom_op/autograd.py
+++ b/torch/_custom_op/autograd.py
@ -61,6 +61,7 @@ def mark_non_differentiable(ctx, output, output_differentiability):
    # - Tensor
    # - Tensor[]
    # - int, bool, Scalar, float
    # See _check_can_register_backward
    if output_differentiability is not None:
        if not isinstance(output, tuple):
            tuple_output = (output,)
@ -90,7 +91,8 @@ def mark_non_differentiable(ctx, output, output_differentiability):
 def construct_autograd_kernel(
        schema,
        output_differentiability,
-        forward_op,
+        custom_op,
        op_overload,
        save_for_backward_fn,
        backward_fn):
@ -102,7 +104,7 @@ def construct_autograd_kernel(
            ctx.set_materialize_grads(True)
            args = pytree.tree_unflatten(list(flat_args), spec)
            with torch._C._AutoDispatchBelowAutograd():
-                output = forward_op(*args)
+                output = op_overload(*args)
            # We use the info about args to give better error messages in backward
            args_info = namedtuple_args(
@ -131,11 +133,11 @@ def construct_autograd_kernel(
            # Massage the grad_inputs_dict to a form acceptable by
            # autograd.Function.
-            validate_grad_inputs_dict(grad_inputs_dict, forward_op, args_info)
+            validate_grad_inputs_dict(grad_inputs_dict, custom_op, args_info)
            return grad_inputs_dict_to_flat_tuple(grad_inputs_dict, args_info)
        generated_cls = gen_autograd_function(
-            forward_op._opname + '_customop', forward, backward)
+            custom_op._opname + '_customop', forward, backward)
        flat_output = generated_cls.apply(*flat_args)
        assert out_spec is not None
--- a/torch/_custom_op/impl.py
+++ b/torch/_custom_op/impl.py
@ -6,7 +6,7 @@ import sys
 import typing
 import weakref
-from torchgen.model import FunctionSchema, OperatorName, SchemaKind
+from torchgen.model import FunctionSchema, OperatorName, SchemaKind, BaseType, ListType, BaseTy
 import torch
 import torch._C as _C
@ -195,9 +195,16 @@ class CustomOp:
        # NB: Some of these impls are registered as kernels to DispatchKeys.
        # Modifying the _impls dict directly won't do anything in that case.
        self._impls: typing.Dict[str, typing.Optional[FuncAndLocation]] = {}
        # See NOTE [CustomOp autograd kernel indirection]
        self._registered_autograd_kernel_indirection = False
        global_registry[self._qualname] = self
    def _register_autograd_kernel_indirection(self):
        assert not self._registered_autograd_kernel_indirection
        self._lib.impl(self._opname, autograd_kernel_indirection(weakref.proxy(self)), "Autograd")
        self._registered_autograd_kernel_indirection = True
    # Records the impl and the source location in self._impls
    # Note that this doesn't cause torch.library to use the impl, that
    # needs to be done in a separate self._lib.impl call.
@ -429,6 +436,67 @@ class CustomOp:
        return inner
    def _check_can_register_backward(self):
        def error(detail):
            raise RuntimeError(
                f"Cannot use torch._custom_ops APIs to register backward "
                f"formula for {detail}. Got operator "
                f"{self._qualname} with schema: {schema}"
            )
        schema = self._schema
        if schema.kind() != SchemaKind.functional:
            error("non-functional operator")
        rets = schema.returns
        if not schema.returns:
            error("operator with no returns")
        assert len(rets) > 0
        is_non_mutating_view = any(
            r.annotation is not None and not r.annotation.is_write for r in rets
        )
        if is_non_mutating_view:
            error("operator that returns views")
        # We make assumptions about the schema's return types.
        allowed_return_types = {
            BaseType(BaseTy.int): "int",
            BaseType(BaseTy.SymInt): "SymInt",
            BaseType(BaseTy.bool): "bool",
            BaseType(BaseTy.float): "float",
            BaseType(BaseTy.Tensor): "Tensor",
            ListType(BaseType(BaseTy.Tensor), None): "List[Tensor]",
        }
        for ret in schema.returns:
            if ret.type in allowed_return_types:
                continue
            error(f"operator with return not in {list(allowed_return_types.values())} (got {ret.type})")
    def _check_doesnt_have_library_autograd_impl(self):
        if self._registered_autograd_kernel_indirection:
            return
        if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "CompositeImplicitAutograd"):
            raise RuntimeError(
                f"impl_backward/impl_save_for_backward: the operator {self._qualname} "
                f"already has an implementation for this device type via a "
                f"pre-existing registration to DispatchKey::CompositeImplicitAutograd."
                f"CompositeImplicitAutograd operators do not need an autograd formula; "
                f"instead, the operator will decompose into its constituents and those "
                f"can have autograd formulas defined on them.")
        # We can improve this by adding "all Autograd<BACKEND> keys", but
        # realistically people will just be using this API for CPU/CUDA for now.
        for key in ["Autograd", "AutogradCPU", "AutogradCUDA"]:
            if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, key):
                raise RuntimeError(
                    f"impl_backward/impl_save_for_backward: "
                    f"the operator {self._qualname} already has an Autograd kernel "
                    f"registered to DispatchKey::{key} vi a pre-existing "
                    f"torch.library or TORCH_LIBRARY registration. Please either "
                    f"remove those registrations or don't use the torch._custom_ops APIs")
    def _check_doesnt_have_library_meta_impl(self):
        if self._has_impl("abstract"):
            return
@ -477,6 +545,7 @@ class CustomOp:
            self._schema,
            self._output_differentiability,
            self,
            get_op(self._qualname),
            self._get_impl("save_for_backward").func,
            self._get_impl("backward").func)
        self._register_impl("autograd", kernel)
@ -487,6 +556,10 @@ class CustomOp:
        Please see impl_backward for more details.
        """
        def inner(f):
            self._check_can_register_backward()
            self._check_doesnt_have_library_autograd_impl()
            if not self._registered_autograd_kernel_indirection:
                self._register_autograd_kernel_indirection()
            self._register_impl("save_for_backward", f, stacklevel=_stacklevel)
            if self._has_impl("backward"):
                self._register_autograd_kernel()
@ -546,6 +619,10 @@ class CustomOp:
                yell()
        def inner(f):
            self._check_can_register_backward()
            self._check_doesnt_have_library_autograd_impl()
            if not self._registered_autograd_kernel_indirection:
                self._register_autograd_kernel_indirection()
            self._register_impl("backward", f, stacklevel=_stacklevel)
            self._output_differentiability = output_differentiability
            if self._has_impl("save_for_backward"):
@ -963,7 +1040,10 @@ def custom_op_from_existing(op):
    ns = op.namespace
    lib = torch.library.Library(ns, "FRAGMENT")
    name = op.name().split("::")[-1]
-    schema = FunctionSchema.parse(str(op._schema))
+    schema_str = str(op._schema)
    # CustomOp expects the schema string without the namespace
    schema_str = schema_str.split("::")[-1]
    schema = FunctionSchema.parse(schema_str)
    return CustomOp(lib, ns, schema, name, op, _private_access=True)
@ -1008,10 +1088,7 @@ def _custom_op_with_schema(qualname, schema):
    lib.define(schema_str)
    ophandle = find_ophandle_or_throw(ns, function_schema.name)
    result = CustomOp(lib, ns, function_schema, name, ophandle, _private_access=True)
-
+    result._register_autograd_kernel_indirection()
    library.impl(lib, result._opname, "Autograd")(
        autograd_kernel_indirection(weakref.proxy(result))
    )
    torch._C._dispatch_set_report_error_callback(
        ophandle, functools.partial(report_error_callback, weakref.proxy(result))
--- a/torch/_custom_ops.py
+++ b/torch/_custom_ops.py
@ -266,7 +266,7 @@ def impl_save_for_backward(qualname, *, func=None):
    """
    def inner(func):
-        custom_op = _find_custom_op(qualname)
+        custom_op = _find_custom_op(qualname, also_check_torch_library=True)
        custom_op.impl_save_for_backward(_stacklevel=3)(func)
        return func
@ -313,7 +313,7 @@ def impl_backward(qualname, output_differentiability=None, *, func=None):
    """
    def inner(func):
-        custom_op = _find_custom_op(qualname)
+        custom_op = _find_custom_op(qualname, also_check_torch_library=True)
        custom_op.impl_backward(output_differentiability, _stacklevel=3)(func)
        return func