mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/49138 See for details: https://fb.quip.com/QRtJAin66lPN We need to model optional types explicitly, mostly for schema inference. So we cannot pass a `Tensor?[]` as `ArrayRef<Tensor>`, instead we need to pass it as an optional type. This PR changes it to `torch::List<c10::optional<Tensor>>`. It also makes the ops c10-full that were blocked by this. ## Backwards Compatibility - This should not break the Python API because the representation in Python is the same and python_arg_parser just transforms the python list into a `List<optional<Tensor>>` instead of into a `List<Tensor>`. - This should not break serialized models because there's some logic that allows loading a serialized `List<Tensor>` as `List<optional<Tensor>>`, see https://github.com/pytorch/pytorch/pull/49138/files#diff-9315f5dd045f47114c677174dcaa2f982721233eee1aa19068a42ff3ef775315R57 - This will break backwards compatibility for the C++ API. There is no implicit conversion from `ArrayRef<Tensor>` (which was the old argument type) to `List<optional<Tensor>>`. One common call pattern is `tensor.index({indices_tensor})`, where indices_tensor is another `Tensor`, and that will continue working because the `{}` initializer_list constructor for `List<optional<Tensor>>` can take `Tensor` elements that are implicitly converted to `optional<Tensor>`, but another common call pattern was `tensor.index(indices_tensor)`, where previously, the `Tensor` got implicitly converted to an `ArrayRef<Tensor>`, and to implicitly convert `Tensor -> optional<Tensor> -> List<optional<Tensor>>` would be two implicit conversions. C++ doesn't allow chaining. two implicit conversions. So those call sites have to be rewritten to `tensor.index({indices_tensor})`. ghstack-source-id: 119269131 Test Plan: ## Benchmarks (C++ instruction counts): ### Forward #### Script ```py from torch.utils.benchmark import Timer counts = Timer( stmt=""" auto t = {{op call to measure}}; """, setup=""" using namespace torch::indexing; auto x = torch::ones({4, 4, 4}); """, language="cpp", ).collect_callgrind(number=1_000) print(counts) ``` #### Results | Op call |before |after |delta | | |------------------------------------------------------------------------|---------|--------|-------|------| |x[0] = 1 |11566015 |11566015|0 |0.00% | |x.index({0}) |6807019 |6801019 |-6000 |-0.09%| |x.index({0, 0}) |13529019 |13557019|28000 |0.21% | |x.index({0, 0, 0}) |10677004 |10692004|15000 |0.14% | |x.index({"..."}) |5512015 |5506015 |-6000 |-0.11%| |x.index({Slice(None, None, None)}) |6866016 |6936016 |70000 |1.02% | |x.index({None}) |8554015 |8548015 |-6000 |-0.07%| |x.index({false}) |22400000 |22744000|344000 |1.54% | |x.index({true}) |27624088 |27264393|-359695|-1.30%| |x.index({"...", 0, true, Slice(1, None, 2), torch::tensor({1, 2})})|123472000|123463306|-8694|-0.01%| ### Autograd #### Script ```py from torch.utils.benchmark import Timer counts = Timer( stmt=""" auto t = {{op call to measure}}; """, setup=""" using namespace torch::indexing; auto x = torch::ones({4, 4, 4}, torch::requires_grad()); """, language="cpp", ).collect_callgrind(number=1_000) print(counts) ``` Note: the script measures the **forward** path of an op call with autograd enabled (i.e. calls into VariableType). It does not measure the backward path. #### Results | Op call |before |after |delta | | |------------------------------------------------------------------------|---------|--------|-------|------| |x.index({0}) |14839019|14833019|-6000| 0.00% | |x.index({0, 0}) |28342019|28370019|28000| 0.00% | |x.index({0, 0, 0}) |24434004|24449004|15000| 0.00% | |x.index({"..."}) |12773015|12767015|-6000| 0.00% | |x.index({Slice(None, None, None)}) |14837016|14907016|70000| 0.47% | |x.index({None}) |15926015|15920015|-6000| 0.00% | |x.index({false}) |36958000|37477000|519000| 1.40% | |x.index({true}) |41971408|42426094|454686| 1.08% | |x.index({"...", 0, true, Slice(1, None, 2), torch::tensor({1, 2})}) |168184392|164545682|-3638710| -2.16% | Reviewed By: bhosmer Differential Revision: D25454632 fbshipit-source-id: 28ab0cffbbdbdff1c40b4130ca62ee72f981b76d
111 lines
4.3 KiB
Python
111 lines
4.3 KiB
Python
from tools.codegen.model import *
|
|
|
|
from tools.codegen.api.types import *
|
|
import tools.codegen.api.cpp as cpp
|
|
from tools.codegen import local
|
|
|
|
from typing import Union, Sequence, List
|
|
|
|
# This file describes the translation of JIT schema to the native functions API.
|
|
# This looks a lot like the C++ API (which makes historical sense, because the
|
|
# idea was you wrote native functions to implement functions in the C++ API),
|
|
# but over time we have evolved the C++ API without actually changing our
|
|
# native:: kernels. The intention is to make native API and dispatcher API
|
|
# line up as closely as possible, since this results in the least overhead
|
|
# (no translation is needed from dispatcher API to native API).
|
|
#
|
|
# When a function is not use_c10_dispatcher: full, the dispatcher API actually
|
|
# coincides with the native:: API (e.g., we do as dumb as pass through as
|
|
# possible).
|
|
|
|
def name(func: FunctionSchema) -> str:
|
|
name = str(func.name.name)
|
|
# TODO: delete this!
|
|
if func.is_out_fn():
|
|
name += '_out'
|
|
if func.name.overload_name:
|
|
name += f'_{func.name.overload_name}'
|
|
return name
|
|
|
|
def argumenttype_type(t: Type, *, mutable: bool, binds: ArgName) -> CType:
|
|
if str(t) == 'Tensor?':
|
|
if mutable:
|
|
return MutRefCType(BaseCType('Tensor', binds))
|
|
else:
|
|
return ConstRefCType(BaseCType('Tensor', binds))
|
|
elif str(t) == 'Tensor?[]':
|
|
return BaseCType('const c10::List<c10::optional<Tensor>> &', binds)
|
|
return cpp.argumenttype_type(t, mutable=mutable, binds=binds)
|
|
|
|
def returns_type(rs: Sequence[Return]) -> str:
|
|
return cpp.returns_type(rs)
|
|
|
|
def argument_type(a: Argument, *, binds: ArgName) -> CType:
|
|
return argumenttype_type(a.type, mutable=a.is_write, binds=binds)
|
|
|
|
def argument(a: Union[Argument, SelfArgument, TensorOptionsArguments]) -> List[Binding]:
|
|
if isinstance(a, Argument):
|
|
return [Binding(
|
|
ctype=argument_type(a, binds=a.name),
|
|
name=a.name,
|
|
default=cpp.default_expr(a.default, a.type) if a.default is not None else None,
|
|
argument=a,
|
|
)]
|
|
elif isinstance(a, SelfArgument):
|
|
# Erase SelfArgument from the distinction
|
|
return argument(a.argument)
|
|
elif isinstance(a, TensorOptionsArguments):
|
|
if local.use_c10_dispatcher() in [UseC10Dispatcher.hacky_wrapper_for_legacy_signatures,
|
|
UseC10Dispatcher.with_codegenerated_unboxing_wrapper]:
|
|
# TODO: expunge this logic entirely
|
|
default = None
|
|
if all(x.default == "None" for x in a.all()):
|
|
default = '{}'
|
|
elif a.dtype.default == "long":
|
|
default = 'at::kLong' # TODO: this is wrong
|
|
return [Binding(
|
|
ctype=ConstRefCType(BaseCType('TensorOptions', 'options')),
|
|
name='options',
|
|
default=default,
|
|
argument=a,
|
|
)]
|
|
else:
|
|
assert local.use_c10_dispatcher() == UseC10Dispatcher.full
|
|
return [
|
|
Binding(
|
|
ctype=OptionalCType(BaseCType('ScalarType', 'dtype')),
|
|
name='dtype',
|
|
default='{}',
|
|
argument=a,
|
|
),
|
|
Binding(
|
|
ctype=OptionalCType(BaseCType('Layout', 'layout')),
|
|
name='layout',
|
|
default='{}',
|
|
argument=a,
|
|
),
|
|
Binding(
|
|
ctype=OptionalCType(BaseCType('Device', 'device')),
|
|
name='device',
|
|
default='{}',
|
|
argument=a,
|
|
),
|
|
Binding(
|
|
ctype=OptionalCType(BaseCType('bool', 'pin_memory')),
|
|
name='pin_memory',
|
|
default='{}',
|
|
argument=a,
|
|
)]
|
|
else:
|
|
assert_never(a)
|
|
|
|
def arguments(func: FunctionSchema) -> List[Binding]:
|
|
args: List[Union[Argument, TensorOptionsArguments, SelfArgument]] = []
|
|
if local.use_c10_dispatcher() is UseC10Dispatcher.full:
|
|
args.extend(func.arguments.non_out)
|
|
args.extend(func.arguments.out)
|
|
else:
|
|
args.extend(func.arguments.out)
|
|
args.extend(func.arguments.non_out)
|
|
return [r for arg in args for r in argument(arg)]
|