mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/53583 `Scalar` takes 32 bytes due to `c10::complex<double>` requires aligning to 16 bytes. Passing Scalar by reference shows about 1% improvements on instruction count. All the changes in this commit are codemoded except for the following 4 files (which code-gen signatures): ``` tools/codegen/api/cpp.py tools/codegen/api/native.py tools/codegen/api/structured.py caffe2/contrib/aten/gen_op.py ``` # Codemode ## Main Step For the codemod part, here is the main command used: ``` fastmod --extensions h '([a-zA-Z_+]\([^)]*,?\s*)Scalar (\w+)' '${1}const Scalar& ${2}' fastmod --extensions h '([a-zA-Z_+]\([^)]*,?\s*)optional<Scalar> (\w+)' '${1}const optional<Scalar>& ${2}' fastmod --extensions cpp '([a-zA-Z_+]\([^)]*,?\s*)Scalar (\w+)' '${1}const Scalar& ${2}' fastmod --extensions cpp '([a-zA-Z_+]\([^)]*,?\s*)optional<Scalar> (\w+)' '${1}const optional<Scalar>& ${2}' ``` As you can tell, it codemods both `Scalar` and `optional<Scalar>`. Apply these commands iteratively until reaching a fix-point (since one method signature might contain multiple `Scalar` parameter). In retrospect, excluding `thrid_party` and `torch/csrc/jit` would be a good idea. (I revert it manually later, see https://github.com/pytorch/pytorch/pull/53479 as an reference). ## Pre-Step Prior to applying the main command, as some `Scalar` are presented as `at::Scalar` or `c10::Scalar`, so I codemod some of them in advance. Here is an incomplete list: ``` fastmod --extensions h '([a-zA-Z_+]\([^)]*,?\s*)at::Scalar (\w+)' '${1}const at::Scalar& ${2}' fastmod --extensions cpp '([a-zA-Z_+]\([^)]*,?\s*)at::Scalar (\w+)' '${1}const at::Scalar& ${2}' fastmod --extensions h '([a-zA-Z_+]\([^)]*,?\s*)c10::optional<Scalar> (\w+)' '${1}const c10::optional<Scalar>& ${2}' fastmod --extensions cpp '([a-zA-Z_+]\([^)]*,?\s*)c10::optional<Scalar> (\w+)' '${1}const c10::optional<Scalar>& ${2}' ``` ## Fixup There are a couple of post codemod fixup. For example, `const Scalar` will be codemoded into `const const Scalar&`. `at:Scalar` will be codemoded into `at::const Scalar&` (if `Pre-step` is not done comprehensively). Here is an incomplete list: ``` fastmod --extensions cpp 'const const Scalar' 'const Scalar' fastmod --extensions h 'const const c10::optional<Scalar>' 'const c10::optional<Scalar>' fastmod --extensions cpp 'const const c10::optional<Scalar>' 'const c10::optional<Scalar>' fastmod 'at::const Scalar&' 'const at::Scalar&' ``` ## Supplementary `cu` and `mm` files also need to be codemoded, for example: ``` fastmod --extensions cu 'at::const Scalar&' 'const at::Scalar&' fastmod --extensions mm '([a-zA-Z_+]\([^)]*,?\s*)Scalar (\w+)' '${1}const Scalar& ${2}' ``` Function pointers are not codemoded. Here is an incomplete list: ``` # Cover case: using index_fill_fn = void(*)(TensorIterator & iter, int64_t dim, int64_t self_dim_size, int64_t self_dim_stride, Scalar source); fastmod --extensions h '(void\s*\(\s*\*\s*\)\([^)]*,?\s*)Scalar (\w+)' '${1}const Scalar& ${2}' # Cover case: using softplus_fn = void (*)(TensorIterator&, Scalar, Scalar); fastmod --extensions h '(void\s*\(\s*\*\s*\)\([^)]*,?\s*)Scalar([, \)])' '${1}const Scalar&${2}' fastmod --extensions cpp '(void\s*\(\s*\*\s*\)\([^)]*,?\s*)Scalar([, \)])' '${1}const Scalar&${2}' fastmod --extensions h '(void\s*\(\s*\*\s*\)\([^)]*,?\s*)optional<Scalar>([, \)])' '${1}const optional<Scalar>&${2}' ``` Some corner cases needs to be manually fixed. ghstack-source-id: 123970306 Test Plan: Imported from OSS Reviewed By: smessmer Differential Revision: D26904445 fbshipit-source-id: 8d8a002af4b5125f153a32f03c6956be7ae5671d
98 lines
4.2 KiB
Python
98 lines
4.2 KiB
Python
from tools.codegen.model import *
|
|
|
|
from tools.codegen.api.types import *
|
|
from tools.codegen.api import cpp
|
|
|
|
from typing import Union, List
|
|
|
|
# This file describes the translation of JIT schema to the structured functions API.
|
|
# This is similar to native API, but a number of historical problems with native
|
|
# API have been fixed.
|
|
|
|
# Translation of types occuring in JIT arguments to a C++ argument type.
|
|
# NB: For now, mutable doesn't do anything; but it could if we make
|
|
# some more nominal types
|
|
def argumenttype_type(t: Type, *, mutable: bool, binds: ArgName) -> CType:
|
|
# If it's a value type, do the value type translation
|
|
r = cpp.valuetype_type(t, binds=binds)
|
|
if r is not None:
|
|
return r
|
|
|
|
if isinstance(t, BaseType):
|
|
if t.name == BaseTy.Tensor:
|
|
return ConstRefCType(BaseCType('Tensor', binds))
|
|
elif t.name == BaseTy.Scalar:
|
|
return ConstRefCType(BaseCType('Scalar', binds))
|
|
else:
|
|
raise AssertionError(f"base type should have been value type {t}")
|
|
elif isinstance(t, OptionalType):
|
|
if t.elem == BaseType(BaseTy.Tensor):
|
|
raise AssertionError(
|
|
"optional tensor not supported by structured yet; to implement this "
|
|
"add OptionalTensor c.f. https://github.com/pytorch/pytorch/issues/51456"
|
|
)
|
|
elif t.elem == BaseType(BaseTy.Scalar):
|
|
raise AssertionError(
|
|
"optional scalar not supported by structured yet"
|
|
)
|
|
elem = argumenttype_type(t.elem, mutable=mutable, binds=binds)
|
|
return OptionalCType(elem)
|
|
elif isinstance(t, ListType):
|
|
if t.elem == BaseType(BaseTy.Tensor):
|
|
raise AssertionError(
|
|
"list of tensor not supported by structured yet; to implement this "
|
|
"resolve torch::List issue, see "
|
|
"https://fb.workplace.com/groups/894363187646754/permalink/1149276442155426"
|
|
)
|
|
# TODO: delete these special cases; see tools.codegen.api.cpp--these
|
|
# must be changed in tandem, but there are problems; see
|
|
# https://github.com/pytorch/pytorch/pull/51485
|
|
elif str(t.elem) == 'int':
|
|
return BaseCType("IntArrayRef", binds)
|
|
elif str(t.elem) == 'Dimname':
|
|
return BaseCType("DimnameList", binds)
|
|
elem = argumenttype_type(t.elem, mutable=mutable, binds=binds)
|
|
return BaseCType(f"ArrayRef<{elem.cpp_type()}>", binds)
|
|
else:
|
|
raise AssertionError(f"unrecognized type {repr(t)}")
|
|
|
|
def argument_type(a: Argument, *, binds: ArgName) -> CType:
|
|
return argumenttype_type(a.type, mutable=a.is_write, binds=binds)
|
|
|
|
# returns_type intentionally omitted, because structured kernels never "return";
|
|
# instead, they always indirectly report their outputs (in the case of a meta
|
|
# function, by calling set_output; in the case of an impl function, by writing
|
|
# directly into the provided out argument).
|
|
|
|
# Structured kernels are never defaulted
|
|
def argument(a: Union[Argument, SelfArgument, TensorOptionsArguments]) -> List[Binding]:
|
|
if isinstance(a, Argument):
|
|
return [Binding(
|
|
ctype=argument_type(a, binds=a.name),
|
|
name=a.name,
|
|
default=None,
|
|
argument=a,
|
|
)]
|
|
elif isinstance(a, SelfArgument):
|
|
return argument(a.argument)
|
|
elif isinstance(a, TensorOptionsArguments):
|
|
raise AssertionError("structured kernels don't support TensorOptions yet")
|
|
else:
|
|
assert_never(a)
|
|
|
|
def impl_arguments(g: StructuredNativeFunctions) -> List[Binding]:
|
|
args: List[Union[Argument, TensorOptionsArguments, SelfArgument]] = []
|
|
args.extend(g.out.func.arguments.non_out)
|
|
args.extend(g.out.func.arguments.out)
|
|
return [r for arg in args for r in argument(arg)]
|
|
|
|
def meta_arguments(g: StructuredNativeFunctions) -> List[Binding]:
|
|
args: List[Union[Argument, TensorOptionsArguments, SelfArgument]] = []
|
|
args.extend(g.functional.func.arguments.non_out)
|
|
return [r for arg in args for r in argument(arg)]
|
|
|
|
def out_arguments(g: StructuredNativeFunctions) -> List[Binding]:
|
|
args: List[Union[Argument, TensorOptionsArguments, SelfArgument]] = []
|
|
args.extend(g.out.func.arguments.out)
|
|
return [r for arg in args for r in argument(arg)]
|