mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/48718 This PR rewrites structured kernels to do the class-based mechanism (instead of defining a meta and impl function, they are methods on a class), and adds enough customizability on the class to support TensorIterator. To show it works, add is made a structured kernel. Don't forget to check https://github.com/pytorch/rfcs/pull/9 for a mostly up-to-date high level description of what's going on here. High level structure of this PR (the order you should review files): * TensorMeta.h - TensorMeta is deleted entirely; instead, meta functions will call `set_output` to allocate/resize their outputs. MetaBase gets a new `maybe_get_output` virtual method for retrieving the (possibly non-existent) output tensor in a meta function; this makes it easier to do special promotion behavior, e.g., as in TensorIterator. * TensorIterator.cpp - Two major changes: first, we add TensorIteratorBase::set_output, which is a "light" version of TensorIterator::set_output; it sets up the internal data structures in TensorIterator, but it doesn't do allocation (that is assumed to have been handled by the structured kernels framework). The control flow here is someone will call the subclassed set_output, which will allocate output, and then we will call the parent class (TensorIteratorBase) to populate the fields in TensorIterator so that other TensorIterator phases can keep track of it. Second, we add some tests for meta tensors, and skip parts of TensorIterator which are not necessary when data is not available. * tools/codegen/model.py - One new field in native_functions.yaml, structured_inherits. This lets you override the parent class of a structured meta class; normally it's MetaBase, but you can make it point at TensorIteratorBase instead for TensorIterator based kernels * tools/codegen/gen.py - Now generate all of the classes we promised. It's kind of hairy because this is the first draft. Check the RFC for what the output looks like, and then follow the logic here. There are some complications: I need to continue to generate old style wrapper functions even if an operator is structured, because SparseCPU/SparseCUDA/etc won't actually use structured kernels to start. The most complicated code generation is the instantiation of `set_output`, which by in large replicates the logic in `TensorIterator::set_output`. This will continue to live in codegen for the forseeable future as we would like to specialize this logic per device. * aten/src/ATen/native/UpSampleNearest1d.cpp - The previous structured kernel is ported to the new format. The changes are very modest. * aten/src/ATen/native/BinaryOps.cpp - Add is ported to structured. TODO: * Work out an appropriate entry point for static runtime, since native:: function stubs no longer are generated * Refactor TensorIteratorConfig construction into helper functions, like before * Make Tensor-Scalar addition structured to fix perf regression * Fix `verify_api_visibility.cpp` * Refactor tools/codegen/gen.py for clarity * Figure out why header changes resulted in undefined reference to `at::Tensor::operator[](long) const` Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: bhosmer Differential Revision: D25278031 Pulled By: ezyang fbshipit-source-id: 57c43a6e5df21929b68964d485995fbbae4d1f7b
60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
from tools.codegen.model import *
|
|
from tools.codegen.api.types import MetaArgument
|
|
|
|
import tools.codegen.api.cpp as cpp
|
|
import tools.codegen.api.dispatcher as dispatcher
|
|
|
|
from typing import Sequence
|
|
import itertools
|
|
|
|
# Follows dispatcher calling convention, but:
|
|
# - Mutable arguments not allowed. Meta functions are always
|
|
# written in functional form. Look at FunctionSchema.signature()
|
|
# - No tensor returns; instead we return a TensorMeta describing
|
|
# the tensor in question
|
|
|
|
def name(g: StructuredNativeFunctions) -> str:
|
|
# use the overload name from the functional version
|
|
return str(g.functional.func.name).replace('.', '_')
|
|
|
|
def argument_type(a: Argument) -> str:
|
|
assert not a.is_write
|
|
return dispatcher.argumenttype_type(a.type, mutable=False)
|
|
|
|
def returntype_type(t: Type) -> str:
|
|
r = cpp.valuetype_type(t)
|
|
if r is not None:
|
|
return r
|
|
|
|
if isinstance(t, BaseType):
|
|
if t.name == BaseTy.Tensor:
|
|
return 'TensorMeta'
|
|
elif isinstance(t, ListType):
|
|
raise NotImplementedError("list returns not supported yet")
|
|
|
|
raise AssertionError(f"unrecognized return type {t}")
|
|
|
|
def return_type(r: Return) -> str:
|
|
assert not r.is_write
|
|
return returntype_type(r.type)
|
|
|
|
def returns_type(rs: Sequence[Return]) -> str:
|
|
if len(rs) == 0:
|
|
return 'void'
|
|
elif len(rs) == 1:
|
|
return return_type(rs[0])
|
|
else:
|
|
args = ','.join(map(return_type, rs))
|
|
return f'std::tuple<{args}>'
|
|
|
|
def argument(a: Argument) -> MetaArgument:
|
|
return MetaArgument(
|
|
type=argument_type(a),
|
|
name=a.name,
|
|
argument=a,
|
|
)
|
|
|
|
def arguments(func: FunctionSchema) -> Sequence[MetaArgument]:
|
|
assert not func.arguments.out
|
|
return list(map(argument, itertools.chain(func.arguments.positional, func.arguments.kwarg_only)))
|