mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/46244 - What does the generated binding code do? The Python binding codegen produces code that takes the input list of PyObjects, finds the matching ATen C++ function using PythonArgParser, converts the PyObjects into C++ types and calls the ATen C++ function: ``` +--------+ parsing +------------------------+ binding +-----------------------+ | PyObjs | ---------> | PythonArgParser Output | ---------> | Cpp Function Dispatch | +--------+ +------------------------+ +-----------------------+ ``` - Are Python arguments 1-1 mapped to C++ arguments? Python arguments might be reordered, packed, unpacked when binding to C++ arguments, as illustrated below: ``` // Binding - Reorder & Packing // aten::empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor Python Args Cpp Args ----------------------------------------------------------- 0: size size 1: names names 2: memory_format -------+ 3: dtype -----+-|--> options 4: layout / | 5: device / +--> memory_format 6: pin_memory / 7: requires_grad -+ // Binding - Unpacking // aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) Python Args Cpp Args ----------------------------------------------------------- +----> max /-----> max_values 0: input / self 1: dim / dim 2: keepdim / keepdim 3: out -----+ ``` - Why do we want to rewrite the python binding codegen? The old codegen takes Declarations.yaml as input. It doesn't distinguish between Python arguments and C++ arguments - they are all mixed together as a bag of non-typed dict objects. Different methods process these arg objects and add new attributes for various different purposes. It's not so obvious to figure out the semantics of these attributes. The complicated binding logic happens implicitly and scatteredly. ``` +--------------------+ | Native Functions | +--------------------+ | | v +--------------------+ | Cpp Signatures | +--------------------+ | | v +--------------------+ | Declarations.yaml | +--------------------+ | +-------------------------------------+ | +-------> | PythonArgParser Schema | | | +-------------------------------------+ | | . | | . v | . +--------------------+ +-------------------------------------+ | NonTyped Args Objs | --> | PythonArgParser -> Cpp Args Binding | +--------------------+ +-------------------------------------+ | . | . | . | +-------------------------------------+ +-------> | Cpp Function Dispatch | +-------------------------------------+ ``` This PR leverages the new immutable data models introduced in the new aten codegen. It introduces dedicated data models for python schema. This way, we can not only avoid subtle Declaration.yaml conversions but also decouple the generation of python schema, python to c++ binding and c++ function call. The ultimate state will be like the following diagram: ``` +-------------------+ +-------------------------------------+ +-------> | Python Signatures | --> | PythonArgParser Schema | | +-------------------+ +-------------------------------------+ | | . | | . | | . +------------------+ | +-------------------------------------+ | Native Functions | +-------> | PythonArgParser -> Cpp Args Binding | +------------------+ | +-------------------------------------+ | | . | | . | | . | +-------------------+ +-------------------------------------+ +-------> | Cpp Signatures | --> | Cpp Function Dispatch | +-------------------+ +-------------------------------------+ ``` This PR has migrated the core binding logic from tools/autograd/gen_python_functions.py to tools/codegen/api/python.py. It produces the byte-for-byte same results (tested with #46243). Will migrate the rest of gen_python_functions.py in subsequent PRs. Test Plan: Imported from OSS Reviewed By: bhosmer Differential Revision: D24388874 Pulled By: ljk53 fbshipit-source-id: f88b6df4e917cf90d868a2bbae2d5ffb680d1841
186 lines
6.6 KiB
Python
186 lines
6.6 KiB
Python
import argparse
|
|
import os
|
|
import sys
|
|
|
|
source_files = {'.py', '.cpp', '.h'}
|
|
|
|
DECLARATIONS_PATH = 'torch/share/ATen/Declarations.yaml'
|
|
NATIVE_FUNCTIONS_PATH = 'aten/src/ATen/native/native_functions.yaml'
|
|
|
|
# TODO: This is a little inaccurate, because it will also pick
|
|
# up setup_helper scripts which don't affect code generation
|
|
def all_generator_source():
|
|
r = []
|
|
for directory, _, filenames in os.walk('tools'):
|
|
for f in filenames:
|
|
if os.path.splitext(f)[1] in source_files:
|
|
full = os.path.join(directory, f)
|
|
r.append(full)
|
|
return sorted(r)
|
|
|
|
|
|
def generate_code(ninja_global=None,
|
|
declarations_path=None,
|
|
nn_path=None,
|
|
native_functions_path=None,
|
|
install_dir=None,
|
|
subset=None,
|
|
disable_autograd=False,
|
|
force_schema_registration=False,
|
|
operator_selector=None):
|
|
from tools.autograd.gen_autograd import gen_autograd, gen_autograd_python
|
|
from tools.autograd.gen_annotated_fn_args import gen_annotated
|
|
from tools.jit.gen_unboxing_wrappers import gen_unboxing_wrappers
|
|
from tools.codegen.selective_build.selector import SelectiveBuilder
|
|
|
|
|
|
# Build ATen based Variable classes
|
|
if install_dir is None:
|
|
install_dir = 'torch/csrc'
|
|
python_install_dir = 'torch/testing/_internal/generated'
|
|
else:
|
|
python_install_dir = install_dir
|
|
autograd_gen_dir = os.path.join(install_dir, 'autograd', 'generated')
|
|
jit_gen_dir = os.path.join(install_dir, 'jit', 'generated')
|
|
for d in (autograd_gen_dir, jit_gen_dir, python_install_dir):
|
|
if not os.path.exists(d):
|
|
os.makedirs(d)
|
|
runfiles_dir = os.environ.get("RUNFILES_DIR", None)
|
|
data_dir = os.path.join(runfiles_dir, 'pytorch') if runfiles_dir else ''
|
|
autograd_dir = os.path.join(data_dir, 'tools', 'autograd')
|
|
tools_jit_templates = os.path.join(data_dir, 'tools', 'jit', 'templates')
|
|
|
|
if subset == "pybindings" or not subset:
|
|
gen_autograd_python(
|
|
declarations_path or DECLARATIONS_PATH,
|
|
native_functions_path or NATIVE_FUNCTIONS_PATH,
|
|
autograd_gen_dir,
|
|
autograd_dir)
|
|
|
|
if operator_selector is None:
|
|
operator_selector = SelectiveBuilder.get_nop_selector()
|
|
|
|
if subset == "libtorch" or not subset:
|
|
|
|
gen_autograd(
|
|
declarations_path or DECLARATIONS_PATH,
|
|
autograd_gen_dir,
|
|
autograd_dir,
|
|
disable_autograd=disable_autograd,
|
|
operator_selector=operator_selector,
|
|
)
|
|
gen_unboxing_wrappers(
|
|
declarations_path or DECLARATIONS_PATH,
|
|
jit_gen_dir,
|
|
tools_jit_templates,
|
|
disable_autograd=disable_autograd,
|
|
operator_selector=operator_selector,
|
|
force_schema_registration=force_schema_registration)
|
|
|
|
if subset == "python" or not subset:
|
|
gen_annotated(
|
|
declarations_path or DECLARATIONS_PATH,
|
|
python_install_dir,
|
|
autograd_dir)
|
|
|
|
def get_selector_from_legacy_operator_selection_list(
|
|
selected_op_list_path: str,
|
|
):
|
|
from tools.autograd.utils import load_op_list_and_strip_overload
|
|
|
|
selected_op_list = load_op_list_and_strip_overload(
|
|
None,
|
|
selected_op_list_path,
|
|
)
|
|
|
|
# Internal build doesn't use this flag any more. Only used by OSS
|
|
# build now. Every operator should be considered a root operator
|
|
# (hence generating unboxing code for it, which is consistent with
|
|
# the current behaviour), and also be considered as used for
|
|
# training, since OSS doesn't support training on mobile for now.
|
|
#
|
|
is_root_operator = True
|
|
is_used_for_training = True
|
|
|
|
from tools.codegen.selective_build.selector import SelectiveBuilder
|
|
|
|
selector: SelectiveBuilder = SelectiveBuilder.get_nop_selector()
|
|
if selected_op_list is not None:
|
|
selector = SelectiveBuilder.from_legacy_op_registration_allow_list(
|
|
selected_op_list,
|
|
is_root_operator,
|
|
is_used_for_training,
|
|
)
|
|
|
|
return selector
|
|
|
|
|
|
def get_selector(selected_op_list_path, operators_yaml_path):
|
|
# cwrap depends on pyyaml, so we can't import it earlier
|
|
root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
sys.path.insert(0, root)
|
|
from tools.codegen.selective_build.selector import SelectiveBuilder
|
|
|
|
assert not (selected_op_list_path is not None and
|
|
operators_yaml_path is not None), \
|
|
("Expected at most one of selected_op_list_path and " +
|
|
"operators_yaml_path to be set.")
|
|
|
|
if selected_op_list_path is None and operators_yaml_path is None:
|
|
return SelectiveBuilder.get_nop_selector()
|
|
elif selected_op_list_path is not None:
|
|
return get_selector_from_legacy_operator_selection_list(selected_op_list_path)
|
|
else:
|
|
return SelectiveBuilder.from_yaml_path(operators_yaml_path)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Autogenerate code')
|
|
parser.add_argument('--declarations-path')
|
|
parser.add_argument('--native-functions-path')
|
|
parser.add_argument('--nn-path')
|
|
parser.add_argument('--ninja-global')
|
|
parser.add_argument('--install_dir')
|
|
parser.add_argument(
|
|
'--subset',
|
|
help='Subset of source files to generate. Can be "libtorch" or "pybindings". Generates both when omitted.'
|
|
)
|
|
parser.add_argument(
|
|
'--disable-autograd',
|
|
default=False,
|
|
action='store_true',
|
|
help='It can skip generating autograd related code when the flag is set',
|
|
)
|
|
parser.add_argument(
|
|
'--selected-op-list-path',
|
|
help='Path to the yaml file that contains the list of operators to include for custom build.',
|
|
)
|
|
parser.add_argument(
|
|
'--operators_yaml_path',
|
|
help='Path to the model YAML file that contains the list of operators to include for custom build.',
|
|
)
|
|
parser.add_argument(
|
|
'--force_schema_registration',
|
|
action='store_true',
|
|
help='force it to generate schema-only registrations for ops that are not'
|
|
'listed on --selected-op-list'
|
|
)
|
|
options = parser.parse_args()
|
|
|
|
generate_code(
|
|
options.ninja_global,
|
|
options.declarations_path,
|
|
options.nn_path,
|
|
options.native_functions_path,
|
|
options.install_dir,
|
|
options.subset,
|
|
options.disable_autograd,
|
|
options.force_schema_registration,
|
|
# options.selected_op_list
|
|
operator_selector=get_selector(options.selected_op_list_path, options.operators_yaml_path),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|