pytorch/tools/codegen/selective_build/selector.py
Dhruv Matani 0c5cd8c2b9 [RFC] Switch PyTorch Selective Build (Custom Build) to use the SelectiveBuilder abstraction (#45722)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/45722

This diff does a bunch of things:

1. Introduces some abstractions as detailed in https://fb.quip.com/2oEzAR5MKqbD to help with selective build related codegen in multiple files.
2. Adds helper methods to combine operators, debug info, operator lists, etc...
3. Currently, the selective build machinery querying `op_registration_whitelist` directly at various places in the code. `op_registration_whitelist` is a list of allowed operator names (without overload name). We want to move to a world where the overload names are also included so that we can be more selective about which operators we include. To that effect, it makes sense to hide the checking logic in a separate abstraction and have the build use that abstraction instead of putting all this selective build specific logic in the code-generator itself. This change is attempting to do just that.
4. Updates generate_code, unboxing-wrapper codegen, and autograd codegen to accept the operator selector paradigm as opposed to a selected operator list.
5. Update `tools/code_analyzer/gen_op_registration_allowlist.py` to expose providing an actual structured operator dependency graph in addition to a serialized string.

There are a bunch of structural changes as well:

1. `root_op_list.yaml` and `combined_op_list.yaml` are now actual YAML files (not a space separated list of operator names)
2. `generate_code.py` accepts only paths to operator list YAML files (both old style as well as new style) and not list of operator names on the command line as arguments
3. `gen.py` optionally also accepts a custom build related operators YAML path (this file has information about which operators to register in the generated library).

ghstack-source-id: 114578753

(Note: this ignores all push blocking failures!)

Test Plan:
`buck test caffe2/test:selective_build`

Generated YAML files after the change:

{P143981979}

{P143982025}

{P143982056}

Ensure that the generated files are same before and after the change:

```
[dhruvbird@devvm2490 /tmp/TypeDefault.cpp] find -name "*.cpp" | xargs md5sum
d72c3d125baa7b77e4c5581bbc7110d2  ./after_change/gen_aten/TypeDefault.cpp
42353036c83ebc7620a7159235b9647f  ./after_change/lite_predictor_lib_aten/TypeDefault.cpp
d72c3d125baa7b77e4c5581bbc7110d2  ./before_change/gen_aten/TypeDefault.cpp
42353036c83ebc7620a7159235b9647f  ./before_change/lite_predictor_lib_aten/TypeDefault.cpp
```

`VariableTypes_N.cpp` are generated the same both before and after the change:

```
[dhruvbird@devvm2490 /tmp/VariableType] find -name "*.cpp" | xargs -n 1 md5sum | sort
3be89f63fd098291f01935077a60b677  ./after/VariableType_2.cpp
3be89f63fd098291f01935077a60b677  ./before/VariableType_2.cpp
40a3e59d64e9dbe86024cf314f127fd6  ./after/VariableType_4.cpp
40a3e59d64e9dbe86024cf314f127fd6  ./before/VariableType_4.cpp
a4911699ceda3c3a430f08c64e8243fd  ./after/VariableType_1.cpp
a4911699ceda3c3a430f08c64e8243fd  ./before/VariableType_1.cpp
ca9aa611fcb2a573a8cba4e269468c99  ./after/VariableType_0.cpp
ca9aa611fcb2a573a8cba4e269468c99  ./before/VariableType_0.cpp
e18f639ed23d802dc4a31cdba40df570  ./after/VariableType_3.cpp
e18f639ed23d802dc4a31cdba40df570  ./before/VariableType_3.cpp
```

Reviewed By: ljk53

Differential Revision: D23837010

fbshipit-source-id: ad06b1756af5be25baa39fd801dfdf09bc565442
2020-10-18 15:10:42 -07:00

161 lines
5.5 KiB
Python

from typing import Dict, Set, Optional, Tuple
import yaml
from dataclasses import dataclass
from tools.codegen.selective_build.operator import *
# A SelectiveBuilder holds information extracted from the selective build
# YAML specification.
#
# It includes information about the build's selectivity, the debug_info
# associated with this selective build (opaque string), and the set of
# operators that should be included in the build.
#
@dataclass(frozen=True)
class SelectiveBuilder:
# If true, then the build is not selective, and includes all
# operators.
include_all_operators: bool
# Debug Information at the selective/custom build level.
_debug_info: Optional[Tuple[str, ...]]
# A dictionary of operator -> operator metadata.
operators: Dict[str, SelectiveBuildOperator]
@staticmethod
def get_nop_selector() -> 'SelectiveBuilder':
return SelectiveBuilder.from_yaml_dict({'include_all_operators': True})
@staticmethod
def from_yaml_dict(data: Dict[str, object]) -> 'SelectiveBuilder':
valid_top_level_keys = {
'include_all_operators',
'debug_info',
'operators',
}
top_level_keys = set(data.keys())
if len(top_level_keys - valid_top_level_keys) > 0:
raise Exception("Got unexpected top level keys: {}".format(
",".join(top_level_keys - valid_top_level_keys),
))
include_all_operators = data.get('include_all_operators', False)
assert isinstance(include_all_operators, bool)
debug_info = None
if 'debug_info' in data:
di_list = data['debug_info']
assert isinstance(di_list, list)
debug_info = tuple(map(lambda x: str(x), di_list))
operators = {}
operators_dict = data.get('operators', {})
assert isinstance(operators_dict, dict)
for (k, v) in operators_dict.items():
operators[k] = SelectiveBuildOperator.from_yaml_dict(k, v)
return SelectiveBuilder(include_all_operators, debug_info, operators)
@staticmethod
def from_yaml_str(config_contents: str) -> 'SelectiveBuilder':
contents = yaml.load(config_contents)
return SelectiveBuilder.from_yaml_dict(contents)
@staticmethod
def from_yaml_path(config_path: str) -> 'SelectiveBuilder':
with open(config_path, 'r') as f:
contents = yaml.load(f)
return SelectiveBuilder.from_yaml_dict(contents)
@staticmethod
def from_legacy_op_registration_allow_list(
allow_list: Set[str],
is_root_operator: bool,
is_used_for_training: bool) -> 'SelectiveBuilder':
operators = {}
for op in allow_list:
operators[op] = {
'name': op,
'is_root_operator': is_root_operator,
'is_used_for_training': is_used_for_training,
'include_all_overloads': True,
}
return SelectiveBuilder.from_yaml_dict({
'operators': operators,
})
def is_operator_selected(self, name: str) -> bool:
if self.include_all_operators:
return True
if name in self.operators:
return True
name = strip_operator_overload_name(name)
return name in self.operators and self.operators[name].include_all_overloads
def is_operator_selected_for_training(self, name: str) -> bool:
if not self.is_operator_selected(name):
return False
if self.include_all_operators:
return True
not_training_op = SelectiveBuildOperator(
name='',
is_root_operator=False,
is_used_for_training=False,
include_all_overloads=False,
_debug_info=None,
)
op = not_training_op
if name in self.operators:
op = self.operators[name]
name = strip_operator_overload_name(name)
base_op = not_training_op
if name in self.operators:
base_op = self.operators[name]
return (
op.is_used_for_training or
(base_op.include_all_overloads and base_op.is_used_for_training)
)
def is_root_operator(self, name: str) -> bool:
if not self.is_operator_selected(name):
return False
if self.include_all_operators:
return True
if name in self.operators:
op: SelectiveBuildOperator = self.operators[name]
return op.is_root_operator
name = strip_operator_overload_name(name)
if name not in self.operators:
return False
base_op: SelectiveBuildOperator = self.operators[name]
return base_op.include_all_overloads and base_op.is_root_operator
def to_dict(self) -> Dict[str, object]:
ret: Dict[str, object] = {
'include_all_operators': self.include_all_operators,
}
operators = {}
for (op_name, op) in self.operators.items():
operators[op_name] = op.to_dict()
ret['operators'] = operators
if self._debug_info is not None:
ret['debug_info'] = self._debug_info
return ret
def combine_selective_builders(lhs: SelectiveBuilder, rhs: SelectiveBuilder) -> SelectiveBuilder:
include_all_operators = lhs.include_all_operators or rhs.include_all_operators
debug_info = merge_debug_info(lhs._debug_info, rhs._debug_info)
operators = merge_operator_dicts(lhs.operators, rhs.operators)
return SelectiveBuilder(include_all_operators, debug_info, operators)