mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: moved __all__ to top of functions, removed private funcitons from all Test Plan: python test/test_public_bindings.py Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D41015538](https://our.internmc.facebook.com/intern/diff/D41015538) Pull Request resolved: https://github.com/pytorch/pytorch/pull/88391 Approved by: https://github.com/jcaip
161 lines
5.2 KiB
Python
161 lines
5.2 KiB
Python
import torch
|
|
from ._common_operator_config_utils import (
|
|
_get_binary_op_configs,
|
|
_get_bn_configs,
|
|
_get_cat_config,
|
|
_get_conv_configs,
|
|
_get_default_op_configs,
|
|
_get_embedding_op_configs,
|
|
_get_fixed_qparams_op_configs,
|
|
_get_linear_configs,
|
|
_get_rnn_op_configs,
|
|
_get_share_qparams_op_configs,
|
|
)
|
|
from .backend_config import BackendConfig, DTypeConfig, DTypeWithConstraints
|
|
|
|
__all__ = [
|
|
"get_qnnpack_backend_config",
|
|
]
|
|
|
|
# ===================
|
|
# | DTYPE CONFIGS |
|
|
# ===================
|
|
|
|
qnnpack_weighted_op_quint8_dtype_config = DTypeConfig(
|
|
input_dtype=torch.quint8,
|
|
output_dtype=torch.quint8,
|
|
weight_dtype=torch.qint8,
|
|
bias_dtype=torch.float,
|
|
)
|
|
|
|
qnnpack_default_op_quint8_dtype_config = DTypeConfig(
|
|
input_dtype=torch.quint8,
|
|
output_dtype=torch.quint8,
|
|
)
|
|
|
|
qnnpack_default_op_fp16_dtype_config = DTypeConfig(
|
|
input_dtype=torch.float16,
|
|
output_dtype=torch.float16,
|
|
weight_dtype=torch.float16,
|
|
bias_dtype=torch.float16,
|
|
)
|
|
|
|
qnnpack_default_dynamic_int8_dtype_config = DTypeConfig(
|
|
input_dtype=torch.quint8,
|
|
output_dtype=torch.float,
|
|
weight_dtype=torch.qint8,
|
|
bias_dtype=torch.float,
|
|
is_dynamic=True,
|
|
)
|
|
|
|
qnnpack_default_dynamic_float16_dtype_config = DTypeConfig(
|
|
input_dtype=torch.float16,
|
|
output_dtype=torch.float,
|
|
weight_dtype=torch.float16,
|
|
bias_dtype=torch.float,
|
|
is_dynamic=True,
|
|
)
|
|
|
|
qnnpack_weight_only_quint8_dtype_config = DTypeConfig(
|
|
input_dtype=torch.float,
|
|
output_dtype=torch.float,
|
|
weight_dtype=torch.quint8,
|
|
)
|
|
|
|
qnnpack_weight_only_quint4x2_dtype_config = DTypeConfig(
|
|
input_dtype=torch.float,
|
|
output_dtype=torch.float,
|
|
weight_dtype=torch.quint4x2,
|
|
)
|
|
|
|
# xnnpack compatible dtype configs
|
|
|
|
# We restrict scale values to be 2 ** -12 to ensure the
|
|
# requantization scale never falls below the xnnpack lower
|
|
# threshold. Additionally, for qint8 weight, we restrict
|
|
# the quantization values to [-127, +127], excluding -128.
|
|
# For more detail, refer to the description of
|
|
# `default_symmetric_qnnpack_qconfig`.
|
|
|
|
# TODO: add additional restriction on qscheme to ensure it
|
|
# is either per_tensor_symmetric or per_channel_symmetric
|
|
|
|
qnnpack_act_qint8_scale_min_2_neg_12 = DTypeWithConstraints(
|
|
dtype=torch.qint8,
|
|
scale_min_lower_bound=2 ** -12,
|
|
)
|
|
|
|
qnnpack_weight_qint8_neg_127_to_127_scale_min_2_neg_12 = DTypeWithConstraints(
|
|
dtype=torch.qint8,
|
|
quant_min_lower_bound=-127,
|
|
quant_max_upper_bound=127,
|
|
scale_min_lower_bound=2 ** -12,
|
|
)
|
|
|
|
qnnpack_weighted_op_qint8_symmetric_dtype_config = DTypeConfig(
|
|
input_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
|
|
output_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
|
|
weight_dtype=qnnpack_weight_qint8_neg_127_to_127_scale_min_2_neg_12,
|
|
bias_dtype=torch.float,
|
|
)
|
|
|
|
qnnpack_default_op_qint8_symmetric_dtype_config = DTypeConfig(
|
|
input_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
|
|
output_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
|
|
)
|
|
|
|
|
|
# =====================
|
|
# | BACKEND CONFIGS |
|
|
# =====================
|
|
|
|
def get_qnnpack_backend_config() -> BackendConfig:
|
|
"""
|
|
Return the `BackendConfig` for PyTorch's native QNNPACK backend.
|
|
"""
|
|
conv_dtype_configs = [
|
|
qnnpack_weighted_op_qint8_symmetric_dtype_config,
|
|
qnnpack_weighted_op_quint8_dtype_config,
|
|
]
|
|
linear_dtype_configs = [
|
|
qnnpack_weighted_op_qint8_symmetric_dtype_config,
|
|
qnnpack_weighted_op_quint8_dtype_config,
|
|
qnnpack_default_dynamic_int8_dtype_config,
|
|
qnnpack_default_dynamic_float16_dtype_config,
|
|
]
|
|
binary_op_dtype_configs = [
|
|
qnnpack_default_op_qint8_symmetric_dtype_config,
|
|
qnnpack_default_op_quint8_dtype_config,
|
|
]
|
|
default_op_dtype_configs = [
|
|
qnnpack_default_op_qint8_symmetric_dtype_config,
|
|
qnnpack_default_op_quint8_dtype_config,
|
|
]
|
|
fixed_qparams_op_dtype_configs = [
|
|
qnnpack_default_op_qint8_symmetric_dtype_config,
|
|
qnnpack_default_op_quint8_dtype_config,
|
|
]
|
|
share_qparams_op_dtype_configs = [
|
|
qnnpack_default_op_qint8_symmetric_dtype_config,
|
|
qnnpack_default_op_quint8_dtype_config,
|
|
]
|
|
rnn_op_dtype_configs = [
|
|
qnnpack_default_dynamic_int8_dtype_config,
|
|
qnnpack_default_dynamic_float16_dtype_config,
|
|
]
|
|
embedding_op_dtype_configs = [
|
|
qnnpack_weight_only_quint8_dtype_config,
|
|
qnnpack_weight_only_quint4x2_dtype_config,
|
|
]
|
|
return BackendConfig("qnnpack") \
|
|
.set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
|
|
.set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
|
|
.set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))
|