[PT2E][Quant] Support for embedding op quantization via

ExecuTorchNativeQuantizer (#99106)

ExecuTorchNativeQuantizer

ExecuTorchNativeQuantizer is a terribly name, I admit, however lets fix it once
we align on what the quantized kernel lib within executorch runtime should be called

Differential Revision: [D44986258](https://our.internmc.facebook.com/intern/diff/D44986258/)

**NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D44986258/)!
Pull Request resolved: https://github.com/pytorch/pytorch/pull/99106
Approved by: https://github.com/jerryzh168
This commit is contained in:
Kimish Patel 2023-04-17 20:47:31 -07:00 committed by PyTorch MergeBot
parent 06f19fdbe5
commit c0be06667f
3 changed files with 26 additions and 4 deletions

View File

@ -13,6 +13,7 @@ from torch.ao.quantization._pt2e.quantizer import (
)
from torch.ao.quantization._quantize_pt2e import convert_pt2e, prepare_pt2e_quantizer
from torch.ao.quantization.backend_config import get_qnnpack_backend_config
from torch.ao.quantization.qconfig import default_per_channel_symmetric_qnnpack_qconfig
from torch.ao.quantization.quantize_fx import convert_to_reference_fx, prepare_fx
from torch.testing._internal.common_quantization import (

View File

@ -116,6 +116,8 @@ def _get_act_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig]):
if quantization_config is None:
return None
assert quantization_config is not None
if quantization_config.activation is None:
return None
quantization_spec: QuantizationSpec = quantization_config.activation
qdtype = _TORCH_DTYPE_TO_QDTYPE[quantization_spec.dtype]
assert quantization_spec.qscheme in [
@ -141,6 +143,8 @@ def _get_weight_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig])
if quantization_config is None:
return None
assert quantization_config is not None
if quantization_config.weight is None:
return None
quantization_spec: QuantizationSpec = quantization_config.weight
qdtype = _TORCH_DTYPE_TO_QDTYPE[quantization_spec.dtype]
if quantization_spec.qscheme == torch.per_tensor_symmetric:
@ -169,6 +173,8 @@ def _get_bias_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig]):
if quantization_config is None:
return None
assert quantization_config is not None
if quantization_config.bias is None:
return None
quantization_spec: QuantizationSpec = quantization_config.bias
assert (
quantization_spec.dtype == torch.float

View File

@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from typing import Callable, List, NamedTuple, Optional
import torch
@ -18,6 +18,15 @@ SUPPORTED_QSCHEMES = [
torch.per_channel_affine_float_qparams,
]
# TODO: add support for torch dtype in quant code base
# this includes observers and prepare/convert code
_TORCH_DTYPE_TO_QDTYPE = {
torch.int8: torch.qint8,
torch.uint8: torch.quint8,
torch.int32: torch.qint32,
torch.float16: torch.float16,
}
@dataclass(eq=True, frozen=True)
class QuantizationSpec:
@ -53,13 +62,19 @@ class QuantizationSpec:
raise ValueError("Ch_axis is < 0.")
def get_observer_kwargs(quant_spec: QuantizationSpec):
kwargs_dict = asdict(quant_spec)
kwargs_dict["dtype"] = _TORCH_DTYPE_TO_QDTYPE[quant_spec.dtype]
return kwargs_dict
# In the absence of better name, just winging it with QuantizationConfig
QuantizationConfig = NamedTuple(
"QuantizationConfig",
[
("activation", QuantizationSpec),
("weight", QuantizationSpec),
("bias", QuantizationSpec),
("activation", Optional[QuantizationSpec]),
("weight", Optional[QuantizationSpec]),
("bias", Optional[QuantizationSpec]),
],
)