mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[PT2E][Quant] Support for embedding op quantization via
ExecuTorchNativeQuantizer (#99106) ExecuTorchNativeQuantizer ExecuTorchNativeQuantizer is a terribly name, I admit, however lets fix it once we align on what the quantized kernel lib within executorch runtime should be called Differential Revision: [D44986258](https://our.internmc.facebook.com/intern/diff/D44986258/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D44986258/)! Pull Request resolved: https://github.com/pytorch/pytorch/pull/99106 Approved by: https://github.com/jerryzh168
This commit is contained in:
parent
06f19fdbe5
commit
c0be06667f
|
|
@ -13,6 +13,7 @@ from torch.ao.quantization._pt2e.quantizer import (
|
|||
)
|
||||
from torch.ao.quantization._quantize_pt2e import convert_pt2e, prepare_pt2e_quantizer
|
||||
from torch.ao.quantization.backend_config import get_qnnpack_backend_config
|
||||
|
||||
from torch.ao.quantization.qconfig import default_per_channel_symmetric_qnnpack_qconfig
|
||||
from torch.ao.quantization.quantize_fx import convert_to_reference_fx, prepare_fx
|
||||
from torch.testing._internal.common_quantization import (
|
||||
|
|
|
|||
|
|
@ -116,6 +116,8 @@ def _get_act_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig]):
|
|||
if quantization_config is None:
|
||||
return None
|
||||
assert quantization_config is not None
|
||||
if quantization_config.activation is None:
|
||||
return None
|
||||
quantization_spec: QuantizationSpec = quantization_config.activation
|
||||
qdtype = _TORCH_DTYPE_TO_QDTYPE[quantization_spec.dtype]
|
||||
assert quantization_spec.qscheme in [
|
||||
|
|
@ -141,6 +143,8 @@ def _get_weight_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig])
|
|||
if quantization_config is None:
|
||||
return None
|
||||
assert quantization_config is not None
|
||||
if quantization_config.weight is None:
|
||||
return None
|
||||
quantization_spec: QuantizationSpec = quantization_config.weight
|
||||
qdtype = _TORCH_DTYPE_TO_QDTYPE[quantization_spec.dtype]
|
||||
if quantization_spec.qscheme == torch.per_tensor_symmetric:
|
||||
|
|
@ -169,6 +173,8 @@ def _get_bias_obs_or_fq_ctr(quantization_config: Optional[QuantizationConfig]):
|
|||
if quantization_config is None:
|
||||
return None
|
||||
assert quantization_config is not None
|
||||
if quantization_config.bias is None:
|
||||
return None
|
||||
quantization_spec: QuantizationSpec = quantization_config.bias
|
||||
assert (
|
||||
quantization_spec.dtype == torch.float
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Callable, List, NamedTuple, Optional
|
||||
|
||||
import torch
|
||||
|
|
@ -18,6 +18,15 @@ SUPPORTED_QSCHEMES = [
|
|||
torch.per_channel_affine_float_qparams,
|
||||
]
|
||||
|
||||
# TODO: add support for torch dtype in quant code base
|
||||
# this includes observers and prepare/convert code
|
||||
_TORCH_DTYPE_TO_QDTYPE = {
|
||||
torch.int8: torch.qint8,
|
||||
torch.uint8: torch.quint8,
|
||||
torch.int32: torch.qint32,
|
||||
torch.float16: torch.float16,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(eq=True, frozen=True)
|
||||
class QuantizationSpec:
|
||||
|
|
@ -53,13 +62,19 @@ class QuantizationSpec:
|
|||
raise ValueError("Ch_axis is < 0.")
|
||||
|
||||
|
||||
def get_observer_kwargs(quant_spec: QuantizationSpec):
|
||||
kwargs_dict = asdict(quant_spec)
|
||||
kwargs_dict["dtype"] = _TORCH_DTYPE_TO_QDTYPE[quant_spec.dtype]
|
||||
return kwargs_dict
|
||||
|
||||
|
||||
# In the absence of better name, just winging it with QuantizationConfig
|
||||
QuantizationConfig = NamedTuple(
|
||||
"QuantizationConfig",
|
||||
[
|
||||
("activation", QuantizationSpec),
|
||||
("weight", QuantizationSpec),
|
||||
("bias", QuantizationSpec),
|
||||
("activation", Optional[QuantizationSpec]),
|
||||
("weight", Optional[QuantizationSpec]),
|
||||
("bias", Optional[QuantizationSpec]),
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user