mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary:
During convert step observers are first replaced by Q-DQ pair. In some
scenarios like following output DQ has a fan out.
---> OP2 -> Q -> DQ
/
OP -> Q -> DQ -
\
---> OP3 -> Q -> DQ
If either op OP2 or OP3 are configured to be quantized, then the input
is expected to quantized. In this case quantized equivalent of some
pattern, that quantizer asked to be quantized, should look like:
[DQ -> {pattern} -> Q]. However, in scenario like above where DQ node
is shared between multiple "quantized" patterns, boundary of "quantized"
pattern is not clear because DQ now belongs to multiple quantized
patterns.
This poses challenge for:
- Porting metadata: which "quantized" partition this DQ node belongs
- Quantized representation, equivalently, needs to identify
self-contained quantized pattern that is replaced by its equivalent pattern
that captures compute in the quantized precision.
Test Plan:
test_duplicate_dq_pass
Reviewers:
Subscribers:
Tasks:
Tags:
Differential Revision: [D48663147](https://our.internmc.facebook.com/intern/diff/D48663147)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/107900
Approved by: https://github.com/jerryzh168, https://github.com/andrewor14, https://github.com/leslie-fang-intel
ghstack dependencies: #107105, #107106, #107899
50 lines
1.8 KiB
Python
50 lines
1.8 KiB
Python
from typing import List
|
|
|
|
from torch.ao.quantization.pt2e.utils import _is_sym_size_node
|
|
|
|
from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
|
|
from torch.fx import Node
|
|
|
|
|
|
def _annotate_input_qspec_map(node: Node, input_node: Node, qspec):
|
|
quantization_annotation = node.meta.get(
|
|
"quantization_annotation", QuantizationAnnotation()
|
|
)
|
|
if quantization_annotation.input_qspec_map is None:
|
|
quantization_annotation.input_qspec_map = {}
|
|
quantization_annotation.input_qspec_map[input_node] = qspec
|
|
node.meta["quantization_annotation"] = quantization_annotation
|
|
|
|
|
|
def _annotate_output_qspec(node: Node, qspec):
|
|
quantization_annotation = node.meta.get(
|
|
"quantization_annotation", QuantizationAnnotation()
|
|
)
|
|
quantization_annotation.output_qspec = qspec
|
|
node.meta["quantization_annotation"] = quantization_annotation
|
|
|
|
|
|
def _node_only_used_for_sym_size(node: Node, partition_nodes: List[Node]):
|
|
"""
|
|
This utility is used to handle cases when dynami_shape=True tracing leads
|
|
to symint nodes in the pattern of linear module. In those cases, we need to
|
|
distinguish between the nodes that are in input for just extracting value of
|
|
some dimentions (and symint nodes) vs. the one that is activation.
|
|
For example:
|
|
graph(x, y, weight):
|
|
size_0 = torch.ops.aten.sym_size([x], [0])
|
|
size_1 = torch.ops.aten.sym_size([y], [1])
|
|
view_size = size_0 * size_1
|
|
size_3 = torch.ops.aten.sym_size([x], [2])
|
|
vie_out = torch.ops.aten.view(x, [view_size, size_3])
|
|
return mm(view_out, weight)
|
|
In the example above y node is not actual input. It exist only to extract size_1
|
|
"""
|
|
if _is_sym_size_node(node):
|
|
return True
|
|
|
|
return all(
|
|
((user not in partition_nodes) or _is_sym_size_node(user))
|
|
for user in node.users
|
|
)
|