mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[BE][13/16] fix typos in torch/ (torch/ao/) (#156603)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/156603 Approved by: https://github.com/msaroufim
This commit is contained in:
parent
1913c915e0
commit
f8293116f5
|
|
@ -1173,7 +1173,6 @@ exclude_patterns = [
|
|||
'test/distributed/**',
|
||||
'torch/**',
|
||||
'torch/_*/**',
|
||||
'torch/ao/**',
|
||||
'torch/fx/**',
|
||||
'torch/distributed/tensor/**',
|
||||
'torch/[j-o]*/**',
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ NowNs
|
|||
optins
|
||||
OT
|
||||
overrideable
|
||||
padD
|
||||
ptd
|
||||
rebuild
|
||||
rebuilt
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ class MultiheadAttention(nn.MultiheadAttention):
|
|||
fp.bias_v = nn.Parameter(self.bias_v.dequantize())
|
||||
|
||||
# Set the linear weights
|
||||
# Note: Because the linear layers are quantized, mypy does not nkow how
|
||||
# Note: Because the linear layers are quantized, mypy does not know how
|
||||
# to deal with them -- might need to ignore the typing checks.
|
||||
# for the type: ignore[has-type], see https://github.com/pytorch/pytorch/issues/58969
|
||||
w, b = self.out_proj._weight_bias() # type: ignore[operator, has-type]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def _is_valid_linear_block_sparse_pattern(
|
|||
|
||||
# This is a stop-gap measure as current flow does not allow module
|
||||
# specific block sparse pattern.
|
||||
# Infact there is no way to convey sparse pattern via module config
|
||||
# In fact there is no way to convey sparse pattern via module config
|
||||
# of quantization flow. Thus using the global context to convey
|
||||
# sparsity pattern.
|
||||
# Once the flow supports it, this should be removed.
|
||||
|
|
|
|||
|
|
@ -1124,7 +1124,7 @@ def create_a_shadows_b(
|
|||
# (prev_node_c+) -> (logger_c_input)? -> node_start_c -> ... -> node_end_c -> logger_c
|
||||
#
|
||||
# Note: node_start_c may be the same node as node_end_c, or they
|
||||
# may have nodes inbetween.
|
||||
# may have nodes in between.
|
||||
|
||||
else:
|
||||
env_c[node_b.name] = graph_c.node_copy(node_b, load_arg)
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ class QConfigMultiMapping:
|
|||
target_qconfigs_dict[key] = None
|
||||
break
|
||||
|
||||
# insert copies of this new QConfigMapping until all entires
|
||||
# insert copies of this new QConfigMapping until all entries
|
||||
# in qconfig_list can fit among the QConfigMappings
|
||||
while len(qconfig_list) > len(self.qconfig_mappings_list):
|
||||
self.qconfig_mappings_list.append(copy.deepcopy(new_qconfig_mapping))
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ class ActivationSparsifier:
|
|||
if data is None:
|
||||
out_data = [
|
||||
0 for _ in range(0, len(features))
|
||||
] # create one incase of 1st forward
|
||||
] # create one in case of 1st forward
|
||||
self.state[name]["mask"] = [0 for _ in range(0, len(features))]
|
||||
else:
|
||||
out_data = data # a list
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ The [DataNormSparsifier](https://github.com/pytorch/pytorch/blob/main/torch/ao/p
|
|||
3. Norm: L1 and L2
|
||||
|
||||
## Dataset
|
||||
The benchmarks are created for the dlrm model on the Kaggle CriteoDataset which can be downloaded from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1).
|
||||
The benchmarks are created for the dlrm model on the Kaggle CriteoDataset which can be downloaded from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1). <!-- codespell:ignore -->
|
||||
|
||||
## Results
|
||||
1. **Disk Usage**: Introducing sparsity in the embeddings reduces file size after compression. The compressed model size goes down from 1.9 GB to 150 MB after 100% sparsity.
|
||||
|
|
@ -34,7 +34,7 @@ The takeaway is that the dlrm model with sparse coo tensor is slower (roughly 2x
|
|||
## Setup
|
||||
The benchmark codes depend on the [DLRM codebase](https://github.com/facebookresearch/dlrm).
|
||||
1. Clone the dlrm git repository
|
||||
2. Download the dataset from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1)
|
||||
2. Download the dataset from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1) <!-- codespell:ignore -->
|
||||
3. The DLRM model can be trained using the following script
|
||||
```
|
||||
# Make sure you go into the file and make sure that the path to dataset is correct.
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ class TestTrainingAwareCallback(TestCase):
|
|||
do not want as the config of each layer changes after
|
||||
.step()
|
||||
|
||||
Hence, we need to dump and restore the state_dict() everytime because we're
|
||||
Hence, we need to dump and restore the state_dict() every time because we're
|
||||
copying the model after each epoch.
|
||||
Hence, it is essential to make sure that the sparsifier's state_dict() is being
|
||||
correctly dumped and restored.
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ __all__ = ["FPGMPruner"]
|
|||
|
||||
class FPGMPruner(BaseStructuredSparsifier):
|
||||
r"""Filter Pruning via Geometric Median (FPGM) Structured Pruner
|
||||
This sparsifier prune fliter (row) in a tensor according to distances among filters according to
|
||||
This sparsifier prune filter (row) in a tensor according to distances among filters according to
|
||||
`Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration <https://arxiv.org/abs/1811.00250>`_.
|
||||
|
||||
This sparsifier is controlled by three variables:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ class SaliencyPruner(BaseStructuredSparsifier):
|
|||
Prune rows based on the saliency (L1 norm) of each row.
|
||||
|
||||
This pruner works on N-Dimensional weight tensors.
|
||||
For each row, we will calculate the saliency, whic is the sum the L1 norm of all weights in that row.
|
||||
For each row, we will calculate the saliency, which is the sum the L1 norm of all weights in that row.
|
||||
We expect that the resulting saliency vector has the same shape as our mask.
|
||||
We then pick elements to remove until we reach the target sparsity_level.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class AdaptiveRoundingLoss(torch.nn.Module):
|
|||
1 + np.cos(rel_iter * np.pi)
|
||||
)
|
||||
|
||||
# A rectified sigmoid for soft-quantization as formualted [23] in https://arxiv.org/pdf/2004.10568.pdf
|
||||
# A rectified sigmoid for soft-quantization as formulated [23] in https://arxiv.org/pdf/2004.10568.pdf
|
||||
h_alpha = torch.clamp(
|
||||
torch.sigmoid(V) * (ADAROUND_ZETA - ADAROUND_GAMMA) + ADAROUND_GAMMA,
|
||||
min=0,
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class AdaptiveRoundingOptimizer:
|
|||
)
|
||||
if torch.cuda.is_available():
|
||||
# Somehow, we need to move the model continuously
|
||||
# Otherwise, the model will be lowered to CPU misteriously
|
||||
# Otherwise, the model will be lowered to CPU mysteriously
|
||||
self.model = self.model.cuda()
|
||||
self.q_model = self.q_model.cuda()
|
||||
for data_ in data:
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ BackendConfig(nniqat.LinearReLU)
|
|||
|
||||
Pattern in this case is the same as before, it defines the pattern for the subgraph we are dealing with
|
||||
|
||||
`set_observation_type`: sets the observation type for the patter, currently only two types:
|
||||
`set_observation_type`: sets the observation type for the pattern, currently only two types:
|
||||
|
||||
`OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT` means the output observer instance will be different from the input, which is the most common type of observer placement.
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ ModelReport
|
|||
Most detectors require a **traceable GraphModule**, but some (ex. `PerChannelDetector`) require just an `nn.Module`.
|
||||
|
||||
#### Typical Fx Workflow
|
||||
- Initialize model → Prepare model → Callibrate model → Convert model → ...
|
||||
- Initialize model → Prepare model → Calibrate model → Convert model → ...
|
||||
|
||||
#### Fx Workflow with ModelReport
|
||||
- Initialize model → Prepare model → **Add detector observers** → Callibrate model → **Generate report** → **Remove detector observers** → Convert model → ...
|
||||
- Initialize model → Prepare model → **Add detector observers** → Calibrate model → **Generate report** → **Remove detector observers** → Convert model → ...
|
||||
|
||||
> ⚠️ **You can only prepare and remove observers once with a given ModelReport Instance**: Be very careful here!
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ This snippet should be ready to copy, paste, and use with the exception of a few
|
|||
# prep model
|
||||
qconfig_mapping = torch.ao.quantization.get_default_qconfig_mapping()
|
||||
model = Model() # TODO define model
|
||||
example_input = torch.randn((*args)) # TODO get example data for callibration
|
||||
example_input = torch.randn((*args)) # TODO get example data for calibration
|
||||
prepared_model = quantize_fx.prepare_fx(model, qconfig_mapping, example_input)
|
||||
|
||||
# create ModelReport instance and insert observers
|
||||
|
|
@ -31,8 +31,8 @@ detector_set = set([DynamicStaticDetector()]) # TODO add all desired detectors
|
|||
model_report = ModelReport(model, detector_set)
|
||||
ready_for_callibrate = model_report.prepare_detailed_callibration()
|
||||
|
||||
# callibrate model and generate report
|
||||
ready_for_callibrate(example_input) # TODO run callibration of model with relevant data
|
||||
# calibrate model and generate report
|
||||
ready_for_callibrate(example_input) # TODO run calibration of model with relevant data
|
||||
reports = model_report.generate_model_report(remove_inserted_observers=True)
|
||||
for report_name in report.keys():
|
||||
text_report, report_dict = reports[report_name]
|
||||
|
|
@ -46,7 +46,7 @@ mod_rep_visualizer.generate_table_visualization() # shows collected data as a ta
|
|||
```
|
||||
|
||||
There is a tutorial in the works that will walk through a full usage of the ModelReport API.
|
||||
This tutorial will show the ModelReport API being used on toy model in both an Fx Graph Mode workflow and an alterative workflow with just a traceable model.
|
||||
This tutorial will show the ModelReport API being used on toy model in both an Fx Graph Mode workflow and an alternative workflow with just a traceable model.
|
||||
This README will be updated with a link to the tutorial upon completion of the tutorial.
|
||||
|
||||
# Key Modules Overview
|
||||
|
|
@ -60,7 +60,7 @@ There are three primary methods to be familiar with when using the ModelReport c
|
|||
This is so that we can keep track of where we want to insert observers on a detector by detector basis and also keep track of which detectors to generate reports for.
|
||||
- `prepare_detailed_calibration(self)` → `GraphModule` inserts observers into the locations specified by each detector in the model.
|
||||
It then returns the GraphModule with the detectors inserted into both the regular module structure as well as the node structure.
|
||||
- `generate_model_report(self, remove_inserted_observers: bool)` → `Dict[str, Tuple[str, Dict]]` uses callibrated GraphModule to optionally removes inserted observers, and generate, for each detector the ModelReport instance was initialized with:
|
||||
- `generate_model_report(self, remove_inserted_observers: bool)` → `Dict[str, Tuple[str, Dict]]` uses calibrated GraphModule to optionally removes inserted observers, and generate, for each detector the ModelReport instance was initialized with:
|
||||
- A string-based report that is easily digestable and actionable explaining the data collected by relevant observers for that detector
|
||||
- A dictionary containing statistics collected by the relevant observers and values calculated by the detector for further analysis or plotting
|
||||
|
||||
|
|
@ -107,7 +107,7 @@ For both of the two things listed above, you can filter the data by either `modu
|
|||
To get a list of all the modules or features, you can call `mod_rep_visualizer.get_all_unique_module_fqns()`
|
||||
and `mod_rep_visualizer.get_all_unique_feature_names()` respectively.
|
||||
For the features, because some features are not plottable, you can set the flag to only get plottable features
|
||||
in the aformentioned `get_all_unique_feature_names` method.
|
||||
in the aforementioned `get_all_unique_feature_names` method.
|
||||
|
||||
## Detector Overview
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ The statistics collected by the `ModelReportObserver` include:
|
|||
- Ratio of 100th percentile to some *n*th percentile
|
||||
- Number of constant value batches to pass through each channel
|
||||
|
||||
After the `ModelReportObserver` collects the statistics above during the callibration process, the detectors then extract the information they need to generate their reports from the relevant observers.
|
||||
After the `ModelReportObserver` collects the statistics above during the calibration process, the detectors then extract the information they need to generate their reports from the relevant observers.
|
||||
|
||||
### Using Your Own Observer
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class ModelReport:
|
|||
- Suggestions for outlier detection for all layers (Graph Modules)
|
||||
|
||||
The ModelReport class has the primary functionality of inserting observers (primarily the ModelReportObserver)
|
||||
where needed for each detector to gather the information it needs, and then after callibration, the ModelReport
|
||||
where needed for each detector to gather the information it needs, and then after calibration, the ModelReport
|
||||
class compiles the report generated by each Detector class into a single report to return to the user. It also
|
||||
has the capability to remove all the observers it inserted as well.
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ class ModelReport:
|
|||
1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects and model
|
||||
2.) Prepare your model with prepare_fx
|
||||
3.) Call model_report.prepare_detailed_calibration to add relevant observers
|
||||
4.) Callibrate your model with data
|
||||
4.) Calibrate your model with data
|
||||
5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
|
||||
Optional
|
||||
6.) Call model_report.generate_visualizer to get a ModelReportVisualizer instance
|
||||
|
|
@ -102,7 +102,7 @@ class ModelReport:
|
|||
... )
|
||||
>>> tracer_reporter = ModelReport(graph_module, tracer_detector_set)
|
||||
|
||||
>>> # now we insert the observers and callibrate the model
|
||||
>>> # now we insert the observers and calibrate the model
|
||||
>>> tracer_model_with_observers = tracer_reporter.prepare_detailed_calibration()
|
||||
>>> for i in range(num_callibration_batches):
|
||||
>>> example_input = get_callibration_input()
|
||||
|
|
@ -179,7 +179,7 @@ class ModelReport:
|
|||
# if already prepared once, cannot prepare again
|
||||
if self._prepared_flag:
|
||||
raise ValueError(
|
||||
"Already ran preparing detailed callibration. Run the report generation next after callibration."
|
||||
"Already ran preparing detailed calibration. Run the report generation next after calibration."
|
||||
)
|
||||
|
||||
# loop through each detector, find where placements should be, and keep track
|
||||
|
|
@ -271,7 +271,7 @@ class ModelReport:
|
|||
Generates all the requested reports.
|
||||
|
||||
Note:
|
||||
You should have callibrated the model with relevant data before calling this
|
||||
You should have calibrated the model with relevant data before calling this
|
||||
|
||||
The reports generated are specified by the desired_reports specified in desired_reports
|
||||
|
||||
|
|
@ -286,12 +286,12 @@ class ModelReport:
|
|||
|
||||
Note:
|
||||
Throws exception if we try to generate report on model we already removed observers from
|
||||
Throws exception if we try to generate report without preparing for callibration
|
||||
Throws exception if we try to generate report without preparing for calibration
|
||||
"""
|
||||
# if we haven't prepped model for callibration, then we shouldn't generate report yet
|
||||
# if we haven't prepped model for calibration, then we shouldn't generate report yet
|
||||
if not self._prepared_flag:
|
||||
raise Exception( # noqa: TRY002
|
||||
"Cannot generate report without preparing model for callibration"
|
||||
"Cannot generate report without preparing model for calibration"
|
||||
)
|
||||
|
||||
# if we already removed the observers, we cannot generate report
|
||||
|
|
@ -546,12 +546,12 @@ class ModelReport:
|
|||
|
||||
Note:
|
||||
Throws exception if we try to generate mapping on model we already removed observers from
|
||||
Throws exception if we try to generate mapping without preparing for callibration
|
||||
Throws exception if we try to generate mapping without preparing for calibration
|
||||
"""
|
||||
# if we haven't prepped model for callibration, then we shouldn't generate mapping yet
|
||||
# if we haven't prepped model for calibration, then we shouldn't generate mapping yet
|
||||
if not self._prepared_flag:
|
||||
raise Exception( # noqa: TRY002
|
||||
"Cannot generate report without preparing model for callibration"
|
||||
"Cannot generate report without preparing model for calibration"
|
||||
)
|
||||
|
||||
# if we already removed the observers, we cannot mapping
|
||||
|
|
@ -600,7 +600,7 @@ class ModelReport:
|
|||
|
||||
Note:
|
||||
Throws exception if we try to generate mapping on model we already removed observers from
|
||||
Throws exception if we try to generate mapping without preparing for callibration
|
||||
Throws exception if we try to generate mapping without preparing for calibration
|
||||
"""
|
||||
# get the mapping info
|
||||
detector_qconfig_info_combined = (
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class ModelReportVisualizer:
|
|||
1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects
|
||||
2.) Prepare your model with prepare_fx
|
||||
3.) Call model_report.prepare_detailed_calibration on your model to add relevant observers
|
||||
4.) Callibrate your model with data
|
||||
4.) Calibrate your model with data
|
||||
5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
|
||||
6.) Use output of model_report.generate_report to initialize ModelReportVisualizer instance
|
||||
7.) Use instance to view different views of data as desired, applying filters as needed
|
||||
|
|
|
|||
|
|
@ -1107,7 +1107,7 @@ def _maybe_insert_output_observer_for_node(
|
|||
)
|
||||
target_dtype, target_is_dynamic = _get_dtype_and_is_dynamic(output_act_obs_or_fq)
|
||||
# uncomment after we support reuse_input_obs_or_fq properly by having separate
|
||||
# implemntations for this key instead of reusing the input_output_share_observers
|
||||
# implementations for this key instead of reusing the input_output_share_observers
|
||||
# code
|
||||
# reuse_input_obs_or_fq = node.meta["target_dtype_info"].get("reuse_input_obs_or_fq", False)
|
||||
# for now we set this to False since reuse_input_obs_or_fq for
|
||||
|
|
@ -1117,7 +1117,7 @@ def _maybe_insert_output_observer_for_node(
|
|||
reuse_input_obs_or_fq = False
|
||||
|
||||
# Note: prev_output_dtype = torch.float and prev_output_is_dynamic=False
|
||||
# because the prev_output is the output of an fp32 op, althought technically
|
||||
# because the prev_output is the output of an fp32 op, although technically
|
||||
# we should get the dtype of the output from node.meta["val"] in the future
|
||||
# if we deprecate fx graph mode quantization
|
||||
needs_obs_or_fq = _needs_obs_or_fq(
|
||||
|
|
@ -2002,7 +2002,7 @@ def prepare(
|
|||
same as input_quantized_idxs configuration provided
|
||||
for the standalone module
|
||||
standalone_module_output_quantized_idxs(List[Int]): a list of
|
||||
indexs for the graph output that is quantized
|
||||
indices for the graph output that is quantized
|
||||
same as input_quantized_idxs configuration provided
|
||||
for the standalone module
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ def get_new_attr_name_with_prefix(prefix: str) -> Callable:
|
|||
|
||||
|
||||
def collect_producer_nodes(node: Node) -> Optional[list[Node]]:
|
||||
r"""Starting from a target node, trace back until we hit inpu or
|
||||
r"""Starting from a target node, trace back until we hit input or
|
||||
getattr node. This is used to extract the chain of operators
|
||||
starting from getattr to the target node, for example
|
||||
def forward(self, x):
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ class UniformQuantizationObserverBase(ObserverBase):
|
|||
# Functionally equivalent to 'determine_qparams' in utils.py. Observers must be torchscriptable however and qscheme
|
||||
# as far as I can tell is not allowed to passed as a parameter in torchscript functions. This makes refactoring observer
|
||||
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code
|
||||
# seems unlikey to change (last update over 1 year ago) and when torchscript is fully deprecated we can refactor.
|
||||
# seems unlikely to change (last update over 1 year ago) and when torchscript is fully deprecated we can refactor.
|
||||
# TODO(jakeszwe, jerryzh168)
|
||||
if not check_min_max_valid(min_val, max_val):
|
||||
return torch.tensor([1.0], device=min_val.device.type), torch.tensor(
|
||||
|
|
@ -1866,7 +1866,7 @@ class AffineQuantizedObserverBase(ABC, torch.nn.Module):
|
|||
Converts the observer node in the graph into its quantized representation
|
||||
|
||||
Args:
|
||||
model: graph module to conver the observer node in
|
||||
model: graph module to convert the observer node in
|
||||
observer_node: the observer node to convert
|
||||
"""
|
||||
from torch.ao.quantization.fx.utils import create_getattr_from_value
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# copied from https://github.com/pytorch/ao/blob/main/torchao/quantization/observer.py
|
||||
# and https://github.com/pytorch/ao/blob/main/torchao/quantization/quant_primitives.py
|
||||
# PLESE DON'T MODIFY THIS FILE SO THAT WE DON'T GET OUT OF SYNC
|
||||
# PLEASE DON'T MODIFY THIS FILE SO THAT WE DON'T GET OUT OF SYNC
|
||||
import logging
|
||||
from abc import ABCMeta
|
||||
from typing import Any, Optional, Union
|
||||
|
|
@ -469,7 +469,7 @@ def _quantize_affine_no_dtype_cast(
|
|||
1. figure out the dimension for reduction based on block_size, also reshape the input to align with
|
||||
the shape after reduction
|
||||
2. quantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
|
||||
3. reshape the quantized result to origianl shape
|
||||
3. reshape the quantized result to original shape
|
||||
"""
|
||||
# TODO: validations
|
||||
# TODO: validate scale/zero_point dimensions are compatible with block_size
|
||||
|
|
@ -619,7 +619,7 @@ def _dequantize_affine_no_dtype_check(
|
|||
1. figure out the dimension for reduction based on block_size, also reshape the input to align with
|
||||
the shape after reduction
|
||||
2. dequantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
|
||||
3. reshape the quantized result to origianl shape and change dtype to the output_dtype
|
||||
3. reshape the quantized result to original shape and change dtype to the output_dtype
|
||||
"""
|
||||
assert len(block_size) == input.dim(), (
|
||||
f"Got input dim:{input.dim()}, block_size: {block_size}"
|
||||
|
|
|
|||
|
|
@ -177,19 +177,19 @@ class PortNodeMetaForQDQ(PassBase):
|
|||
- Example 1:
|
||||
- Original: [Conv -> AvgPool -> Linear]
|
||||
- Quantized [Q-> DQ -> Conv -> Q -> DQ -> AvgPool -> Q -> DQ -> Linear -> Q -> DQ]
|
||||
- Inner brackets specify which nodes Q/DQ inherit metdata from
|
||||
- Inner brackets specify which nodes Q/DQ inherit metadata from
|
||||
- [Q-> [DQ -> Conv -> Q] -> [DQ -> AvgPool -> Q] -> [DQ -> Linear -> Q] -> DQ]
|
||||
- Note first Q and last DQ do not inherit metadata from any nodes
|
||||
- Example 2:
|
||||
- Original: [Conv -> AvgPool -> Linear]
|
||||
- AvgPool is not quantized
|
||||
- Quantized [Q-> DQ -> Conv -> Q -> DQ -> AvgPool -> Q -> DQ -> Linear -> Q -> DQ]
|
||||
- Inner brackets specify which nodes Q/DQ inherit metdata from
|
||||
- Inner brackets specify which nodes Q/DQ inherit metadata from
|
||||
- [Q-> [DQ -> Conv -> Q] -> DQ -> [AvgPool] -> Q -> [DQ -> Linear -> Q] -> DQ]
|
||||
- Note DQ and Q nodes around AvgPool do not inherit metadata from AvgPool because
|
||||
AvgPool was not supposed to be quantized. Metadata porting relies on quantization_annotation
|
||||
on the nodes (in this case AvgPool node) to conclude if the node or patter was
|
||||
supposed to be quantized. And subsequntly decide if the preceding Q, if any, should
|
||||
on the nodes (in this case AvgPool node) to conclude if the node or pattern was
|
||||
supposed to be quantized. And subsequently decide if the preceding Q, if any, should
|
||||
inherit metadata from AvgPool.
|
||||
- Dynamically quantized patterns:
|
||||
- Input that are dynamically quantized have choose_qparams, quantize and dequantize nodes
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ def _get_edge_or_node_to_group_id(
|
|||
|
||||
_update_shared_with(input_edge, qspec, shared_with_map)
|
||||
|
||||
# now that we get the sharing relations between all edges and nodes, we can assingn group ids
|
||||
# now that we get the sharing relations between all edges and nodes, we can assign group ids
|
||||
cur_group_id = 0
|
||||
edge_or_node_to_group_id: dict[EdgeOrNode, int] = {}
|
||||
for edge_or_node in shared_with_map.keys():
|
||||
|
|
|
|||
|
|
@ -876,7 +876,7 @@ def _fold_conv_bn_qat(m: GraphModule) -> GraphModule:
|
|||
m, F.conv_transpose2d, _quantized_conv2d_bn_example_inputs, is_cuda=is_cuda
|
||||
)
|
||||
|
||||
# remove in place add from batchnorm tracking traning stats
|
||||
# remove in place add from batchnorm tracking training stats
|
||||
for node in m.graph.nodes:
|
||||
if (
|
||||
node.target == torch.ops.aten.add_.Tensor
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ def _reference_quantized_conv2d(
|
|||
# Out_(i, j)_fp32 = ((X_scale * W_scale) * Sum_(over k)[(X_(i, k)_fp32 - X_zp) * (W_(i, k)_fp32 - W_zp)]) + bias_(i)_fp32
|
||||
# In order to addition of bias_(i)_fp32 inside, we must do
|
||||
# Out_(i, j)_fp32 = (X_scale * W_scale) * (Sum_(over k)[(X_(i, k)_fp32 - X_zp) * (W_(i, k)_fp32 - W_zp)] + (1 / (X_scale * W_scale)) * bias_(i)_fp32)W_scale # noqa: B950
|
||||
# Note we had to multiply bias_fp32 qith X_scale * W_scale = bias_scale
|
||||
# Note we had to multiply bias_fp32 with X_scale * W_scale = bias_scale
|
||||
# Thus bias quantization to int32 must be with X_scale * W_scale
|
||||
|
||||
bias_i32 = out_dtype(torch.ops.aten.div.Tensor, torch.int32, bias_fp32, bias_scale)
|
||||
|
|
@ -436,7 +436,7 @@ def _reference_quantized_add(
|
|||
x_fp32 = (x_i8 - x_zero_point) * x_scale (3)
|
||||
y_fp32 = (y_i8 - y_zero_point) * y_scale (4)
|
||||
|
||||
# applying the above fomula to the out_i8 equation we can get the following:
|
||||
# applying the above formula to the out_i8 equation we can get the following:
|
||||
out_i8 = out_fp32 / out_scale + out_zero_point # (1)
|
||||
= (x_f32 + y_f32) / out_scale + out_zero_point # applying (2) to substitute out_fp32 with x_fp32 + y_fp32
|
||||
= ((x_i8 - x_zero_point) * x_scale + (y_i8 - y_zero_point) * y_scale) / out_scale + out_zero_point # apply (3) and (4)
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ def _prepare_standalone_module_fx(
|
|||
same as input_quantized_idxs configuration provided
|
||||
for the standalone module
|
||||
* `standalone_module_output_quantized_idxs(List[Int])`: a list of
|
||||
indexs for the graph output that is quantized
|
||||
indices for the graph output that is quantized
|
||||
same as input_quantized_idxs configuration provided
|
||||
for the standalone module
|
||||
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ def prepare_pt2e(
|
|||
|
||||
# Step 1. program capture
|
||||
# NOTE: this API will be updated to torch.export API in the future, but the captured
|
||||
# result shoud mostly stay the same
|
||||
# result should mostly stay the same
|
||||
m = torch.export.export_for_training(m, *example_inputs).module()
|
||||
# we get a model with aten ops
|
||||
|
||||
|
|
@ -153,7 +153,7 @@ def prepare_qat_pt2e(
|
|||
|
||||
# Step 1. program capture
|
||||
# NOTE: this API will be updated to torch.export API in the future, but the captured
|
||||
# result shoud mostly stay the same
|
||||
# result should mostly stay the same
|
||||
m = torch.export.export_for_training(m, *example_inputs).module()
|
||||
# we get a model with aten ops
|
||||
|
||||
|
|
@ -218,7 +218,7 @@ def convert_pt2e(
|
|||
|
||||
Args:
|
||||
* `model` (torch.fx.GraphModule): calibrated/trained model
|
||||
* `use_reference_representation` (bool): boolean flag to indicate whether to produce referece representation or not
|
||||
* `use_reference_representation` (bool): boolean flag to indicate whether to produce reference representation or not
|
||||
* `fold_quantize` (bool): boolean flag for whether fold the quantize op or not
|
||||
|
||||
Returns:
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class DerivedQuantizationSpec(QuantizationSpecBase):
|
|||
|
||||
@dataclass
|
||||
class QuantizationAnnotation:
|
||||
"""How are input arguemnt or output should be quantized,
|
||||
"""How are input argument or output should be quantized,
|
||||
expressed as QuantizationSpec, this corresponds to how a Tensor in the
|
||||
operator Graph is observed (PTQ) or fake quantized (QAT)
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ def _node_only_used_for_sym_size(node: Node, partition_nodes: list[Node]):
|
|||
This utility is used to handle cases when dynami_shape=True tracing leads
|
||||
to symint nodes in the pattern of linear module. In those cases, we need to
|
||||
distinguish between the nodes that are in input for just extracting value of
|
||||
some dimentions (and symint nodes) vs. the one that is activation.
|
||||
some dimensions (and symint nodes) vs. the one that is activation.
|
||||
For example:
|
||||
graph(x, y, weight):
|
||||
size_0 = torch.ops.aten.sym_size([x], [0])
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ def _get_not_module_type_or_name_filter(
|
|||
class XNNPACKQuantizer(Quantizer):
|
||||
"""
|
||||
!!! DEPRECATED !!!
|
||||
XNNPACKQuantizer is a marked as deprected. It will be removed in the future.
|
||||
XNNPACKQuantizer is a marked as deprecated. It will be removed in the future.
|
||||
It has been moved to executorch.backends.xnnpack.quantizer.xnnpack_quantizer.XNNPACKQuantizer.
|
||||
Please use the new quantizer instead.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -422,7 +422,7 @@ def _annotate_conv_bn(
|
|||
filter_fn: Optional[Callable[[Node], bool]] = None,
|
||||
) -> Optional[list[list[Node]]]:
|
||||
"""
|
||||
Find conv + batchnorm parititions
|
||||
Find conv + batchnorm partitions
|
||||
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
|
||||
"""
|
||||
return _do_annotate_conv_bn(gm, quantization_config, filter_fn, has_relu=False)
|
||||
|
|
@ -435,7 +435,7 @@ def _annotate_conv_bn_relu(
|
|||
filter_fn: Optional[Callable[[Node], bool]] = None,
|
||||
) -> Optional[list[list[Node]]]:
|
||||
"""
|
||||
Find conv + batchnorm + relu parititions
|
||||
Find conv + batchnorm + relu partitions
|
||||
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
|
||||
"""
|
||||
return _do_annotate_conv_bn(gm, quantization_config, filter_fn, has_relu=True)
|
||||
|
|
@ -448,7 +448,7 @@ def _annotate_conv_transpose_bn(
|
|||
filter_fn: Optional[Callable[[Node], bool]] = None,
|
||||
) -> Optional[list[list[Node]]]:
|
||||
"""
|
||||
Find conv_transpose + batchnorm parititions
|
||||
Find conv_transpose + batchnorm partitions
|
||||
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
|
||||
"""
|
||||
return _do_annotate_conv_bn(
|
||||
|
|
@ -463,7 +463,7 @@ def _annotate_conv_transpose_bn_relu(
|
|||
filter_fn: Optional[Callable[[Node], bool]] = None,
|
||||
) -> Optional[list[list[Node]]]:
|
||||
"""
|
||||
Find conv_transpose + batchnorm + relu parititions
|
||||
Find conv_transpose + batchnorm + relu partitions
|
||||
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
|
||||
"""
|
||||
return _do_annotate_conv_bn(
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class XPUInductorQuantizer(X86InductorQuantizer):
|
|||
overrides. We keep the annotate methods but make the function
|
||||
body empty, aiming to let `_generate_qdq_quantized_model`
|
||||
generate qdq around op and graph execute on fp32 dtype for
|
||||
unspported operators.
|
||||
unsupported operators.
|
||||
"""
|
||||
|
||||
def _annotate_qat_conv2d_fusion_pattern(
|
||||
|
|
|
|||
|
|
@ -616,7 +616,7 @@ def validate_qmin_qmax(quant_min: int, quant_max: int) -> None:
|
|||
|
||||
# Functionally equivalent to '_calculate_qparams' in observer.py. Observers must be torchscriptable however and qscheme
|
||||
# as far as I can tell is not allowed to passed as a parameter in torchscript functions. This makes refactoring observer
|
||||
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code seems unlikey to change
|
||||
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code seems unlikely to change
|
||||
# (last update over 1 year ago) and when torchscript is fully deprecated we can refactor. TODO(jakeszwe, jerryzh168)
|
||||
def determine_qparams(
|
||||
min_val: torch.Tensor,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user