[BE][13/16] fix typos in torch/ (torch/ao/) (#156603)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156603
Approved by: https://github.com/msaroufim
This commit is contained in:
Xuehai Pan 2025-06-29 11:38:22 +08:00 committed by PyTorch MergeBot
parent 1913c915e0
commit f8293116f5
33 changed files with 66 additions and 66 deletions

View File

@ -1173,7 +1173,6 @@ exclude_patterns = [
'test/distributed/**',
'torch/**',
'torch/_*/**',
'torch/ao/**',
'torch/fx/**',
'torch/distributed/tensor/**',
'torch/[j-o]*/**',

View File

@ -18,6 +18,7 @@ NowNs
optins
OT
overrideable
padD
ptd
rebuild
rebuilt

View File

@ -214,7 +214,7 @@ class MultiheadAttention(nn.MultiheadAttention):
fp.bias_v = nn.Parameter(self.bias_v.dequantize())
# Set the linear weights
# Note: Because the linear layers are quantized, mypy does not nkow how
# Note: Because the linear layers are quantized, mypy does not know how
# to deal with them -- might need to ignore the typing checks.
# for the type: ignore[has-type], see https://github.com/pytorch/pytorch/issues/58969
w, b = self.out_proj._weight_bias() # type: ignore[operator, has-type]

View File

@ -15,7 +15,7 @@ def _is_valid_linear_block_sparse_pattern(
# This is a stop-gap measure as current flow does not allow module
# specific block sparse pattern.
# Infact there is no way to convey sparse pattern via module config
# In fact there is no way to convey sparse pattern via module config
# of quantization flow. Thus using the global context to convey
# sparsity pattern.
# Once the flow supports it, this should be removed.

View File

@ -1124,7 +1124,7 @@ def create_a_shadows_b(
# (prev_node_c+) -> (logger_c_input)? -> node_start_c -> ... -> node_end_c -> logger_c
#
# Note: node_start_c may be the same node as node_end_c, or they
# may have nodes inbetween.
# may have nodes in between.
else:
env_c[node_b.name] = graph_c.node_copy(node_b, load_arg)

View File

@ -109,7 +109,7 @@ class QConfigMultiMapping:
target_qconfigs_dict[key] = None
break
# insert copies of this new QConfigMapping until all entires
# insert copies of this new QConfigMapping until all entries
# in qconfig_list can fit among the QConfigMappings
while len(qconfig_list) > len(self.qconfig_mappings_list):
self.qconfig_mappings_list.append(copy.deepcopy(new_qconfig_mapping))

View File

@ -159,7 +159,7 @@ class ActivationSparsifier:
if data is None:
out_data = [
0 for _ in range(0, len(features))
] # create one incase of 1st forward
] # create one in case of 1st forward
self.state[name]["mask"] = [0 for _ in range(0, len(features))]
else:
out_data = data # a list

View File

@ -14,7 +14,7 @@ The [DataNormSparsifier](https://github.com/pytorch/pytorch/blob/main/torch/ao/p
3. Norm: L1 and L2
## Dataset
The benchmarks are created for the dlrm model on the Kaggle CriteoDataset which can be downloaded from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1).
The benchmarks are created for the dlrm model on the Kaggle CriteoDataset which can be downloaded from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1). <!-- codespell:ignore -->
## Results
1. **Disk Usage**: Introducing sparsity in the embeddings reduces file size after compression. The compressed model size goes down from 1.9 GB to 150 MB after 100% sparsity.
@ -34,7 +34,7 @@ The takeaway is that the dlrm model with sparse coo tensor is slower (roughly 2x
## Setup
The benchmark codes depend on the [DLRM codebase](https://github.com/facebookresearch/dlrm).
1. Clone the dlrm git repository
2. Download the dataset from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1)
2. Download the dataset from [here](https://ailab.criteo.com/ressources/) or [here](https://figshare.com/articles/dataset/Kaggle_Display_Advertising_Challenge_dataset/5732310/1) <!-- codespell:ignore -->
3. The DLRM model can be trained using the following script
```
# Make sure you go into the file and make sure that the path to dataset is correct.

View File

@ -199,7 +199,7 @@ class TestTrainingAwareCallback(TestCase):
do not want as the config of each layer changes after
.step()
Hence, we need to dump and restore the state_dict() everytime because we're
Hence, we need to dump and restore the state_dict() every time because we're
copying the model after each epoch.
Hence, it is essential to make sure that the sparsifier's state_dict() is being
correctly dumped and restored.

View File

@ -11,7 +11,7 @@ __all__ = ["FPGMPruner"]
class FPGMPruner(BaseStructuredSparsifier):
r"""Filter Pruning via Geometric Median (FPGM) Structured Pruner
This sparsifier prune fliter (row) in a tensor according to distances among filters according to
This sparsifier prune filter (row) in a tensor according to distances among filters according to
`Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration <https://arxiv.org/abs/1811.00250>`_.
This sparsifier is controlled by three variables:

View File

@ -7,7 +7,7 @@ class SaliencyPruner(BaseStructuredSparsifier):
Prune rows based on the saliency (L1 norm) of each row.
This pruner works on N-Dimensional weight tensors.
For each row, we will calculate the saliency, whic is the sum the L1 norm of all weights in that row.
For each row, we will calculate the saliency, which is the sum the L1 norm of all weights in that row.
We expect that the resulting saliency vector has the same shape as our mask.
We then pick elements to remove until we reach the target sparsity_level.
"""

View File

@ -54,7 +54,7 @@ class AdaptiveRoundingLoss(torch.nn.Module):
1 + np.cos(rel_iter * np.pi)
)
# A rectified sigmoid for soft-quantization as formualted [23] in https://arxiv.org/pdf/2004.10568.pdf
# A rectified sigmoid for soft-quantization as formulated [23] in https://arxiv.org/pdf/2004.10568.pdf
h_alpha = torch.clamp(
torch.sigmoid(V) * (ADAROUND_ZETA - ADAROUND_GAMMA) + ADAROUND_GAMMA,
min=0,

View File

@ -107,7 +107,7 @@ class AdaptiveRoundingOptimizer:
)
if torch.cuda.is_available():
# Somehow, we need to move the model continuously
# Otherwise, the model will be lowered to CPU misteriously
# Otherwise, the model will be lowered to CPU mysteriously
self.model = self.model.cuda()
self.q_model = self.q_model.cuda()
for data_ in data:

View File

@ -296,7 +296,7 @@ BackendConfig(nniqat.LinearReLU)
Pattern in this case is the same as before, it defines the pattern for the subgraph we are dealing with
`set_observation_type`: sets the observation type for the patter, currently only two types:
`set_observation_type`: sets the observation type for the pattern, currently only two types:
`OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT` means the output observer instance will be different from the input, which is the most common type of observer placement.

View File

@ -8,10 +8,10 @@ ModelReport
Most detectors require a **traceable GraphModule**, but some (ex. `PerChannelDetector`) require just an `nn.Module`.
#### Typical Fx Workflow
- Initialize model &rarr; Prepare model &rarr; Callibrate model &rarr; Convert model &rarr; ...
- Initialize model &rarr; Prepare model &rarr; Calibrate model &rarr; Convert model &rarr; ...
#### Fx Workflow with ModelReport
- Initialize model &rarr; Prepare model &rarr; **Add detector observers** &rarr; Callibrate model &rarr; **Generate report** &rarr; **Remove detector observers** &rarr; Convert model &rarr; ...
- Initialize model &rarr; Prepare model &rarr; **Add detector observers** &rarr; Calibrate model &rarr; **Generate report** &rarr; **Remove detector observers** &rarr; Convert model &rarr; ...
> ⚠️ **You can only prepare and remove observers once with a given ModelReport Instance**: Be very careful here!
@ -23,7 +23,7 @@ This snippet should be ready to copy, paste, and use with the exception of a few
# prep model
qconfig_mapping = torch.ao.quantization.get_default_qconfig_mapping()
model = Model() # TODO define model
example_input = torch.randn((*args)) # TODO get example data for callibration
example_input = torch.randn((*args)) # TODO get example data for calibration
prepared_model = quantize_fx.prepare_fx(model, qconfig_mapping, example_input)
# create ModelReport instance and insert observers
@ -31,8 +31,8 @@ detector_set = set([DynamicStaticDetector()]) # TODO add all desired detectors
model_report = ModelReport(model, detector_set)
ready_for_callibrate = model_report.prepare_detailed_callibration()
# callibrate model and generate report
ready_for_callibrate(example_input) # TODO run callibration of model with relevant data
# calibrate model and generate report
ready_for_callibrate(example_input) # TODO run calibration of model with relevant data
reports = model_report.generate_model_report(remove_inserted_observers=True)
for report_name in report.keys():
text_report, report_dict = reports[report_name]
@ -46,7 +46,7 @@ mod_rep_visualizer.generate_table_visualization() # shows collected data as a ta
```
There is a tutorial in the works that will walk through a full usage of the ModelReport API.
This tutorial will show the ModelReport API being used on toy model in both an Fx Graph Mode workflow and an alterative workflow with just a traceable model.
This tutorial will show the ModelReport API being used on toy model in both an Fx Graph Mode workflow and an alternative workflow with just a traceable model.
This README will be updated with a link to the tutorial upon completion of the tutorial.
# Key Modules Overview
@ -60,7 +60,7 @@ There are three primary methods to be familiar with when using the ModelReport c
This is so that we can keep track of where we want to insert observers on a detector by detector basis and also keep track of which detectors to generate reports for.
- `prepare_detailed_calibration(self)` &rarr; `GraphModule` inserts observers into the locations specified by each detector in the model.
It then returns the GraphModule with the detectors inserted into both the regular module structure as well as the node structure.
- `generate_model_report(self, remove_inserted_observers: bool)` &rarr; `Dict[str, Tuple[str, Dict]]` uses callibrated GraphModule to optionally removes inserted observers, and generate, for each detector the ModelReport instance was initialized with:
- `generate_model_report(self, remove_inserted_observers: bool)` &rarr; `Dict[str, Tuple[str, Dict]]` uses calibrated GraphModule to optionally removes inserted observers, and generate, for each detector the ModelReport instance was initialized with:
- A string-based report that is easily digestable and actionable explaining the data collected by relevant observers for that detector
- A dictionary containing statistics collected by the relevant observers and values calculated by the detector for further analysis or plotting
@ -107,7 +107,7 @@ For both of the two things listed above, you can filter the data by either `modu
To get a list of all the modules or features, you can call `mod_rep_visualizer.get_all_unique_module_fqns()`
and `mod_rep_visualizer.get_all_unique_feature_names()` respectively.
For the features, because some features are not plottable, you can set the flag to only get plottable features
in the aformentioned `get_all_unique_feature_names` method.
in the aforementioned `get_all_unique_feature_names` method.
## Detector Overview
@ -152,7 +152,7 @@ The statistics collected by the `ModelReportObserver` include:
- Ratio of 100th percentile to some *n*th percentile
- Number of constant value batches to pass through each channel
After the `ModelReportObserver` collects the statistics above during the callibration process, the detectors then extract the information they need to generate their reports from the relevant observers.
After the `ModelReportObserver` collects the statistics above during the calibration process, the detectors then extract the information they need to generate their reports from the relevant observers.
### Using Your Own Observer

View File

@ -36,7 +36,7 @@ class ModelReport:
- Suggestions for outlier detection for all layers (Graph Modules)
The ModelReport class has the primary functionality of inserting observers (primarily the ModelReportObserver)
where needed for each detector to gather the information it needs, and then after callibration, the ModelReport
where needed for each detector to gather the information it needs, and then after calibration, the ModelReport
class compiles the report generated by each Detector class into a single report to return to the user. It also
has the capability to remove all the observers it inserted as well.
@ -70,7 +70,7 @@ class ModelReport:
1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects and model
2.) Prepare your model with prepare_fx
3.) Call model_report.prepare_detailed_calibration to add relevant observers
4.) Callibrate your model with data
4.) Calibrate your model with data
5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
Optional
6.) Call model_report.generate_visualizer to get a ModelReportVisualizer instance
@ -102,7 +102,7 @@ class ModelReport:
... )
>>> tracer_reporter = ModelReport(graph_module, tracer_detector_set)
>>> # now we insert the observers and callibrate the model
>>> # now we insert the observers and calibrate the model
>>> tracer_model_with_observers = tracer_reporter.prepare_detailed_calibration()
>>> for i in range(num_callibration_batches):
>>> example_input = get_callibration_input()
@ -179,7 +179,7 @@ class ModelReport:
# if already prepared once, cannot prepare again
if self._prepared_flag:
raise ValueError(
"Already ran preparing detailed callibration. Run the report generation next after callibration."
"Already ran preparing detailed calibration. Run the report generation next after calibration."
)
# loop through each detector, find where placements should be, and keep track
@ -271,7 +271,7 @@ class ModelReport:
Generates all the requested reports.
Note:
You should have callibrated the model with relevant data before calling this
You should have calibrated the model with relevant data before calling this
The reports generated are specified by the desired_reports specified in desired_reports
@ -286,12 +286,12 @@ class ModelReport:
Note:
Throws exception if we try to generate report on model we already removed observers from
Throws exception if we try to generate report without preparing for callibration
Throws exception if we try to generate report without preparing for calibration
"""
# if we haven't prepped model for callibration, then we shouldn't generate report yet
# if we haven't prepped model for calibration, then we shouldn't generate report yet
if not self._prepared_flag:
raise Exception( # noqa: TRY002
"Cannot generate report without preparing model for callibration"
"Cannot generate report without preparing model for calibration"
)
# if we already removed the observers, we cannot generate report
@ -546,12 +546,12 @@ class ModelReport:
Note:
Throws exception if we try to generate mapping on model we already removed observers from
Throws exception if we try to generate mapping without preparing for callibration
Throws exception if we try to generate mapping without preparing for calibration
"""
# if we haven't prepped model for callibration, then we shouldn't generate mapping yet
# if we haven't prepped model for calibration, then we shouldn't generate mapping yet
if not self._prepared_flag:
raise Exception( # noqa: TRY002
"Cannot generate report without preparing model for callibration"
"Cannot generate report without preparing model for calibration"
)
# if we already removed the observers, we cannot mapping
@ -600,7 +600,7 @@ class ModelReport:
Note:
Throws exception if we try to generate mapping on model we already removed observers from
Throws exception if we try to generate mapping without preparing for callibration
Throws exception if we try to generate mapping without preparing for calibration
"""
# get the mapping info
detector_qconfig_info_combined = (

View File

@ -63,7 +63,7 @@ class ModelReportVisualizer:
1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects
2.) Prepare your model with prepare_fx
3.) Call model_report.prepare_detailed_calibration on your model to add relevant observers
4.) Callibrate your model with data
4.) Calibrate your model with data
5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
6.) Use output of model_report.generate_report to initialize ModelReportVisualizer instance
7.) Use instance to view different views of data as desired, applying filters as needed

View File

@ -1107,7 +1107,7 @@ def _maybe_insert_output_observer_for_node(
)
target_dtype, target_is_dynamic = _get_dtype_and_is_dynamic(output_act_obs_or_fq)
# uncomment after we support reuse_input_obs_or_fq properly by having separate
# implemntations for this key instead of reusing the input_output_share_observers
# implementations for this key instead of reusing the input_output_share_observers
# code
# reuse_input_obs_or_fq = node.meta["target_dtype_info"].get("reuse_input_obs_or_fq", False)
# for now we set this to False since reuse_input_obs_or_fq for
@ -1117,7 +1117,7 @@ def _maybe_insert_output_observer_for_node(
reuse_input_obs_or_fq = False
# Note: prev_output_dtype = torch.float and prev_output_is_dynamic=False
# because the prev_output is the output of an fp32 op, althought technically
# because the prev_output is the output of an fp32 op, although technically
# we should get the dtype of the output from node.meta["val"] in the future
# if we deprecate fx graph mode quantization
needs_obs_or_fq = _needs_obs_or_fq(
@ -2002,7 +2002,7 @@ def prepare(
same as input_quantized_idxs configuration provided
for the standalone module
standalone_module_output_quantized_idxs(List[Int]): a list of
indexs for the graph output that is quantized
indices for the graph output that is quantized
same as input_quantized_idxs configuration provided
for the standalone module
"""

View File

@ -190,7 +190,7 @@ def get_new_attr_name_with_prefix(prefix: str) -> Callable:
def collect_producer_nodes(node: Node) -> Optional[list[Node]]:
r"""Starting from a target node, trace back until we hit inpu or
r"""Starting from a target node, trace back until we hit input or
getattr node. This is used to extract the chain of operators
starting from getattr to the target node, for example
def forward(self, x):

View File

@ -358,7 +358,7 @@ class UniformQuantizationObserverBase(ObserverBase):
# Functionally equivalent to 'determine_qparams' in utils.py. Observers must be torchscriptable however and qscheme
# as far as I can tell is not allowed to passed as a parameter in torchscript functions. This makes refactoring observer
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code
# seems unlikey to change (last update over 1 year ago) and when torchscript is fully deprecated we can refactor.
# seems unlikely to change (last update over 1 year ago) and when torchscript is fully deprecated we can refactor.
# TODO(jakeszwe, jerryzh168)
if not check_min_max_valid(min_val, max_val):
return torch.tensor([1.0], device=min_val.device.type), torch.tensor(
@ -1866,7 +1866,7 @@ class AffineQuantizedObserverBase(ABC, torch.nn.Module):
Converts the observer node in the graph into its quantized representation
Args:
model: graph module to conver the observer node in
model: graph module to convert the observer node in
observer_node: the observer node to convert
"""
from torch.ao.quantization.fx.utils import create_getattr_from_value

View File

@ -1,6 +1,6 @@
# copied from https://github.com/pytorch/ao/blob/main/torchao/quantization/observer.py
# and https://github.com/pytorch/ao/blob/main/torchao/quantization/quant_primitives.py
# PLESE DON'T MODIFY THIS FILE SO THAT WE DON'T GET OUT OF SYNC
# PLEASE DON'T MODIFY THIS FILE SO THAT WE DON'T GET OUT OF SYNC
import logging
from abc import ABCMeta
from typing import Any, Optional, Union
@ -469,7 +469,7 @@ def _quantize_affine_no_dtype_cast(
1. figure out the dimension for reduction based on block_size, also reshape the input to align with
the shape after reduction
2. quantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
3. reshape the quantized result to origianl shape
3. reshape the quantized result to original shape
"""
# TODO: validations
# TODO: validate scale/zero_point dimensions are compatible with block_size
@ -619,7 +619,7 @@ def _dequantize_affine_no_dtype_check(
1. figure out the dimension for reduction based on block_size, also reshape the input to align with
the shape after reduction
2. dequantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
3. reshape the quantized result to origianl shape and change dtype to the output_dtype
3. reshape the quantized result to original shape and change dtype to the output_dtype
"""
assert len(block_size) == input.dim(), (
f"Got input dim:{input.dim()}, block_size: {block_size}"

View File

@ -177,19 +177,19 @@ class PortNodeMetaForQDQ(PassBase):
- Example 1:
- Original: [Conv -> AvgPool -> Linear]
- Quantized [Q-> DQ -> Conv -> Q -> DQ -> AvgPool -> Q -> DQ -> Linear -> Q -> DQ]
- Inner brackets specify which nodes Q/DQ inherit metdata from
- Inner brackets specify which nodes Q/DQ inherit metadata from
- [Q-> [DQ -> Conv -> Q] -> [DQ -> AvgPool -> Q] -> [DQ -> Linear -> Q] -> DQ]
- Note first Q and last DQ do not inherit metadata from any nodes
- Example 2:
- Original: [Conv -> AvgPool -> Linear]
- AvgPool is not quantized
- Quantized [Q-> DQ -> Conv -> Q -> DQ -> AvgPool -> Q -> DQ -> Linear -> Q -> DQ]
- Inner brackets specify which nodes Q/DQ inherit metdata from
- Inner brackets specify which nodes Q/DQ inherit metadata from
- [Q-> [DQ -> Conv -> Q] -> DQ -> [AvgPool] -> Q -> [DQ -> Linear -> Q] -> DQ]
- Note DQ and Q nodes around AvgPool do not inherit metadata from AvgPool because
AvgPool was not supposed to be quantized. Metadata porting relies on quantization_annotation
on the nodes (in this case AvgPool node) to conclude if the node or patter was
supposed to be quantized. And subsequntly decide if the preceding Q, if any, should
on the nodes (in this case AvgPool node) to conclude if the node or pattern was
supposed to be quantized. And subsequently decide if the preceding Q, if any, should
inherit metadata from AvgPool.
- Dynamically quantized patterns:
- Input that are dynamically quantized have choose_qparams, quantize and dequantize nodes

View File

@ -275,7 +275,7 @@ def _get_edge_or_node_to_group_id(
_update_shared_with(input_edge, qspec, shared_with_map)
# now that we get the sharing relations between all edges and nodes, we can assingn group ids
# now that we get the sharing relations between all edges and nodes, we can assign group ids
cur_group_id = 0
edge_or_node_to_group_id: dict[EdgeOrNode, int] = {}
for edge_or_node in shared_with_map.keys():

View File

@ -876,7 +876,7 @@ def _fold_conv_bn_qat(m: GraphModule) -> GraphModule:
m, F.conv_transpose2d, _quantized_conv2d_bn_example_inputs, is_cuda=is_cuda
)
# remove in place add from batchnorm tracking traning stats
# remove in place add from batchnorm tracking training stats
for node in m.graph.nodes:
if (
node.target == torch.ops.aten.add_.Tensor

View File

@ -300,7 +300,7 @@ def _reference_quantized_conv2d(
# Out_(i, j)_fp32 = ((X_scale * W_scale) * Sum_(over k)[(X_(i, k)_fp32 - X_zp) * (W_(i, k)_fp32 - W_zp)]) + bias_(i)_fp32
# In order to addition of bias_(i)_fp32 inside, we must do
# Out_(i, j)_fp32 = (X_scale * W_scale) * (Sum_(over k)[(X_(i, k)_fp32 - X_zp) * (W_(i, k)_fp32 - W_zp)] + (1 / (X_scale * W_scale)) * bias_(i)_fp32)W_scale # noqa: B950
# Note we had to multiply bias_fp32 qith X_scale * W_scale = bias_scale
# Note we had to multiply bias_fp32 with X_scale * W_scale = bias_scale
# Thus bias quantization to int32 must be with X_scale * W_scale
bias_i32 = out_dtype(torch.ops.aten.div.Tensor, torch.int32, bias_fp32, bias_scale)
@ -436,7 +436,7 @@ def _reference_quantized_add(
x_fp32 = (x_i8 - x_zero_point) * x_scale (3)
y_fp32 = (y_i8 - y_zero_point) * y_scale (4)
# applying the above fomula to the out_i8 equation we can get the following:
# applying the above formula to the out_i8 equation we can get the following:
out_i8 = out_fp32 / out_scale + out_zero_point # (1)
= (x_f32 + y_f32) / out_scale + out_zero_point # applying (2) to substitute out_fp32 with x_fp32 + y_fp32
= ((x_i8 - x_zero_point) * x_scale + (y_i8 - y_zero_point) * y_scale) / out_scale + out_zero_point # apply (3) and (4)

View File

@ -185,7 +185,7 @@ def _prepare_standalone_module_fx(
same as input_quantized_idxs configuration provided
for the standalone module
* `standalone_module_output_quantized_idxs(List[Int])`: a list of
indexs for the graph output that is quantized
indices for the graph output that is quantized
same as input_quantized_idxs configuration provided
for the standalone module

View File

@ -76,7 +76,7 @@ def prepare_pt2e(
# Step 1. program capture
# NOTE: this API will be updated to torch.export API in the future, but the captured
# result shoud mostly stay the same
# result should mostly stay the same
m = torch.export.export_for_training(m, *example_inputs).module()
# we get a model with aten ops
@ -153,7 +153,7 @@ def prepare_qat_pt2e(
# Step 1. program capture
# NOTE: this API will be updated to torch.export API in the future, but the captured
# result shoud mostly stay the same
# result should mostly stay the same
m = torch.export.export_for_training(m, *example_inputs).module()
# we get a model with aten ops
@ -218,7 +218,7 @@ def convert_pt2e(
Args:
* `model` (torch.fx.GraphModule): calibrated/trained model
* `use_reference_representation` (bool): boolean flag to indicate whether to produce referece representation or not
* `use_reference_representation` (bool): boolean flag to indicate whether to produce reference representation or not
* `fold_quantize` (bool): boolean flag for whether fold the quantize op or not
Returns:

View File

@ -111,7 +111,7 @@ class DerivedQuantizationSpec(QuantizationSpecBase):
@dataclass
class QuantizationAnnotation:
"""How are input arguemnt or output should be quantized,
"""How are input argument or output should be quantized,
expressed as QuantizationSpec, this corresponds to how a Tensor in the
operator Graph is observed (PTQ) or fake quantized (QAT)
"""

View File

@ -28,7 +28,7 @@ def _node_only_used_for_sym_size(node: Node, partition_nodes: list[Node]):
This utility is used to handle cases when dynami_shape=True tracing leads
to symint nodes in the pattern of linear module. In those cases, we need to
distinguish between the nodes that are in input for just extracting value of
some dimentions (and symint nodes) vs. the one that is activation.
some dimensions (and symint nodes) vs. the one that is activation.
For example:
graph(x, y, weight):
size_0 = torch.ops.aten.sym_size([x], [0])

View File

@ -245,7 +245,7 @@ def _get_not_module_type_or_name_filter(
class XNNPACKQuantizer(Quantizer):
"""
!!! DEPRECATED !!!
XNNPACKQuantizer is a marked as deprected. It will be removed in the future.
XNNPACKQuantizer is a marked as deprecated. It will be removed in the future.
It has been moved to executorch.backends.xnnpack.quantizer.xnnpack_quantizer.XNNPACKQuantizer.
Please use the new quantizer instead.
"""

View File

@ -422,7 +422,7 @@ def _annotate_conv_bn(
filter_fn: Optional[Callable[[Node], bool]] = None,
) -> Optional[list[list[Node]]]:
"""
Find conv + batchnorm parititions
Find conv + batchnorm partitions
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
"""
return _do_annotate_conv_bn(gm, quantization_config, filter_fn, has_relu=False)
@ -435,7 +435,7 @@ def _annotate_conv_bn_relu(
filter_fn: Optional[Callable[[Node], bool]] = None,
) -> Optional[list[list[Node]]]:
"""
Find conv + batchnorm + relu parititions
Find conv + batchnorm + relu partitions
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
"""
return _do_annotate_conv_bn(gm, quantization_config, filter_fn, has_relu=True)
@ -448,7 +448,7 @@ def _annotate_conv_transpose_bn(
filter_fn: Optional[Callable[[Node], bool]] = None,
) -> Optional[list[list[Node]]]:
"""
Find conv_transpose + batchnorm parititions
Find conv_transpose + batchnorm partitions
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
"""
return _do_annotate_conv_bn(
@ -463,7 +463,7 @@ def _annotate_conv_transpose_bn_relu(
filter_fn: Optional[Callable[[Node], bool]] = None,
) -> Optional[list[list[Node]]]:
"""
Find conv_transpose + batchnorm + relu parititions
Find conv_transpose + batchnorm + relu partitions
Note: This is only used for QAT. In PTQ, batchnorm should already be fused into the conv.
"""
return _do_annotate_conv_bn(

View File

@ -85,7 +85,7 @@ class XPUInductorQuantizer(X86InductorQuantizer):
overrides. We keep the annotate methods but make the function
body empty, aiming to let `_generate_qdq_quantized_model`
generate qdq around op and graph execute on fp32 dtype for
unspported operators.
unsupported operators.
"""
def _annotate_qat_conv2d_fusion_pattern(

View File

@ -616,7 +616,7 @@ def validate_qmin_qmax(quant_min: int, quant_max: int) -> None:
# Functionally equivalent to '_calculate_qparams' in observer.py. Observers must be torchscriptable however and qscheme
# as far as I can tell is not allowed to passed as a parameter in torchscript functions. This makes refactoring observer
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code seems unlikey to change
# to use this utility a massive pain and very gross. For now Im opting just to duplicate as this code seems unlikely to change
# (last update over 1 year ago) and when torchscript is fully deprecated we can refactor. TODO(jakeszwe, jerryzh168)
def determine_qparams(
min_val: torch.Tensor,