[Quant][fx][bc-breaking] Do not move models to CPU in convert (#80555)

Summary: Previously, we automatically moved the model to CPU in
torch.ao.quantization.fx.convert to work around the issue where
certain functions called by convert expect CPU arguments. This
commit pushes this responsibility to the caller since it is the
user's decision of which device to use.

Test Plan:
python test/test_quantization.py TestQuantizeFx
python test/test_quantization.py TestQuantizeFxOps

BC-breaking Notes:

Before:
```
model = resnet18(...)
model = prepare_fx(model, qconfig_mapping, example_inputs)
... # calibrate
model = convert_fx(model)
```
After:
```
model = resnet18(...)
model.cpu()
model = prepare_fx(model, qconfig_mapping, example_inputs)
... # calibrate
model = convert_fx(model)
```

Reviewers: jerryzh168

Subscribers: jerryzh168

Differential Revision: [D37528830](https://our.internmc.facebook.com/intern/diff/D37528830)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/80555
Approved by: https://github.com/jerryzh168
This commit is contained in:
Andrew Or 2022-07-07 15:05:35 -07:00 committed by PyTorch MergeBot
parent cc3126083e
commit 8fab682e47
2 changed files with 4 additions and 8 deletions

View File

@ -7611,6 +7611,10 @@ class TestQuantizeFxModels(QuantizationTestCase):
torch.testing.assert_allclose(grad[0], grad_ref[0])
if 'fbgemm' in torch.backends.quantized.supported_engines:
# During the lowering step in convert, fold_weight calls quantized::linear_prepack
# which doesn't support QuantizedCuda backend
prepared.cpu()
prepared_ref.cpu()
converted = convert_fx(prepared)
converted_ref = convert_fx(prepared_ref)
inp = torch.rand(5, 5)

View File

@ -580,14 +580,6 @@ def convert(
node_name_to_scope, prepare_custom_config, observed_node_names = restore_state(model)
qconfig_map: Dict[str, QConfigAny] = model._qconfig_map # type: ignore[assignment]
# TODO this should be removed now that gpu support for quantization is being supported.
# however in practice, as of 7/22/2021, certain functions that get called by convert expect
# only cpu arguments.
# As an example, in TestQuantizeFxModels.test_qat_functional_linear when device='cuda',
# fold_weight will call quantized::linear_prepack which doesn't support QuantizedCuda backend.
if not is_reference:
model.cpu()
# mapping from fully qualified module name to module instance
# for example,
# {