From 8fab682e47242ecaebda99fc78800096962e65cf Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Jul 2022 15:05:35 -0700 Subject: [PATCH] [Quant][fx][bc-breaking] Do not move models to CPU in convert (#80555) Summary: Previously, we automatically moved the model to CPU in torch.ao.quantization.fx.convert to work around the issue where certain functions called by convert expect CPU arguments. This commit pushes this responsibility to the caller since it is the user's decision of which device to use. Test Plan: python test/test_quantization.py TestQuantizeFx python test/test_quantization.py TestQuantizeFxOps BC-breaking Notes: Before: ``` model = resnet18(...) model = prepare_fx(model, qconfig_mapping, example_inputs) ... # calibrate model = convert_fx(model) ``` After: ``` model = resnet18(...) model.cpu() model = prepare_fx(model, qconfig_mapping, example_inputs) ... # calibrate model = convert_fx(model) ``` Reviewers: jerryzh168 Subscribers: jerryzh168 Differential Revision: [D37528830](https://our.internmc.facebook.com/intern/diff/D37528830) Pull Request resolved: https://github.com/pytorch/pytorch/pull/80555 Approved by: https://github.com/jerryzh168 --- test/quantization/fx/test_quantize_fx.py | 4 ++++ torch/ao/quantization/fx/convert.py | 8 -------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py index 9498f34ab47..cd9aec0a6e5 100644 --- a/test/quantization/fx/test_quantize_fx.py +++ b/test/quantization/fx/test_quantize_fx.py @@ -7611,6 +7611,10 @@ class TestQuantizeFxModels(QuantizationTestCase): torch.testing.assert_allclose(grad[0], grad_ref[0]) if 'fbgemm' in torch.backends.quantized.supported_engines: + # During the lowering step in convert, fold_weight calls quantized::linear_prepack + # which doesn't support QuantizedCuda backend + prepared.cpu() + prepared_ref.cpu() converted = convert_fx(prepared) converted_ref = convert_fx(prepared_ref) inp = torch.rand(5, 5) diff --git a/torch/ao/quantization/fx/convert.py b/torch/ao/quantization/fx/convert.py index 2cdbd3686b4..31bce8157bd 100644 --- a/torch/ao/quantization/fx/convert.py +++ b/torch/ao/quantization/fx/convert.py @@ -580,14 +580,6 @@ def convert( node_name_to_scope, prepare_custom_config, observed_node_names = restore_state(model) qconfig_map: Dict[str, QConfigAny] = model._qconfig_map # type: ignore[assignment] - # TODO this should be removed now that gpu support for quantization is being supported. - # however in practice, as of 7/22/2021, certain functions that get called by convert expect - # only cpu arguments. - # As an example, in TestQuantizeFxModels.test_qat_functional_linear when device='cuda', - # fold_weight will call quantized::linear_prepack which doesn't support QuantizedCuda backend. - if not is_reference: - model.cpu() - # mapping from fully qualified module name to module instance # for example, # {