mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
[Quant][fx][bc-breaking] Do not move models to CPU in convert (#80555)
Summary: Previously, we automatically moved the model to CPU in torch.ao.quantization.fx.convert to work around the issue where certain functions called by convert expect CPU arguments. This commit pushes this responsibility to the caller since it is the user's decision of which device to use. Test Plan: python test/test_quantization.py TestQuantizeFx python test/test_quantization.py TestQuantizeFxOps BC-breaking Notes: Before: ``` model = resnet18(...) model = prepare_fx(model, qconfig_mapping, example_inputs) ... # calibrate model = convert_fx(model) ``` After: ``` model = resnet18(...) model.cpu() model = prepare_fx(model, qconfig_mapping, example_inputs) ... # calibrate model = convert_fx(model) ``` Reviewers: jerryzh168 Subscribers: jerryzh168 Differential Revision: [D37528830](https://our.internmc.facebook.com/intern/diff/D37528830) Pull Request resolved: https://github.com/pytorch/pytorch/pull/80555 Approved by: https://github.com/jerryzh168
This commit is contained in:
parent
cc3126083e
commit
8fab682e47
|
|
@ -7611,6 +7611,10 @@ class TestQuantizeFxModels(QuantizationTestCase):
|
||||||
torch.testing.assert_allclose(grad[0], grad_ref[0])
|
torch.testing.assert_allclose(grad[0], grad_ref[0])
|
||||||
|
|
||||||
if 'fbgemm' in torch.backends.quantized.supported_engines:
|
if 'fbgemm' in torch.backends.quantized.supported_engines:
|
||||||
|
# During the lowering step in convert, fold_weight calls quantized::linear_prepack
|
||||||
|
# which doesn't support QuantizedCuda backend
|
||||||
|
prepared.cpu()
|
||||||
|
prepared_ref.cpu()
|
||||||
converted = convert_fx(prepared)
|
converted = convert_fx(prepared)
|
||||||
converted_ref = convert_fx(prepared_ref)
|
converted_ref = convert_fx(prepared_ref)
|
||||||
inp = torch.rand(5, 5)
|
inp = torch.rand(5, 5)
|
||||||
|
|
|
||||||
|
|
@ -580,14 +580,6 @@ def convert(
|
||||||
node_name_to_scope, prepare_custom_config, observed_node_names = restore_state(model)
|
node_name_to_scope, prepare_custom_config, observed_node_names = restore_state(model)
|
||||||
qconfig_map: Dict[str, QConfigAny] = model._qconfig_map # type: ignore[assignment]
|
qconfig_map: Dict[str, QConfigAny] = model._qconfig_map # type: ignore[assignment]
|
||||||
|
|
||||||
# TODO this should be removed now that gpu support for quantization is being supported.
|
|
||||||
# however in practice, as of 7/22/2021, certain functions that get called by convert expect
|
|
||||||
# only cpu arguments.
|
|
||||||
# As an example, in TestQuantizeFxModels.test_qat_functional_linear when device='cuda',
|
|
||||||
# fold_weight will call quantized::linear_prepack which doesn't support QuantizedCuda backend.
|
|
||||||
if not is_reference:
|
|
||||||
model.cpu()
|
|
||||||
|
|
||||||
# mapping from fully qualified module name to module instance
|
# mapping from fully qualified module name to module instance
|
||||||
# for example,
|
# for example,
|
||||||
# {
|
# {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user