mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Make the following improvements: * Schedule the torchao benchmark nightly * Enable torchbench, timm, and huggingface models * Refactor the benchmarking script to better arrange the benchmarking groups Test workflow: https://github.com/pytorch/benchmark/actions/runs/9705589352 X-link: https://github.com/pytorch/benchmark/pull/2336 Differential Revision: D59074571 Pulled By: xuzhao9 Pull Request resolved: https://github.com/pytorch/pytorch/pull/129779 Approved by: https://github.com/jerryzh168
55 lines
2.1 KiB
Python
55 lines
2.1 KiB
Python
from typing import Any, Callable
|
|
|
|
import torch
|
|
|
|
|
|
def setup_baseline():
|
|
torch._dynamo.epilogue_fusion = False
|
|
torch._dynamo.config.automatic_dynamic_shapes = False
|
|
torch._dynamo.config.force_parameter_static_shapes = False
|
|
torch._dynamo.config.cache_size_limit = 10000
|
|
torch._inductor.config.force_fuse_int_mm_with_mul = True
|
|
torch._inductor.config.use_mixed_mm = True
|
|
|
|
|
|
def torchao_optimize_ctx(quantization: str):
|
|
import torchao
|
|
from torchao.quantization.quant_api import (
|
|
change_linear_weights_to_int4_woqtensors,
|
|
change_linear_weights_to_int8_dqtensors,
|
|
change_linear_weights_to_int8_woqtensors,
|
|
)
|
|
|
|
def inner(model_iter_fn: Callable):
|
|
def _torchao_apply(module: torch.nn.Module, example_inputs: Any):
|
|
if getattr(module, "_quantized", None) is None:
|
|
if quantization == "int8dynamic":
|
|
change_linear_weights_to_int8_dqtensors(module)
|
|
elif quantization == "int8weightonly":
|
|
change_linear_weights_to_int8_woqtensors(module)
|
|
elif quantization == "int4weightonly":
|
|
change_linear_weights_to_int4_woqtensors(module)
|
|
elif quantization == "autoquant":
|
|
torchao.autoquant(module, error_on_unseen=False)
|
|
if isinstance(example_inputs, dict):
|
|
module(**example_inputs)
|
|
else:
|
|
module(*example_inputs)
|
|
from torchao.quantization.autoquant import AUTOQUANT_CACHE
|
|
|
|
assert (
|
|
len(AUTOQUANT_CACHE) > 0
|
|
), f"Err: found no autoquantizable layers in model {type(module)}, stopping autoquantization"
|
|
elif quantization == "noquant":
|
|
pass
|
|
else:
|
|
raise AssertionError(
|
|
f"Unsupposed quantization mode {quantization}."
|
|
)
|
|
setattr(module, "_quantized", True) # noqa: B010
|
|
model_iter_fn(module, example_inputs)
|
|
|
|
return _torchao_apply
|
|
|
|
return inner
|