mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Follows #132753 Pull Request resolved: https://github.com/pytorch/pytorch/pull/132963 Approved by: https://github.com/Skylion007
35 lines
1.2 KiB
C++
35 lines
1.2 KiB
C++
#pragma once
|
|
|
|
#include <c10/macros/Export.h>
|
|
#include <string>
|
|
|
|
namespace torch::jit::mobile {
|
|
class Module;
|
|
namespace quantization {
|
|
/*
|
|
* Device side PTQ API.
|
|
* Once the model has been prepared for quantization on server side, such model
|
|
* is sent to device. On device side the model is further trained. At the end of
|
|
* the training, before the model is readied for inference, we need to quantize
|
|
* the model.
|
|
* Usage of this API is as follows.
|
|
* PTQQuanizationHelper ptq_helper;
|
|
* ptq_helper.quantize_dynamic(m, "forward");
|
|
* Args:
|
|
* m: Captured by reference, an instance of mobile::Module. This module will be
|
|
* mutated in place to replace its <method_name> method with quantized
|
|
* equivalent. method:name: Name of the method to be quantized. AOT preparation
|
|
* for quantization must also have been done for this method. Returns: In place
|
|
* mutated `m` whose size should be smaller due to weight quantization and whose
|
|
* <method_name> method should use quantized ops
|
|
*/
|
|
class TORCH_API PTQQuanizationHelper {
|
|
public:
|
|
PTQQuanizationHelper() = default;
|
|
void quantize_dynamic(
|
|
torch::jit::mobile::Module& m,
|
|
const std::string& method_name);
|
|
};
|
|
} // namespace quantization
|
|
} // namespace torch::jit::mobile
|