mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[ONNX] Apply clang-format changes (#73220)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/73220 Test Plan: CI Reviewed By: seemethere Differential Revision: D34395058 fbshipit-source-id: dd043f32ba4e33f1ceeffbf432942a850488e628
This commit is contained in:
parent
fd50170935
commit
c5265e90c7
|
|
@ -764,14 +764,15 @@ static void fuseListConstructListUnpack(Block* b) {
|
||||||
// https://github.com/pytorch/pytorch/wiki/PyTorch-ONNX-exporter#quantized-model-export
|
// https://github.com/pytorch/pytorch/wiki/PyTorch-ONNX-exporter#quantized-model-export
|
||||||
static void eraseTupleConstruct(Block* block) {
|
static void eraseTupleConstruct(Block* block) {
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
// TupleConstruct is generated from the symbolics in quantized domain, and consumed
|
// TupleConstruct is generated from the symbolics in quantized domain, and
|
||||||
// by other quantized operators. The remained TupleConstruct should be at the output of the blocks.
|
// consumed by other quantized operators. The remained TupleConstruct should
|
||||||
|
// be at the output of the blocks.
|
||||||
for (auto* output : block->outputs()) {
|
for (auto* output : block->outputs()) {
|
||||||
auto output_node = output->node();
|
auto output_node = output->node();
|
||||||
if (output_node->kind() == prim::TupleConstruct) {
|
if (output_node->kind() == prim::TupleConstruct) {
|
||||||
block->eraseOutput(index);
|
block->eraseOutput(index);
|
||||||
size_t input_index = 0;
|
size_t input_index = 0;
|
||||||
for (auto* input: output_node->inputs()) {
|
for (auto* input : output_node->inputs()) {
|
||||||
block->insertOutput(index + (input_index++), input);
|
block->insertOutput(index + (input_index++), input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
#include <torch/csrc/jit/jit_log.h>
|
#include <torch/csrc/jit/jit_log.h>
|
||||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||||
#include <torch/csrc/jit/passes/onnx/scalar_type_analysis.h>
|
|
||||||
#include <torch/csrc/jit/passes/onnx/helper.h>
|
#include <torch/csrc/jit/passes/onnx/helper.h>
|
||||||
|
#include <torch/csrc/jit/passes/onnx/scalar_type_analysis.h>
|
||||||
|
|
||||||
namespace torch {
|
namespace torch {
|
||||||
namespace jit {
|
namespace jit {
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,9 @@ std::vector<Node*> CreateQuantizedWeights(
|
||||||
float scale,
|
float scale,
|
||||||
int64_t zero_point) {
|
int64_t zero_point) {
|
||||||
Node* const_node_1 = graph->create(prim::Constant);
|
Node* const_node_1 = graph->create(prim::Constant);
|
||||||
auto const_value = at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat).to(at::kCPU);
|
auto const_value =
|
||||||
|
at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat)
|
||||||
|
.to(at::kCPU);
|
||||||
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
||||||
at::Tensor const_value_copy = at::empty(c10::IntArrayRef(shapes), options);
|
at::Tensor const_value_copy = at::empty(c10::IntArrayRef(shapes), options);
|
||||||
const_value.copy_(const_value);
|
const_value.copy_(const_value);
|
||||||
|
|
@ -145,16 +147,23 @@ std::vector<Node*> CreateQuantizedWeights(
|
||||||
Node* const_node_2 = graph->create(prim::Constant);
|
Node* const_node_2 = graph->create(prim::Constant);
|
||||||
std::vector<float> scale_v{scale};
|
std::vector<float> scale_v{scale};
|
||||||
std::vector<int64_t> scale_shapes{1};
|
std::vector<int64_t> scale_shapes{1};
|
||||||
auto const_shape = at::from_blob(scale_v.data(), c10::IntArrayRef(scale_shapes), at::kFloat).to(at::kCPU);
|
auto const_shape =
|
||||||
at::Tensor const_shape_copy = at::empty(c10::IntArrayRef(scale_shapes), options);
|
at::from_blob(scale_v.data(), c10::IntArrayRef(scale_shapes), at::kFloat)
|
||||||
|
.to(at::kCPU);
|
||||||
|
at::Tensor const_shape_copy =
|
||||||
|
at::empty(c10::IntArrayRef(scale_shapes), options);
|
||||||
const_shape_copy.copy_(const_shape);
|
const_shape_copy.copy_(const_shape);
|
||||||
const_node_2->t_(Symbol::attr("value"), const_shape_copy);
|
const_node_2->t_(Symbol::attr("value"), const_shape_copy);
|
||||||
|
|
||||||
Node* const_node_3 = graph->create(prim::Constant);
|
Node* const_node_3 = graph->create(prim::Constant);
|
||||||
std::vector<int64_t> zero_point_v{zero_point};
|
std::vector<int64_t> zero_point_v{zero_point};
|
||||||
std::vector<int64_t> zero_shapes{1};
|
std::vector<int64_t> zero_shapes{1};
|
||||||
auto const_zero = at::from_blob(zero_point_v.data(), c10::IntArrayRef(zero_shapes), at::kInt).to(at::kCPU);
|
auto const_zero =
|
||||||
at::Tensor const_zero_copy = at::empty(c10::IntArrayRef(zero_shapes), options);
|
at::from_blob(
|
||||||
|
zero_point_v.data(), c10::IntArrayRef(zero_shapes), at::kInt)
|
||||||
|
.to(at::kCPU);
|
||||||
|
at::Tensor const_zero_copy =
|
||||||
|
at::empty(c10::IntArrayRef(zero_shapes), options);
|
||||||
const_zero_copy.copy_(const_zero);
|
const_zero_copy.copy_(const_zero);
|
||||||
const_node_3->t_(Symbol::attr("value"), const_zero_copy);
|
const_node_3->t_(Symbol::attr("value"), const_zero_copy);
|
||||||
|
|
||||||
|
|
@ -166,7 +175,9 @@ Node* CreateQuantizedBias(
|
||||||
std::shared_ptr<Graph>& graph,
|
std::shared_ptr<Graph>& graph,
|
||||||
std::vector<int64_t> shapes) {
|
std::vector<int64_t> shapes) {
|
||||||
Node* const_node_1 = graph->create(prim::Constant);
|
Node* const_node_1 = graph->create(prim::Constant);
|
||||||
auto const_bias = at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat).to(at::kCPU);
|
auto const_bias =
|
||||||
|
at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat)
|
||||||
|
.to(at::kCPU);
|
||||||
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
||||||
at::Tensor const_bias_copy = at::empty(c10::IntArrayRef(shapes), options);
|
at::Tensor const_bias_copy = at::empty(c10::IntArrayRef(shapes), options);
|
||||||
const_bias_copy.copy_(const_bias);
|
const_bias_copy.copy_(const_bias);
|
||||||
|
|
@ -201,7 +212,7 @@ void unpackQuantizedWeightsHelper(
|
||||||
const std::string& pattern,
|
const std::string& pattern,
|
||||||
const std::string& unpack_fn,
|
const std::string& unpack_fn,
|
||||||
QuantizedParamsType params_type,
|
QuantizedParamsType params_type,
|
||||||
bool caffe2=true) {
|
bool caffe2 = true) {
|
||||||
Graph pattern_graph;
|
Graph pattern_graph;
|
||||||
std::unordered_map<std::string, Value*> vmap;
|
std::unordered_map<std::string, Value*> vmap;
|
||||||
parseIR(pattern, &pattern_graph, vmap);
|
parseIR(pattern, &pattern_graph, vmap);
|
||||||
|
|
@ -425,12 +436,18 @@ void unpackQuantizedWeightsHelper(
|
||||||
} else {
|
} else {
|
||||||
std::vector<float> unpacked_weight_values;
|
std::vector<float> unpacked_weight_values;
|
||||||
unpacked_weight_values.reserve(unpacked_weight.numel());
|
unpacked_weight_values.reserve(unpacked_weight.numel());
|
||||||
auto unpacked_weight_data = reinterpret_cast<int8_t*>(unpacked_weight.data_ptr<c10::qint8>());
|
auto unpacked_weight_data =
|
||||||
|
reinterpret_cast<int8_t*>(unpacked_weight.data_ptr<c10::qint8>());
|
||||||
for (const auto i : c10::irange(unpacked_weight.numel())) {
|
for (const auto i : c10::irange(unpacked_weight.numel())) {
|
||||||
unpacked_weight_values.push_back(static_cast<float>(unpacked_weight_data[i]));
|
unpacked_weight_values.push_back(
|
||||||
|
static_cast<float>(unpacked_weight_data[i]));
|
||||||
}
|
}
|
||||||
std::vector<Node*> c2_weight = CreateQuantizedWeights(
|
std::vector<Node*> c2_weight = CreateQuantizedWeights(
|
||||||
unpacked_weight_values, graph, wt_sizes, static_cast<float>(unpacked_weight.q_scale()), weight_zp);
|
unpacked_weight_values,
|
||||||
|
graph,
|
||||||
|
wt_sizes,
|
||||||
|
static_cast<float>(unpacked_weight.q_scale()),
|
||||||
|
weight_zp);
|
||||||
graph->setInsertPoint(qlinear_node);
|
graph->setInsertPoint(qlinear_node);
|
||||||
c2_weight[0]->insertBefore(qlinear_node);
|
c2_weight[0]->insertBefore(qlinear_node);
|
||||||
qlinear_node->insertInput(1, c2_weight[0]->output());
|
qlinear_node->insertInput(1, c2_weight[0]->output());
|
||||||
|
|
@ -464,7 +481,8 @@ void unpackQuantizedWeightsHelper(
|
||||||
|
|
||||||
if (caffe2) {
|
if (caffe2) {
|
||||||
auto input_scale = getScaleFromInput(input_node);
|
auto input_scale = getScaleFromInput(input_node);
|
||||||
q_bias = at::quantize_per_tensor(original_bias, weight_scale * input_scale, 0, at::kQInt32);
|
q_bias = at::quantize_per_tensor(
|
||||||
|
original_bias, weight_scale * input_scale, 0, at::kQInt32);
|
||||||
std::vector<int64_t> bias_values;
|
std::vector<int64_t> bias_values;
|
||||||
bias_values.reserve(q_bias.numel());
|
bias_values.reserve(q_bias.numel());
|
||||||
auto bias_data = (int32_t*)q_bias.data_ptr<c10::qint32>();
|
auto bias_data = (int32_t*)q_bias.data_ptr<c10::qint32>();
|
||||||
|
|
@ -485,10 +503,8 @@ void unpackQuantizedWeightsHelper(
|
||||||
for (const auto i : c10::irange(original_bias.numel())) {
|
for (const auto i : c10::irange(original_bias.numel())) {
|
||||||
bias_values[i] = bias_data[i];
|
bias_values[i] = bias_data[i];
|
||||||
}
|
}
|
||||||
Node* bias = CreateQuantizedBias(
|
Node* bias =
|
||||||
bias_values,
|
CreateQuantizedBias(bias_values, graph, original_bias.sizes().vec());
|
||||||
graph,
|
|
||||||
original_bias.sizes().vec());
|
|
||||||
bias->insertBefore(qlinear_node);
|
bias->insertBefore(qlinear_node);
|
||||||
// For quantized_linear inputs, the order is input, weight, bias, ....
|
// For quantized_linear inputs, the order is input, weight, bias, ....
|
||||||
// We unpack weight into 3 values. then it is
|
// We unpack weight into 3 values. then it is
|
||||||
|
|
@ -521,11 +537,13 @@ void unpackQuantizedWeightsHelper(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::unordered_map<c10::ScalarType, c10::ScalarType, ScalarTypeHashFunction> qTypeToValType = {
|
static std::
|
||||||
{c10::ScalarType::QInt8, c10::ScalarType::Char},
|
unordered_map<c10::ScalarType, c10::ScalarType, ScalarTypeHashFunction>
|
||||||
{c10::ScalarType::QUInt8, c10::ScalarType::Byte},
|
qTypeToValType = {
|
||||||
{c10::ScalarType::QInt32, c10::ScalarType::Int},
|
{c10::ScalarType::QInt8, c10::ScalarType::Char},
|
||||||
{c10::ScalarType::QUInt4x2, c10::ScalarType::Byte},
|
{c10::ScalarType::QUInt8, c10::ScalarType::Byte},
|
||||||
|
{c10::ScalarType::QInt32, c10::ScalarType::Int},
|
||||||
|
{c10::ScalarType::QUInt4x2, c10::ScalarType::Byte},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unpack quantized tensor inputs into {value, scale, zero_point},
|
// Unpack quantized tensor inputs into {value, scale, zero_point},
|
||||||
|
|
@ -544,14 +562,21 @@ void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
std::string input_name = g_input->debugName();
|
std::string input_name = g_input->debugName();
|
||||||
auto input_value = graph->insertInput(index, input_name + "_value")->setType(shape_type->withScalarType(qTypeToValType[scalar_type]));
|
auto input_value =
|
||||||
|
graph->insertInput(index, input_name + "_value")
|
||||||
|
->setType(shape_type->withScalarType(qTypeToValType[scalar_type]));
|
||||||
// scale and zero_point type can be found at torch/include/ATen/Operators.h
|
// scale and zero_point type can be found at torch/include/ATen/Operators.h
|
||||||
auto input_scale = graph->insertInput(index + 1, input_name + "_scale")->setType(TensorType::create(
|
auto input_scale =
|
||||||
at::kDouble, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
graph->insertInput(index + 1, input_name + "_scale")
|
||||||
auto input_zero_point = graph->insertInput(index + 2, input_name + "_zero_point")->setType(TensorType::create(
|
->setType(TensorType::create(
|
||||||
at::kLong, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
at::kDouble, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
||||||
|
auto input_zero_point =
|
||||||
|
graph->insertInput(index + 2, input_name + "_zero_point")
|
||||||
|
->setType(TensorType::create(
|
||||||
|
at::kLong, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
||||||
std::vector<Value*> converted{input_value, input_scale, input_zero_point};
|
std::vector<Value*> converted{input_value, input_scale, input_zero_point};
|
||||||
auto input_tuple = graph->prependNode(graph->createTuple(converted))->output();
|
auto input_tuple =
|
||||||
|
graph->prependNode(graph->createTuple(converted))->output();
|
||||||
g_input->replaceAllUsesWith(input_tuple);
|
g_input->replaceAllUsesWith(input_tuple);
|
||||||
// Erase the original quantized tensor input.
|
// Erase the original quantized tensor input.
|
||||||
graph->eraseInput(index + converted.size());
|
graph->eraseInput(index + converted.size());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user