diff --git a/torch/csrc/jit/passes/onnx/peephole.cpp b/torch/csrc/jit/passes/onnx/peephole.cpp index 0282070c748..e5114d9f1e8 100644 --- a/torch/csrc/jit/passes/onnx/peephole.cpp +++ b/torch/csrc/jit/passes/onnx/peephole.cpp @@ -764,14 +764,15 @@ static void fuseListConstructListUnpack(Block* b) { // https://github.com/pytorch/pytorch/wiki/PyTorch-ONNX-exporter#quantized-model-export static void eraseTupleConstruct(Block* block) { size_t index = 0; - // TupleConstruct is generated from the symbolics in quantized domain, and consumed - // by other quantized operators. The remained TupleConstruct should be at the output of the blocks. + // TupleConstruct is generated from the symbolics in quantized domain, and + // consumed by other quantized operators. The remained TupleConstruct should + // be at the output of the blocks. for (auto* output : block->outputs()) { auto output_node = output->node(); if (output_node->kind() == prim::TupleConstruct) { block->eraseOutput(index); size_t input_index = 0; - for (auto* input: output_node->inputs()) { + for (auto* input : output_node->inputs()) { block->insertOutput(index + (input_index++), input); } } diff --git a/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp b/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp index 31a727c13ba..2f6d5ae5380 100644 --- a/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp +++ b/torch/csrc/jit/passes/onnx/scalar_type_analysis.cpp @@ -1,8 +1,8 @@ #include #include #include -#include #include +#include namespace torch { namespace jit { diff --git a/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp b/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp index bffb5161327..7c237cc8046 100644 --- a/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp +++ b/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp @@ -136,7 +136,9 @@ std::vector CreateQuantizedWeights( float scale, int64_t zero_point) { Node* const_node_1 = graph->create(prim::Constant); - auto const_value = at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat).to(at::kCPU); + auto const_value = + at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat) + .to(at::kCPU); auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU); at::Tensor const_value_copy = at::empty(c10::IntArrayRef(shapes), options); const_value.copy_(const_value); @@ -145,16 +147,23 @@ std::vector CreateQuantizedWeights( Node* const_node_2 = graph->create(prim::Constant); std::vector scale_v{scale}; std::vector scale_shapes{1}; - auto const_shape = at::from_blob(scale_v.data(), c10::IntArrayRef(scale_shapes), at::kFloat).to(at::kCPU); - at::Tensor const_shape_copy = at::empty(c10::IntArrayRef(scale_shapes), options); + auto const_shape = + at::from_blob(scale_v.data(), c10::IntArrayRef(scale_shapes), at::kFloat) + .to(at::kCPU); + at::Tensor const_shape_copy = + at::empty(c10::IntArrayRef(scale_shapes), options); const_shape_copy.copy_(const_shape); const_node_2->t_(Symbol::attr("value"), const_shape_copy); Node* const_node_3 = graph->create(prim::Constant); std::vector zero_point_v{zero_point}; std::vector zero_shapes{1}; - auto const_zero = at::from_blob(zero_point_v.data(), c10::IntArrayRef(zero_shapes), at::kInt).to(at::kCPU); - at::Tensor const_zero_copy = at::empty(c10::IntArrayRef(zero_shapes), options); + auto const_zero = + at::from_blob( + zero_point_v.data(), c10::IntArrayRef(zero_shapes), at::kInt) + .to(at::kCPU); + at::Tensor const_zero_copy = + at::empty(c10::IntArrayRef(zero_shapes), options); const_zero_copy.copy_(const_zero); const_node_3->t_(Symbol::attr("value"), const_zero_copy); @@ -166,7 +175,9 @@ Node* CreateQuantizedBias( std::shared_ptr& graph, std::vector shapes) { Node* const_node_1 = graph->create(prim::Constant); - auto const_bias = at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat).to(at::kCPU); + auto const_bias = + at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat) + .to(at::kCPU); auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU); at::Tensor const_bias_copy = at::empty(c10::IntArrayRef(shapes), options); const_bias_copy.copy_(const_bias); @@ -201,7 +212,7 @@ void unpackQuantizedWeightsHelper( const std::string& pattern, const std::string& unpack_fn, QuantizedParamsType params_type, - bool caffe2=true) { + bool caffe2 = true) { Graph pattern_graph; std::unordered_map vmap; parseIR(pattern, &pattern_graph, vmap); @@ -425,12 +436,18 @@ void unpackQuantizedWeightsHelper( } else { std::vector unpacked_weight_values; unpacked_weight_values.reserve(unpacked_weight.numel()); - auto unpacked_weight_data = reinterpret_cast(unpacked_weight.data_ptr()); + auto unpacked_weight_data = + reinterpret_cast(unpacked_weight.data_ptr()); for (const auto i : c10::irange(unpacked_weight.numel())) { - unpacked_weight_values.push_back(static_cast(unpacked_weight_data[i])); + unpacked_weight_values.push_back( + static_cast(unpacked_weight_data[i])); } std::vector c2_weight = CreateQuantizedWeights( - unpacked_weight_values, graph, wt_sizes, static_cast(unpacked_weight.q_scale()), weight_zp); + unpacked_weight_values, + graph, + wt_sizes, + static_cast(unpacked_weight.q_scale()), + weight_zp); graph->setInsertPoint(qlinear_node); c2_weight[0]->insertBefore(qlinear_node); qlinear_node->insertInput(1, c2_weight[0]->output()); @@ -464,7 +481,8 @@ void unpackQuantizedWeightsHelper( if (caffe2) { auto input_scale = getScaleFromInput(input_node); - q_bias = at::quantize_per_tensor(original_bias, weight_scale * input_scale, 0, at::kQInt32); + q_bias = at::quantize_per_tensor( + original_bias, weight_scale * input_scale, 0, at::kQInt32); std::vector bias_values; bias_values.reserve(q_bias.numel()); auto bias_data = (int32_t*)q_bias.data_ptr(); @@ -485,10 +503,8 @@ void unpackQuantizedWeightsHelper( for (const auto i : c10::irange(original_bias.numel())) { bias_values[i] = bias_data[i]; } - Node* bias = CreateQuantizedBias( - bias_values, - graph, - original_bias.sizes().vec()); + Node* bias = + CreateQuantizedBias(bias_values, graph, original_bias.sizes().vec()); bias->insertBefore(qlinear_node); // For quantized_linear inputs, the order is input, weight, bias, .... // We unpack weight into 3 values. then it is @@ -521,11 +537,13 @@ void unpackQuantizedWeightsHelper( } } -static std::unordered_map qTypeToValType = { - {c10::ScalarType::QInt8, c10::ScalarType::Char}, - {c10::ScalarType::QUInt8, c10::ScalarType::Byte}, - {c10::ScalarType::QInt32, c10::ScalarType::Int}, - {c10::ScalarType::QUInt4x2, c10::ScalarType::Byte}, +static std:: + unordered_map + qTypeToValType = { + {c10::ScalarType::QInt8, c10::ScalarType::Char}, + {c10::ScalarType::QUInt8, c10::ScalarType::Byte}, + {c10::ScalarType::QInt32, c10::ScalarType::Int}, + {c10::ScalarType::QUInt4x2, c10::ScalarType::Byte}, }; // Unpack quantized tensor inputs into {value, scale, zero_point}, @@ -544,14 +562,21 @@ void UnpackQuantizedTensorInputs(std::shared_ptr& graph) { continue; } std::string input_name = g_input->debugName(); - auto input_value = graph->insertInput(index, input_name + "_value")->setType(shape_type->withScalarType(qTypeToValType[scalar_type])); + auto input_value = + graph->insertInput(index, input_name + "_value") + ->setType(shape_type->withScalarType(qTypeToValType[scalar_type])); // scale and zero_point type can be found at torch/include/ATen/Operators.h - auto input_scale = graph->insertInput(index + 1, input_name + "_scale")->setType(TensorType::create( - at::kDouble, at::kCPU, 0, /*requires_grad=*/c10::nullopt)); - auto input_zero_point = graph->insertInput(index + 2, input_name + "_zero_point")->setType(TensorType::create( - at::kLong, at::kCPU, 0, /*requires_grad=*/c10::nullopt)); + auto input_scale = + graph->insertInput(index + 1, input_name + "_scale") + ->setType(TensorType::create( + at::kDouble, at::kCPU, 0, /*requires_grad=*/c10::nullopt)); + auto input_zero_point = + graph->insertInput(index + 2, input_name + "_zero_point") + ->setType(TensorType::create( + at::kLong, at::kCPU, 0, /*requires_grad=*/c10::nullopt)); std::vector converted{input_value, input_scale, input_zero_point}; - auto input_tuple = graph->prependNode(graph->createTuple(converted))->output(); + auto input_tuple = + graph->prependNode(graph->createTuple(converted))->output(); g_input->replaceAllUsesWith(input_tuple); // Erase the original quantized tensor input. graph->eraseInput(index + converted.size());