Bound shape inferencer: fix int8fc scale and bias

Summary: Previous when inferring Int8FC, we failed to carry over the scale and zero point properly. Also fixed int8 FC weight data type to be int8 instead of uint8 as that's what C2 actually uses. Test Plan: Use net_runner to lower a single Int8Dequantize op. Previous scale and bias would always be 1 and 0. Now the proper value is set. Reviewed By: yinghai Differential Revision: D22912186 fbshipit-source-id: a6620c3493e492bdda91da73775bfc9117db12d1
2025-12-07 12:21:27 +01:00 · 2020-08-06 14:38:24 -07:00 · 2020-08-06 14:38:24 -07:00 · cdd7db1ffc
commit cdd7db1ffc
parent b44a10c179
2 changed files with 30 additions and 3 deletions
--- a/caffe2/opt/bound_shape_inference_test.cc
+++ b/caffe2/opt/bound_shape_inference_test.cc
@ -325,7 +325,7 @@ TEST(
          {TensorBoundShape_DimType_CONSTANT,
           TensorBoundShape_DimType_CONSTANT},
          {16, 101},
-          TensorProto_DataType_UINT8,
+          TensorProto_DataType_INT8,
          true));
  shape_map.emplace(
      "B0",
--- a/caffe2/opt/bound_shape_inferencer.cc
+++ b/caffe2/opt/bound_shape_inferencer.cc
@ -624,6 +624,9 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
  const ShapeInfo& b_shape_info = b_it->second;
  bool fp16 = (op.type() == "FbFCPacked");
  bool int8_fc = (op.type() == "Int8FC" || op.engine() == "DNNLOWP");
+  float scale = 1;
+  int offset = 0;
+
  auto x_it = shape_info_.find(op.input(0));
  if (x_it == shape_info_.end()) {
    // We don't have a hint at the x input we try to deduce it from weight
@ -657,9 +660,21 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
    } else {
      w_data_type = w_shape.data_type();
    }
+
+    if (int8_fc) {
+      scale = helper.GetSingleArgument<float>("Y_scale", 1);
+      offset = helper.GetSingleArgument<int>("Y_zero_point", 0);
+    }
    // Note: for FbFCPacked, weight is fp16 but activations are in fp32
    CheckAndSetTensorBoundShape(
-        op.input(0), dimTypes, dims, w_data_type, int8_fc ? true : false);
+        op.input(0),
+        dimTypes,
+        dims,
+        w_data_type,
+        int8_fc ? true : false,
+        false,
+        scale,
+        offset);
  } else {
    ShapeInfo& x_shape_info = x_it->second;
    if (x_shape_info.getDimType(0) == TensorBoundShape_DimType_UNKNOWN) {
@ -692,13 +707,24 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
  } else {
    output_data_type = output_shapes.front().data_type();
  }
+
+  if (int8_fc) {
+    ArgumentHelper helper(op);
+
+    scale = helper.GetSingleArgument<float>("Y_scale", 1);
+    offset = helper.GetSingleArgument<int>("Y_zero_point", 0);
+  }
+
  CheckAndSetTensorBoundShape(
      op.output(0),
      setDimTypeWithFirst(
          TensorBoundShape_DimType_BATCH, output_shapes.front().dims().size()),
      ConvertToVec(output_shapes[0].dims()),
      output_data_type,
-      int8_fc ? true : false);
+      int8_fc ? true : false,
+      false,
+      scale,
+      offset);
 }

 // Infers shapes for operators which are used to transform non-quantized
@ -838,6 +864,7 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
        (op.type() != "Int8GenQuantParams");
    float scale = 1;
    int offset = 0;
+
    TensorProto::DataType infered_data_type = TensorProto::UNDEFINED;
    if (is_quantized) {
      const static std::map<std::string, int> type_info_from_input = {