add tensor and cost inference functions (#17684)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17684 Adding tensor and cost inference functions to more int8 operators. Reviewed By: yinghai Differential Revision: D14174746 fbshipit-source-id: dfad975fa75899565c8fb61f1b7747a9206ebd22
2025-12-06 12:20:52 +01:00 · 2019-03-06 23:26:27 -08:00 · 2019-03-06 23:26:27 -08:00 · 39423fbdd4
commit 39423fbdd4
parent 3dba1285ab
11 changed files with 144 additions and 113 deletions
--- a/caffe2/operators/concat_split_op.cc
+++ b/caffe2/operators/concat_split_op.cc
@ -106,7 +106,6 @@ Split a tensor into a list of tensors, given a lengths input, along the specifie
 The `input` will be split into `K` parts. Each part of length
 `sum(lengths[i*k:i*k+k))`)DOC");

-namespace {
 OpSchema::Cost CostInferenceForConcat(
    const OperatorDef& def,
    const vector<TensorShape>& in) {
@ -143,6 +142,7 @@ OpSchema::Cost CostInferenceForConcat(
  return cost;
 }

+namespace {
 std::pair<std::vector<DeviceOption>, std::vector<DeviceOption>>
 concatOpDevInfer(const OperatorDef& def) {
  auto op_device =
@ -157,6 +157,80 @@ concatOpDevInfer(const OperatorDef& def) {
 }
 } // namespace

+vector<TensorShape> TensorInferenceForConcat(
+    const OperatorDef& def,
+    const vector<TensorShape>& in) {
+  ArgumentHelper helper(def);
+  const int axis = helper.HasArgument("axis")
+      ? helper.GetSingleArgument<int>("axis", -1)
+      : GetDimFromOrderString(
+            helper.GetSingleArgument<string>("order", "NCHW"));
+  bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
+  int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
+  const int canonical_axis = canonical_axis_index_(axis, adj_size);
+  CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range.");
+  CAFFE_ENFORCE_GT(in.size(), 0);
+  vector<int> split_shape(1, in.size());
+  vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
+  if (add_axis) {
+    for (int i = 1; i < in.size(); ++i) {
+      CAFFE_ENFORCE_EQ(
+          in[0].dims().size(),
+          in[i].dims().size(),
+          "All inputs of Concat should have same dims when add_axis = 1. "
+          "Got different sizes for inputs 0 and ",
+          i);
+      for (int j = 0; j < in[0].dims().size(); ++j) {
+        CAFFE_ENFORCE_EQ(
+            in[0].dims(j),
+            in[i].dims(j),
+            "All inputs of Concat should have same dims when add_axis = 1. "
+            "Got different dims for inputs 0 and ",
+            i,
+            ". At dim: ",
+            j);
+      }
+    }
+    out_shape.insert(out_shape.begin() + canonical_axis, in.size());
+  } else {
+    for (int i = 1; i < in.size(); ++i) {
+      CAFFE_ENFORCE_EQ(
+          in[0].dims().size(),
+          in[i].dims().size(),
+          "All inputs of Concat should have same dims except "
+          "canonical_axis dim that is equal to ",
+          canonical_axis,
+          "Got different sizes for inputs 0 and ",
+          i);
+      for (int j = 0; j < in[0].dims().size(); ++j) {
+        if (j == canonical_axis) {
+          continue;
+        }
+        CAFFE_ENFORCE_EQ(
+            in[0].dims(j),
+            in[i].dims(j),
+            "All inputs of Concat should have same dims except "
+            "canonical_axis dim that is equal to ",
+            canonical_axis,
+            "Got different dims for inputs 0 and ",
+            i,
+            ". At dim: ",
+            j);
+      }
+    }
+
+    for (int i = 1; i < in.size(); ++i) {
+      out_shape[canonical_axis] += in[i].dims(canonical_axis);
+    }
+  }
+  if (def.output_size() == 1) {
+    return vector<TensorShape>{CreateTensorShape(out_shape, in[0].data_type())};
+  }
+  return vector<TensorShape>{
+      CreateTensorShape(out_shape, in[0].data_type()),
+      CreateTensorShape(split_shape, TensorProto::INT32)};
+}
+
 REGISTER_CPU_OPERATOR(Concat, ConcatOp<CPUContext>);
 OPERATOR_SCHEMA(Concat)
    .NumInputs(1, INT_MAX)
@ -168,83 +242,8 @@ OPERATOR_SCHEMA(Concat)
    .Arg(
        "add_axis",
        "*(type: int)* Pass non-zero integer to add the axis specified in `axis` to all input tensors.")
-    .TensorInferenceFunction(OpSchema::NeedsAllInputShapes([](const OperatorDef&
-                                                                  def,
-                                                              const vector<
-                                                                  TensorShape>&
-                                                                  in) {
-      ArgumentHelper helper(def);
-      const int axis = helper.HasArgument("axis")
-          ? helper.GetSingleArgument<int>("axis", -1)
-          : GetDimFromOrderString(
-                helper.GetSingleArgument<string>("order", "NCHW"));
-      bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
-      int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
-      const int canonical_axis = canonical_axis_index_(axis, adj_size);
-      CAFFE_ENFORCE_LT(
-          canonical_axis, adj_size, "Axis not in input ndim range.");
-      CAFFE_ENFORCE_GT(in.size(), 0);
-      vector<int> split_shape(1, in.size());
-      vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
-      if (add_axis) {
-        for (int i = 1; i < in.size(); ++i) {
-          CAFFE_ENFORCE_EQ(
-              in[0].dims().size(),
-              in[i].dims().size(),
-              "All inputs of Concat should have same dims when add_axis = 1. "
-              "Got different sizes for inputs 0 and ",
-              i);
-          for (int j = 0; j < in[0].dims().size(); ++j) {
-            CAFFE_ENFORCE_EQ(
-                in[0].dims(j),
-                in[i].dims(j),
-                "All inputs of Concat should have same dims when add_axis = 1. "
-                "Got different dims for inputs 0 and ",
-                i,
-                ". At dim: ",
-                j);
-          }
-        }
-        out_shape.insert(out_shape.begin() + canonical_axis, in.size());
-      } else {
-        for (int i = 1; i < in.size(); ++i) {
-          CAFFE_ENFORCE_EQ(
-              in[0].dims().size(),
-              in[i].dims().size(),
-              "All inputs of Concat should have same dims except "
-              "canonical_axis dim that is equal to ",
-              canonical_axis,
-              "Got different sizes for inputs 0 and ",
-              i);
-          for (int j = 0; j < in[0].dims().size(); ++j) {
-            if (j == canonical_axis) {
-              continue;
-            }
-            CAFFE_ENFORCE_EQ(
-                in[0].dims(j),
-                in[i].dims(j),
-                "All inputs of Concat should have same dims except "
-                "canonical_axis dim that is equal to ",
-                canonical_axis,
-                "Got different dims for inputs 0 and ",
-                i,
-                ". At dim: ",
-                j);
-          }
-        }
-
-        for (int i = 1; i < in.size(); ++i) {
-          out_shape[canonical_axis] += in[i].dims(canonical_axis);
-        }
-      }
-      if (def.output_size() == 1) {
-        return vector<TensorShape>{
-            CreateTensorShape(out_shape, in[0].data_type())};
-      }
-      return vector<TensorShape>{
-          CreateTensorShape(out_shape, in[0].data_type()),
-          CreateTensorShape(split_shape, TensorProto::INT32)};
-    }))
+    .TensorInferenceFunction(
+        OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
    .CostInferenceFunction(CostInferenceForConcat)
    .DeviceInferenceFunction(concatOpDevInfer)
    .SetDoc(R"DOC(
--- a/caffe2/operators/concat_split_op.h
+++ b/caffe2/operators/concat_split_op.h
@ -335,6 +335,14 @@ bool ConcatOp<Context>::RunOnDevice() {
  return true;
 }

+OpSchema::Cost CostInferenceForConcat(
+    const OperatorDef& def,
+    const std::vector<TensorShape>& in);
+
+std::vector<TensorShape> TensorInferenceForConcat(
+    const OperatorDef& def,
+    const std::vector<TensorShape>& in);
+
 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_CONCAT_SPLIT_OP_H_
--- a/caffe2/operators/elementwise_sum_op.cc
+++ b/caffe2/operators/elementwise_sum_op.cc
@ -2,17 +2,6 @@

 namespace caffe2 {

-namespace {
-OpSchema::Cost CostInferenceForSum(
-    const OperatorDef& def,
-    const vector<TensorShape>& in) {
-  struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
-  cost.flops *= (in.size() - 1);
-  cost.params_bytes = 0;
-  return cost;
-}
-} // namespace
-
 REGISTER_CPU_OPERATOR(Sum, SumOp<CPUContext>);

 OPERATOR_SCHEMA(Sum)
--- a/caffe2/operators/flatten_op.cc
+++ b/caffe2/operators/flatten_op.cc
@ -7,27 +7,7 @@ REGISTER_CPU_OPERATOR(Flatten, FlattenOp<CPUContext>);
 OPERATOR_SCHEMA(Flatten)
    .NumInputs(1)
    .NumOutputs(1)
-    .TensorInferenceFunction([](const OperatorDef& def,
-                                const vector<TensorShape>& in) {
-      ArgumentHelper helper(def);
-      const int axis = helper.GetSingleArgument<int>("axis", 1);
-      vector<TensorShape> out(1);
-      int64_t outer = 1;
-      int64_t inner = 1;
-      std::size_t index = 0;
-      for (auto d : in[0].dims()) {
-        if (index < axis) {
-          outer *= d;
-        } else {
-          inner *= d;
-        }
-        ++index;
-      }
-      out[0].set_data_type(in[0].data_type());
-      out[0].add_dims(outer);
-      out[0].add_dims(inner);
-      return out;
-    })
+    .TensorInferenceFunction(TensorInferenceForFlatten)
    .SetDoc(R"DOC(
 Flattens the input tensor into a 2D matrix. If input tensor has shape
 $(d_0, d_1, ..., d_n)$ then the output will have shape
--- a/caffe2/operators/flatten_op.h
+++ b/caffe2/operators/flatten_op.h
@ -33,6 +33,29 @@ class FlattenOp : public Operator<Context> {
  int axis_;
 };

+inline std::vector<TensorShape> TensorInferenceForFlatten(
+    const OperatorDef& def,
+    const std::vector<TensorShape>& in) {
+  ArgumentHelper helper(def);
+  const int axis = helper.GetSingleArgument<int>("axis", 1);
+  std::vector<TensorShape> out(1);
+  int64_t outer = 1;
+  int64_t inner = 1;
+  std::size_t index = 0;
+  for (auto d : in[0].dims()) {
+    if (index < axis) {
+      outer *= d;
+    } else {
+      inner *= d;
+    }
+    ++index;
+  }
+  out[0].set_data_type(in[0].data_type());
+  out[0].add_dims(outer);
+  out[0].add_dims(inner);
+  return out;
+}
+
 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_FLATTEN_OP_H_
--- a/caffe2/operators/quantized/int8_add_op.cc
+++ b/caffe2/operators/quantized/int8_add_op.cc
@ -1,6 +1,8 @@
+#include "caffe2/operators/quantized/int8_add_op.h"
+
 #include <climits>

-#include "caffe2/operators/quantized/int8_add_op.h"
+#include "caffe2/operators/utility_ops.h"

 namespace caffe2 {

@ -55,6 +57,8 @@ OPERATOR_SCHEMA(Int8Sum)
    .NumInputs(1, std::numeric_limits<int>::max())
    .NumOutputs(1)
    .AllowInplace({{0, 0}, {1, 0}})
+    .CostInferenceFunction(CostInferenceForSum)
+    .IdenticalTypeAndShapeOfInput(0)
    .Arg("Y_scale", "Output tensor quantization scale")
    .Arg("Y_zero_point", "Output tensor quantization offset");

@ -62,6 +66,8 @@ OPERATOR_SCHEMA(Int8SumRelu)
    .NumInputs(1, std::numeric_limits<int>::max())
    .NumOutputs(1)
    .AllowInplace({{0, 0}, {1, 0}})
+    .CostInferenceFunction(CostInferenceForSum)
+    .IdenticalTypeAndShapeOfInput(0)
    .Arg("Y_scale", "Output tensor quantization scale")
    .Arg("Y_zero_point", "Output tensor quantization offset");

--- a/caffe2/operators/quantized/int8_concat_op.cc
+++ b/caffe2/operators/quantized/int8_concat_op.cc
@ -1,5 +1,7 @@
 #include "caffe2/operators/quantized/int8_concat_op.h"

+#include "caffe2/operators/concat_split_op.h"
+
 namespace caffe2 {

 REGISTER_CPU_OPERATOR(Int8Concat, int8::Int8ConcatOp);
@ -14,6 +16,9 @@ OPERATOR_SCHEMA(Int8Concat)
        "add_axis",
        "Pass 1 to add the axis specified in arg 'axis' to all "
        "input tensors")
+    .TensorInferenceFunction(
+        OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
+    .CostInferenceFunction(CostInferenceForConcat)
    .SetDoc("Concatenate a list of tensors into a single tensor")
    .Output(0, "concat_result", "Concatenated tensor")
    .Output(1, "split_info", "The dimensions of the inputs.")
--- a/caffe2/operators/quantized/int8_fc_op.cc
+++ b/caffe2/operators/quantized/int8_fc_op.cc
@ -1,12 +1,19 @@
 #include "caffe2/operators/quantized/int8_fc_op.h"

+#include <functional>
+
+#include "caffe2/operators/fc_inference.h"
+
 namespace caffe2 {

 REGISTER_CPU_OPERATOR(Int8FC, int8::Int8FCOp);

+using namespace std::placeholders;
 OPERATOR_SCHEMA(Int8FC)
    .NumInputs(3)
    .NumOutputs(1)
+    .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
+    .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
    .SetDoc(R"DOC(
 Computes the result of passing an input vector X into a fully
 connected layer with 2D weight matrix W and 1D bias vector b. That is,
--- a/caffe2/operators/quantized/int8_flatten_op.cc
+++ b/caffe2/operators/quantized/int8_flatten_op.cc
@ -1,5 +1,7 @@
 #include "caffe2/operators/quantized/int8_flatten_op.h"

+#include "caffe2/operators/flatten_op.h"
+
 namespace caffe2 {

 REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp);
@ -7,6 +9,7 @@ REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp);
 OPERATOR_SCHEMA(Int8Flatten)
    .NumInputs(1)
    .NumOutputs(1)
+    .TensorInferenceFunction(TensorInferenceForFlatten)
    .SetDoc(R"DOC(
 Flattens the input tensor into a 2D matrix. If input tensor has shape
 (d_0, d_1, ... d_n) then the output will have shape
--- a/caffe2/operators/quantized/int8_given_tensor_fill_op.cc
+++ b/caffe2/operators/quantized/int8_given_tensor_fill_op.cc
@ -12,7 +12,8 @@ OPERATOR_SCHEMA(Int8GivenTensorFill)
    .SetDoc(R"DOC(
    Creates quantized tensor of type char(byte) with scale and zero point info.
 )DOC")
-    .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
+    .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
+    .TensorInferenceFunction(FillerTensorInference<>);

 OPERATOR_SCHEMA(Int8GivenIntTensorFill)
    .NumInputs(0)
@ -24,7 +25,8 @@ OPERATOR_SCHEMA(Int8GivenIntTensorFill)
    .SetDoc(R"DOC(
    Creates quantized tensor of type int32 with scale and zero point info.
 )DOC")
-    .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
+    .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
+    .TensorInferenceFunction(FillerTensorInference<>);

 REGISTER_CPU_OPERATOR(Int8GivenTensorFill, int8::Int8GivenTensorFillOp);
 REGISTER_CPU_OPERATOR(Int8GivenIntTensorFill, int8::Int8GivenIntTensorFillOp);
--- a/caffe2/operators/utility_ops.h
+++ b/caffe2/operators/utility_ops.h
@ -317,6 +317,15 @@ class SumOp : public Operator<Context> {
  }
 };

+inline OpSchema::Cost CostInferenceForSum(
+    const OperatorDef& def,
+    const std::vector<TensorShape>& in) {
+  struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
+  cost.flops *= (in.size() - 1);
+  cost.params_bytes = 0;
+  return cost;
+}
+
 // WeightedSumOp computes the weighted sum of several tensors. The input should
 // be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
 // shape, and weight_i are size 1 tensors that specifies the weight of each