mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
add tensor and cost inference functions (#17684)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17684 Adding tensor and cost inference functions to more int8 operators. Reviewed By: yinghai Differential Revision: D14174746 fbshipit-source-id: dfad975fa75899565c8fb61f1b7747a9206ebd22
This commit is contained in:
parent
3dba1285ab
commit
39423fbdd4
|
|
@ -106,7 +106,6 @@ Split a tensor into a list of tensors, given a lengths input, along the specifie
|
|||
The `input` will be split into `K` parts. Each part of length
|
||||
`sum(lengths[i*k:i*k+k))`)DOC");
|
||||
|
||||
namespace {
|
||||
OpSchema::Cost CostInferenceForConcat(
|
||||
const OperatorDef& def,
|
||||
const vector<TensorShape>& in) {
|
||||
|
|
@ -143,6 +142,7 @@ OpSchema::Cost CostInferenceForConcat(
|
|||
return cost;
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::pair<std::vector<DeviceOption>, std::vector<DeviceOption>>
|
||||
concatOpDevInfer(const OperatorDef& def) {
|
||||
auto op_device =
|
||||
|
|
@ -157,6 +157,80 @@ concatOpDevInfer(const OperatorDef& def) {
|
|||
}
|
||||
} // namespace
|
||||
|
||||
vector<TensorShape> TensorInferenceForConcat(
|
||||
const OperatorDef& def,
|
||||
const vector<TensorShape>& in) {
|
||||
ArgumentHelper helper(def);
|
||||
const int axis = helper.HasArgument("axis")
|
||||
? helper.GetSingleArgument<int>("axis", -1)
|
||||
: GetDimFromOrderString(
|
||||
helper.GetSingleArgument<string>("order", "NCHW"));
|
||||
bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
|
||||
int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
|
||||
const int canonical_axis = canonical_axis_index_(axis, adj_size);
|
||||
CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range.");
|
||||
CAFFE_ENFORCE_GT(in.size(), 0);
|
||||
vector<int> split_shape(1, in.size());
|
||||
vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
|
||||
if (add_axis) {
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims().size(),
|
||||
in[i].dims().size(),
|
||||
"All inputs of Concat should have same dims when add_axis = 1. "
|
||||
"Got different sizes for inputs 0 and ",
|
||||
i);
|
||||
for (int j = 0; j < in[0].dims().size(); ++j) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims(j),
|
||||
in[i].dims(j),
|
||||
"All inputs of Concat should have same dims when add_axis = 1. "
|
||||
"Got different dims for inputs 0 and ",
|
||||
i,
|
||||
". At dim: ",
|
||||
j);
|
||||
}
|
||||
}
|
||||
out_shape.insert(out_shape.begin() + canonical_axis, in.size());
|
||||
} else {
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims().size(),
|
||||
in[i].dims().size(),
|
||||
"All inputs of Concat should have same dims except "
|
||||
"canonical_axis dim that is equal to ",
|
||||
canonical_axis,
|
||||
"Got different sizes for inputs 0 and ",
|
||||
i);
|
||||
for (int j = 0; j < in[0].dims().size(); ++j) {
|
||||
if (j == canonical_axis) {
|
||||
continue;
|
||||
}
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims(j),
|
||||
in[i].dims(j),
|
||||
"All inputs of Concat should have same dims except "
|
||||
"canonical_axis dim that is equal to ",
|
||||
canonical_axis,
|
||||
"Got different dims for inputs 0 and ",
|
||||
i,
|
||||
". At dim: ",
|
||||
j);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
out_shape[canonical_axis] += in[i].dims(canonical_axis);
|
||||
}
|
||||
}
|
||||
if (def.output_size() == 1) {
|
||||
return vector<TensorShape>{CreateTensorShape(out_shape, in[0].data_type())};
|
||||
}
|
||||
return vector<TensorShape>{
|
||||
CreateTensorShape(out_shape, in[0].data_type()),
|
||||
CreateTensorShape(split_shape, TensorProto::INT32)};
|
||||
}
|
||||
|
||||
REGISTER_CPU_OPERATOR(Concat, ConcatOp<CPUContext>);
|
||||
OPERATOR_SCHEMA(Concat)
|
||||
.NumInputs(1, INT_MAX)
|
||||
|
|
@ -168,83 +242,8 @@ OPERATOR_SCHEMA(Concat)
|
|||
.Arg(
|
||||
"add_axis",
|
||||
"*(type: int)* Pass non-zero integer to add the axis specified in `axis` to all input tensors.")
|
||||
.TensorInferenceFunction(OpSchema::NeedsAllInputShapes([](const OperatorDef&
|
||||
def,
|
||||
const vector<
|
||||
TensorShape>&
|
||||
in) {
|
||||
ArgumentHelper helper(def);
|
||||
const int axis = helper.HasArgument("axis")
|
||||
? helper.GetSingleArgument<int>("axis", -1)
|
||||
: GetDimFromOrderString(
|
||||
helper.GetSingleArgument<string>("order", "NCHW"));
|
||||
bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
|
||||
int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
|
||||
const int canonical_axis = canonical_axis_index_(axis, adj_size);
|
||||
CAFFE_ENFORCE_LT(
|
||||
canonical_axis, adj_size, "Axis not in input ndim range.");
|
||||
CAFFE_ENFORCE_GT(in.size(), 0);
|
||||
vector<int> split_shape(1, in.size());
|
||||
vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
|
||||
if (add_axis) {
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims().size(),
|
||||
in[i].dims().size(),
|
||||
"All inputs of Concat should have same dims when add_axis = 1. "
|
||||
"Got different sizes for inputs 0 and ",
|
||||
i);
|
||||
for (int j = 0; j < in[0].dims().size(); ++j) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims(j),
|
||||
in[i].dims(j),
|
||||
"All inputs of Concat should have same dims when add_axis = 1. "
|
||||
"Got different dims for inputs 0 and ",
|
||||
i,
|
||||
". At dim: ",
|
||||
j);
|
||||
}
|
||||
}
|
||||
out_shape.insert(out_shape.begin() + canonical_axis, in.size());
|
||||
} else {
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims().size(),
|
||||
in[i].dims().size(),
|
||||
"All inputs of Concat should have same dims except "
|
||||
"canonical_axis dim that is equal to ",
|
||||
canonical_axis,
|
||||
"Got different sizes for inputs 0 and ",
|
||||
i);
|
||||
for (int j = 0; j < in[0].dims().size(); ++j) {
|
||||
if (j == canonical_axis) {
|
||||
continue;
|
||||
}
|
||||
CAFFE_ENFORCE_EQ(
|
||||
in[0].dims(j),
|
||||
in[i].dims(j),
|
||||
"All inputs of Concat should have same dims except "
|
||||
"canonical_axis dim that is equal to ",
|
||||
canonical_axis,
|
||||
"Got different dims for inputs 0 and ",
|
||||
i,
|
||||
". At dim: ",
|
||||
j);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i < in.size(); ++i) {
|
||||
out_shape[canonical_axis] += in[i].dims(canonical_axis);
|
||||
}
|
||||
}
|
||||
if (def.output_size() == 1) {
|
||||
return vector<TensorShape>{
|
||||
CreateTensorShape(out_shape, in[0].data_type())};
|
||||
}
|
||||
return vector<TensorShape>{
|
||||
CreateTensorShape(out_shape, in[0].data_type()),
|
||||
CreateTensorShape(split_shape, TensorProto::INT32)};
|
||||
}))
|
||||
.TensorInferenceFunction(
|
||||
OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
|
||||
.CostInferenceFunction(CostInferenceForConcat)
|
||||
.DeviceInferenceFunction(concatOpDevInfer)
|
||||
.SetDoc(R"DOC(
|
||||
|
|
|
|||
|
|
@ -335,6 +335,14 @@ bool ConcatOp<Context>::RunOnDevice() {
|
|||
return true;
|
||||
}
|
||||
|
||||
OpSchema::Cost CostInferenceForConcat(
|
||||
const OperatorDef& def,
|
||||
const std::vector<TensorShape>& in);
|
||||
|
||||
std::vector<TensorShape> TensorInferenceForConcat(
|
||||
const OperatorDef& def,
|
||||
const std::vector<TensorShape>& in);
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CONCAT_SPLIT_OP_H_
|
||||
|
|
|
|||
|
|
@ -2,17 +2,6 @@
|
|||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
OpSchema::Cost CostInferenceForSum(
|
||||
const OperatorDef& def,
|
||||
const vector<TensorShape>& in) {
|
||||
struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
|
||||
cost.flops *= (in.size() - 1);
|
||||
cost.params_bytes = 0;
|
||||
return cost;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_CPU_OPERATOR(Sum, SumOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(Sum)
|
||||
|
|
|
|||
|
|
@ -7,27 +7,7 @@ REGISTER_CPU_OPERATOR(Flatten, FlattenOp<CPUContext>);
|
|||
OPERATOR_SCHEMA(Flatten)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.TensorInferenceFunction([](const OperatorDef& def,
|
||||
const vector<TensorShape>& in) {
|
||||
ArgumentHelper helper(def);
|
||||
const int axis = helper.GetSingleArgument<int>("axis", 1);
|
||||
vector<TensorShape> out(1);
|
||||
int64_t outer = 1;
|
||||
int64_t inner = 1;
|
||||
std::size_t index = 0;
|
||||
for (auto d : in[0].dims()) {
|
||||
if (index < axis) {
|
||||
outer *= d;
|
||||
} else {
|
||||
inner *= d;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
out[0].set_data_type(in[0].data_type());
|
||||
out[0].add_dims(outer);
|
||||
out[0].add_dims(inner);
|
||||
return out;
|
||||
})
|
||||
.TensorInferenceFunction(TensorInferenceForFlatten)
|
||||
.SetDoc(R"DOC(
|
||||
Flattens the input tensor into a 2D matrix. If input tensor has shape
|
||||
$(d_0, d_1, ..., d_n)$ then the output will have shape
|
||||
|
|
|
|||
|
|
@ -33,6 +33,29 @@ class FlattenOp : public Operator<Context> {
|
|||
int axis_;
|
||||
};
|
||||
|
||||
inline std::vector<TensorShape> TensorInferenceForFlatten(
|
||||
const OperatorDef& def,
|
||||
const std::vector<TensorShape>& in) {
|
||||
ArgumentHelper helper(def);
|
||||
const int axis = helper.GetSingleArgument<int>("axis", 1);
|
||||
std::vector<TensorShape> out(1);
|
||||
int64_t outer = 1;
|
||||
int64_t inner = 1;
|
||||
std::size_t index = 0;
|
||||
for (auto d : in[0].dims()) {
|
||||
if (index < axis) {
|
||||
outer *= d;
|
||||
} else {
|
||||
inner *= d;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
out[0].set_data_type(in[0].data_type());
|
||||
out[0].add_dims(outer);
|
||||
out[0].add_dims(inner);
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_FLATTEN_OP_H_
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#include "caffe2/operators/quantized/int8_add_op.h"
|
||||
|
||||
#include <climits>
|
||||
|
||||
#include "caffe2/operators/quantized/int8_add_op.h"
|
||||
#include "caffe2/operators/utility_ops.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
|
|
@ -55,6 +57,8 @@ OPERATOR_SCHEMA(Int8Sum)
|
|||
.NumInputs(1, std::numeric_limits<int>::max())
|
||||
.NumOutputs(1)
|
||||
.AllowInplace({{0, 0}, {1, 0}})
|
||||
.CostInferenceFunction(CostInferenceForSum)
|
||||
.IdenticalTypeAndShapeOfInput(0)
|
||||
.Arg("Y_scale", "Output tensor quantization scale")
|
||||
.Arg("Y_zero_point", "Output tensor quantization offset");
|
||||
|
||||
|
|
@ -62,6 +66,8 @@ OPERATOR_SCHEMA(Int8SumRelu)
|
|||
.NumInputs(1, std::numeric_limits<int>::max())
|
||||
.NumOutputs(1)
|
||||
.AllowInplace({{0, 0}, {1, 0}})
|
||||
.CostInferenceFunction(CostInferenceForSum)
|
||||
.IdenticalTypeAndShapeOfInput(0)
|
||||
.Arg("Y_scale", "Output tensor quantization scale")
|
||||
.Arg("Y_zero_point", "Output tensor quantization offset");
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#include "caffe2/operators/quantized/int8_concat_op.h"
|
||||
|
||||
#include "caffe2/operators/concat_split_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CPU_OPERATOR(Int8Concat, int8::Int8ConcatOp);
|
||||
|
|
@ -14,6 +16,9 @@ OPERATOR_SCHEMA(Int8Concat)
|
|||
"add_axis",
|
||||
"Pass 1 to add the axis specified in arg 'axis' to all "
|
||||
"input tensors")
|
||||
.TensorInferenceFunction(
|
||||
OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
|
||||
.CostInferenceFunction(CostInferenceForConcat)
|
||||
.SetDoc("Concatenate a list of tensors into a single tensor")
|
||||
.Output(0, "concat_result", "Concatenated tensor")
|
||||
.Output(1, "split_info", "The dimensions of the inputs.")
|
||||
|
|
|
|||
|
|
@ -1,12 +1,19 @@
|
|||
#include "caffe2/operators/quantized/int8_fc_op.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "caffe2/operators/fc_inference.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CPU_OPERATOR(Int8FC, int8::Int8FCOp);
|
||||
|
||||
using namespace std::placeholders;
|
||||
OPERATOR_SCHEMA(Int8FC)
|
||||
.NumInputs(3)
|
||||
.NumOutputs(1)
|
||||
.TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
|
||||
.CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
|
||||
.SetDoc(R"DOC(
|
||||
Computes the result of passing an input vector X into a fully
|
||||
connected layer with 2D weight matrix W and 1D bias vector b. That is,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#include "caffe2/operators/quantized/int8_flatten_op.h"
|
||||
|
||||
#include "caffe2/operators/flatten_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp);
|
||||
|
|
@ -7,6 +9,7 @@ REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp);
|
|||
OPERATOR_SCHEMA(Int8Flatten)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.TensorInferenceFunction(TensorInferenceForFlatten)
|
||||
.SetDoc(R"DOC(
|
||||
Flattens the input tensor into a 2D matrix. If input tensor has shape
|
||||
(d_0, d_1, ... d_n) then the output will have shape
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ OPERATOR_SCHEMA(Int8GivenTensorFill)
|
|||
.SetDoc(R"DOC(
|
||||
Creates quantized tensor of type char(byte) with scale and zero point info.
|
||||
)DOC")
|
||||
.Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
|
||||
.Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
|
||||
.TensorInferenceFunction(FillerTensorInference<>);
|
||||
|
||||
OPERATOR_SCHEMA(Int8GivenIntTensorFill)
|
||||
.NumInputs(0)
|
||||
|
|
@ -24,7 +25,8 @@ OPERATOR_SCHEMA(Int8GivenIntTensorFill)
|
|||
.SetDoc(R"DOC(
|
||||
Creates quantized tensor of type int32 with scale and zero point info.
|
||||
)DOC")
|
||||
.Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
|
||||
.Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
|
||||
.TensorInferenceFunction(FillerTensorInference<>);
|
||||
|
||||
REGISTER_CPU_OPERATOR(Int8GivenTensorFill, int8::Int8GivenTensorFillOp);
|
||||
REGISTER_CPU_OPERATOR(Int8GivenIntTensorFill, int8::Int8GivenIntTensorFillOp);
|
||||
|
|
|
|||
|
|
@ -317,6 +317,15 @@ class SumOp : public Operator<Context> {
|
|||
}
|
||||
};
|
||||
|
||||
inline OpSchema::Cost CostInferenceForSum(
|
||||
const OperatorDef& def,
|
||||
const std::vector<TensorShape>& in) {
|
||||
struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
|
||||
cost.flops *= (in.size() - 1);
|
||||
cost.params_bytes = 0;
|
||||
return cost;
|
||||
}
|
||||
|
||||
// WeightedSumOp computes the weighted sum of several tensors. The input should
|
||||
// be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
|
||||
// shape, and weight_i are size 1 tensors that specifies the weight of each
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user