mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/57366 We often get error messages such as ``` Model failed AOT (glow ahead-of-time compilation) with exception: Error during AOT optimization (non-provisioned addNetwork): Non-recoverable device error when adding network: Error code: PARTITIONER_ERROR Error message: Did not find a partition with an SLS node Error return stack: -------------------------------------------------------------------------------- glow/glow/lib/Partitioner/Partitioner.cpp:1244 -------------------------------------------------------------------------------- glow/glow/lib/Runtime/HostManager/HostManager.cpp:375 -------------------------------------------------------------------------------- ``` This makes the error message more clear by checking for the number of OnnixifiOp created before going into Glow. The check is enabled with the `verify_only_single_subnet` flag, and is disabled by default. Test Plan: Unit tests pass Reviewed By: khabinov Differential Revision: D28097674 fbshipit-source-id: 0eefd8f6ec1a82546b759be8e541256bf271a673
331 lines
10 KiB
C++
331 lines
10 KiB
C++
#include "caffe2/opt/tvm_transformer.h"
|
|
#include "caffe2/opt/backend_cutting.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
NetDef TvmTransformer::buildTvmOp(
|
|
const caffe2::NetDef& net,
|
|
const std::unordered_set<std::string>& weights,
|
|
const ShapeInfoMap& shape_hints) {
|
|
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
|
if (opts_.min_ops > net.op_size()) {
|
|
return net;
|
|
}
|
|
caffe2::NetDef net_opt;
|
|
auto* op = net_opt.add_op();
|
|
op->set_type("TVMJit");
|
|
|
|
// Remove the second output of Concat/Reshape from external_output.
|
|
// And figure out what primary inputs of the net is sequence look-ups
|
|
std::unordered_set<std::string> split_infos;
|
|
std::unordered_set<std::string> input_set(
|
|
net.external_input().begin(), net.external_input().end());
|
|
std::unordered_set<std::string> seq_input_set;
|
|
for (const auto& op0 : net.op()) {
|
|
if ((op0.type() == "Concat" || op0.type() == "Reshape") &&
|
|
op0.output_size() == 2) {
|
|
split_infos.emplace(op0.output(1));
|
|
} else if (
|
|
op0.type() == "SparseLengthsSum" ||
|
|
op0.type() == "SparseLengthsSumFused8BitRowwise") {
|
|
// The indices input of SparseLengthSum should be of SEQ type
|
|
if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
|
|
seq_input_set.emplace(op0.input(1));
|
|
}
|
|
} else if (
|
|
op0.type() == "SparseLengthsWeightedSum" ||
|
|
op0.type() == "SparseLengthsWeightedSumFused8BitRowwise") {
|
|
// The weight and indices inputs of SparseLengthWeightedSum should be of
|
|
// SEQ type
|
|
if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
|
|
seq_input_set.emplace(op0.input(1));
|
|
}
|
|
if (op0.input_size() > 2 && input_set.count(op0.input(2))) {
|
|
seq_input_set.emplace(op0.input(2));
|
|
}
|
|
}
|
|
}
|
|
|
|
// C2 operator bind input/output by position (they can be rewritten by e.g.
|
|
// Memonger) while TVM runtime bind them by name. Therefore, we need to record
|
|
// the input/output names.
|
|
auto* input_arg = op->add_arg();
|
|
input_arg->set_name("inputs");
|
|
auto* output_arg = op->add_arg();
|
|
output_arg->set_name("outputs");
|
|
|
|
// We expose both inputs and weights as inputs of TVMJitOp
|
|
for (const auto& i : net.external_input()) {
|
|
net_opt.add_external_input(i);
|
|
op->add_input(i);
|
|
input_arg->add_strings(i);
|
|
}
|
|
for (const auto& i : net.external_output()) {
|
|
if (split_infos.count(i)) {
|
|
continue;
|
|
}
|
|
net_opt.add_external_output(i);
|
|
op->add_output(i);
|
|
output_arg->add_strings(i);
|
|
}
|
|
|
|
// Record the referred weights
|
|
auto* w_arg = op->add_arg();
|
|
std::unordered_set<std::string> referred_weights;
|
|
for (const auto& op0 : net.op()) {
|
|
for (const auto& i : op0.input()) {
|
|
if (weights.count(i)) {
|
|
referred_weights.emplace(i);
|
|
}
|
|
}
|
|
}
|
|
w_arg->set_name("weights");
|
|
for (const auto& w : referred_weights) {
|
|
w_arg->add_strings(w);
|
|
}
|
|
|
|
// Add input shape info in "input_shape_info" argument of the net
|
|
if (!opts_.profiling_based_jit) {
|
|
auto* shape_arg = op->add_arg();
|
|
shape_arg->set_name("input_shape_info");
|
|
for (const auto& i : net_opt.external_input()) {
|
|
shape_arg->mutable_tensors()->Add()->CopyFrom(
|
|
wrapShapeInfoIntoTensorProto(i, shape_hints.at(i)));
|
|
}
|
|
}
|
|
|
|
// Add original net as a fallback
|
|
auto* original_net_arg = op->add_arg();
|
|
original_net_arg->set_name("original_net");
|
|
original_net_arg->mutable_n()->CopyFrom(net);
|
|
|
|
// Add model id
|
|
AddArgument("model_id", model_id_, op);
|
|
|
|
// Add op id
|
|
AddArgument("tvm_op_id", tvm_op_id_++, op);
|
|
|
|
// Add nominal batch size
|
|
AddArgument("nominal_batch_size", opts_.bound_shape_spec.max_batch_size, op);
|
|
|
|
// Add nominal sequence size
|
|
AddArgument("nominal_seq_size", opts_.bound_shape_spec.max_seq_size, op);
|
|
|
|
// Indices of the input blobs with sequence type
|
|
auto* seq_input_indices_arg = op->add_arg();
|
|
seq_input_indices_arg->set_name("seq_input_indices");
|
|
int64_t input_idx = 0;
|
|
for (const auto& input : net_opt.external_input()) {
|
|
if (seq_input_set.count(input)) {
|
|
seq_input_indices_arg->add_ints(input_idx);
|
|
}
|
|
++input_idx;
|
|
}
|
|
|
|
if (opts_.debug) {
|
|
AddArgument("debug", 1, op);
|
|
}
|
|
|
|
if (opts_.profiling_based_jit) {
|
|
AddArgument("profiling_based_jit", 1, op);
|
|
}
|
|
|
|
return net_opt;
|
|
}
|
|
|
|
// Cutting off the runnable part and replace with TVMJitOPs. Asssume the nets
|
|
// were topologically sorted
|
|
void TvmTransformer::transform(
|
|
Workspace* ws,
|
|
NetDef* pred_net,
|
|
const std::vector<std::string>& weight_names,
|
|
const ShapeInfoMap& input_shape_hints,
|
|
const std::unordered_set<int>& blocklisted_ops) {
|
|
CAFFE_ENFORCE(ws);
|
|
CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");
|
|
|
|
// Save the args of the net so that we can copy it to opt net later
|
|
std::vector<Argument> args;
|
|
for (const auto& arg : pred_net->arg()) {
|
|
args.emplace_back(arg);
|
|
}
|
|
|
|
// Get model id and reset TVM op id to 0
|
|
model_id_ = getModelId(*pred_net);
|
|
tvm_op_id_ = 0;
|
|
|
|
std::unordered_set<std::string> weights(
|
|
weight_names.begin(), weight_names.end());
|
|
|
|
// input_shape_hints should only contain shapes of inputs and not activations
|
|
ShapeInfoMap shape_hints;
|
|
if (!opts_.profiling_based_jit) {
|
|
shape_hints =
|
|
inferShapes(ws, pred_net, input_shape_hints, opts_.bound_shape_spec);
|
|
}
|
|
|
|
if (opts_.debug) {
|
|
dumpNet(*pred_net, shape_hints, "debug_net.pbtxt");
|
|
}
|
|
|
|
// We are ready to transform the net
|
|
NetDef net_opt =
|
|
applyTvmTransform(pred_net, weights, blocklisted_ops, shape_hints);
|
|
|
|
// Copy the properties
|
|
for (const auto& arg : args) {
|
|
net_opt.add_arg()->CopyFrom(arg);
|
|
}
|
|
net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
|
|
pred_net->Swap(&net_opt);
|
|
if (opts_.debug) {
|
|
dumpNet(*pred_net, shape_hints, "debug_full_opt_net.pbtxt");
|
|
}
|
|
}
|
|
|
|
const std::unordered_set<std::string>& TvmTransformer::getSupportedOps() {
|
|
const static std::unordered_set<std::string> supported_ops{
|
|
"Add",
|
|
"BatchGather",
|
|
"BatchMatMul",
|
|
"Cast",
|
|
"Clip",
|
|
"Concat",
|
|
"Copy",
|
|
"DotProduct",
|
|
"EnsureCPUOutput",
|
|
"ExpandDims",
|
|
"FbFCPacked",
|
|
"FC",
|
|
"FCTransposed",
|
|
"Flatten",
|
|
"Fused8BitRowwiseQuantizedToFloat",
|
|
"Logit",
|
|
"MatMul",
|
|
"Mul",
|
|
"Relu",
|
|
"Reshape",
|
|
"ReplaceNaN",
|
|
"Sigmoid",
|
|
"Slice",
|
|
"Softmax",
|
|
"Split",
|
|
"Sum",
|
|
"Tanh",
|
|
"Transpose",
|
|
"UnPackRecords",
|
|
};
|
|
return supported_ops;
|
|
}
|
|
|
|
bool TvmTransformer::canConvertFullGraph(
|
|
const caffe2::NetDef& net,
|
|
const std::unordered_set<int>& blocklisted_ops) {
|
|
const auto& supported_ops = getSupportedOps();
|
|
for (const auto& op : net.op()) {
|
|
int pos =
|
|
ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
|
|
if (blocklisted_ops.count(pos) || supported_ops.count(op.type()) == 0) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
NetDef TvmTransformer::applyTvmTransform(
|
|
NetDef* pred_net,
|
|
const std::unordered_set<std::string>& weights,
|
|
const std::unordered_set<int>& blocklisted_ops,
|
|
const ShapeInfoMap& shape_hints) {
|
|
const auto profiling_based_jit = opts_.profiling_based_jit;
|
|
auto tvm_supports = [&blocklisted_ops, &shape_hints, &profiling_based_jit](
|
|
const caffe2::OperatorDef& op) {
|
|
const auto& supported_ops = getSupportedOps();
|
|
try {
|
|
// If the op position is block listed, return false
|
|
int pos =
|
|
ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
|
|
if (blocklisted_ops.count(pos)) {
|
|
LOG(INFO) << "op is being blocklisted, " << op.type() << " at position " << pos;
|
|
return false;
|
|
}
|
|
|
|
// If we don't have proper shape info for the op, we cannot compile it
|
|
// properly, return false
|
|
if (!profiling_based_jit) {
|
|
for (const auto& i : op.input()) {
|
|
if (shape_hints.find(i) == shape_hints.end()) {
|
|
LOG(INFO) << "Skipping op " << op.type()
|
|
<< " due to missing shape info for input " << i;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If TVM c2 frontend doesn't support this op, return false
|
|
// TODO: This should be something like TVMC2Frontend::supports(op);
|
|
return (supported_ops.count(op.type()) != 0);
|
|
} catch (const std::exception& ex) {
|
|
LOG(ERROR) << "Caught exception when querying op " << op.type()
|
|
<< ", what: " << ex.what();
|
|
return false;
|
|
}
|
|
};
|
|
auto tvm_op_converter =
|
|
[this, &weights, &shape_hints](const caffe2::NetDef& net) {
|
|
return buildTvmOp(net, weights, shape_hints);
|
|
};
|
|
|
|
return opt::OptimizeForBackend(*pred_net, tvm_supports, tvm_op_converter).net;
|
|
}
|
|
|
|
void tvmTransform(
|
|
NetDef* net,
|
|
Workspace* ws,
|
|
const std::vector<std::string>& input_names,
|
|
const std::vector<std::string>& output_names,
|
|
const std::vector<std::string>& weight_names,
|
|
const ShapeInfoMap& shape_hints,
|
|
const std::unordered_set<int>& blocklisted_ops,
|
|
int32_t max_batch_size,
|
|
int32_t max_seq_size,
|
|
int32_t num_embeddings,
|
|
int32_t embedding_size,
|
|
int32_t tvm_min_ops,
|
|
bool tvm_profiling_based_jit,
|
|
bool debug) {
|
|
TvmTransformOptions opts;
|
|
opts.bound_shape_spec.max_batch_size = max_batch_size;
|
|
opts.bound_shape_spec.max_seq_size = max_seq_size;
|
|
opts.bound_shape_spec.num_embeddings = num_embeddings;
|
|
opts.bound_shape_spec.embedding_length = embedding_size;
|
|
opts.min_ops = tvm_min_ops;
|
|
opts.profiling_based_jit = tvm_profiling_based_jit;
|
|
opts.debug = debug;
|
|
TvmTransformer ts(opts);
|
|
|
|
// Clean up the external input/output of the net
|
|
cleanUpPredictNet(net, input_names, output_names, weight_names);
|
|
|
|
ts.transform(ws, net, weight_names, shape_hints, blocklisted_ops);
|
|
}
|
|
|
|
void cleanUpPredictNet(
|
|
NetDef* net,
|
|
const std::vector<std::string>& input_names,
|
|
const std::vector<std::string>& output_names,
|
|
const std::vector<std::string>& weight_names) {
|
|
net->mutable_external_input()->Clear();
|
|
net->mutable_external_output()->Clear();
|
|
for (const auto& i : input_names) {
|
|
net->add_external_input(i);
|
|
}
|
|
for (const auto& w : weight_names) {
|
|
net->add_external_input(w);
|
|
}
|
|
for (const auto& o : output_names) {
|
|
net->add_external_output(o);
|
|
}
|
|
}
|
|
} // namespace caffe2
|