pytorch/caffe2/opt/onnxifi_transformer.h

#pragma once

#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>

#include "caffe2/opt/backend_cutting.h"
#include "onnx/onnx_pb.h"

#include "caffe2/core/operator.h"
#include "caffe2/onnx/onnxifi_init.h"
#include "caffe2/opt/backend_transformer_base.h"

namespace caffe2 {
namespace onnx {
class OnnxExporter;
}

// Split SparseLengthsSumSparse into SparseLengthsSumSparseLookup +
// SparseLengthsSum
TORCH_API void splitSparseLengthsSumSparse(NetDef* net, const Workspace& ws);

struct OnnxifiTransformerOptions final : public BackendTransformOptions {
  explicit OnnxifiTransformerOptions() : BackendTransformOptions() {}

  // Pass serialized onnx model if true, otherwise pass serialized c2 model
  bool use_onnx{false};

  // Whether to adjust batch at the outputs or not
  bool adjust_batch{true};

  // Whether to lower model blob by blob
  bool load_model_by_blob{false};

  // Whether to enforce fp32 inputs into fp16.
  bool enforce_fp32_inputs_into_fp16{false};

  // Whether to combine fp32 batched inputs into one tensor and convert it to
  // fp16 or not
  bool merge_fp32_inputs_into_fp16{false};

  // Whether to verify that a single subnet was created
  bool verify_only_single_subnet{false};

  // Whether the net has been ssaRewritten
  bool predictor_net_ssa_rewritten{false};

  // Inference timeout
  int timeout{0};

  // Mapping of batch sizes to shape infos
  std::unordered_map<int, ShapeInfoMap> shape_hints_per_bs;

  // Whether to read batch size from Onnxifi.
  bool use_onnxifi_batch_size{false};
};

class TORCH_API OnnxifiOptionHelper final {
 public:
  OnnxifiOptionHelper();

  // Set Onnxifi option
  bool setOnnxifiOption(const std::string& option, const std::string& value);

  //  Get Onnxifi option
  std::string getOnnxifiOption(const std::string& option);

 private:
  // Pointer to loaded onnxifi library
  onnxifi_library* lib_{nullptr};
};

class TORCH_API OnnxifiTransformer final : public BackendTransformerBase {
 public:
  explicit OnnxifiTransformer(const OnnxifiTransformerOptions& opts);
  ~OnnxifiTransformer() override;

  void transform(
      Workspace* ws,
      NetDef* pred_net,
      const std::vector<std::string>& weight_names,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<int>& blocklisted_ops) override;

  // Query whether an operator is supported by passing C2 protobuf
  bool supportOpC2(
      const caffe2::OperatorDef& op,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      onnxBackendID backend_id) const;

  // Determine backend id
  std::vector<onnxBackendID> getBackendId();

 private:
  // Since we create new tensors during the conversion process, we actually need
  // into inject them into the original workspace
  // Since our onnx exporter uses std::unordered_map<std::string, TensorShape>
  // as lut, we need to include an extra copy of shape info and maintain them
  // together
  caffe2::NetDef SubnetToOnnxifiOpViaOnnx(
      const caffe2::NetDef& net,
      const std::unordered_set<std::string>& weights_in_ws,
      Workspace* ws,
      onnx::OnnxExporter* exporter,
      ShapeInfoMap* shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Convert a cutoff subgraph net to an Onnxifi op
  caffe2::NetDef SubnetToOnnxifiOpViaC2(
      const caffe2::NetDef& net,
      const std::unordered_set<std::string>& weights_in_ws,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Check that output shape hints are present to ensure we can pass them to
  // OnnxifiOp
  bool canPassOutputShapeHintsPerBs(
      const OperatorDef& op,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs) const;

  // We already have all the ops and external inputs and outputs!
  OperatorDef buildOnnxifiOp(
      const std::string& onnx_model_str,
      const std::unordered_set<std::string>& initialization_list,
      const std::vector<std::string>& external_inputs,
      const std::vector<std::string>& external_outputs,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Transform by passing C2 proto to backend
  opt::CutResult TransformViaC2(
      NetDef* pred_net,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      const ShapeInfoMap& shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Transform by passing ONNX proto to backend
  opt::CutResult TransformViaOnnx(
      Workspace* ws,
      NetDef* pred_net,
      const std::unordered_set<std::string>& weights,
      const std::unordered_set<int>& blocklisted_ops,
      ShapeInfoMap* shape_hints_max_bs,
      const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

  // Query whether an operator is supported by passing ONNX protobuf
  bool supportOpOnnx(
      const caffe2::OperatorDef& op,
      onnx::OnnxExporter* exporter,
      const std::unordered_set<int>& blocklisted_ops,
      onnxBackendID backend_id) const;

  // Tie the output of Gather to the scalar weight input of the
  // SparseLengthsWeighted* and SparseLengthsSumSparseLookup (which is split
  // from the SparseLengthsWeighted*Sparse) ops. If the latter is disabled,
  // disable the former too.
  void tieGatherAndSparseLengthsWeightedSumOps(
      const NetDef& net,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      std::unordered_set<int>* blocklisted_ops) const;

  // For net with partitioning info, blocklist ops that are supposed to run on
  // CPU, whose partition info will contain empty device_id list.
  void blocklistCpuPartition(
      const NetDef& net,
      std::unordered_set<int>* blocklisted_ops) const;

  // Rule based filtering
  void applyFilteringRules(
      const NetDef& net,
      const ShapeInfoMap& shape_hints,
      const std::unordered_set<std::string>& weights,
      std::unordered_set<int>* blocklisted_ops) const;

  // Extract partition info from the original net
  void extractPartitionInfo(const NetDef& net);

  // Options
  OnnxifiTransformerOptions opts_;

  // Pointer to loaded onnxifi library
  onnxifi_library* lib_{nullptr};

  // Number of backends
  size_t num_backends_{0};

  // backend idx
  int idx_{0};

  // Number of Onnxifi Ops we build so far
  int onnxifi_op_id_{0};

  // Model id
  std::string model_id_;

  // Backned IDs
  std::vector<onnxBackendID> backend_ids_;

  // A cache for ONNX shape hints
  std::unordered_map<std::string, TensorShape> shape_hints_onnx_;

  // Partition info
  std::vector<PartitionInfo> partition_infos_;
};
} // namespace caffe2