mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/55062 This diff introduces the following changes: 1. InlinedCallStack pickler/serializer is introduced. It is serialized as a tuple of {module_instance_info, source range tag, callee:InlinedCallStack} Module instance info is serialized as tuple of {class_type_name, instance_name}. Note that callee of the serialized inlined callstack points to the tuple of already serialized callstack. This means the first callstack ptr to serialize, will serialize entire path of the tree, where some callee nodes might be shared with callstack pointers that will be serialized subsequently. Pickler supports memoization of pickled objects, where if a tuple has been serialized then object id is obtained instead of serialized object again. Thus we stll serialize the tree and not every path from the root separately. Furthermore, InlinedCallStackSerializer also uses cache to lookup the pointer and return the serialized IValue. Furthermore, note that we must also serialize the source range of InlinedCallStack. In order to this serializer requires map of source-range-tags-to-source-range map. This was done in the previous diff, where as part of source range serialization we also generate unique tags. These are the tags that are serialized in InlinedCallStack. Thus during deserialization we would have to deserialize source range before deserializing InlinedCallStacks. 2. Furthermore, each serialized InlinedCallStack is serialized with a unique debug_handle and source range tag. BackendDebugHandleManager manages generation of unique debug handles and saves the map of debug-handles-to-{source_range_tag, inlined-callstack-ptr}. This map is then serialized as callstack_debug_map.pkl. Note that inlined callstack is not sufficient to get all the source information since it contains source information about the nodes which are inlined. The top-of-the-stack (or bottom) node, which is the actual op node, is not part of the inlined callstack pointer and thus the source range of this node is serialized separately using source_range_tag. This is similar to how JIT creates callstack in torch/csrc/jit/runtime/interpreter.cpp Unique debug handles facilitates exception throwing or profiling using just the debug handle without any further qualifications, such as which function or module the inlined-callstack belongs to. Furthermore, this diff refactors the old mobile code for tracking module hierarchy information per op. Mainly now bytecode serialization will serialize debug handles corresponding to ops/nodes in graph and have callstack_debug_map.pkl help generate: 1. Entire callstack and 2. Module hierarchy information. Test Plan: python test/mobile/test_lite_script_module.py TestLiteScriptModule ./build/bin/test_jit --gtest_filter=*ModuleInfo Imported from OSS Reviewed By: raziel Differential Revision: D27468709 fbshipit-source-id: 53e2413e7703ead01c77718b7c333c7c6ff50a23
129 lines
3.5 KiB
C++
129 lines
3.5 KiB
C++
#include <torch/csrc/jit/mobile/function.h>
|
|
|
|
#include <caffe2/serialize/inline_container.h>
|
|
#include <torch/csrc/jit/mobile/interpreter.h>
|
|
#include <torch/csrc/jit/runtime/instruction.h>
|
|
#include <torch/csrc/jit/runtime/operator.h>
|
|
#include <torch/custom_class_detail.h>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
|
|
char const* toString(OpCode op);
|
|
namespace mobile {
|
|
Function::Function(c10::QualifiedName name)
|
|
: name_(std::move(name)), code_(std::make_shared<Code>()) {}
|
|
|
|
const c10::QualifiedName& Function::qualname() const {
|
|
return name_;
|
|
}
|
|
|
|
const std::string& Function::name() const {
|
|
return name_.name();
|
|
}
|
|
|
|
void Function::append_instruction(OpCode op, int X, int N, int64_t dbg_handle) {
|
|
TORCH_CHECK(
|
|
isOpSupportedInMobile(op),
|
|
toString(op),
|
|
" is not supported in mobile module.");
|
|
code_->instructions_.emplace_back(op, X, N);
|
|
code_->debug_handles_.emplace_back(dbg_handle);
|
|
}
|
|
|
|
bool Function::append_operator(
|
|
const std::string& name,
|
|
const std::string& overload_name,
|
|
int64_t model_version) {
|
|
// Keep the original opname in code_
|
|
code_->op_names_.emplace_back(name, overload_name);
|
|
auto opname = code_->op_names_.back();
|
|
|
|
const auto& opname_c10 = opname;
|
|
std::function<void(Stack&)> fn;
|
|
|
|
auto jit_op = findOperatorFor(opname);
|
|
if (jit_op) {
|
|
fn = [jit_op](Stack& stack) { jit_op->getOperation()(&stack); };
|
|
} else {
|
|
auto op = c10::Dispatcher::singleton().findSchema(opname_c10);
|
|
if (op.has_value()) {
|
|
fn = [op](Stack& stack) { op->callBoxed(&stack); };
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (model_version == 0x3LL &&
|
|
opname == c10::OperatorName("aten::_convolution", "")) {
|
|
// Since byte-code versions 0x4L, convolution has an additional
|
|
// default-value argument (allow_tf32=True, see
|
|
// https://github.com/pytorch/pytorch/pull/40737). This wrapper handles
|
|
// backward compatibility with models of byte-code version <= 0x3L, where
|
|
// this bool argument does not yet exist.
|
|
fn = [fn](Stack& stack) {
|
|
stack.push_back(true);
|
|
fn(stack);
|
|
};
|
|
}
|
|
|
|
code_->operators_.emplace_back(fn);
|
|
return true;
|
|
}
|
|
|
|
void Function::append_constant(const c10::IValue& constant) {
|
|
code_->constants_.push_back(constant);
|
|
}
|
|
|
|
void Function::append_type(const at::TypePtr& type) {
|
|
code_->types_.push_back(type);
|
|
}
|
|
|
|
void Function::set_register_size(size_t size) {
|
|
code_->register_size_ = size;
|
|
}
|
|
|
|
int64_t Function::get_debug_handle(size_t pc) const {
|
|
TORCH_CHECK(code_, "Valid code must exist.");
|
|
TORCH_CHECK(
|
|
pc < code_->debug_handles_.size(),
|
|
"Module debug info index out of boundary.");
|
|
return code_->debug_handles_[pc];
|
|
}
|
|
|
|
void Function::setSchema(c10::FunctionSchema schema) {
|
|
schema_ = std::move(schema);
|
|
}
|
|
|
|
const at::optional<c10::FunctionSchema>& Function::getSchema() const {
|
|
return schema_;
|
|
}
|
|
|
|
bool Function::run(Stack& stack) const {
|
|
const auto& schema = getSchema();
|
|
if (schema) { // if we have a schema then resolve optional args if any
|
|
schema->checkAndNormalizeInputs(
|
|
stack, std::unordered_map<std::string, IValue>{} /*kwargs*/);
|
|
}
|
|
InterpreterState interp_state(code_);
|
|
return interp_state.run(stack);
|
|
}
|
|
|
|
c10::IValue Function::operator()(Stack& stack) const {
|
|
run(stack);
|
|
return stack.front();
|
|
}
|
|
|
|
const std::shared_ptr<Code> Function::get_code() const {
|
|
return code_;
|
|
}
|
|
|
|
int64_t Function::getExceptionDebugHandle() const {
|
|
size_t pc = getInterpretersExceptionPC();
|
|
return (pc < code_->debug_handles_.size()) ? code_->debug_handles_[pc] : -1;
|
|
}
|
|
|
|
} // namespace mobile
|
|
} // namespace jit
|
|
} // namespace torch
|