mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/64269 Revert changes in D29826210 (693d8f2f07) (we don't need operator lambda caching since there aren't duplicate operators anymore) This diff stack results in an additional approx 12% speedup in model loading time (from 229ms to 200ms) when run against an 87MB speech model that jiatongzhou provided. ghstack-source-id: 138014904 Test Plan: **Speech Transducer v25 model (as in D29826210 (693d8f2f07))** || Before | After | |Load Time|[229ms](https://www.internalfb.com/intern/aibench/details/160889436133243)|[200ms](https://www.internalfb.com/intern/aibench/details/837884532607514)| |Save File Size|[86.23 MB](https://lookaside.facebook.com/intern/diff/file/data/?number=658544950)|[86.1 MB](https://lookaside.facebook.com/intern/diff/file/data/?number=658554403)| The "after" flamegraph shows significantly less time is spent on ```append_operator``` than before. Steps - Check out desired commit in devserver (base branch or this diff) - ```buck build bento/kernels:bento_kernel_pytorch``` - Use N1094068 with pytorch_local kernel to save model for lite interpreter - Edit ```aibench/specifications/models/pytorch/speech_transducer/v25.json ``` to have new model location and md5 - ```buck run aibench:run_bench -- -b aibench/specifications/models/pytorch/speech_transducer/v25.json --framework pytorch --platform android/arm64 --devices "S8US" --force_profile --remote ``` **Test that saving a model with de-dup ops doesn't change its output** https://www.internalfb.com/intern/anp/view/?id=1137434 Reviewed By: iseeyuan Differential Revision: D30615710 fbshipit-source-id: bb4052f0f16eccab386585e94411056f94bce43c
54 lines
1.6 KiB
C++
54 lines
1.6 KiB
C++
#pragma once
|
|
|
|
#include <ATen/core/function_schema.h>
|
|
#include <ATen/core/ivalue.h>
|
|
#include <vector>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
using Stack = std::vector<c10::IValue>;
|
|
enum OpCode : uint8_t;
|
|
|
|
namespace mobile {
|
|
struct Code;
|
|
|
|
class Function {
|
|
public:
|
|
TORCH_API Function(c10::QualifiedName name);
|
|
TORCH_API bool run(Stack& stack) const;
|
|
c10::IValue operator()(Stack& stack) const;
|
|
const std::string& name() const;
|
|
TORCH_API const c10::QualifiedName& qualname() const;
|
|
void append_instruction(OpCode op, int X, int N, int64_t dbg_handle = -1);
|
|
bool append_operator(
|
|
const std::string& name,
|
|
const std::string& overload_name,
|
|
const c10::optional<int>& num_specified_args,
|
|
int64_t model_version); /* TODO: T90339189 deprecate all v3 when v3 models
|
|
are removed */
|
|
void append_constant(const c10::IValue& constant);
|
|
void append_type(const c10::TypePtr& type);
|
|
|
|
void set_register_size(size_t size);
|
|
|
|
int64_t get_debug_handle(size_t pc) const;
|
|
const std::shared_ptr<Code> get_code() const;
|
|
|
|
void setSchema(c10::FunctionSchema schema);
|
|
const at::optional<c10::FunctionSchema>& getSchema() const;
|
|
|
|
// Returns the debug handle corresponding to where the execution
|
|
// is halted due to exception.
|
|
// If no corresponding debug handle is found then -1 is returned.
|
|
int64_t getExceptionDebugHandle() const;
|
|
|
|
private:
|
|
c10::QualifiedName name_;
|
|
std::shared_ptr<Code> code_;
|
|
at::optional<c10::FunctionSchema> schema_; // (byte-code version 4+)
|
|
};
|
|
|
|
} // namespace mobile
|
|
} // namespace jit
|
|
} // namespace torch
|