mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Re-landing https://github.com/pytorch/pytorch/pull/68111 ## Description Preview4 PR of this [RFC](https://github.com/pytorch/pytorch/issues/49444). On the basis of https://github.com/pytorch/pytorch/pull/50256, the below improvements are included: - The [preview4 release branch](https://github.com/oneapi-src/oneDNN/releases/tag/graph-v0.4.1) of the oneDNN Graph API is used - The fuser now works with the profiling graph executor. We have inserted type check nodes to guard the profiled tensor properties. ### User API: The optimization pass is disabled by default. Users could enable it by: ``` torch.jit.enable_onednn_fusion(True) ``` ### Performance: [pytorch/benchmark](https://github.com/pytorch/benchmark) tool is used to compare the performance: - SkyLake 8180 (1 socket of 28 cores):  - SkyLake 8180 (single thread):  \* By mapping hardswish to oneDNN Graph, it’s 8% faster than PyTorch JIT (NNC + OFI) \** We expect performance gain after mapping transpose, contiguous & view to oneDNN graph ops ### Directory structure of the integration code Fuser-related code are placed under: ``` torch/csrc/jit/codegen/onednn/ ``` Optimization pass registration is done in: ``` torch/csrc/jit/passes/onednn_graph_fuser.h ``` CMake for the integration code is: ``` caffe2/CMakeLists.txt ``` ## Limitations - In this PR, we have only supported the optimization on Linux platform. The support on Windows and MacOS will be enabled as the next step. - We have only optimized the inference use case. Pull Request resolved: https://github.com/pytorch/pytorch/pull/74596 Approved by: https://github.com/malfet
63 lines
1.4 KiB
C++
63 lines
1.4 KiB
C++
#pragma once
|
|
#include <ATen/Config.h>
|
|
#include <torch/csrc/jit/ir/ir.h>
|
|
#include <torch/csrc/jit/passes/pass_manager.h>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
namespace fuser {
|
|
namespace onednn {
|
|
|
|
static std::atomic<bool> onednn_enabled{false};
|
|
|
|
std::atomic<bool>& getLlgaEnabled() {
|
|
return onednn_enabled;
|
|
}
|
|
|
|
C10_EXPORT void fuseGraph(std::shared_ptr<Graph>& g);
|
|
|
|
} // namespace onednn
|
|
} // namespace fuser
|
|
|
|
struct C10_EXPORT RegisterLlgaFuseGraph
|
|
: public PassManager<RegisterLlgaFuseGraph> {
|
|
static bool setEnabled(bool enabled) {
|
|
TORCH_CHECK(
|
|
AT_MKLDNN_ENABLED(),
|
|
"Running oneDNN Graph fuser is only supported with MKLDNN builds.");
|
|
bool oldState = fuser::onednn::getLlgaEnabled();
|
|
fuser::onednn::getLlgaEnabled() = enabled;
|
|
if (enabled) {
|
|
registerPass(fuser::onednn::fuseGraph);
|
|
} else {
|
|
clearPass();
|
|
}
|
|
return oldState;
|
|
}
|
|
|
|
static bool isEnabled() {
|
|
return fuser::onednn::getLlgaEnabled();
|
|
}
|
|
|
|
// override PassManager::registerPass to register pre-pass
|
|
static bool registerPass(GraphPass p) {
|
|
if (!isRegistered()) {
|
|
passID(registerPrePass(std::move(p)), true);
|
|
isRegistered(true);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// override PassManager::clearPass to clear pre-pass
|
|
static void clearPass() {
|
|
if (isRegistered()) {
|
|
clearPrePass(passID());
|
|
isRegistered(true);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace jit
|
|
} // namespace torch
|