mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
112 lines
3.3 KiB
C++
112 lines
3.3 KiB
C++
#include <torch/csrc/jit/codegen/fuser/interface.h>
|
|
|
|
#include <torch/csrc/jit/codegen/fuser/compiler.h>
|
|
#include <torch/csrc/jit/codegen/fuser/executor.h>
|
|
#include <torch/csrc/jit/codegen/fuser/fallback.h>
|
|
#include <torch/csrc/jit/codegen/fuser/kernel_cache.h>
|
|
|
|
#include <c10/util/Flags.h>
|
|
#include <stdexcept>
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
|
C10_DEFINE_bool(torch_jit_enable_cpu_fusion, false, "enable cpu fusion");
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
|
|
namespace detail {
|
|
|
|
// Note: CPU fusion is currently disabled due to test flakiness
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
|
bool cpu_fuser_enabled = false;
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
|
bool gpu_fuser_enabled = true;
|
|
|
|
} // namespace detail
|
|
|
|
int64_t registerFusion(const Node* fusion_group) {
|
|
return fuser::registerFusion(fusion_group);
|
|
}
|
|
|
|
void runFusion(const int64_t key, Stack& stack) {
|
|
const auto result = fuser::runFusion(key, stack);
|
|
if (!result)
|
|
fuser::runFallback(key, stack);
|
|
}
|
|
|
|
bool canFuseOnCPU() {
|
|
return fuser::hasFusionBackend(DeviceType::CPU) &&
|
|
(detail::cpu_fuser_enabled || FLAGS_torch_jit_enable_cpu_fusion);
|
|
}
|
|
|
|
bool canFuseOnGPU() {
|
|
return fuser::hasFusionBackend(DeviceType::CUDA) && detail::gpu_fuser_enabled;
|
|
}
|
|
|
|
void overrideCanFuseOnCPU(bool value) {
|
|
detail::cpu_fuser_enabled = value;
|
|
}
|
|
|
|
void overrideCanFuseOnGPU(bool value) {
|
|
detail::gpu_fuser_enabled = value;
|
|
}
|
|
|
|
// Uses the above interface by stuffing the graph into a node and treating that
|
|
// node as a fusion group.
|
|
std::vector<at::Tensor> debugLaunchGraph(
|
|
Graph& graph,
|
|
at::ArrayRef<at::Tensor> inputs) {
|
|
// Creates a fusion group node
|
|
auto wrapper_graph = std::make_shared<Graph>();
|
|
Node* fusion_group = wrapper_graph->insertNode(
|
|
wrapper_graph->createWithSubgraph(prim::FusionGroup));
|
|
fusion_group->g_(attr::Subgraph, graph.copy());
|
|
for (size_t i = 0; i < graph.inputs().size(); ++i) {
|
|
fusion_group->addInput(wrapper_graph->addInput());
|
|
}
|
|
for (size_t i = 0; i < graph.outputs().size(); ++i) {
|
|
wrapper_graph->registerOutput(fusion_group->addOutput());
|
|
}
|
|
|
|
// Creates the stack, registers and runs the fusion
|
|
Stack stack = fmap<IValue>(inputs);
|
|
const auto key = fuser::registerFusion(fusion_group);
|
|
fuser::runFusion(key, stack);
|
|
return fmap(stack, [](const IValue& iv) { return iv.toTensor(); });
|
|
}
|
|
|
|
std::string debugGetFusedKernelCode(
|
|
Graph& graph,
|
|
at::ArrayRef<at::Tensor> inputs) {
|
|
// Creates a fusion group node
|
|
auto wrapper_graph = std::make_shared<Graph>();
|
|
Node* fusion_group = wrapper_graph->insertNode(
|
|
wrapper_graph->createWithSubgraph(prim::FusionGroup));
|
|
fusion_group->g_(attr::Subgraph, graph.copy());
|
|
for (size_t i = 0; i < graph.inputs().size(); ++i) {
|
|
fusion_group->addInput(wrapper_graph->addInput());
|
|
}
|
|
for (size_t i = 0; i < graph.outputs().size(); ++i) {
|
|
wrapper_graph->registerOutput(fusion_group->addOutput());
|
|
}
|
|
|
|
// Creates the stack, registers and runs the fusion
|
|
Stack stack = fmap<IValue>(inputs);
|
|
const auto key = fuser::registerFusion(fusion_group);
|
|
|
|
std::string code;
|
|
if (!fuser::runFusion(key, stack, &code)) {
|
|
throw std::runtime_error("Could not run fusion for graph");
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
size_t nCompiledKernels() {
|
|
return fuser::nCompiledKernels();
|
|
}
|
|
|
|
} // namespace jit
|
|
} // namespace torch
|