pytorch/torch/csrc/jit/codegen/fuser/interface.cpp
Mike Guo 6ecc1a4c4f Make pytorch clang-tidy clean (#60649)
Summary:
This PR suppresses clang-tidy warnings in the codebase (for now) so that we can re-enable clang-tidy checks on master.

I ran this script to add the `NOLINTNEXTLINE` comments (on a devserver):
```bash
python3 setup.py develop

# Uses same script that's run on CI and adds the -j (parallel), -s (add comments), -k (continue if diagnostic errors are found) options
python3 tools/clang_tidy.py \
  -j \
  -s \
  -k \
  -v \
  --paths torch/csrc/ \
  -g"-torch/csrc/jit/passes/onnx/helper.cpp" \
  -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \
  -g"-torch/csrc/jit/serialization/onnx.cpp" \
  -g"-torch/csrc/jit/serialization/export.cpp" \
  -g"-torch/csrc/jit/serialization/import.cpp" \
  -g"-torch/csrc/jit/serialization/import_legacy.cpp" \
  -g"-torch/csrc/onnx/init.cpp" \
  -g"-torch/csrc/cuda/nccl.*" \
  -g"-torch/csrc/cuda/python_nccl.cpp" \
  -g"-torch/csrc/autograd/FunctionsManual.cpp" \
  -g"-torch/csrc/generic/*.cpp" \
  -g"-torch/csrc/jit/codegen/cuda/runtime/*" \
  -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \
  -g"-torch/csrc/deploy/interpreter/interpreter.h" \
  -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \
  -g"-torch/csrc/deploy/interpreter/test_main.cpp"
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/60649

Test Plan: Verified changes by re-running the script (without the `-s` option) and seeing no warnings/errors.

Reviewed By: walterddr, janeyx99

Differential Revision: D29504258

Pulled By: 1ntEgr8

fbshipit-source-id: 78310b30ee8213b73ddb4771ad874665323e7a4e
2021-07-01 12:21:07 -07:00

117 lines
3.5 KiB
C++

#include <torch/csrc/jit/codegen/fuser/interface.h>
#include <torch/csrc/jit/codegen/fuser/compiler.h>
#include <torch/csrc/jit/codegen/fuser/executor.h>
#include <torch/csrc/jit/codegen/fuser/fallback.h>
#include <torch/csrc/jit/codegen/fuser/kernel_cache.h>
#include <c10/util/Flags.h>
#include <stdexcept>
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
C10_DEFINE_bool(torch_jit_enable_cpu_fusion, false, "enable cpu fusion");
namespace torch {
namespace jit {
namespace detail {
// Note: CPU fusion is currently disabled due to test flakiness
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
#if defined(FBCODE_CAFFE2)
bool cpu_fuser_enabled = true;
#else
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
bool cpu_fuser_enabled = false;
#endif
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
bool gpu_fuser_enabled = true;
} // namespace detail
int64_t registerFusion(const Node* fusion_group) {
return fuser::registerFusion(fusion_group);
}
void runFusion(const int64_t key, Stack& stack) {
const auto result = fuser::runFusion(key, stack);
if (!result)
fuser::runFallback(key, stack);
}
bool canFuseOnCPU() {
return fuser::hasFusionBackend(DeviceType::CPU) &&
(detail::cpu_fuser_enabled || FLAGS_torch_jit_enable_cpu_fusion);
}
bool canFuseOnGPU() {
return fuser::hasFusionBackend(DeviceType::CUDA) && detail::gpu_fuser_enabled;
}
void overrideCanFuseOnCPU(bool value) {
detail::cpu_fuser_enabled = value;
}
void overrideCanFuseOnGPU(bool value) {
detail::gpu_fuser_enabled = value;
}
// Uses the above interface by stuffing the graph into a node and treating that
// node as a fusion group.
std::vector<at::Tensor> debugLaunchGraph(
Graph& graph,
at::ArrayRef<at::Tensor> inputs) {
// Creates a fusion group node
auto wrapper_graph = std::make_shared<Graph>();
Node* fusion_group = wrapper_graph->insertNode(
wrapper_graph->createWithSubgraph(prim::FusionGroup));
fusion_group->g_(attr::Subgraph, graph.copy());
for (size_t i = 0; i < graph.inputs().size(); ++i) {
fusion_group->addInput(wrapper_graph->addInput());
}
for (size_t i = 0; i < graph.outputs().size(); ++i) {
wrapper_graph->registerOutput(fusion_group->addOutput());
}
// Creates the stack, registers and runs the fusion
Stack stack = fmap<IValue>(inputs);
const auto key = fuser::registerFusion(fusion_group);
fuser::runFusion(key, stack);
return fmap(stack, [](const IValue& iv) { return iv.toTensor(); });
}
std::string debugGetFusedKernelCode(
Graph& graph,
at::ArrayRef<at::Tensor> inputs) {
// Creates a fusion group node
auto wrapper_graph = std::make_shared<Graph>();
Node* fusion_group = wrapper_graph->insertNode(
wrapper_graph->createWithSubgraph(prim::FusionGroup));
fusion_group->g_(attr::Subgraph, graph.copy());
for (size_t i = 0; i < graph.inputs().size(); ++i) {
fusion_group->addInput(wrapper_graph->addInput());
}
for (size_t i = 0; i < graph.outputs().size(); ++i) {
wrapper_graph->registerOutput(fusion_group->addOutput());
}
// Creates the stack, registers and runs the fusion
Stack stack = fmap<IValue>(inputs);
const auto key = fuser::registerFusion(fusion_group);
std::string code;
if (!fuser::runFusion(key, stack, &code)) {
throw std::runtime_error("Could not run fusion for graph");
}
return code;
}
size_t nCompiledKernels() {
return fuser::nCompiledKernels();
}
} // namespace jit
} // namespace torch