pytorch/benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp
Bert Maher 468c99fba4 Reapply D25856891: [te] Benchmark comparing fused overhead to unfused (#50543)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/50543

Original commit changeset: 2d2f07f79986

Was part of a stack that got reverted.  This is just a benchmark.
ghstack-source-id: 119825594

Test Plan: CI

Reviewed By: navahgar

Differential Revision: D25912439

fbshipit-source-id: 5d9ca45810fff8931a3cfbd03965e11050180676
2021-01-14 14:17:45 -08:00

58 lines
1.2 KiB
C++

#include <benchmark/benchmark.h>
#include <torch/csrc/jit/codegen/fuser/interface.h>
#include <torch/torch.h>
using namespace torch::jit;
static const std::string two_adds = R"JIT(
def two_adds(self, x: Tensor, y: Tensor, z: Tensor) -> Tensor:
return x + y + z
)JIT";
static void FusedOverhead(benchmark::State& state) {
torch::NoGradGuard ng;
torch::AutoNonVariableTypeMode nv;
overrideCanFuseOnCPU(true);
Module m("m");
m.define(two_adds);
auto x = torch::ones({1});
auto y = torch::ones({1});
auto z = torch::ones({1});
// Warmup.
for (int i = 0; i < 8; i++) {
m.run_method("two_adds", x, y, z);
}
for (auto _ : state) {
m.run_method("two_adds", x, y, z);
}
}
static void UnfusedOverhead(benchmark::State& state) {
torch::NoGradGuard ng;
torch::AutoNonVariableTypeMode nv;
overrideCanFuseOnCPU(false);
Module m("m");
m.define(two_adds);
auto x = torch::ones({1});
auto y = torch::ones({1});
auto z = torch::ones({1});
// Warmup.
for (int i = 0; i < 8; i++) {
m.run_method("two_adds", x, y, z);
}
for (auto _ : state) {
m.run_method("two_adds", x, y, z);
}
}
BENCHMARK(FusedOverhead);
BENCHMARK(UnfusedOverhead);