pytorch/c10/benchmark/intrusive_ptr_benchmark.cpp
Yangqing Jia 1ef99cf0ab Intrusive_ptr implementation slower than shared_ptr (#30810)
Summary:
It was a random coding exercise so I wasn't putting much effort into it; but, I was like "hey is the current intrusive_ptr implementation optimized enough?" so I compared it with shared_ptr (using std::shared_from_this).

My benchmark result shows that intrusive_ptr is actually slower. On my macbook the speed is:

```
---------------------------------------------------------------
Benchmark                        Time           CPU Iterations
---------------------------------------------------------------
BM_IntrusivePtrCtorDtor         14 ns         14 ns   52541902
BM_SharedPtrCtorDtor            10 ns         10 ns   71898849
BM_IntrusivePtrArray         14285 ns      14112 ns      49775
BM_SharedPtrArray            13821 ns      13384 ns      51602
```

Wanted to share the results so someone could probably take a look if interested.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/30810

Reviewed By: yinghai

Differential Revision: D18828785

Pulled By: bddppq

fbshipit-source-id: 202e9849c9d8a3da17edbe568572a74bb70cb6c5
2019-12-13 00:25:36 -08:00

76 lines
1.8 KiB
C++

#include <c10/util/intrusive_ptr.h>
#include <benchmark/benchmark.h>
#include <memory>
using c10::intrusive_ptr;
using c10::intrusive_ptr_target;
using c10::make_intrusive;
using c10::weak_intrusive_ptr;
namespace {
// Foo uses intrusive ptr
class Foo : public intrusive_ptr_target {
public:
Foo(int param_) : param(param_) {}
int param;
};
class Bar : public std::enable_shared_from_this<Bar> {
public:
Bar(int param_) : param(param_) {}
int param;
};
static void BM_IntrusivePtrCtorDtor(benchmark::State& state) {
intrusive_ptr<Foo> var = make_intrusive<Foo>(0);
while (state.KeepRunning()) {
volatile intrusive_ptr<Foo> var2 = var;
}
}
BENCHMARK(BM_IntrusivePtrCtorDtor);
static void BM_SharedPtrCtorDtor(benchmark::State& state) {
std::shared_ptr<Bar> var = std::make_shared<Bar>(0);
while (state.KeepRunning()) {
volatile std::shared_ptr<Bar> var2 = var;
}
}
BENCHMARK(BM_SharedPtrCtorDtor);
static void BM_IntrusivePtrArray(benchmark::State& state) {
intrusive_ptr<Foo> var = make_intrusive<Foo>(0);
const size_t kLength = state.range(0);
std::vector<intrusive_ptr<Foo> > vararray(kLength);
while (state.KeepRunning()) {
for (int i = 0; i < kLength; ++i) {
vararray[i] = var;
}
for (int i = 0; i < kLength; ++i) {
vararray[i].reset();
}
}
}
BENCHMARK(BM_IntrusivePtrArray)->RangeMultiplier(2)->Range(16, 4096);
static void BM_SharedPtrArray(benchmark::State& state) {
std::shared_ptr<Bar> var = std::make_shared<Bar>(0);
const size_t kLength = state.range(0);
std::vector<std::shared_ptr<Bar> > vararray(kLength);
while (state.KeepRunning()) {
for (int i = 0; i < kLength; ++i) {
vararray[i] = var;
}
for (int i = 0; i < kLength; ++i) {
vararray[i].reset();
}
}
}
BENCHMARK(BM_SharedPtrArray)->RangeMultiplier(2)->Range(16, 4096);
} // namespace
BENCHMARK_MAIN();