mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: It was a random coding exercise so I wasn't putting much effort into it; but, I was like "hey is the current intrusive_ptr implementation optimized enough?" so I compared it with shared_ptr (using std::shared_from_this). My benchmark result shows that intrusive_ptr is actually slower. On my macbook the speed is: ``` --------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------- BM_IntrusivePtrCtorDtor 14 ns 14 ns 52541902 BM_SharedPtrCtorDtor 10 ns 10 ns 71898849 BM_IntrusivePtrArray 14285 ns 14112 ns 49775 BM_SharedPtrArray 13821 ns 13384 ns 51602 ``` Wanted to share the results so someone could probably take a look if interested. Pull Request resolved: https://github.com/pytorch/pytorch/pull/30810 Reviewed By: yinghai Differential Revision: D18828785 Pulled By: bddppq fbshipit-source-id: 202e9849c9d8a3da17edbe568572a74bb70cb6c5
76 lines
1.8 KiB
C++
76 lines
1.8 KiB
C++
#include <c10/util/intrusive_ptr.h>
|
|
|
|
#include <benchmark/benchmark.h>
|
|
#include <memory>
|
|
|
|
using c10::intrusive_ptr;
|
|
using c10::intrusive_ptr_target;
|
|
using c10::make_intrusive;
|
|
using c10::weak_intrusive_ptr;
|
|
|
|
namespace {
|
|
|
|
// Foo uses intrusive ptr
|
|
class Foo : public intrusive_ptr_target {
|
|
public:
|
|
Foo(int param_) : param(param_) {}
|
|
int param;
|
|
};
|
|
|
|
|
|
class Bar : public std::enable_shared_from_this<Bar> {
|
|
public:
|
|
Bar(int param_) : param(param_) {}
|
|
int param;
|
|
};
|
|
|
|
static void BM_IntrusivePtrCtorDtor(benchmark::State& state) {
|
|
intrusive_ptr<Foo> var = make_intrusive<Foo>(0);
|
|
while (state.KeepRunning()) {
|
|
volatile intrusive_ptr<Foo> var2 = var;
|
|
}
|
|
}
|
|
BENCHMARK(BM_IntrusivePtrCtorDtor);
|
|
|
|
static void BM_SharedPtrCtorDtor(benchmark::State& state) {
|
|
std::shared_ptr<Bar> var = std::make_shared<Bar>(0);
|
|
while (state.KeepRunning()) {
|
|
volatile std::shared_ptr<Bar> var2 = var;
|
|
}
|
|
}
|
|
BENCHMARK(BM_SharedPtrCtorDtor);
|
|
|
|
static void BM_IntrusivePtrArray(benchmark::State& state) {
|
|
intrusive_ptr<Foo> var = make_intrusive<Foo>(0);
|
|
const size_t kLength = state.range(0);
|
|
std::vector<intrusive_ptr<Foo> > vararray(kLength);
|
|
while (state.KeepRunning()) {
|
|
for (int i = 0; i < kLength; ++i) {
|
|
vararray[i] = var;
|
|
}
|
|
for (int i = 0; i < kLength; ++i) {
|
|
vararray[i].reset();
|
|
}
|
|
}
|
|
}
|
|
BENCHMARK(BM_IntrusivePtrArray)->RangeMultiplier(2)->Range(16, 4096);
|
|
|
|
static void BM_SharedPtrArray(benchmark::State& state) {
|
|
std::shared_ptr<Bar> var = std::make_shared<Bar>(0);
|
|
const size_t kLength = state.range(0);
|
|
std::vector<std::shared_ptr<Bar> > vararray(kLength);
|
|
while (state.KeepRunning()) {
|
|
for (int i = 0; i < kLength; ++i) {
|
|
vararray[i] = var;
|
|
}
|
|
for (int i = 0; i < kLength; ++i) {
|
|
vararray[i].reset();
|
|
}
|
|
}
|
|
}
|
|
BENCHMARK(BM_SharedPtrArray)->RangeMultiplier(2)->Range(16, 4096);
|
|
} // namespace
|
|
|
|
|
|
BENCHMARK_MAIN();
|