[Pytorch] Specialize guts of c10::optional for 32-bit scalars (#47015)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47015

c10::optional has non-trivial copy and move operations always. This change specializes it for 32-bit scalars so that it has trivial copy and move operations in that case. Ideally, we would instead rely on P0602 "variant and optional should propagate copy/move triviality" and use `std::optional` (or implement that functionality ourselves). We can't use `std::optional` because we are stuck with C++14. Implementing the full P0602 ourselves would add even more complexity. We could do it, but this should be a helpful first step.
ghstack-source-id: 115886743

Test Plan:
Collect Callgrind instruction counts for `torch.empty(())`. Data:

Make empty c10-ful (https://github.com/pytorch/pytorch/pull/46092):

```
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7ffaed1128e0>
torch.empty(())
                           All          Noisy symbols removed
    Instructions:       648005                     632899
    Baseline:             4144                       3736
100 runs per measurement, 1 thread
```

This diff atop #46092:

```
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f943f1dc8e0>
torch.empty(())
                           All          Noisy symbols removed
    Instructions:       602347                     591005
    Baseline:             4106                       3736
100 runs per measurement, 1 thread
```

(6.6% improvement vs #46092)

Pass optionals by const reference (https://github.com/pytorch/pytorch/pull/46598)

```
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f1abb3988e0>
torch.empty(())
                           All          Noisy symbols removed
    Instructions:       601349                     590005
    Baseline:             4162                       3736
100 runs per measurement, 1 thread
```
(6.8% improvement vs #46092)

This diff atop #46598 (i.e., both together)

```
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f9577c22850>
torch.empty(())
                           All          Noisy symbols removed
    Instructions:       596095                     582451
    Baseline:             4162                       3736
100 runs per measurement, 1 thread
Warning: PyTorch was not built with debug symbols.
         Source information may be limited. Rebuild with
         REL_WITH_DEB_INFO=1 for more detailed results.
```

(another 1.3% savings!)

#46598 outperformed this change slightly, and combining the two leads to further benefits. I guess we should do both! (Though I still don't understand why passing optionals that should fit in a register by const reference would help...)

Reviewed By: smessmer

Differential Revision: D24552280

fbshipit-source-id: 4d93bfcffafebd8c01559398513fa6b9db959d11
This commit is contained in:
Scott Wolchok 2020-11-04 20:54:56 -08:00 committed by Facebook GitHub Bot
parent 0edc6a39c8
commit df5b4696cf
2 changed files with 177 additions and 42 deletions

View File

@ -1 +1,6 @@
#include <c10/util/Optional.h>
#include <type_traits>
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<int>), "c10::optional<int> should be trivially copyable");
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<bool>), "c10::optional<bool> should be trivially copyable");

View File

@ -25,6 +25,7 @@
#ifndef C10_UTIL_OPTIONAL_H_
#define C10_UTIL_OPTIONAL_H_
#include <c10/macros/Macros.h>
#include <c10/util/in_place.h>
#include <cassert>
@ -184,8 +185,22 @@ struct optional_base {
constexpr optional_base() noexcept : init_(false), storage_(trivial_init){};
explicit constexpr optional_base(const optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
if (init_) {
::new (dataptr()) T(v.storage_.value_);
}
}
explicit constexpr optional_base(const T& v) : init_(true), storage_(v) {}
explicit constexpr optional_base(optional_base<T>&& v) noexcept(
std::is_nothrow_move_constructible<T>::value)
: init_(v.init_), storage_(trivial_init) {
if (init_) {
::new (dataptr()) T(std::move(v.storage_.value_));
}
}
explicit constexpr optional_base(T&& v)
: init_(true), storage_(constexpr_move(v)) {}
@ -203,10 +218,52 @@ struct optional_base {
Args&&... args)
: init_(true), storage_(il, std::forward<Args>(args)...) {}
optional_base& operator=(const optional_base& rhs) {
if (init_ && !rhs.init_) {
clear();
} else if (!init_ && rhs.init_) {
init_ = true;
::new (dataptr()) T(rhs.storage_.value_);
} else if (init_ && rhs.init_) {
storage_.value_ = rhs.storage_.value_;
}
return *this;
}
optional_base& operator=(optional_base&& rhs) noexcept(
std::is_nothrow_move_assignable<T>::value &&
std::is_nothrow_move_constructible<T>::value) {
if (init_ && !rhs.init_) {
clear();
} else if (!init_ && rhs.init_) {
init_ = true;
::new (dataptr()) T(std::move(rhs.storage_.value_));
} else if (init_ && rhs.init_) {
storage_.value_ = std::move(rhs.storage_.value_);
}
return *this;
}
~optional_base() {
if (init_)
storage_.value_.T::~T();
}
private:
typename std::remove_const<T>::type* dataptr() {
return std::addressof(storage_.value_);
}
constexpr const T* dataptr() const {
return detail_::static_addressof(storage_.value_);
}
void clear() noexcept {
if (init_) {
dataptr()->~T();
}
init_ = false;
}
};
template <class T>
@ -217,6 +274,20 @@ struct constexpr_optional_base {
constexpr constexpr_optional_base() noexcept
: init_(false), storage_(trivial_init){};
explicit constexpr constexpr_optional_base(const constexpr_optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
if (init_) {
::new (dataptr()) T(v.storage_.value_);
}
}
explicit constexpr constexpr_optional_base(constexpr_optional_base<T>&& v) noexcept(
std::is_nothrow_move_constructible<T>::value)
: init_(v.init_), storage_(trivial_init) {
if (init_) {
::new (dataptr()) T(std::move(v.storage_.value_));
}
}
explicit constexpr constexpr_optional_base(const T& v)
: init_(true), storage_(v) {}
@ -238,23 +309,112 @@ struct constexpr_optional_base {
: init_(true), storage_(il, std::forward<Args>(args)...) {}
~constexpr_optional_base() = default;
constexpr_optional_base& operator=(const constexpr_optional_base& rhs) {
if (init_ && !rhs.init_) {
clear();
} else if (!init_ && rhs.init_) {
init_ = true;
::new (dataptr()) T(rhs.storage_.value_);
} else if (init_ && rhs.init_) {
storage_.value_ = rhs.storage_.value_;
}
return *this;
}
constexpr_optional_base& operator=(constexpr_optional_base&& rhs) noexcept(
std::is_nothrow_move_assignable<T>::value &&
std::is_nothrow_move_constructible<T>::value) {
if (init_ && !rhs.init_) {
clear();
} else if (!init_ && rhs.init_) {
init_ = true;
::new (dataptr()) T(std::move(rhs.storage_.value_));
} else if (init_ && rhs.init_) {
storage_.value_ = std::move(rhs.storage_.value_);
}
return *this;
}
private:
typename std::remove_const<T>::type* dataptr() {
return std::addressof(storage_.value_);
}
constexpr const T* dataptr() const {
return detail_::static_addressof(storage_.value_);
}
void clear() noexcept {
init_ = false;
}
};
// HACK: Optimization for trivially copyable types. The mainline
// implementation fails to have trivial copy/move operations in these
// cases, and we care about them, so just implement that directly.
template <class T>
struct trivially_copyable_optimization_optional_base {
bool init_;
constexpr_storage_t<T> storage_;
constexpr trivially_copyable_optimization_optional_base() noexcept
: init_(false), storage_(trivial_init) {}
explicit constexpr trivially_copyable_optimization_optional_base(const T& v)
: init_(true), storage_(v) {}
explicit constexpr trivially_copyable_optimization_optional_base(T&& v)
: init_(true), storage_(constexpr_move(v)) {}
template <class... Args>
explicit constexpr trivially_copyable_optimization_optional_base(in_place_t, Args&&... args)
: init_(true), storage_(constexpr_forward<Args>(args)...) {}
template <
class U,
class... Args,
TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
constexpr explicit trivially_copyable_optimization_optional_base(
in_place_t,
std::initializer_list<U> il,
Args&&... args)
: init_(true), storage_(il, std::forward<Args>(args)...) {}
~trivially_copyable_optimization_optional_base() = default;
};
template <class T>
using OptionalBase = typename std::conditional<
std::is_trivially_destructible<T>::value, // if possible
constexpr_optional_base<typename std::remove_const<
T>::type>, // use base with trivial destructor
optional_base<typename std::remove_const<T>::type>>::type;
std::is_trivially_destructible<T>::value &&
C10_IS_TRIVIALLY_COPYABLE(T) &&
// Avoid using is_trivially_copy_{constructible,assignable}
// because old GCC versions don't support them. Also,
// is_trivially_copyable seems not to do what I expect, so check
// trivially_copyable_optimization_optional_base directly.
std::is_copy_constructible<trivially_copyable_optimization_optional_base<T>>::value &&
std::is_copy_assignable<trivially_copyable_optimization_optional_base<T>>::value,
trivially_copyable_optimization_optional_base<T>,
typename std::conditional<
std::is_trivially_destructible<T>::value, // if possible
constexpr_optional_base<typename std::remove_const<
T>::type>, // use base with trivial destructor
optional_base<typename std::remove_const<T>::type>>::type>::type;
template <class T>
class optional : private OptionalBase<T> {
template <class U> // re-declaration for nvcc on Windows.
using OptionalBase = typename std::conditional<
std::is_trivially_destructible<U>::value, // if possible
constexpr_optional_base<typename std::remove_const<
U>::type>, // use base with trivial destructor
optional_base<typename std::remove_const<U>::type>>::type;
std::is_trivially_destructible<U>::value &&
C10_IS_TRIVIALLY_COPYABLE(U) &&
std::is_copy_constructible<trivially_copyable_optimization_optional_base<U>>::value &&
std::is_copy_assignable<trivially_copyable_optimization_optional_base<U>>::value,
trivially_copyable_optimization_optional_base<U>,
typename std::conditional<
std::is_trivially_destructible<U>::value, // if possible
constexpr_optional_base<typename std::remove_const<
U>::type>, // use base with trivial destructor
optional_base<typename std::remove_const<U>::type>>::type>::type;
static_assert(
!std::is_same<typename std::decay<T>::type, nullopt_t>::value,
@ -312,21 +472,9 @@ class optional : private OptionalBase<T> {
constexpr optional() noexcept : OptionalBase<T>(){};
constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};
optional(const optional& rhs) : OptionalBase<T>() {
if (rhs.initialized()) {
::new (static_cast<void*>(dataptr())) T(*rhs);
OptionalBase<T>::init_ = true;
}
}
optional(const optional& rhs) = default;
optional(optional&& rhs) noexcept(
std::is_nothrow_move_constructible<T>::value)
: OptionalBase<T>() {
if (rhs.initialized()) {
::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
OptionalBase<T>::init_ = true;
}
}
optional(optional&& rhs) = default;
// see https://github.com/akrzemi1/Optional/issues/16
// and https://en.cppreference.com/w/cpp/utility/optional/optional,
@ -380,27 +528,9 @@ class optional : private OptionalBase<T> {
return *this;
}
optional& operator=(const optional& rhs) {
if (initialized() == true && rhs.initialized() == false)
clear();
else if (initialized() == false && rhs.initialized() == true)
initialize(*rhs);
else if (initialized() == true && rhs.initialized() == true)
contained_val() = *rhs;
return *this;
}
optional& operator=(const optional& rhs) = default;
optional& operator=(optional&& rhs) noexcept(
std::is_nothrow_move_assignable<T>::value&&
std::is_nothrow_move_constructible<T>::value) {
if (initialized() == true && rhs.initialized() == false)
clear();
else if (initialized() == false && rhs.initialized() == true)
initialize(std::move(*rhs));
else if (initialized() == true && rhs.initialized() == true)
contained_val() = std::move(*rhs);
return *this;
}
optional& operator=(optional&& rhs) = default;
template<class U = T>
auto operator=(U&& v) -> typename std::enable_if<