mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[Pytorch] Specialize guts of c10::optional for 32-bit scalars (#47015)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/47015 c10::optional has non-trivial copy and move operations always. This change specializes it for 32-bit scalars so that it has trivial copy and move operations in that case. Ideally, we would instead rely on P0602 "variant and optional should propagate copy/move triviality" and use `std::optional` (or implement that functionality ourselves). We can't use `std::optional` because we are stuck with C++14. Implementing the full P0602 ourselves would add even more complexity. We could do it, but this should be a helpful first step. ghstack-source-id: 115886743 Test Plan: Collect Callgrind instruction counts for `torch.empty(())`. Data: Make empty c10-ful (https://github.com/pytorch/pytorch/pull/46092): ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7ffaed1128e0> torch.empty(()) All Noisy symbols removed Instructions: 648005 632899 Baseline: 4144 3736 100 runs per measurement, 1 thread ``` This diff atop #46092: ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f943f1dc8e0> torch.empty(()) All Noisy symbols removed Instructions: 602347 591005 Baseline: 4106 3736 100 runs per measurement, 1 thread ``` (6.6% improvement vs #46092) Pass optionals by const reference (https://github.com/pytorch/pytorch/pull/46598) ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f1abb3988e0> torch.empty(()) All Noisy symbols removed Instructions: 601349 590005 Baseline: 4162 3736 100 runs per measurement, 1 thread ``` (6.8% improvement vs #46092) This diff atop #46598 (i.e., both together) ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f9577c22850> torch.empty(()) All Noisy symbols removed Instructions: 596095 582451 Baseline: 4162 3736 100 runs per measurement, 1 thread Warning: PyTorch was not built with debug symbols. Source information may be limited. Rebuild with REL_WITH_DEB_INFO=1 for more detailed results. ``` (another 1.3% savings!) #46598 outperformed this change slightly, and combining the two leads to further benefits. I guess we should do both! (Though I still don't understand why passing optionals that should fit in a register by const reference would help...) Reviewed By: smessmer Differential Revision: D24552280 fbshipit-source-id: 4d93bfcffafebd8c01559398513fa6b9db959d11
This commit is contained in:
parent
0edc6a39c8
commit
df5b4696cf
|
|
@ -1 +1,6 @@
|
|||
#include <c10/util/Optional.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<int>), "c10::optional<int> should be trivially copyable");
|
||||
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<bool>), "c10::optional<bool> should be trivially copyable");
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#ifndef C10_UTIL_OPTIONAL_H_
|
||||
#define C10_UTIL_OPTIONAL_H_
|
||||
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <c10/util/in_place.h>
|
||||
|
||||
#include <cassert>
|
||||
|
|
@ -184,8 +185,22 @@ struct optional_base {
|
|||
|
||||
constexpr optional_base() noexcept : init_(false), storage_(trivial_init){};
|
||||
|
||||
explicit constexpr optional_base(const optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
|
||||
if (init_) {
|
||||
::new (dataptr()) T(v.storage_.value_);
|
||||
}
|
||||
}
|
||||
|
||||
explicit constexpr optional_base(const T& v) : init_(true), storage_(v) {}
|
||||
|
||||
explicit constexpr optional_base(optional_base<T>&& v) noexcept(
|
||||
std::is_nothrow_move_constructible<T>::value)
|
||||
: init_(v.init_), storage_(trivial_init) {
|
||||
if (init_) {
|
||||
::new (dataptr()) T(std::move(v.storage_.value_));
|
||||
}
|
||||
}
|
||||
|
||||
explicit constexpr optional_base(T&& v)
|
||||
: init_(true), storage_(constexpr_move(v)) {}
|
||||
|
||||
|
|
@ -203,10 +218,52 @@ struct optional_base {
|
|||
Args&&... args)
|
||||
: init_(true), storage_(il, std::forward<Args>(args)...) {}
|
||||
|
||||
optional_base& operator=(const optional_base& rhs) {
|
||||
if (init_ && !rhs.init_) {
|
||||
clear();
|
||||
} else if (!init_ && rhs.init_) {
|
||||
init_ = true;
|
||||
::new (dataptr()) T(rhs.storage_.value_);
|
||||
} else if (init_ && rhs.init_) {
|
||||
storage_.value_ = rhs.storage_.value_;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
optional_base& operator=(optional_base&& rhs) noexcept(
|
||||
std::is_nothrow_move_assignable<T>::value &&
|
||||
std::is_nothrow_move_constructible<T>::value) {
|
||||
if (init_ && !rhs.init_) {
|
||||
clear();
|
||||
} else if (!init_ && rhs.init_) {
|
||||
init_ = true;
|
||||
::new (dataptr()) T(std::move(rhs.storage_.value_));
|
||||
} else if (init_ && rhs.init_) {
|
||||
storage_.value_ = std::move(rhs.storage_.value_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~optional_base() {
|
||||
if (init_)
|
||||
storage_.value_.T::~T();
|
||||
}
|
||||
|
||||
private:
|
||||
typename std::remove_const<T>::type* dataptr() {
|
||||
return std::addressof(storage_.value_);
|
||||
}
|
||||
|
||||
constexpr const T* dataptr() const {
|
||||
return detail_::static_addressof(storage_.value_);
|
||||
}
|
||||
|
||||
void clear() noexcept {
|
||||
if (init_) {
|
||||
dataptr()->~T();
|
||||
}
|
||||
init_ = false;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
|
|
@ -217,6 +274,20 @@ struct constexpr_optional_base {
|
|||
constexpr constexpr_optional_base() noexcept
|
||||
: init_(false), storage_(trivial_init){};
|
||||
|
||||
explicit constexpr constexpr_optional_base(const constexpr_optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
|
||||
if (init_) {
|
||||
::new (dataptr()) T(v.storage_.value_);
|
||||
}
|
||||
}
|
||||
|
||||
explicit constexpr constexpr_optional_base(constexpr_optional_base<T>&& v) noexcept(
|
||||
std::is_nothrow_move_constructible<T>::value)
|
||||
: init_(v.init_), storage_(trivial_init) {
|
||||
if (init_) {
|
||||
::new (dataptr()) T(std::move(v.storage_.value_));
|
||||
}
|
||||
}
|
||||
|
||||
explicit constexpr constexpr_optional_base(const T& v)
|
||||
: init_(true), storage_(v) {}
|
||||
|
||||
|
|
@ -238,23 +309,112 @@ struct constexpr_optional_base {
|
|||
: init_(true), storage_(il, std::forward<Args>(args)...) {}
|
||||
|
||||
~constexpr_optional_base() = default;
|
||||
|
||||
constexpr_optional_base& operator=(const constexpr_optional_base& rhs) {
|
||||
if (init_ && !rhs.init_) {
|
||||
clear();
|
||||
} else if (!init_ && rhs.init_) {
|
||||
init_ = true;
|
||||
::new (dataptr()) T(rhs.storage_.value_);
|
||||
} else if (init_ && rhs.init_) {
|
||||
storage_.value_ = rhs.storage_.value_;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr_optional_base& operator=(constexpr_optional_base&& rhs) noexcept(
|
||||
std::is_nothrow_move_assignable<T>::value &&
|
||||
std::is_nothrow_move_constructible<T>::value) {
|
||||
if (init_ && !rhs.init_) {
|
||||
clear();
|
||||
} else if (!init_ && rhs.init_) {
|
||||
init_ = true;
|
||||
::new (dataptr()) T(std::move(rhs.storage_.value_));
|
||||
} else if (init_ && rhs.init_) {
|
||||
storage_.value_ = std::move(rhs.storage_.value_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
typename std::remove_const<T>::type* dataptr() {
|
||||
return std::addressof(storage_.value_);
|
||||
}
|
||||
|
||||
constexpr const T* dataptr() const {
|
||||
return detail_::static_addressof(storage_.value_);
|
||||
}
|
||||
|
||||
void clear() noexcept {
|
||||
init_ = false;
|
||||
}
|
||||
};
|
||||
|
||||
// HACK: Optimization for trivially copyable types. The mainline
|
||||
// implementation fails to have trivial copy/move operations in these
|
||||
// cases, and we care about them, so just implement that directly.
|
||||
template <class T>
|
||||
struct trivially_copyable_optimization_optional_base {
|
||||
bool init_;
|
||||
constexpr_storage_t<T> storage_;
|
||||
|
||||
constexpr trivially_copyable_optimization_optional_base() noexcept
|
||||
: init_(false), storage_(trivial_init) {}
|
||||
|
||||
explicit constexpr trivially_copyable_optimization_optional_base(const T& v)
|
||||
: init_(true), storage_(v) {}
|
||||
|
||||
explicit constexpr trivially_copyable_optimization_optional_base(T&& v)
|
||||
: init_(true), storage_(constexpr_move(v)) {}
|
||||
|
||||
template <class... Args>
|
||||
explicit constexpr trivially_copyable_optimization_optional_base(in_place_t, Args&&... args)
|
||||
: init_(true), storage_(constexpr_forward<Args>(args)...) {}
|
||||
|
||||
template <
|
||||
class U,
|
||||
class... Args,
|
||||
TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
|
||||
constexpr explicit trivially_copyable_optimization_optional_base(
|
||||
in_place_t,
|
||||
std::initializer_list<U> il,
|
||||
Args&&... args)
|
||||
: init_(true), storage_(il, std::forward<Args>(args)...) {}
|
||||
|
||||
~trivially_copyable_optimization_optional_base() = default;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
using OptionalBase = typename std::conditional<
|
||||
std::is_trivially_destructible<T>::value, // if possible
|
||||
constexpr_optional_base<typename std::remove_const<
|
||||
T>::type>, // use base with trivial destructor
|
||||
optional_base<typename std::remove_const<T>::type>>::type;
|
||||
std::is_trivially_destructible<T>::value &&
|
||||
C10_IS_TRIVIALLY_COPYABLE(T) &&
|
||||
// Avoid using is_trivially_copy_{constructible,assignable}
|
||||
// because old GCC versions don't support them. Also,
|
||||
// is_trivially_copyable seems not to do what I expect, so check
|
||||
// trivially_copyable_optimization_optional_base directly.
|
||||
std::is_copy_constructible<trivially_copyable_optimization_optional_base<T>>::value &&
|
||||
std::is_copy_assignable<trivially_copyable_optimization_optional_base<T>>::value,
|
||||
trivially_copyable_optimization_optional_base<T>,
|
||||
typename std::conditional<
|
||||
std::is_trivially_destructible<T>::value, // if possible
|
||||
constexpr_optional_base<typename std::remove_const<
|
||||
T>::type>, // use base with trivial destructor
|
||||
optional_base<typename std::remove_const<T>::type>>::type>::type;
|
||||
|
||||
template <class T>
|
||||
class optional : private OptionalBase<T> {
|
||||
template <class U> // re-declaration for nvcc on Windows.
|
||||
using OptionalBase = typename std::conditional<
|
||||
std::is_trivially_destructible<U>::value, // if possible
|
||||
constexpr_optional_base<typename std::remove_const<
|
||||
U>::type>, // use base with trivial destructor
|
||||
optional_base<typename std::remove_const<U>::type>>::type;
|
||||
std::is_trivially_destructible<U>::value &&
|
||||
C10_IS_TRIVIALLY_COPYABLE(U) &&
|
||||
std::is_copy_constructible<trivially_copyable_optimization_optional_base<U>>::value &&
|
||||
std::is_copy_assignable<trivially_copyable_optimization_optional_base<U>>::value,
|
||||
trivially_copyable_optimization_optional_base<U>,
|
||||
typename std::conditional<
|
||||
std::is_trivially_destructible<U>::value, // if possible
|
||||
constexpr_optional_base<typename std::remove_const<
|
||||
U>::type>, // use base with trivial destructor
|
||||
optional_base<typename std::remove_const<U>::type>>::type>::type;
|
||||
|
||||
static_assert(
|
||||
!std::is_same<typename std::decay<T>::type, nullopt_t>::value,
|
||||
|
|
@ -312,21 +472,9 @@ class optional : private OptionalBase<T> {
|
|||
constexpr optional() noexcept : OptionalBase<T>(){};
|
||||
constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};
|
||||
|
||||
optional(const optional& rhs) : OptionalBase<T>() {
|
||||
if (rhs.initialized()) {
|
||||
::new (static_cast<void*>(dataptr())) T(*rhs);
|
||||
OptionalBase<T>::init_ = true;
|
||||
}
|
||||
}
|
||||
optional(const optional& rhs) = default;
|
||||
|
||||
optional(optional&& rhs) noexcept(
|
||||
std::is_nothrow_move_constructible<T>::value)
|
||||
: OptionalBase<T>() {
|
||||
if (rhs.initialized()) {
|
||||
::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
|
||||
OptionalBase<T>::init_ = true;
|
||||
}
|
||||
}
|
||||
optional(optional&& rhs) = default;
|
||||
|
||||
// see https://github.com/akrzemi1/Optional/issues/16
|
||||
// and https://en.cppreference.com/w/cpp/utility/optional/optional,
|
||||
|
|
@ -380,27 +528,9 @@ class optional : private OptionalBase<T> {
|
|||
return *this;
|
||||
}
|
||||
|
||||
optional& operator=(const optional& rhs) {
|
||||
if (initialized() == true && rhs.initialized() == false)
|
||||
clear();
|
||||
else if (initialized() == false && rhs.initialized() == true)
|
||||
initialize(*rhs);
|
||||
else if (initialized() == true && rhs.initialized() == true)
|
||||
contained_val() = *rhs;
|
||||
return *this;
|
||||
}
|
||||
optional& operator=(const optional& rhs) = default;
|
||||
|
||||
optional& operator=(optional&& rhs) noexcept(
|
||||
std::is_nothrow_move_assignable<T>::value&&
|
||||
std::is_nothrow_move_constructible<T>::value) {
|
||||
if (initialized() == true && rhs.initialized() == false)
|
||||
clear();
|
||||
else if (initialized() == false && rhs.initialized() == true)
|
||||
initialize(std::move(*rhs));
|
||||
else if (initialized() == true && rhs.initialized() == true)
|
||||
contained_val() = std::move(*rhs);
|
||||
return *this;
|
||||
}
|
||||
optional& operator=(optional&& rhs) = default;
|
||||
|
||||
template<class U = T>
|
||||
auto operator=(U&& v) -> typename std::enable_if<
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user