One PR towards #89205.
The content is mostly from PR #38465, but slightly changed the expression to make it faster.
Here are some benchmarking code:
```c++
#include <complex>
#include <iostream>
#include <chrono>
// main.cc
template<typename T> inline std::complex<T> log1p_v0(const std::complex<T> &z) {
// this PR
T x = z.real();
T y = z.imag();
T theta = std::atan2(y, x + T(1));
T r = x * (x + T(2)) + y * y;
return {T(0.5) * std::log1p(r), theta};
}
template<typename T> inline std::complex<T> log1p_v1(const std::complex<T> &z) {
// PR #38465
T x = z.real();
T y = z.imag();
std::complex<T> p1 = z + T(1);
T r = std::abs(p1);
T a = std::arg(p1);
T rm1 = (x * x + y * y + x * T(2)) / (r + 1);
return {std::log1p(rm1), a};
}
template<typename T>
inline std::complex<T> log1p_v2(const std::complex<T> &z) {
// naive, but numerically inaccurate
return std::log(T(1) + z);
}
int main() {
int n = 1000000;
std::complex<float> res(0.0, 0.0);
std::complex<float> input(0.5, 2.0);
auto start = std::chrono::system_clock::now();
for (int i = 0; i < n; i++) {
res += log1p_v0(input);
}
auto end = std::chrono::system_clock::now();
auto elapsed = end - start;
std::cout << "time for v0: " << elapsed.count() << '\n';
start = std::chrono::system_clock::now();
for (int i = 0; i < n; i++) {
res += log1p_v1(input);
}
end = std::chrono::system_clock::now();
elapsed = end - start;
std::cout << "time for v1: " << elapsed.count() << '\n';
start = std::chrono::system_clock::now();
for (int i = 0; i < n; i++) {
res += log1p_v2(input);
}
end = std::chrono::system_clock::now();
elapsed = end - start;
std::cout << "time for v2: " << elapsed.count() << '\n';
std::cout << res << '\n';
}
```
Compiling the script with command `g++ main.cc` produces the following results:
```
time for v0: 237812271
time for v1: 414524941
time for v2: 360585994
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/89214
Approved by: https://github.com/lezcano
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56830
Opt into formatting on GitHub and format everything. This is a trial run before turning on formatting for more and eventually all of the codebase.
Test Plan: CI
Reviewed By: zertosh
Differential Revision: D27979080
fbshipit-source-id: a80f0c48691c08ae8ca0af06377b87e6a2351151
Summary:
This file should have been renamed as `complex.h`, but unfortunately, it was named as `complex_type.h` due to a name clash with FBCode. Is this still the case and is it easy to resolve the name clash? Maybe related to the comment at https://github.com/pytorch/pytorch/pull/39834#issuecomment-642950012
Pull Request resolved: https://github.com/pytorch/pytorch/pull/39885
Differential Revision: D22018575
Pulled By: ezyang
fbshipit-source-id: e237ccedbe2b30c31aca028a5b4c8c063087a30f
Summary:
Issue: https://github.com/pytorch/pytorch/issues/35284
~This depends on and contains https://github.com/pytorch/pytorch/pull/35524. Please review after the dependency gets merged and I will rebase to get a clean diff.~
The implementation of most functions follow the pattern
```C++
template<typename T>
C10_HOST_DEVICE c10::complex<T> some_function(c10::complex<T> x) {
#if defined(__CUDACC__) || defined(__HIPCC__)
return static_cast<c10::complex<T>>(thrust::some_function(static_cast<thrust::complex<T>>(x)));
#else
return static_cast<c10::complex<T>>(std::some_function(static_cast<std::complex<T>>(x)));
#endif
}
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/35725
Differential Revision: D21256854
Pulled By: ezyang
fbshipit-source-id: 2112ba6b79923450feafd7ebdc7184a3eaecadb6