Simplify c10::guts::apply (#164566)

There is only one call site of `c10::guts::apply` that can be replaced by `:std::apply` except for ROCm. This PR therefore simplifies the implementation of `c10::guts::apply`.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/164566
Approved by: https://github.com/Aidyn-A, https://github.com/albanD
This commit is contained in:
Yuanyuan Chen 2025-10-22 00:47:43 +00:00 committed by PyTorch MergeBot
parent 7773a22cdb
commit 35153d0846
2 changed files with 9 additions and 13 deletions

View File

@ -1,18 +1,17 @@
#pragma once
#include <ATen/OpMathType.h>
#include <ATen/cuda/detail/OffsetCalculator.cuh>
#include <ATen/detail/FunctionTraits.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/TensorIteratorDynamicCasting.h>
#include <ATen/cuda/detail/OffsetCalculator.cuh>
#include <ATen/OpMathType.h>
#include <ATen/native/cuda/thread_constants.h>
#include <thrust/tuple.h>
#include <ATen/native/cuda/MemoryAccess.cuh>
#include <tuple>
namespace at::native {
template<int N>
@ -62,7 +61,11 @@ __device__ inline void elementwise_kernel_helper(func_t f, policy_t policy) {
#pragma unroll
for (int i = 0; i < elems_per_thread; i++) {
if (policy.check_inbounds(i)) {
#if defined(__HIP__)
results[i] = c10::guts::apply(f, args[i]);
#else
results[i] = std::apply(f, args[i]);
#endif
}
}

View File

@ -45,14 +45,7 @@ constexpr bool is_pod_v = is_pod<T>::value;
namespace guts {
#if defined(__cpp_lib_apply) && !defined(__CUDA_ARCH__) && !defined(__HIP__)
template <class F, class Tuple>
C10_HOST_DEVICE inline constexpr decltype(auto) apply(F&& f, Tuple&& t) {
return std::apply(std::forward<F>(f), std::forward<Tuple>(t));
}
#else
#if defined(__HIP__)
// Implementation from http://en.cppreference.com/w/cpp/utility/apply (but
// modified)