Remove cuda/Loops.cuh dependency on native_functions.yaml (#64168)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/64168

Test Plan: Imported from OSS

Reviewed By: gchanan

Differential Revision: D30728582

Pulled By: dagitses

fbshipit-source-id: 99dcbb9bb790dd0440d498593ac43e2c18e54a0c
This commit is contained in:
Peter Bell 2021-10-08 12:57:31 -07:00 committed by Facebook GitHub Bot
parent 92ce188510
commit 731cf494f2
6 changed files with 8 additions and 6 deletions

View File

@ -31,7 +31,6 @@
#include <type_traits>
#include <tuple>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/core/Array.h>
#include <ATen/detail/FunctionTraits.h>
@ -165,10 +164,10 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
} else {
at::detail::Array<ScalarType, traits::arity> dtypes;
for (int i = 0; i < traits::arity; i++) {
dtypes[i] = iter.tensor(i + 1).scalar_type();
dtypes[i] = iter.dtype(i + 1);
}
auto loader = memory::LoadWithCast<traits::arity>(dtypes);
auto storer = memory::StoreWithCast(iter.tensor(0).scalar_type());
auto storer = memory::StoreWithCast(iter.dtype(0));
if (contiguous) {
auto input_offset_calculator = TrivialOffsetCalculator<traits::arity>();
auto output_offset_calculator = TrivialOffsetCalculator<1>();

View File

@ -1,6 +1,7 @@
#include <ATen/NativeFunctions.h>
#include <ATen/Dispatch.h>
#include <ATen/ExpandUtils.h>
#include <ATen/Functions.h>
#include <ATen/native/cuda/Loops.cuh>
#include <ATen/native/TensorIterator.h>

View File

@ -176,7 +176,7 @@ void opmath_gpu_kernel_with_scalars(TensorIteratorBase& iter, const func_t& f) {
// works around incorrect device guard generation for pre-structured
// kernels device guards, but structured kernels do it right and
// we can assume the device is already set correctly
const OptionalDeviceGuard device_guard(device_of(iter.tensor(1)));
const OptionalDeviceGuard device_guard(iter.device(1));
gpu_kernel(iter, af);
} else if (iter.is_cpu_scalar(2)) {
BUnaryFunctor<arg1_t, arg2_t, return_t, func_t> bf(f, iter.scalar_value<opmath_arg2_t>(2));

View File

@ -30,7 +30,6 @@
#include <type_traits>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/core/Array.h>
#include <ATen/cuda/detail/OffsetCalculator.cuh>
@ -322,7 +321,7 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
at::detail::Array<ScalarType, ntensors> dtypes;
for (int i = 0; i < ntensors; i++) {
dtypes[i] = iter.tensor(i).scalar_type();
dtypes[i] = iter.dtype(i);
}
int64_t numel = iter.numel();

View File

@ -1,3 +1,5 @@
#include <ATen/Dispatch.h>
#include <ATen/Functions.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/cuda/Loops.cuh>

View File

@ -1,3 +1,4 @@
#include <ATen/ATen.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/cuda/Loops.cuh>