mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Remove cuda/Loops.cuh dependency on native_functions.yaml (#64168)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/64168 Test Plan: Imported from OSS Reviewed By: gchanan Differential Revision: D30728582 Pulled By: dagitses fbshipit-source-id: 99dcbb9bb790dd0440d498593ac43e2c18e54a0c
This commit is contained in:
parent
92ce188510
commit
731cf494f2
|
|
@ -31,7 +31,6 @@
|
|||
#include <type_traits>
|
||||
#include <tuple>
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/core/Array.h>
|
||||
#include <ATen/detail/FunctionTraits.h>
|
||||
|
|
@ -165,10 +164,10 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
|||
} else {
|
||||
at::detail::Array<ScalarType, traits::arity> dtypes;
|
||||
for (int i = 0; i < traits::arity; i++) {
|
||||
dtypes[i] = iter.tensor(i + 1).scalar_type();
|
||||
dtypes[i] = iter.dtype(i + 1);
|
||||
}
|
||||
auto loader = memory::LoadWithCast<traits::arity>(dtypes);
|
||||
auto storer = memory::StoreWithCast(iter.tensor(0).scalar_type());
|
||||
auto storer = memory::StoreWithCast(iter.dtype(0));
|
||||
if (contiguous) {
|
||||
auto input_offset_calculator = TrivialOffsetCalculator<traits::arity>();
|
||||
auto output_offset_calculator = TrivialOffsetCalculator<1>();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/ExpandUtils.h>
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/native/cuda/Loops.cuh>
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ void opmath_gpu_kernel_with_scalars(TensorIteratorBase& iter, const func_t& f) {
|
|||
// works around incorrect device guard generation for pre-structured
|
||||
// kernels device guards, but structured kernels do it right and
|
||||
// we can assume the device is already set correctly
|
||||
const OptionalDeviceGuard device_guard(device_of(iter.tensor(1)));
|
||||
const OptionalDeviceGuard device_guard(iter.device(1));
|
||||
gpu_kernel(iter, af);
|
||||
} else if (iter.is_cpu_scalar(2)) {
|
||||
BUnaryFunctor<arg1_t, arg2_t, return_t, func_t> bf(f, iter.scalar_value<opmath_arg2_t>(2));
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@
|
|||
|
||||
#include <type_traits>
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/core/Array.h>
|
||||
#include <ATen/cuda/detail/OffsetCalculator.cuh>
|
||||
|
|
@ -322,7 +321,7 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
|
|||
|
||||
at::detail::Array<ScalarType, ntensors> dtypes;
|
||||
for (int i = 0; i < ntensors; i++) {
|
||||
dtypes[i] = iter.tensor(i).scalar_type();
|
||||
dtypes[i] = iter.dtype(i);
|
||||
}
|
||||
|
||||
int64_t numel = iter.numel();
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
#include <ATen/native/cuda/Loops.cuh>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <ATen/ATen.h>
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
#include <ATen/native/cuda/Loops.cuh>
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user