diff --git a/aten/src/ATen/EmptyTensor.cpp b/aten/src/ATen/EmptyTensor.cpp new file mode 100644 index 00000000000..a52ea72d915 --- /dev/null +++ b/aten/src/ATen/EmptyTensor.cpp @@ -0,0 +1,91 @@ +#define TORCH_ASSERT_NO_OPERATORS +#include +#include +#include + +namespace at { +namespace detail { + +static c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) { + if (pin_memory) { + return at::detail::getCUDAHooks().getPinnedMemoryAllocator(); + } + return c10::GetCPUAllocator(); +} + +void check_size_nonnegative(IntArrayRef size) { + for (auto x: size) { + TORCH_CHECK(x >= 0, "Trying to create tensor with negative dimension ", x, ": ", size); + } +} + +TensorBase empty_generic( + IntArrayRef size, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type, + c10::optional memory_format_opt) { + at::detail::check_size_nonnegative(size); + + int64_t nelements = c10::multiply_integers(size); + caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); + int64_t size_bytes = nelements * dtype.itemsize(); + auto storage_impl = c10::make_intrusive( + c10::StorageImpl::use_byte_size_t(), + size_bytes, + allocator->allocate(size_bytes), + allocator, + /*resizeable=*/true); + + auto tensor = detail::make_tensor_base( + std::move(storage_impl), ks, dtype); + // Default TensorImpl has size [0] + if (size.size() != 1 || size[0] != 0) { + tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size); + } + + if (memory_format_opt.has_value()) { + // Restriding a just-created empty contiguous tensor does nothing. + if (*memory_format_opt != MemoryFormat::Contiguous) { + tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt); + } + } + + return tensor; +} + +TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory, + c10::optional memory_format_opt) { + auto allocator = GetCPUAllocatorMaybePinned(pin_memory); + constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU); + return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt); +} + +TensorBase empty_cpu( + IntArrayRef size, + c10::optional dtype_opt, + c10::optional layout_opt, + c10::optional device_opt, + c10::optional pin_memory_opt, + c10::optional memory_format_opt) { + auto device = device_or_default(device_opt); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); + + auto pin_memory = pinned_memory_or_default(pin_memory_opt); + auto dtype = dtype_or_default(dtype_opt); + return empty_cpu(size, dtype, pin_memory, memory_format_opt); +} + +TensorBase empty_cpu( + IntArrayRef size, const TensorOptions &options) { + return at::detail::empty_cpu( + size, + optTypeMetaToScalarType(options.dtype_opt()), + options.layout_opt(), + options.device_opt(), + options.pinned_memory_opt(), + options.memory_format_opt()); +} + +}} // namespace at::detail diff --git a/aten/src/ATen/EmptyTensor.h b/aten/src/ATen/EmptyTensor.h new file mode 100644 index 00000000000..0f2bc0c63ea --- /dev/null +++ b/aten/src/ATen/EmptyTensor.h @@ -0,0 +1,34 @@ +#pragma once +#include + +namespace at { +namespace detail { + +TORCH_API void check_size_nonnegative(IntArrayRef size); + +TORCH_API TensorBase empty_generic( + IntArrayRef size, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type, + c10::optional memory_format_opt); + +TORCH_API TensorBase empty_cpu( + IntArrayRef size, + ScalarType dtype, + bool pin_memory=false, + c10::optional memory_format_opt=c10::nullopt); + +TORCH_API TensorBase empty_cpu( + IntArrayRef size, + c10::optional dtype_opt, + c10::optional layout_opt, + c10::optional device_opt, + c10::optional pin_memory_opt, + c10::optional memory_format_opt); + +TORCH_API TensorBase empty_cpu( + IntArrayRef size, + const TensorOptions &options); + +}} // namespace at::detail diff --git a/aten/src/ATen/ScalarOps.cpp b/aten/src/ATen/ScalarOps.cpp index 09d5034c4a2..8eb10266d78 100644 --- a/aten/src/ATen/ScalarOps.cpp +++ b/aten/src/ATen/ScalarOps.cpp @@ -1,14 +1,7 @@ -// FastPass -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif - +#define TORCH_ASSERT_ONLY_METHOD_OPERATORS +#include +#include #include -#include -#include namespace at { namespace { @@ -32,7 +25,8 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) { Tensor scalar_tensor_static(const Scalar& s, c10::optional dtype_opt, c10::optional device_opt) { at::tracer::impl::NoTracerDispatchMode tracer_guard; at::AutoDispatchBelowAutograd mode; - auto result = at::detail::empty_cpu({}, dtype_opt, c10::nullopt, device_opt, c10::nullopt, c10::nullopt); + Tensor result = at::detail::empty_cpu( + {}, dtype_opt, c10::nullopt, device_opt, c10::nullopt, c10::nullopt); scalar_fill(result, s); return result; } diff --git a/aten/src/ATen/Utils.cpp b/aten/src/ATen/Utils.cpp index a6540f7a5b6..a0fbc499378 100644 --- a/aten/src/ATen/Utils.cpp +++ b/aten/src/ATen/Utils.cpp @@ -22,72 +22,6 @@ int _crash_if_asan(int arg) { } namespace detail { -// empty_cpu is used in ScalarOps.h, which can be referenced by other ATen -// files. Since we want to decouple direct referencing native symbols and only -// access native symbols through dispatching, we move its implementation here. -Tensor empty_cpu( - IntArrayRef size, - c10::optional dtype_opt, - c10::optional layout_opt, - c10::optional device_opt, - c10::optional pin_memory_opt, - c10::optional memory_format_opt) { - - auto device = device_or_default(device_opt); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::CPU); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); - - bool pin_memory = pinned_memory_or_default(pin_memory_opt); - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - c10::Allocator* allocator; - if (pin_memory) { - allocator = detail::getCUDAHooks().getPinnedMemoryAllocator(); - } else { - allocator = at::getCPUAllocator(); - } - auto dtype = dtype_or_default(dtype_opt); - - return empty_generic(size, allocator, at::DispatchKey::CPU, dtype, memory_format_opt); -} - -Tensor empty_generic( - IntArrayRef size, - c10::Allocator* allocator, - // technically this can be inferred from the device, but usually the - // correct setting is obvious from the call site so just make callers - // pass it in - c10::DispatchKey dispatch_key, - ScalarType scalar_type, - c10::optional memory_format_opt) { - - check_size_nonnegative(size); - - int64_t nelements = c10::multiply_integers(size); - caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); - int64_t size_bytes = nelements * dtype.itemsize(); - auto storage_impl = c10::make_intrusive( - c10::StorageImpl::use_byte_size_t(), - size_bytes, - allocator->allocate(size_bytes), - allocator, - /*resizeable=*/true); - - auto tensor = detail::make_tensor( - std::move(storage_impl), dispatch_key, dtype); - // Default TensorImpl has size [0] - if (size.size() != 1 || size[0] != 0) { - tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size); - } - - if (memory_format_opt.has_value()) { - // Restriding a just-created empty contiguous tensor does nothing. - if (*memory_format_opt != MemoryFormat::Contiguous) { - tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt); - } - } - - return tensor; -} template Tensor tensor_cpu(ArrayRef values, const TensorOptions& options) { diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h index 0143e0c49b4..9160cbe2fed 100644 --- a/aten/src/ATen/Utils.h +++ b/aten/src/ATen/Utils.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -113,26 +114,9 @@ static inline T* get_generator_or_default(const c10::optional& gen, c return gen.has_value() && gen->defined() ? check_generator(gen) : check_generator(default_gen); } -inline void check_size_nonnegative(IntArrayRef size) { - for (auto x: size) { - TORCH_CHECK(x >= 0, "Trying to create tensor with negative dimension ", x, ": ", size); - } -} +using at::detail::check_size_nonnegative; namespace detail { -TORCH_API -Tensor empty_cpu(IntArrayRef size, c10::optional dtype_opt, c10::optional layout_opt, - c10::optional device_opt, c10::optional pin_memory_opt, c10::optional memory_format_opt); - -TORCH_API -Tensor empty_generic( - IntArrayRef size, - c10::Allocator* allocator, - c10::DispatchKey dispatch_key, - ScalarType dtype, - c10::optional memory_format -); - template TORCH_API diff --git a/aten/src/ATen/core/ivalue.h b/aten/src/ATen/core/ivalue.h index d22ad5a1035..f6c9901d21c 100644 --- a/aten/src/ATen/core/ivalue.h +++ b/aten/src/ATen/core/ivalue.h @@ -386,7 +386,7 @@ public: // While some of these accessors could be generated through templates, // we prefer to write them manually for clarity - IValue(at::Tensor t) : tag(Tag::Tensor), is_intrusive_ptr(false) { + IValue(at::TensorBase t) : tag(Tag::Tensor), is_intrusive_ptr(false) { new (&payload.as_tensor) at::Tensor(std::move(t)); } bool isTensor() const { diff --git a/aten/src/ATen/native/MetaTensor.cpp b/aten/src/ATen/native/MetaTensor.cpp index 224fc2ec80d..58e58044fe7 100644 --- a/aten/src/ATen/native/MetaTensor.cpp +++ b/aten/src/ATen/native/MetaTensor.cpp @@ -47,8 +47,9 @@ Tensor empty_meta( auto* allocator = GetMetaAllocator(); auto dtype = dtype_or_default(dtype_opt); + constexpr c10::DispatchKeySet meta_ks(c10::DispatchKey::Meta); return at::detail::empty_generic( - size, allocator, at::DispatchKey::Meta, dtype, memory_format_opt); + size, allocator, meta_ks, dtype, memory_format_opt); } Tensor empty_strided_meta( diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index 5e0198931d5..9a360b2179e 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -1081,8 +1081,9 @@ Tensor _efficientzerotensor(IntArrayRef size, auto device_ = device_or_default(device); auto allocator = ZeroTensorAllocator(device_); auto dtype_ = dtype_or_default(dtype); + constexpr auto zero_ks = at::DispatchKeySet(at::DispatchKey::ZeroTensor); return at::detail::empty_generic( - size, &allocator, at::DispatchKey::ZeroTensor, dtype_, c10::nullopt); + size, &allocator, zero_ks, dtype_, c10::nullopt); } Tensor& zeros_out(IntArrayRef size, Tensor& result) { diff --git a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp index e52ec7609cb..224a66f8abf 100644 --- a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp +++ b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp @@ -284,7 +284,7 @@ Tensor& qembeddingbag_byte_prepack_out(Tensor& output, const Tensor& weight) { Tensor qembeddingbag_byte_prepack(const Tensor& weight) { const auto weight_contig = weight.expect_contiguous(weight.suggest_memory_format()); - auto output = at::detail::empty_cpu( + Tensor output = at::detail::empty_cpu( {0}, at::kByte, weight_contig->layout(), diff --git a/test/cpp/jit/test_backend_compiler_lib.cpp b/test/cpp/jit/test_backend_compiler_lib.cpp index ec756ba2c11..0db8bd428e9 100644 --- a/test/cpp/jit/test_backend_compiler_lib.cpp +++ b/test/cpp/jit/test_backend_compiler_lib.cpp @@ -125,8 +125,7 @@ class BackendWithCompiler : public PyTorchBackendInterface { (x.scalar_type() == c10::ScalarType::Float && h.scalar_type() == c10::ScalarType::Float), "Only float tensors are compatible for add and sub."); - auto y = at::detail::empty_cpu( - x.sizes(), c10::ScalarType::Float, {}, {}, {}, c10::nullopt); + at::Tensor y = at::detail::empty_cpu(x.sizes(), at::kFloat); auto x_ptr = float_data_ptr(x); auto h_ptr = float_data_ptr(h); auto y_ptr = float_data_ptr(y); diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl index b6cc8d488c4..4d9aee2de2e 100644 --- a/tools/build_variables.bzl +++ b/tools/build_variables.bzl @@ -932,6 +932,7 @@ aten_cpu_source_non_codegen_list = [ "aten/src/ATen/CPUGeneratorImpl.cpp", "aten/src/ATen/Context.cpp", "aten/src/ATen/DLConvertor.cpp", + "aten/src/ATen/EmptyTensor.cpp", "aten/src/ATen/ExpandUtils.cpp", "aten/src/ATen/FunctionalInverses.cpp", "aten/src/ATen/FunctionalStorageImpl.cpp",