mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
empty_cpu: Add functions that don't depend on Tensor (#70613)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/70613 This refactors `at::detail::empty_cpu` to use only `TensorBase` so you can construct tensors without including `Tensor.h`. It also adds a `TensorOptions` version to reduce friction in operators moving from the `at::empty` API. Test Plan: Imported from OSS Reviewed By: samdow Differential Revision: D33623682 Pulled By: ngimel fbshipit-source-id: 7a7b08bc2ed06830a3d698197a0c8389a096dc1d
This commit is contained in:
parent
03ab65023a
commit
2e17ad0bbd
91
aten/src/ATen/EmptyTensor.cpp
Normal file
91
aten/src/ATen/EmptyTensor.cpp
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
#define TORCH_ASSERT_NO_OPERATORS
|
||||||
|
#include <ATen/EmptyTensor.h>
|
||||||
|
#include <ATen/detail/CUDAHooksInterface.h>
|
||||||
|
#include <c10/core/CPUAllocator.h>
|
||||||
|
|
||||||
|
namespace at {
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
static c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
|
||||||
|
if (pin_memory) {
|
||||||
|
return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
|
||||||
|
}
|
||||||
|
return c10::GetCPUAllocator();
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_size_nonnegative(IntArrayRef size) {
|
||||||
|
for (auto x: size) {
|
||||||
|
TORCH_CHECK(x >= 0, "Trying to create tensor with negative dimension ", x, ": ", size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorBase empty_generic(
|
||||||
|
IntArrayRef size,
|
||||||
|
c10::Allocator* allocator,
|
||||||
|
c10::DispatchKeySet ks,
|
||||||
|
ScalarType scalar_type,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt) {
|
||||||
|
at::detail::check_size_nonnegative(size);
|
||||||
|
|
||||||
|
int64_t nelements = c10::multiply_integers(size);
|
||||||
|
caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
|
||||||
|
int64_t size_bytes = nelements * dtype.itemsize();
|
||||||
|
auto storage_impl = c10::make_intrusive<StorageImpl>(
|
||||||
|
c10::StorageImpl::use_byte_size_t(),
|
||||||
|
size_bytes,
|
||||||
|
allocator->allocate(size_bytes),
|
||||||
|
allocator,
|
||||||
|
/*resizeable=*/true);
|
||||||
|
|
||||||
|
auto tensor = detail::make_tensor_base<TensorImpl>(
|
||||||
|
std::move(storage_impl), ks, dtype);
|
||||||
|
// Default TensorImpl has size [0]
|
||||||
|
if (size.size() != 1 || size[0] != 0) {
|
||||||
|
tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memory_format_opt.has_value()) {
|
||||||
|
// Restriding a just-created empty contiguous tensor does nothing.
|
||||||
|
if (*memory_format_opt != MemoryFormat::Contiguous) {
|
||||||
|
tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensor;
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt) {
|
||||||
|
auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
|
||||||
|
constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
|
||||||
|
return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorBase empty_cpu(
|
||||||
|
IntArrayRef size,
|
||||||
|
c10::optional<ScalarType> dtype_opt,
|
||||||
|
c10::optional<Layout> layout_opt,
|
||||||
|
c10::optional<Device> device_opt,
|
||||||
|
c10::optional<bool> pin_memory_opt,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt) {
|
||||||
|
auto device = device_or_default(device_opt);
|
||||||
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::CPU);
|
||||||
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
|
||||||
|
|
||||||
|
auto pin_memory = pinned_memory_or_default(pin_memory_opt);
|
||||||
|
auto dtype = dtype_or_default(dtype_opt);
|
||||||
|
return empty_cpu(size, dtype, pin_memory, memory_format_opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorBase empty_cpu(
|
||||||
|
IntArrayRef size, const TensorOptions &options) {
|
||||||
|
return at::detail::empty_cpu(
|
||||||
|
size,
|
||||||
|
optTypeMetaToScalarType(options.dtype_opt()),
|
||||||
|
options.layout_opt(),
|
||||||
|
options.device_opt(),
|
||||||
|
options.pinned_memory_opt(),
|
||||||
|
options.memory_format_opt());
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // namespace at::detail
|
||||||
34
aten/src/ATen/EmptyTensor.h
Normal file
34
aten/src/ATen/EmptyTensor.h
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
#pragma once
|
||||||
|
#include <ATen/core/TensorBase.h>
|
||||||
|
|
||||||
|
namespace at {
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
TORCH_API void check_size_nonnegative(IntArrayRef size);
|
||||||
|
|
||||||
|
TORCH_API TensorBase empty_generic(
|
||||||
|
IntArrayRef size,
|
||||||
|
c10::Allocator* allocator,
|
||||||
|
c10::DispatchKeySet ks,
|
||||||
|
ScalarType scalar_type,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt);
|
||||||
|
|
||||||
|
TORCH_API TensorBase empty_cpu(
|
||||||
|
IntArrayRef size,
|
||||||
|
ScalarType dtype,
|
||||||
|
bool pin_memory=false,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt=c10::nullopt);
|
||||||
|
|
||||||
|
TORCH_API TensorBase empty_cpu(
|
||||||
|
IntArrayRef size,
|
||||||
|
c10::optional<ScalarType> dtype_opt,
|
||||||
|
c10::optional<Layout> layout_opt,
|
||||||
|
c10::optional<Device> device_opt,
|
||||||
|
c10::optional<bool> pin_memory_opt,
|
||||||
|
c10::optional<c10::MemoryFormat> memory_format_opt);
|
||||||
|
|
||||||
|
TORCH_API TensorBase empty_cpu(
|
||||||
|
IntArrayRef size,
|
||||||
|
const TensorOptions &options);
|
||||||
|
|
||||||
|
}} // namespace at::detail
|
||||||
|
|
@ -1,14 +1,7 @@
|
||||||
// FastPass
|
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||||
#ifdef _MSC_VER
|
#include <ATen/Dispatch.h>
|
||||||
#ifndef _USE_MATH_DEFINES
|
#include <ATen/EmptyTensor.h>
|
||||||
#define _USE_MATH_DEFINES
|
|
||||||
#endif
|
|
||||||
#include <math.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <ATen/ScalarOps.h>
|
#include <ATen/ScalarOps.h>
|
||||||
#include <ATen/ATen.h>
|
|
||||||
#include <ATen/Utils.h>
|
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
@ -32,7 +25,8 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) {
|
||||||
Tensor scalar_tensor_static(const Scalar& s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt) {
|
Tensor scalar_tensor_static(const Scalar& s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt) {
|
||||||
at::tracer::impl::NoTracerDispatchMode tracer_guard;
|
at::tracer::impl::NoTracerDispatchMode tracer_guard;
|
||||||
at::AutoDispatchBelowAutograd mode;
|
at::AutoDispatchBelowAutograd mode;
|
||||||
auto result = at::detail::empty_cpu({}, dtype_opt, c10::nullopt, device_opt, c10::nullopt, c10::nullopt);
|
Tensor result = at::detail::empty_cpu(
|
||||||
|
{}, dtype_opt, c10::nullopt, device_opt, c10::nullopt, c10::nullopt);
|
||||||
scalar_fill(result, s);
|
scalar_fill(result, s);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,72 +22,6 @@ int _crash_if_asan(int arg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
// empty_cpu is used in ScalarOps.h, which can be referenced by other ATen
|
|
||||||
// files. Since we want to decouple direct referencing native symbols and only
|
|
||||||
// access native symbols through dispatching, we move its implementation here.
|
|
||||||
Tensor empty_cpu(
|
|
||||||
IntArrayRef size,
|
|
||||||
c10::optional<ScalarType> dtype_opt,
|
|
||||||
c10::optional<Layout> layout_opt,
|
|
||||||
c10::optional<Device> device_opt,
|
|
||||||
c10::optional<bool> pin_memory_opt,
|
|
||||||
c10::optional<c10::MemoryFormat> memory_format_opt) {
|
|
||||||
|
|
||||||
auto device = device_or_default(device_opt);
|
|
||||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::CPU);
|
|
||||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
|
|
||||||
|
|
||||||
bool pin_memory = pinned_memory_or_default(pin_memory_opt);
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
|
||||||
c10::Allocator* allocator;
|
|
||||||
if (pin_memory) {
|
|
||||||
allocator = detail::getCUDAHooks().getPinnedMemoryAllocator();
|
|
||||||
} else {
|
|
||||||
allocator = at::getCPUAllocator();
|
|
||||||
}
|
|
||||||
auto dtype = dtype_or_default(dtype_opt);
|
|
||||||
|
|
||||||
return empty_generic(size, allocator, at::DispatchKey::CPU, dtype, memory_format_opt);
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor empty_generic(
|
|
||||||
IntArrayRef size,
|
|
||||||
c10::Allocator* allocator,
|
|
||||||
// technically this can be inferred from the device, but usually the
|
|
||||||
// correct setting is obvious from the call site so just make callers
|
|
||||||
// pass it in
|
|
||||||
c10::DispatchKey dispatch_key,
|
|
||||||
ScalarType scalar_type,
|
|
||||||
c10::optional<c10::MemoryFormat> memory_format_opt) {
|
|
||||||
|
|
||||||
check_size_nonnegative(size);
|
|
||||||
|
|
||||||
int64_t nelements = c10::multiply_integers(size);
|
|
||||||
caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
|
|
||||||
int64_t size_bytes = nelements * dtype.itemsize();
|
|
||||||
auto storage_impl = c10::make_intrusive<StorageImpl>(
|
|
||||||
c10::StorageImpl::use_byte_size_t(),
|
|
||||||
size_bytes,
|
|
||||||
allocator->allocate(size_bytes),
|
|
||||||
allocator,
|
|
||||||
/*resizeable=*/true);
|
|
||||||
|
|
||||||
auto tensor = detail::make_tensor<TensorImpl>(
|
|
||||||
std::move(storage_impl), dispatch_key, dtype);
|
|
||||||
// Default TensorImpl has size [0]
|
|
||||||
if (size.size() != 1 || size[0] != 0) {
|
|
||||||
tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (memory_format_opt.has_value()) {
|
|
||||||
// Restriding a just-created empty contiguous tensor does nothing.
|
|
||||||
if (*memory_format_opt != MemoryFormat::Contiguous) {
|
|
||||||
tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tensor;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Tensor tensor_cpu(ArrayRef<T> values, const TensorOptions& options) {
|
Tensor tensor_cpu(ArrayRef<T> values, const TensorOptions& options) {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <ATen/core/ATenGeneral.h>
|
#include <ATen/core/ATenGeneral.h>
|
||||||
#include <ATen/core/Generator.h>
|
#include <ATen/core/Generator.h>
|
||||||
|
#include <ATen/EmptyTensor.h>
|
||||||
#include <ATen/Formatting.h>
|
#include <ATen/Formatting.h>
|
||||||
#include <c10/core/ScalarType.h>
|
#include <c10/core/ScalarType.h>
|
||||||
#include <c10/core/StorageImpl.h>
|
#include <c10/core/StorageImpl.h>
|
||||||
|
|
@ -113,26 +114,9 @@ static inline T* get_generator_or_default(const c10::optional<Generator>& gen, c
|
||||||
return gen.has_value() && gen->defined() ? check_generator<T>(gen) : check_generator<T>(default_gen);
|
return gen.has_value() && gen->defined() ? check_generator<T>(gen) : check_generator<T>(default_gen);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void check_size_nonnegative(IntArrayRef size) {
|
using at::detail::check_size_nonnegative;
|
||||||
for (auto x: size) {
|
|
||||||
TORCH_CHECK(x >= 0, "Trying to create tensor with negative dimension ", x, ": ", size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
TORCH_API
|
|
||||||
Tensor empty_cpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
|
|
||||||
c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt);
|
|
||||||
|
|
||||||
TORCH_API
|
|
||||||
Tensor empty_generic(
|
|
||||||
IntArrayRef size,
|
|
||||||
c10::Allocator* allocator,
|
|
||||||
c10::DispatchKey dispatch_key,
|
|
||||||
ScalarType dtype,
|
|
||||||
c10::optional<c10::MemoryFormat> memory_format
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
TORCH_API
|
TORCH_API
|
||||||
|
|
|
||||||
|
|
@ -386,7 +386,7 @@ public:
|
||||||
// While some of these accessors could be generated through templates,
|
// While some of these accessors could be generated through templates,
|
||||||
// we prefer to write them manually for clarity
|
// we prefer to write them manually for clarity
|
||||||
|
|
||||||
IValue(at::Tensor t) : tag(Tag::Tensor), is_intrusive_ptr(false) {
|
IValue(at::TensorBase t) : tag(Tag::Tensor), is_intrusive_ptr(false) {
|
||||||
new (&payload.as_tensor) at::Tensor(std::move(t));
|
new (&payload.as_tensor) at::Tensor(std::move(t));
|
||||||
}
|
}
|
||||||
bool isTensor() const {
|
bool isTensor() const {
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,9 @@ Tensor empty_meta(
|
||||||
|
|
||||||
auto* allocator = GetMetaAllocator();
|
auto* allocator = GetMetaAllocator();
|
||||||
auto dtype = dtype_or_default(dtype_opt);
|
auto dtype = dtype_or_default(dtype_opt);
|
||||||
|
constexpr c10::DispatchKeySet meta_ks(c10::DispatchKey::Meta);
|
||||||
return at::detail::empty_generic(
|
return at::detail::empty_generic(
|
||||||
size, allocator, at::DispatchKey::Meta, dtype, memory_format_opt);
|
size, allocator, meta_ks, dtype, memory_format_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor empty_strided_meta(
|
Tensor empty_strided_meta(
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#include <ATen/ATen.h>
|
#include <ATen/ATen.h>
|
||||||
#include <ATen/CPUGeneratorImpl.h>
|
#include <ATen/CPUGeneratorImpl.h>
|
||||||
#include <ATen/Utils.h>
|
|
||||||
#include <ATen/Dispatch.h>
|
#include <ATen/Dispatch.h>
|
||||||
|
#include <ATen/EmptyTensor.h>
|
||||||
#include <ATen/Parallel.h>
|
#include <ATen/Parallel.h>
|
||||||
#include <ATen/MapAllocator.h>
|
#include <ATen/MapAllocator.h>
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
|
|
@ -1081,8 +1081,9 @@ Tensor _efficientzerotensor(IntArrayRef size,
|
||||||
auto device_ = device_or_default(device);
|
auto device_ = device_or_default(device);
|
||||||
auto allocator = ZeroTensorAllocator(device_);
|
auto allocator = ZeroTensorAllocator(device_);
|
||||||
auto dtype_ = dtype_or_default(dtype);
|
auto dtype_ = dtype_or_default(dtype);
|
||||||
|
constexpr auto zero_ks = at::DispatchKeySet(at::DispatchKey::ZeroTensor);
|
||||||
return at::detail::empty_generic(
|
return at::detail::empty_generic(
|
||||||
size, &allocator, at::DispatchKey::ZeroTensor, dtype_, c10::nullopt);
|
size, &allocator, zero_ks, dtype_, c10::nullopt);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor& zeros_out(IntArrayRef size, Tensor& result) {
|
Tensor& zeros_out(IntArrayRef size, Tensor& result) {
|
||||||
|
|
|
||||||
|
|
@ -284,7 +284,7 @@ Tensor& qembeddingbag_byte_prepack_out(Tensor& output, const Tensor& weight) {
|
||||||
Tensor qembeddingbag_byte_prepack(const Tensor& weight) {
|
Tensor qembeddingbag_byte_prepack(const Tensor& weight) {
|
||||||
const auto weight_contig =
|
const auto weight_contig =
|
||||||
weight.expect_contiguous(weight.suggest_memory_format());
|
weight.expect_contiguous(weight.suggest_memory_format());
|
||||||
auto output = at::detail::empty_cpu(
|
Tensor output = at::detail::empty_cpu(
|
||||||
{0},
|
{0},
|
||||||
at::kByte,
|
at::kByte,
|
||||||
weight_contig->layout(),
|
weight_contig->layout(),
|
||||||
|
|
|
||||||
|
|
@ -125,8 +125,7 @@ class BackendWithCompiler : public PyTorchBackendInterface {
|
||||||
(x.scalar_type() == c10::ScalarType::Float &&
|
(x.scalar_type() == c10::ScalarType::Float &&
|
||||||
h.scalar_type() == c10::ScalarType::Float),
|
h.scalar_type() == c10::ScalarType::Float),
|
||||||
"Only float tensors are compatible for add and sub.");
|
"Only float tensors are compatible for add and sub.");
|
||||||
auto y = at::detail::empty_cpu(
|
at::Tensor y = at::detail::empty_cpu(x.sizes(), at::kFloat);
|
||||||
x.sizes(), c10::ScalarType::Float, {}, {}, {}, c10::nullopt);
|
|
||||||
auto x_ptr = float_data_ptr(x);
|
auto x_ptr = float_data_ptr(x);
|
||||||
auto h_ptr = float_data_ptr(h);
|
auto h_ptr = float_data_ptr(h);
|
||||||
auto y_ptr = float_data_ptr(y);
|
auto y_ptr = float_data_ptr(y);
|
||||||
|
|
|
||||||
|
|
@ -932,6 +932,7 @@ aten_cpu_source_non_codegen_list = [
|
||||||
"aten/src/ATen/CPUGeneratorImpl.cpp",
|
"aten/src/ATen/CPUGeneratorImpl.cpp",
|
||||||
"aten/src/ATen/Context.cpp",
|
"aten/src/ATen/Context.cpp",
|
||||||
"aten/src/ATen/DLConvertor.cpp",
|
"aten/src/ATen/DLConvertor.cpp",
|
||||||
|
"aten/src/ATen/EmptyTensor.cpp",
|
||||||
"aten/src/ATen/ExpandUtils.cpp",
|
"aten/src/ATen/ExpandUtils.cpp",
|
||||||
"aten/src/ATen/FunctionalInverses.cpp",
|
"aten/src/ATen/FunctionalInverses.cpp",
|
||||||
"aten/src/ATen/FunctionalStorageImpl.cpp",
|
"aten/src/ATen/FunctionalStorageImpl.cpp",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user