Move THPStorage definitions out of torch/csrc/generic (#78032)

Fixes #77908

Pull Request resolved: https://github.com/pytorch/pytorch/pull/78032
Approved by: https://github.com/ezyang
This commit is contained in:
Kurt Mohler 2022-06-01 19:00:58 +00:00 committed by PyTorch MergeBot
parent 6a4997e66a
commit 272193d026
15 changed files with 726 additions and 708 deletions

View File

@ -843,6 +843,8 @@ libtorch_python_core_sources = [
"torch/csrc/python_dimname.cpp",
"torch/csrc/Size.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/StorageMethods.cpp",
"torch/csrc/StorageSharing.cpp",
"torch/csrc/Stream.cpp",
"torch/csrc/TypeInfo.cpp",
"torch/csrc/api/src/python/init.cpp",

View File

@ -864,7 +864,7 @@ class ThroughputBenchmark(object):
def run_once(self, *args: Any, **kwargs: Any) -> Any: ...
def benchmark(self, config: BenchmarkConfig) -> BenchmarkExecutionStats: ...
# Defined in torch/csrc/generic/Storage.cpp
# Defined in torch/csrc/Storage.cpp
${legacy_storage_base_hints}
# TODO: where

View File

@ -11,14 +11,12 @@
#include <torch/csrc/CudaIPCTypes.h>
#include <torch/csrc/Device.h>
#include <torch/csrc/autograd/utils/wrap_outputs.h>
#include <torch/csrc/utils/python_arg_parser.h>
#include <torch/csrc/StorageMethods.h>
#include <torch/csrc/StorageSharing.h>
#include <c10/core/CPUAllocator.h>
#include <fmt/format.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <torch/csrc/generic/Storage.cpp>
#include <torch/csrc/THGenerateByteType.h>
#include <c10/util/intrusive_ptr.h>
template<>
@ -27,3 +25,358 @@ void THPPointer<c10::StorageImpl>::free() {
c10::raw::intrusive_ptr::decref(ptr);
}
}
PyObject *THPStorageClass = nullptr;
PyObject * THPStorage_New(c10::intrusive_ptr<c10::StorageImpl> ptr)
{
AT_ASSERT(ptr);
PyTypeObject *type = (PyTypeObject *)THPStorageClass;
PyObject *obj = type->tp_alloc(type, 0);
if (obj) {
((THPStorage *)obj)->cdata = ptr.release();
}
return obj;
}
static void THPStorage_dealloc(THPStorage* self)
{
if (self->cdata) {
c10::raw::intrusive_ptr::decref(self->cdata);
}
Py_TYPE(self)->tp_free((PyObject*)self);
}
static PyObject * THPStorage_pynew(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
static torch::PythonArgParser parser({
THPStorageStr "(*, int64_t allocator=None, Device device=None)",
THPStorageStr "(int64_t size, *, int64_t allocator=None, Device device=None)",
THPStorageStr "(PyObject* sequence, *, int64_t allocator=None, Device device=None)",
});
torch::ParsedArgs<3> parsed_args;
auto r = parser.parse(args, kwargs, parsed_args);
int64_t allocator_arg_idx = 0;
int64_t device_arg_idx = 1;
if (r.idx > 0) {
allocator_arg_idx = 1;
device_arg_idx = 2;
}
c10::optional<int64_t> allocator_opt = r.toInt64Optional(allocator_arg_idx);
c10::optional<at::Device> device_opt = r.deviceOptional(device_arg_idx);
TORCH_CHECK(!allocator_opt.has_value() || !device_opt.has_value(),
THPStorageStr, "(): only one or neither of 'allocator' or 'device' can ",
"be given, but not both");
THPStoragePtr self((THPStorage *)type->tp_alloc(type, 0));
THPUtils_assert(self, "failed to allocate a " THPStorageStr " object");
c10::Allocator* allocator = nullptr;
at::OptionalDeviceGuard device_guard;
if (allocator_opt.has_value()) {
allocator = reinterpret_cast<c10::Allocator*>(allocator_opt.value());
} else if (device_opt.has_value()) {
at::Device device = device_opt.value();
if (device.type() == at::kCPU) {
allocator = c10::GetDefaultCPUAllocator();
#ifdef USE_CUDA
} else if (device.type() == at::kCUDA) {
at::globalContext().lazyInitCUDA();
allocator = c10::cuda::CUDACachingAllocator::get();
#endif
} else if (device.type() == at::DeviceType::Meta) {
allocator = c10::GetAllocator(device.type());
} else {
TORCH_CHECK(false,
THPStorageStr, "(): Storage device not recognized: ", device.type());
}
device_guard.reset_device(device);
} else {
allocator = c10::GetDefaultCPUAllocator();
}
// torch.Storage(*, ...)
if (r.idx == 0) {
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
// torch.Storage(size, *, ...)
} else if (r.idx == 1) {
int64_t size = r.toInt64(0);
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
// torch.Storage(sequence, *, ...)
} else if (r.idx == 2) {
PyObject *sequence = r.pyobject(0);
Py_ssize_t length = PySequence_Length(sequence);
TORCH_CHECK(PySequence_Check(sequence),
THPStorageStr, "(): Expected a sequence type, but got ",
THPUtils_typename(sequence));
TORCH_CHECK(length >= 0,
THPStorageStr, "(): Could not obtain the length of sequence of type ",
THPUtils_typename(sequence));
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
length,
allocator,
/*resizable=*/true)
.release();
THPObjectPtr item;
try {
for (Py_ssize_t i = 0; i < length; i++) {
item = PySequence_GetItem(sequence, i);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
uint8_t value = THPByteUtils_unpackReal(item.get());
if (allocator == c10::GetDefaultCPUAllocator()) {
self->cdata->unsafe_data<uint8_t>()[i] = value;
} else {
// TODO: this might be slow - consider batched updates?
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
i,
value);
}
}
} catch (const std::exception &e) {
THPUtils_setError(THPStorageStr
"(): tried to construct a storage from a sequence (%s), "
"but one of the items was of type %s instead of %s",
THPUtils_typename(sequence),
THPUtils_typename(item.get()),
THPUtils_typeTraits<uint8_t>::python_type_str);
return nullptr;
}
return (PyObject*)self.release();
}
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static Py_ssize_t THPStorage_length(THPStorage *self)
{
HANDLE_TH_ERRORS
return self->cdata->nbytes() / sizeof(uint8_t);
END_HANDLE_TH_ERRORS_RET(-1)
}
static PyObject * THPStorage_get(THPStorage *self, PyObject *index)
{
HANDLE_TH_ERRORS
/* Integer index */
if (THPUtils_checkLong(index)) {
int64_t nindex = THPUtils_unpackLong(index);
if (nindex < 0)
nindex += (self->cdata->nbytes() / sizeof(uint8_t));
if (nindex < 0 || nindex >= static_cast<int64_t>(self->cdata->nbytes() / sizeof(uint8_t))) {
PyErr_SetString(PyExc_IndexError, fmt::format(
"index {} out of range for storage of size {}",
nindex, self->cdata->nbytes() / sizeof(uint8_t)));
return nullptr;
}
uint8_t value = storage_get(at::unsafeStorageFromTH(self->cdata, /*retain=*/true), nindex);
return THPByteUtils_newReal(value);
/* Slice index */
} else if (PySlice_Check(index)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Py_ssize_t start, stop, slicelength, step;
int64_t len = self->cdata->nbytes() / sizeof(uint8_t);
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return nullptr;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %lld, but only a step of "
"1 is supported", (long long)step);
return nullptr;
}
uint8_t *data = self->cdata->data<uint8_t>();
at::StorageImpl* old_storage = self->cdata;
c10::raw::intrusive_ptr::incref(old_storage);
auto new_storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
#ifdef THQUANTIZED
slicelength * sizeof(quantized_t),
#else
slicelength * sizeof(uint8_t),
#endif
at::DataPtr(
static_cast<void*>(data + start),
old_storage,
[](void* s) {
c10::raw::intrusive_ptr::decref(static_cast<at::StorageImpl*>(s));
},
old_storage->device()),
old_storage->allocator(),
/* resizable */ false);
PyObject *_ret = THPStorage_New(std::move(new_storage));
return _ret;
}
PyErr_Format(PyExc_TypeError, "can't index a " THPStorageStr " with %s",
THPUtils_typename(index));
return nullptr;
END_HANDLE_TH_ERRORS
}
static int THPStorage_set(THPStorage *self, PyObject *index, PyObject *value)
{
HANDLE_TH_ERRORS
if (!THPByteUtils_checkReal(value)) {
THPUtils_setError("can only set storage content with a %s, but got "
"%s instead", THPUtils_typeTraits<uint8_t>::python_type_str,
THPUtils_typename(value));
return -1;
}
uint8_t rvalue = THPByteUtils_unpackReal(value);
if (THPUtils_checkLong(index)) {
int64_t nindex = THPUtils_unpackLong(index);
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
nindex,
rvalue);
return 0;
} else if (PySlice_Check(index)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Py_ssize_t start, stop, slicelength, step;
int64_t len = self->cdata->nbytes() / sizeof(uint8_t);
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return -1;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %lld, but only a step of "
"1 is supported", (long long)step);
return 0;
}
// TODO: check the bounds only once
// TODO: fill?
for (;start < stop; start++)
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
start,
rvalue);
return 0;
}
THPUtils_setError("can't index a " THPStorageStr " with %s",
THPUtils_typename(index));
return -1;
END_HANDLE_TH_ERRORS_RET(-1)
}
static PyMappingMethods THPStorage_mappingmethods = {
(lenfunc)THPStorage_length,
(binaryfunc)THPStorage_get,
(objobjargproc)THPStorage_set
};
// TODO: implement equality
PyTypeObject THPStorageType = {
PyVarObject_HEAD_INIT(nullptr, 0)
"torch._C." THPStorageBaseStr, /* tp_name */
sizeof(THPStorage), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPStorage_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
nullptr, /* tp_getattr */
nullptr, /* tp_setattr */
nullptr, /* tp_reserved */
nullptr, /* tp_repr */
nullptr, /* tp_as_number */
nullptr, /* tp_as_sequence */
&THPStorage_mappingmethods, /* tp_as_mapping */
nullptr, /* tp_hash */
nullptr, /* tp_call */
nullptr, /* tp_str */
nullptr, /* tp_getattro */
nullptr, /* tp_setattro */
nullptr, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
nullptr, /* tp_doc */
nullptr, /* tp_traverse */
nullptr, /* tp_clear */
nullptr, /* tp_richcompare */
0, /* tp_weaklistoffset */
nullptr, /* tp_iter */
nullptr, /* tp_iternext */
nullptr, /* will be assigned in init */ /* tp_methods */
nullptr, /* will be assigned in init */ /* tp_members */
nullptr, /* tp_getset */
nullptr, /* tp_base */
nullptr, /* tp_dict */
nullptr, /* tp_descr_get */
nullptr, /* tp_descr_set */
0, /* tp_dictoffset */
nullptr, /* tp_init */
nullptr, /* tp_alloc */
THPStorage_pynew, /* tp_new */
};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static struct PyMemberDef THPStorage_members[] = {
{(char*)"_cdata", T_ULONGLONG, offsetof(THPStorage, cdata), READONLY, nullptr},
{nullptr}
};
static PyObject * THPStorage_device(THPStorage* self, void *unused) {
HANDLE_TH_ERRORS
return THPDevice_New(self->cdata->device());
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_dtype(THPStorage *self, void *unused)
{
HANDLE_TH_ERRORS
return torch::autograd::utils::wrap(
torch::getTHPDtype(at::typeMetaToScalarType(
#ifdef THQUANTIZED
caffe2::TypeMeta::Make<quantized_t>()
#else
caffe2::TypeMeta::Make<uint8_t>()
#endif
)));
END_HANDLE_TH_ERRORS
}
typedef PyObject *(*getter)(PyObject *, void *);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static struct PyGetSetDef THPStorage_properties[] = {
{"device", (getter)THPStorage_device, nullptr, nullptr, nullptr},
{nullptr}
};
bool THPStorage_init(PyObject *module)
{
static std::vector<PyMethodDef> methods;
THPUtils_addPyMethodDefs(methods, THPStorage_getMethods());
THPUtils_addPyMethodDefs(methods, THPStorage_getSharingMethods());
THPStorageType.tp_methods = methods.data();
THPStorageType.tp_members = THPStorage_members;
THPStorageType.tp_getset = THPStorage_properties;
if (PyType_Ready(&THPStorageType) < 0)
return false;
Py_INCREF(&THPStorageType);
PyModule_AddObject(module, THPStorageBaseStr, (PyObject *)&THPStorageType);
return true;
}
void THPStorage_postInit(PyObject *module)
{
THPStorageClass = PyObject_GetAttrString(module, "_UntypedStorage");
if (!THPStorageClass) throw python_error();
}

View File

@ -1,18 +1,22 @@
#ifndef THP_STORAGE_INC
#define THP_STORAGE_INC
#include <torch/csrc/THConcat.h>
#include <torch/csrc/Types.h>
#define THPStorageStr "torch._UntypedStorage"
#define THPStorage_(NAME) TH_CONCAT_2(THPStorage_,NAME)
#define THPStorage_Check(obj) \
PyObject_IsInstance(obj, THPStorageClass)
#define THPStorage_CData(obj) (obj)->cdata
#define THPStorageBaseStr "StorageBase"
#include <torch/csrc/generic/Storage.h>
#include <torch/csrc/THGenerateByteType.h>
struct THPStorage {
PyObject_HEAD
c10::StorageImpl *cdata;
};
TORCH_PYTHON_API PyObject * THPStorage_New(c10::intrusive_ptr<c10::StorageImpl> ptr);
extern PyObject *THPStorageClass;
bool THPStorage_init(PyObject *module);
void THPStorage_postInit(PyObject *module);
extern PyTypeObject THPStorageType;
#endif

View File

@ -1,5 +0,0 @@
#pragma once
struct THPStorage {
PyObject_HEAD
c10::StorageImpl *cdata;
};

View File

@ -1,3 +1,24 @@
#include <torch/csrc/python_headers.h>
#ifdef _MSC_VER
#include <c10/util/win32-headers.h>
#endif
#include <structmember.h>
#include <libshm.h>
#include <torch/csrc/THP.h>
#include <torch/csrc/copy_utils.h>
#include <torch/csrc/DynamicTypes.h>
#include <torch/csrc/CudaIPCTypes.h>
#include <torch/csrc/Device.h>
#include <torch/csrc/autograd/utils/wrap_outputs.h>
#include <c10/core/CPUAllocator.h>
#include <fmt/format.h>
#include <c10/util/intrusive_ptr.h>
#include <torch/csrc/StorageMethods.h>
#include <torch/csrc/Storage.h>
#include <ATen/ATen.h>
#include <ATen/MapAllocator.h>
#include <torch/csrc/utils/pycfunction_helpers.h>
@ -9,7 +30,6 @@
#include <ATen/native/cuda/Resize.h>
#endif
#include <c10/core/CPUAllocator.h>
#include <ATen/native/Resize.h>
#ifdef _MSC_VER
@ -18,7 +38,7 @@
#define LSEEK lseek
#endif
static PyObject * THPStorage_(nbytes)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_nbytes(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -26,15 +46,15 @@ static PyObject * THPStorage_(nbytes)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(dataPtr)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_dataPtr(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
return PyLong_FromVoidPtr(self->cdata->data<scalar_t>());
return PyLong_FromVoidPtr(self->cdata->data<uint8_t>());
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(copy_)(PyObject *self, PyObject *args, PyObject *kwargs)
static PyObject * THPStorage_copy_(PyObject *self, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
@ -59,27 +79,27 @@ static PyObject * THPStorage_(copy_)(PyObject *self, PyObject *args, PyObject *k
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(isPinned)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_isPinned(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
#if defined(USE_CUDA)
return PyBool_FromLong(at::globalContext().isPinnedPtr(self->cdata->data<scalar_t>()));
return PyBool_FromLong(at::globalContext().isPinnedPtr(self->cdata->data<uint8_t>()));
#else
Py_RETURN_FALSE;
#endif
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(elementSize)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_elementSize(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
return THPUtils_packInt64(sizeof(scalar_t));
return THPUtils_packInt64(sizeof(uint8_t));
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(new)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_new(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -91,11 +111,11 @@ static PyObject * THPStorage_(new)(PyObject *_self, PyObject *noargs)
/*resizable=*/true);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
return THPStorage_(New)(std::move(new_storage));
return THPStorage_New(std::move(new_storage));
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(resize_)(PyObject *_self, PyObject *number_arg)
static PyObject * THPStorage_resize_(PyObject *_self, PyObject *number_arg)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -123,22 +143,22 @@ static PyObject * THPStorage_(resize_)(PyObject *_self, PyObject *number_arg)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(fill_)(PyObject *_self, PyObject *number_arg)
static PyObject * THPStorage_fill_(PyObject *_self, PyObject *number_arg)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THPUtils_assert(THPUtils_(checkReal)(number_arg), "fill_ expects %s, "
"but got %s", THPUtils_typeTraits<scalar_t>::python_type_str,
THPUtils_assert(THPByteUtils_checkReal(number_arg), "fill_ expects %s, "
"but got %s", THPUtils_typeTraits<uint8_t>::python_type_str,
THPUtils_typename(number_arg));
storage_fill(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
THPUtils_(unpackReal)(number_arg));
THPByteUtils_unpackReal(number_arg));
Py_INCREF(self);
return (PyObject*)self;
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyObject *keywds)
static PyObject * THPStorage_fromBuffer(PyObject *_unused, PyObject *args, PyObject *keywds)
{
HANDLE_TH_ERRORS
PyObject *obj = nullptr;
@ -276,11 +296,11 @@ static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyO
}
PyBuffer_Release(&buffer);
return (PyObject*)THPStorage_(New)(storage);
return (PyObject*)THPStorage_New(storage);
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(fromFile)(PyObject *_unused, PyObject *args, PyObject *keywds)
static PyObject * THPStorage_fromFile(PyObject *_unused, PyObject *args, PyObject *keywds)
{
HANDLE_TH_ERRORS
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -309,11 +329,11 @@ static PyObject * THPStorage_(fromFile)(PyObject *_unused, PyObject *args, PyObj
storage->set_nbytes(actual_nbytes);
}
return (PyObject*)THPStorage_(New)(std::move(storage));
return (PyObject*)THPStorage_New(std::move(storage));
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(writeFile)(PyObject *_self, PyObject *args)
PyObject * THPStorage_writeFile(PyObject *_self, PyObject *args)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -327,19 +347,19 @@ PyObject * THPStorage_(writeFile)(PyObject *_self, PyObject *args)
uint64_t element_size = THPUtils_unpackUInt64(element_size_obj);
if (!is_real_file) {
THPStorage_(writeFileRaw<PyObject*>)(self->cdata, file, save_size, element_size);
THPStorage_writeFileRaw<PyObject*>(self->cdata, file, save_size, element_size);
Py_RETURN_NONE;
}
int fd = PyObject_AsFileDescriptor(file);
THPUtils_assert(fd != -1, "_write_file couldn't retrieve a file descriptor "
"from given object");
THPStorage_(writeFileRaw)(self->cdata, fd, save_size, element_size);
THPStorage_writeFileRaw(self->cdata, fd, save_size, element_size);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *args)
PyObject * THPStorage_newWithFile(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
TORCH_CHECK(PyTuple_Size(args) == 2,
@ -353,14 +373,14 @@ PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *args)
"_new_with_file: need to specify element size");
uint64_t element_size = THPUtils_unpackUInt64(element_size_obj);
auto storage = THPStorage_(readFileRaw<int>)(fd, {}, element_size);
auto storage = THPStorage_readFileRaw<int>(fd, {}, element_size);
if (!storage.defined())
return nullptr;
return THPStorage_(New)(std::move(storage));
return THPStorage_New(std::move(storage));
END_HANDLE_TH_ERRORS
}
static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
static PyObject *THPStorage_setFromFile(PyObject *_self, PyObject *args)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -381,7 +401,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
"_set_from_file: offset is NYI for filelike objects");
auto self_storage = c10::intrusive_ptr<c10::StorageImpl>::reclaim_copy(self->cdata);
auto storage = THPStorage_(readFileRaw<PyObject*>)(file, std::move(self_storage), element_size);
auto storage = THPStorage_readFileRaw<PyObject*>(file, std::move(self_storage), element_size);
if (!storage.defined()) {
return nullptr;
}
@ -398,7 +418,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
THPUtils_assert(fd != -1, "_set_from_file couldn't retrieve a file "
"descriptor from given object");
auto self_storage = c10::intrusive_ptr<c10::StorageImpl>::reclaim_copy(self->cdata);
auto storage = THPStorage_(readFileRaw<int>)(fd, self_storage, element_size);
auto storage = THPStorage_readFileRaw<int>(fd, self_storage, element_size);
if (!storage.defined())
return nullptr;
Py_INCREF(self);
@ -418,7 +438,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(_setCdata)(PyObject *_self, PyObject *new_cdata)
PyObject * THPStorage__setCdata(PyObject *_self, PyObject *new_cdata)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -439,23 +459,27 @@ PyObject * THPStorage_(_setCdata)(PyObject *_self, PyObject *new_cdata)
}
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static PyMethodDef THPStorage_(methods)[] = {
{"copy_", castPyCFunctionWithKeywords(THPStorage_(copy_)),
static PyMethodDef THPStorage_methods[] = {
{"copy_", castPyCFunctionWithKeywords(THPStorage_copy_),
METH_VARARGS | METH_KEYWORDS, nullptr},
{"element_size", THPStorage_(elementSize), METH_NOARGS, nullptr},
{"fill_", THPStorage_(fill_), METH_O, nullptr},
{"new", THPStorage_(new), METH_NOARGS, nullptr},
{"resize_", THPStorage_(resize_), METH_O, nullptr},
{"nbytes", THPStorage_(nbytes), METH_NOARGS, nullptr},
{"data_ptr", THPStorage_(dataPtr), METH_NOARGS, nullptr},
{"is_pinned", THPStorage_(isPinned), METH_NOARGS, nullptr},
{"_write_file", THPStorage_(writeFile), METH_VARARGS, nullptr},
{"_new_with_file", THPStorage_(newWithFile), METH_VARARGS | METH_STATIC, nullptr},
{"_set_from_file", THPStorage_(setFromFile), METH_VARARGS, nullptr},
{"from_buffer", castPyCFunctionWithKeywords(THPStorage_(fromBuffer)),
{"element_size", THPStorage_elementSize, METH_NOARGS, nullptr},
{"fill_", THPStorage_fill_, METH_O, nullptr},
{"new", THPStorage_new, METH_NOARGS, nullptr},
{"resize_", THPStorage_resize_, METH_O, nullptr},
{"nbytes", THPStorage_nbytes, METH_NOARGS, nullptr},
{"data_ptr", THPStorage_dataPtr, METH_NOARGS, nullptr},
{"is_pinned", THPStorage_isPinned, METH_NOARGS, nullptr},
{"_write_file", THPStorage_writeFile, METH_VARARGS, nullptr},
{"_new_with_file", THPStorage_newWithFile, METH_VARARGS | METH_STATIC, nullptr},
{"_set_from_file", THPStorage_setFromFile, METH_VARARGS, nullptr},
{"from_buffer", castPyCFunctionWithKeywords(THPStorage_fromBuffer),
METH_VARARGS | METH_KEYWORDS | METH_STATIC, nullptr},
{"from_file", castPyCFunctionWithKeywords(THPStorage_(fromFile)),
{"from_file", castPyCFunctionWithKeywords(THPStorage_fromFile),
METH_VARARGS | METH_KEYWORDS | METH_STATIC, nullptr},
{"_set_cdata", THPStorage_(_setCdata), METH_O, nullptr},
{"_set_cdata", THPStorage__setCdata, METH_O, nullptr},
{nullptr}
};
PyMethodDef* THPStorage_getMethods() {
return THPStorage_methods;
}

View File

@ -0,0 +1,8 @@
#ifndef THP_STORAGE_METHODS_INC
#define THP_STORAGE_METHODS_INC
#include <Python.h>
PyMethodDef* THPStorage_getMethods();
#endif

View File

@ -1,3 +1,24 @@
#include <torch/csrc/python_headers.h>
#ifdef _MSC_VER
#include <c10/util/win32-headers.h>
#endif
#include <structmember.h>
#include <libshm.h>
#include <torch/csrc/THP.h>
#include <torch/csrc/copy_utils.h>
#include <torch/csrc/DynamicTypes.h>
#include <torch/csrc/CudaIPCTypes.h>
#include <torch/csrc/Device.h>
#include <torch/csrc/autograd/utils/wrap_outputs.h>
#include <c10/core/CPUAllocator.h>
#include <fmt/format.h>
#include <c10/util/intrusive_ptr.h>
#include <torch/csrc/StorageSharing.h>
#include <torch/csrc/Storage.h>
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
@ -9,7 +30,7 @@
#include <atomic>
#include <string>
static PyObject * THPStorage_(sharedDecref)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_sharedDecref(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -26,7 +47,7 @@ static PyObject * THPStorage_(sharedDecref)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_sharedIncref(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -42,7 +63,7 @@ static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_pyNewFilenameStorage(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -53,7 +74,7 @@ static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
std::string handle = at::NewProcessWideShmHandle();
return THPStorage_(New)(c10::make_intrusive<at::StorageImpl>(
return THPStorage_New(c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, size),
@ -62,7 +83,7 @@ static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_shareFilename(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
TORCH_CHECK(reinterpret_cast<THPStorage*>(_self)->cdata->device_type() == at::kCPU,
@ -98,7 +119,7 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
if (!manager_handle) return nullptr;
THPObjectPtr storage_handle(PyBytes_FromString(ctx->filename()));
if (!storage_handle) return nullptr;
THPObjectPtr size(THPUtils_packUInt64(storage->nbytes() / sizeof(scalar_t)));
THPObjectPtr size(THPUtils_packUInt64(storage->nbytes() / sizeof(uint8_t)));
if (!size) return nullptr;
THPObjectPtr tuple(PyTuple_New(3));
@ -110,7 +131,7 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(newSharedFilename)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_newSharedFilename(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
THPUtils_assert(PyTuple_GET_SIZE(args) == 3, "tuple of 3 items expected");
@ -127,7 +148,7 @@ static PyObject * THPStorage_(newSharedFilename)(PyObject *_unused, PyObject *ar
int64_t size = THPUtils_unpackLong(_size);
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
at::ALLOCATOR_MAPPED_NOCREATE;
return THPStorage_(New)(
return THPStorage_New(
c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
@ -137,14 +158,14 @@ static PyObject * THPStorage_(newSharedFilename)(PyObject *_unused, PyObject *ar
END_HANDLE_TH_ERRORS
}
static c10::intrusive_ptr<c10::StorageImpl> THPStorage_(newFdStorage)(ptrdiff_t size)
static c10::intrusive_ptr<c10::StorageImpl> THPStorage_newFdStorage(ptrdiff_t size)
{
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
at::ALLOCATOR_MAPPED_EXCLUSIVE |
at::ALLOCATOR_MAPPED_KEEPFD |
at::ALLOCATOR_MAPPED_UNLINK;
std::string handle = at::NewProcessWideShmHandle();
auto sptr = at::MapAllocator::makeDataPtr(handle.c_str(), flags, size * sizeof(scalar_t), nullptr);
auto sptr = at::MapAllocator::makeDataPtr(handle.c_str(), flags, size * sizeof(uint8_t), nullptr);
return c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
@ -153,7 +174,7 @@ static c10::intrusive_ptr<c10::StorageImpl> THPStorage_(newFdStorage)(ptrdiff_t
/*resizable=*/false);
}
static PyObject * THPStorage_(pyNewFdStorage)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_pyNewFdStorage(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -161,11 +182,11 @@ static PyObject * THPStorage_(pyNewFdStorage)(PyObject *_unused, PyObject *args)
if (!PyArg_ParseTuple(args, "L", &size)) {
return nullptr;
}
return THPStorage_(New)(THPStorage_(newFdStorage)(size));
return THPStorage_New(THPStorage_newFdStorage(size));
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_shareFd(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
TORCH_CHECK(reinterpret_cast<THPStorage*>(_self)->cdata->device_type() == at::kCPU,
@ -178,7 +199,7 @@ static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
if ((ctx = at::MapAllocator::fromDataPtr(storage->data_ptr()))) {
// done
} else {
at::Storage new_storage(THPStorage_(newFdStorage)(storage->nbytes()));
at::Storage new_storage(THPStorage_newFdStorage(storage->nbytes()));
at::Storage _self_aten = torch::createStorage(_self);
storage_copy(new_storage, _self_aten);
@ -189,7 +210,7 @@ static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
THPObjectPtr storage_handle(THPUtils_packInt32(ctx->fd()));
if (!storage_handle) return nullptr;
THPObjectPtr size(THPUtils_packUInt64(storage->nbytes() / sizeof(scalar_t)));
THPObjectPtr size(THPUtils_packUInt64(storage->nbytes() / sizeof(uint8_t)));
if (!size) return nullptr;
THPObjectPtr tuple(PyTuple_New(2));
@ -200,7 +221,7 @@ static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(newSharedFd)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_newSharedFd(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
THPUtils_assert(PyTuple_GET_SIZE(args) == 2, "tuple of 2 items expected");
@ -224,7 +245,7 @@ static PyObject * THPStorage_(newSharedFd)(PyObject *_unused, PyObject *args)
at::ALLOCATOR_MAPPED_NOCREATE |
at::ALLOCATOR_MAPPED_KEEPFD |
at::ALLOCATOR_MAPPED_FROMFD;
return THPStorage_(New)(
return THPStorage_New(
c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
@ -234,7 +255,7 @@ static PyObject * THPStorage_(newSharedFd)(PyObject *_unused, PyObject *args)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(shareCuda)(PyObject *_self, PyObject *noargs)
static PyObject * THPStorage_shareCuda(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
#ifdef USE_CUDA
@ -262,11 +283,11 @@ static PyObject * THPStorage_(shareCuda)(PyObject *_self, PyObject *noargs)
Py_INCREF(Py_None);
THPObjectPtr _event_sync_required(Py_None);
Py_INCREF(Py_None);
if (storage->data<scalar_t>()) {
if (storage->data<uint8_t>()) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
size_t base_size;
void *base_ptr = c10::cuda::CUDACachingAllocator::getBaseAllocation(storage->data<scalar_t>(), &base_size);
ptrdiff_t offset_bytes = (char*)storage->data<scalar_t>() - (char*)base_ptr;
void *base_ptr = c10::cuda::CUDACachingAllocator::getBaseAllocation(storage->data<uint8_t>(), &base_size);
ptrdiff_t offset_bytes = (char*)storage->data<uint8_t>() - (char*)base_ptr;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
cudaIpcMemHandle_t handle;
@ -320,7 +341,7 @@ static PyObject * THPStorage_(shareCuda)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(releaseIPCCounter)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_releaseIPCCounter(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
#ifdef USE_CUDA
@ -363,7 +384,7 @@ static PyObject * THPStorage_(releaseIPCCounter)(PyObject *_unused, PyObject *ar
}
#ifdef USE_CUDA
static std::string THPStorage_(bytesAsHandleString)(PyObject *handle) {
static std::string THPStorage_bytesAsHandleString(PyObject *handle) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
char* buffer;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -379,7 +400,7 @@ static std::string THPStorage_(bytesAsHandleString)(PyObject *handle) {
}
#endif
static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
static PyObject * THPStorage_newSharedCuda(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
#ifdef USE_CUDA
@ -405,7 +426,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
return nullptr;
}
size_t storage_size = (size_t)THPUtils_unpackLong(_size_bytes) / sizeof(scalar_t);
size_t storage_size = (size_t)THPUtils_unpackLong(_size_bytes) / sizeof(uint8_t);
ptrdiff_t storage_offset_bytes = (ptrdiff_t)THPUtils_unpackLong(_offset_bytes);
int64_t device = THPUtils_unpackLong(_device);
@ -414,7 +435,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
if (PyObject_IsTrue(_event_sync_required)) {
// Ensure that producer prepared all tensor's data
std::string s_ipc_event_handle =
THPStorage_(bytesAsHandleString)(_event_handle);
THPStorage_bytesAsHandleString(_event_handle);
auto ipc_event_handle = reinterpret_cast<const cudaIpcEventHandle_t*>(
s_ipc_event_handle.c_str());
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -424,7 +445,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
cudaStreamWaitEvent(c10::cuda::getCurrentCUDAStream(device), event, 0));
}
std::string s_handle = THPStorage_(bytesAsHandleString)(_handle);
std::string s_handle = THPStorage_bytesAsHandleString(_handle);
std::shared_ptr<void> basePtr = c10::cuda::CUDACachingAllocator::getIpcDevPtr(s_handle);
// Offset the basePtr to reconstruct the real storage
@ -499,7 +520,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
base->set_resizable(false);
base->set_received_cuda(true);
return THPStorage_(New)(std::move(base));
return THPStorage_New(std::move(base));
#else
TORCH_CHECK(false, "CUDA is not available");
#endif
@ -511,7 +532,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
// pointer.
//
// NB: This does NOT preserve object identity when you call it multiple times
static PyObject * THPStorage_(weakRef)(PyObject *_self, PyObject *args) {
static PyObject * THPStorage_weakRef(PyObject *_self, PyObject *args) {
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
c10::StorageImpl* storage = self->cdata;
@ -519,20 +540,20 @@ static PyObject * THPStorage_(weakRef)(PyObject *_self, PyObject *args) {
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(newWithWeakPtr)(PyObject *_unused, PyObject *arg)
PyObject * THPStorage_newWithWeakPtr(PyObject *_unused, PyObject *arg)
{
HANDLE_TH_ERRORS
THPUtils_assert(THPUtils_checkLong(arg),
"_new_with_weak_ptr(): arg must be an 'int'");
c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
if (auto* storage = c10::raw::weak_intrusive_ptr::lock(weak_storage)) {
return THPStorage_(New)(c10::intrusive_ptr<c10::StorageImpl>::reclaim(storage));
return THPStorage_New(c10::intrusive_ptr<c10::StorageImpl>::reclaim(storage));
}
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg)
PyObject * THPStorage_freeWeakRef(PyObject *_unused, PyObject *arg)
{
HANDLE_TH_ERRORS
if (arg == Py_None) {
@ -547,7 +568,7 @@ PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg)
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg)
PyObject * THPStorage_expired(PyObject *_unused, PyObject *arg)
{
HANDLE_TH_ERRORS
THPUtils_assert(THPUtils_checkLong(arg), "_expired(): arg must be an 'int'");
@ -556,7 +577,7 @@ PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg)
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(sharedFd)(PyObject *_self, PyObject *noargs)
PyObject * THPStorage_sharedFd(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
@ -571,7 +592,7 @@ PyObject * THPStorage_(sharedFd)(PyObject *_self, PyObject *noargs)
END_HANDLE_TH_ERRORS
}
PyObject * THPStorage_(isShared)(PyObject *_self, PyObject *noargs)
PyObject * THPStorage_isShared(PyObject *_self, PyObject *noargs)
{
auto self = (THPStorage*)_self;
if (self->cdata->device_type() == at::kCUDA) {
@ -586,23 +607,27 @@ PyObject * THPStorage_(isShared)(PyObject *_self, PyObject *noargs)
}
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static PyMethodDef THPStorage_(sharingMethods)[] = {
{"_new_with_weak_ptr", THPStorage_(newWithWeakPtr), METH_O | METH_CLASS, nullptr},
{"_share_cuda_", THPStorage_(shareCuda), METH_NOARGS, nullptr},
{"_new_shared_cuda", THPStorage_(newSharedCuda), METH_VARARGS | METH_STATIC, nullptr},
{"_release_ipc_counter_cuda", THPStorage_(releaseIPCCounter), METH_VARARGS | METH_STATIC, nullptr},
{"_share_fd_cpu_", THPStorage_(shareFd), METH_NOARGS, nullptr},
{"_new_shared_fd_cpu", THPStorage_(newSharedFd), METH_VARARGS | METH_STATIC, nullptr},
{"_new_using_fd_cpu", THPStorage_(pyNewFdStorage), METH_VARARGS | METH_STATIC, nullptr},
{"_share_filename_cpu_", THPStorage_(shareFilename), METH_NOARGS, nullptr},
{"_new_shared_filename_cpu", THPStorage_(newSharedFilename), METH_VARARGS | METH_STATIC, nullptr},
{"_new_using_filename_cpu", THPStorage_(pyNewFilenameStorage), METH_VARARGS | METH_STATIC, nullptr},
{"_weak_ref", THPStorage_(weakRef), METH_NOARGS, nullptr},
{"_free_weak_ref", THPStorage_(freeWeakRef), METH_O | METH_STATIC, nullptr},
{"_expired", THPStorage_(expired), METH_O | METH_STATIC, nullptr},
{"_shared_decref", THPStorage_(sharedDecref), METH_NOARGS, nullptr},
{"_shared_incref", THPStorage_(sharedIncref), METH_NOARGS, nullptr},
{"_get_shared_fd", THPStorage_(sharedFd), METH_NOARGS, nullptr},
{"is_shared", THPStorage_(isShared), METH_NOARGS, nullptr},
static PyMethodDef THPStorage_sharingMethods[] = {
{"_new_with_weak_ptr", THPStorage_newWithWeakPtr, METH_O | METH_CLASS, nullptr},
{"_share_cuda_", THPStorage_shareCuda, METH_NOARGS, nullptr},
{"_new_shared_cuda", THPStorage_newSharedCuda, METH_VARARGS | METH_STATIC, nullptr},
{"_release_ipc_counter_cuda", THPStorage_releaseIPCCounter, METH_VARARGS | METH_STATIC, nullptr},
{"_share_fd_cpu_", THPStorage_shareFd, METH_NOARGS, nullptr},
{"_new_shared_fd_cpu", THPStorage_newSharedFd, METH_VARARGS | METH_STATIC, nullptr},
{"_new_using_fd_cpu", THPStorage_pyNewFdStorage, METH_VARARGS | METH_STATIC, nullptr},
{"_share_filename_cpu_", THPStorage_shareFilename, METH_NOARGS, nullptr},
{"_new_shared_filename_cpu", THPStorage_newSharedFilename, METH_VARARGS | METH_STATIC, nullptr},
{"_new_using_filename_cpu", THPStorage_pyNewFilenameStorage, METH_VARARGS | METH_STATIC, nullptr},
{"_weak_ref", THPStorage_weakRef, METH_NOARGS, nullptr},
{"_free_weak_ref", THPStorage_freeWeakRef, METH_O | METH_STATIC, nullptr},
{"_expired", THPStorage_expired, METH_O | METH_STATIC, nullptr},
{"_shared_decref", THPStorage_sharedDecref, METH_NOARGS, nullptr},
{"_shared_incref", THPStorage_sharedIncref, METH_NOARGS, nullptr},
{"_get_shared_fd", THPStorage_sharedFd, METH_NOARGS, nullptr},
{"is_shared", THPStorage_isShared, METH_NOARGS, nullptr},
{nullptr}
};
PyMethodDef* THPStorage_getSharingMethods() {
return THPStorage_sharingMethods;
}

View File

@ -0,0 +1,8 @@
#ifndef THP_STORAGE_SHARING_INC
#define THP_STORAGE_SHARING_INC
#include <Python.h>
PyMethodDef* THPStorage_getSharingMethods();
#endif

View File

@ -1,366 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "torch/csrc/generic/Storage.cpp"
#else
#include <torch/csrc/utils/python_arg_parser.h>
PyObject *THPStorageClass = nullptr;
PyObject * THPStorage_(New)(c10::intrusive_ptr<c10::StorageImpl> ptr)
{
AT_ASSERT(ptr);
PyTypeObject *type = (PyTypeObject *)THPStorageClass;
PyObject *obj = type->tp_alloc(type, 0);
if (obj) {
((THPStorage *)obj)->cdata = ptr.release();
}
return obj;
}
static void THPStorage_(dealloc)(THPStorage* self)
{
if (self->cdata) {
c10::raw::intrusive_ptr::decref(self->cdata);
}
Py_TYPE(self)->tp_free((PyObject*)self);
}
static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
static torch::PythonArgParser parser({
THPStorageStr "(*, int64_t allocator=None, Device device=None)",
THPStorageStr "(int64_t size, *, int64_t allocator=None, Device device=None)",
THPStorageStr "(PyObject* sequence, *, int64_t allocator=None, Device device=None)",
});
torch::ParsedArgs<3> parsed_args;
auto r = parser.parse(args, kwargs, parsed_args);
int64_t allocator_arg_idx = 0;
int64_t device_arg_idx = 1;
if (r.idx > 0) {
allocator_arg_idx = 1;
device_arg_idx = 2;
}
c10::optional<int64_t> allocator_opt = r.toInt64Optional(allocator_arg_idx);
c10::optional<at::Device> device_opt = r.deviceOptional(device_arg_idx);
TORCH_CHECK(!allocator_opt.has_value() || !device_opt.has_value(),
THPStorageStr, "(): only one or neither of 'allocator' or 'device' can ",
"be given, but not both");
THPStoragePtr self((THPStorage *)type->tp_alloc(type, 0));
THPUtils_assert(self, "failed to allocate a " THPStorageStr " object");
c10::Allocator* allocator = nullptr;
at::OptionalDeviceGuard device_guard;
if (allocator_opt.has_value()) {
allocator = reinterpret_cast<c10::Allocator*>(allocator_opt.value());
} else if (device_opt.has_value()) {
at::Device device = device_opt.value();
if (device.type() == at::kCPU) {
allocator = c10::GetDefaultCPUAllocator();
#ifdef USE_CUDA
} else if (device.type() == at::kCUDA) {
at::globalContext().lazyInitCUDA();
allocator = c10::cuda::CUDACachingAllocator::get();
#endif
} else if (device.type() == at::DeviceType::Meta) {
allocator = c10::GetAllocator(device.type());
} else {
TORCH_CHECK(false,
THPStorageStr, "(): Storage device not recognized: ", device.type());
}
device_guard.reset_device(device);
} else {
allocator = c10::GetDefaultCPUAllocator();
}
// torch.Storage(*, ...)
if (r.idx == 0) {
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
// torch.Storage(size, *, ...)
} else if (r.idx == 1) {
int64_t size = r.toInt64(0);
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
// torch.Storage(sequence, *, ...)
} else if (r.idx == 2) {
PyObject *sequence = r.pyobject(0);
Py_ssize_t length = PySequence_Length(sequence);
TORCH_CHECK(PySequence_Check(sequence),
THPStorageStr, "(): Expected a sequence type, but got ",
THPUtils_typename(sequence));
TORCH_CHECK(length >= 0,
THPStorageStr, "(): Could not obtain the length of sequence of type ",
THPUtils_typename(sequence));
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
length,
allocator,
/*resizable=*/true)
.release();
THPObjectPtr item;
try {
for (Py_ssize_t i = 0; i < length; i++) {
item = PySequence_GetItem(sequence, i);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
scalar_t value = THPUtils_(unpackReal)(item.get());
if (allocator == c10::GetDefaultCPUAllocator()) {
self->cdata->unsafe_data<scalar_t>()[i] = value;
} else {
// TODO: this might be slow - consider batched updates?
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
i,
value);
}
}
} catch (const std::exception &e) {
THPUtils_setError(THPStorageStr
"(): tried to construct a storage from a sequence (%s), "
"but one of the items was of type %s instead of %s",
THPUtils_typename(sequence),
THPUtils_typename(item.get()),
THPUtils_typeTraits<scalar_t>::python_type_str);
return nullptr;
}
return (PyObject*)self.release();
}
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static Py_ssize_t THPStorage_(length)(THPStorage *self)
{
HANDLE_TH_ERRORS
return self->cdata->nbytes() / sizeof(scalar_t);
END_HANDLE_TH_ERRORS_RET(-1)
}
static PyObject * THPStorage_(get)(THPStorage *self, PyObject *index)
{
HANDLE_TH_ERRORS
/* Integer index */
if (THPUtils_checkLong(index)) {
int64_t nindex = THPUtils_unpackLong(index);
if (nindex < 0)
nindex += (self->cdata->nbytes() / sizeof(scalar_t));
if (nindex < 0 || nindex >= static_cast<int64_t>(self->cdata->nbytes() / sizeof(scalar_t))) {
PyErr_SetString(PyExc_IndexError, fmt::format(
"index {} out of range for storage of size {}",
nindex, self->cdata->nbytes() / sizeof(scalar_t)));
return nullptr;
}
scalar_t value = storage_get(at::unsafeStorageFromTH(self->cdata, /*retain=*/true), nindex);
return THPUtils_(newReal)(value);
/* Slice index */
} else if (PySlice_Check(index)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Py_ssize_t start, stop, slicelength, step;
int64_t len = self->cdata->nbytes() / sizeof(scalar_t);
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return nullptr;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %lld, but only a step of "
"1 is supported", (long long)step);
return nullptr;
}
scalar_t *data = self->cdata->data<scalar_t>();
at::StorageImpl* old_storage = self->cdata;
c10::raw::intrusive_ptr::incref(old_storage);
auto new_storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
#ifdef THQUANTIZED
slicelength * sizeof(quantized_t),
#else
slicelength * sizeof(scalar_t),
#endif
at::DataPtr(
static_cast<void*>(data + start),
old_storage,
[](void* s) {
c10::raw::intrusive_ptr::decref(static_cast<at::StorageImpl*>(s));
},
old_storage->device()),
old_storage->allocator(),
/* resizable */ false);
PyObject *_ret = THPStorage_(New)(std::move(new_storage));
return _ret;
}
PyErr_Format(PyExc_TypeError, "can't index a " THPStorageStr " with %s",
THPUtils_typename(index));
return nullptr;
END_HANDLE_TH_ERRORS
}
static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
{
HANDLE_TH_ERRORS
if (!THPUtils_(checkReal)(value)) {
THPUtils_setError("can only set storage content with a %s, but got "
"%s instead", THPUtils_typeTraits<scalar_t>::python_type_str,
THPUtils_typename(value));
return -1;
}
scalar_t rvalue = THPUtils_(unpackReal)(value);
if (THPUtils_checkLong(index)) {
int64_t nindex = THPUtils_unpackLong(index);
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
nindex,
rvalue);
return 0;
} else if (PySlice_Check(index)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Py_ssize_t start, stop, slicelength, step;
int64_t len = self->cdata->nbytes() / sizeof(scalar_t);
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return -1;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %lld, but only a step of "
"1 is supported", (long long)step);
return 0;
}
// TODO: check the bounds only once
// TODO: fill?
for (;start < stop; start++)
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
start,
rvalue);
return 0;
}
THPUtils_setError("can't index a " THPStorageStr " with %s",
THPUtils_typename(index));
return -1;
END_HANDLE_TH_ERRORS_RET(-1)
}
static PyMappingMethods THPStorage_(mappingmethods) = {
(lenfunc)THPStorage_(length),
(binaryfunc)THPStorage_(get),
(objobjargproc)THPStorage_(set)
};
// TODO: implement equality
PyTypeObject THPStorageType = {
PyVarObject_HEAD_INIT(nullptr, 0)
"torch._C." THPStorageBaseStr, /* tp_name */
sizeof(THPStorage), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPStorage_(dealloc), /* tp_dealloc */
0, /* tp_vectorcall_offset */
nullptr, /* tp_getattr */
nullptr, /* tp_setattr */
nullptr, /* tp_reserved */
nullptr, /* tp_repr */
nullptr, /* tp_as_number */
nullptr, /* tp_as_sequence */
&THPStorage_(mappingmethods), /* tp_as_mapping */
nullptr, /* tp_hash */
nullptr, /* tp_call */
nullptr, /* tp_str */
nullptr, /* tp_getattro */
nullptr, /* tp_setattro */
nullptr, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
nullptr, /* tp_doc */
nullptr, /* tp_traverse */
nullptr, /* tp_clear */
nullptr, /* tp_richcompare */
0, /* tp_weaklistoffset */
nullptr, /* tp_iter */
nullptr, /* tp_iternext */
nullptr, /* will be assigned in init */ /* tp_methods */
nullptr, /* will be assigned in init */ /* tp_members */
nullptr, /* tp_getset */
nullptr, /* tp_base */
nullptr, /* tp_dict */
nullptr, /* tp_descr_get */
nullptr, /* tp_descr_set */
0, /* tp_dictoffset */
nullptr, /* tp_init */
nullptr, /* tp_alloc */
THPStorage_(pynew), /* tp_new */
};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static struct PyMemberDef THPStorage_(members)[] = {
{(char*)"_cdata", T_ULONGLONG, offsetof(THPStorage, cdata), READONLY, nullptr},
{nullptr}
};
static PyObject * THPStorage_(device)(THPStorage* self, void *unused) {
HANDLE_TH_ERRORS
return THPDevice_New(self->cdata->device());
END_HANDLE_TH_ERRORS
}
static PyObject * THPStorage_(dtype)(THPStorage *self, void *unused)
{
HANDLE_TH_ERRORS
return torch::autograd::utils::wrap(
torch::getTHPDtype(at::typeMetaToScalarType(
#ifdef THQUANTIZED
caffe2::TypeMeta::Make<quantized_t>()
#else
caffe2::TypeMeta::Make<scalar_t>()
#endif
)));
END_HANDLE_TH_ERRORS
}
typedef PyObject *(*getter)(PyObject *, void *);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
static struct PyGetSetDef THPStorage_(properties)[] = {
{"device", (getter)THPStorage_(device), nullptr, nullptr, nullptr},
{nullptr}
};
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <torch/csrc/generic/StorageMethods.cpp>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <torch/csrc/generic/StorageSharing.cpp>
bool THPStorage_(init)(PyObject *module)
{
static std::vector<PyMethodDef> methods;
THPUtils_addPyMethodDefs(methods, THPStorage_(methods));
THPUtils_addPyMethodDefs(methods, THPStorage_(sharingMethods));
THPStorageType.tp_methods = methods.data();
THPStorageType.tp_members = THPStorage_(members);
THPStorageType.tp_getset = THPStorage_(properties);
if (PyType_Ready(&THPStorageType) < 0)
return false;
Py_INCREF(&THPStorageType);
PyModule_AddObject(module, THPStorageBaseStr, (PyObject *)&THPStorageType);
return true;
}
void THPStorage_(postInit)(PyObject *module)
{
THPStorageClass = PyObject_GetAttrString(module, "_UntypedStorage");
if (!THPStorageClass) throw python_error();
}
#endif

View File

@ -1,17 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "torch/csrc/generic/Storage.h"
#else
#include <torch/csrc/StorageDefs.h>
TORCH_PYTHON_API PyObject * THPStorage_(New)(c10::intrusive_ptr<c10::StorageImpl> ptr);
extern PyObject *THPStorageClass;
#include <torch/csrc/Types.h>
bool THPStorage_(init)(PyObject *module);
void THPStorage_(postInit)(PyObject *module);
extern PyTypeObject THPStorageType;
#endif

View File

@ -1,188 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "torch/csrc/generic/serialization.cpp"
#else
#include <c10/core/CPUAllocator.h>
// save_save is necessary since the old eager format saved storages as
// [size + data], but the v1.5 eager format removes this since size is saved in
// the filesize.
template <class io>
void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size)
{
c10::DeviceGuard guard(self->device());
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
scalar_t *data;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<char[]> cpu_data;
int64_t size_bytes = self->nbytes();
int64_t numel = size_bytes / element_size;
if (self->device_type() == at::kCPU) {
data = self->data<scalar_t>();
#ifdef USE_CUDA
} else if (self->device_type() == at::kCUDA) {
cpu_data = std::unique_ptr<char[]>(new char[size_bytes]);
data = (scalar_t*)cpu_data.get();
C10_CUDA_CHECK(cudaMemcpy(
data,
self->data<scalar_t>(),
size_bytes,
cudaMemcpyDeviceToHost));
#endif
} else {
TORCH_CHECK(false, "writeFileRaw: Device not recognized: ", self->device_type());
}
if (save_size) {
if (torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN)
doWrite(fd, &numel, sizeof(int64_t));
else {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t nsize; // convert big endian cpu to little endian storage
torch::utils::THP_encodeInt64Buffer(
(uint8_t*)&nsize,
(const int64_t*)&numel,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
1);
doWrite(fd, &nsize, sizeof(int64_t));
}
}
// fast track for bytes and little endian
if (element_size == 1 ||
torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
doWrite(fd, data, size_bytes);
} else {
int64_t buffer_size = std::min(numel, (int64_t)5000);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * element_size]);
for (int64_t i = 0; i < numel; i += buffer_size) {
size_t to_convert = std::min(numel - i, buffer_size);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (element_size == 2) {
torch::utils::THP_encodeInt16Buffer(
(uint8_t*)le_buffer.get(),
(const int16_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
} else if (element_size == 4) {
torch::utils::THP_encodeInt32Buffer(
(uint8_t*)le_buffer.get(),
(const int32_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
} else if (element_size == 8) {
torch::utils::THP_encodeInt64Buffer(
(uint8_t*)le_buffer.get(),
(const int64_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
}
doWrite(fd, le_buffer.get(), to_convert * element_size);
}
}
}
template void THPStorage_(writeFileRaw<int>)(c10::StorageImpl *self, int fd, bool save_size, uint64_t element_size);
template void THPStorage_(writeFileRaw<PyObject*>)(c10::StorageImpl *self, PyObject* fd, bool save_size, uint64_t element_size);
template <class io>
c10::intrusive_ptr<c10::StorageImpl> THPStorage_(readFileRaw)(
io file, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size)
{
c10::OptionalDeviceGuard guard;
if (storage.defined()) {
guard.reset_device(storage->device());
}
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
scalar_t *data;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t size;
doRead(file, &size, sizeof(int64_t));
int64_t nbytes = element_size * size;
if (torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_BIG_ENDIAN) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t nsize; // convert little endian storage to big endian cpu
nsize = nbytes;
torch::utils::THP_decodeInt64Buffer(
&nbytes, (const uint8_t*)&nsize, torch::utils::THP_nativeByteOrder(), 1);
}
if (!storage.defined()) {
storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
nbytes,
c10::GetDefaultCPUAllocator(),
/*resizable=*/true);
} else {
int64_t _storage_nbytes = storage->nbytes();
TORCH_CHECK(
_storage_nbytes == nbytes,
"storage has wrong byte size: expected %ld got %ld",
nbytes,
_storage_nbytes);
}
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<char[]> cpu_data;
if (storage->device_type() == at::kCPU) {
data = storage->data<scalar_t>();
} else {
cpu_data = std::unique_ptr<char[]>(new char[nbytes]);
data = (scalar_t*)cpu_data.get();
}
// fast track for bytes and little endian
if (element_size == 1 ||
torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
doRead(file, data, storage->nbytes());
} else {
int64_t buffer_size = std::min(size, (int64_t)5000);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * element_size]);
for (int64_t i = 0; i < size; i += buffer_size) {
size_t to_convert = std::min(size - i, buffer_size);
doRead(file, le_buffer.get(), element_size * to_convert);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (element_size == 2) {
torch::utils::THP_decodeInt16Buffer(
(int16_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
} else if (element_size == 4) {
torch::utils::THP_decodeInt32Buffer(
(int32_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
} else if (element_size == 8) {
torch::utils::THP_decodeInt64Buffer(
(int64_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
}
}
}
#ifdef USE_CUDA
if (storage->device_type() == at::kCUDA) {
C10_CUDA_CHECK(cudaMemcpy(storage->data<scalar_t>(), data, nbytes, cudaMemcpyHostToDevice));
}
#endif
return storage;
}
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_(readFileRaw<int>)(
int fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_(readFileRaw<PyObject*>)(
PyObject* fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);
#endif

View File

@ -1,12 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "torch/csrc/generic/serialization.h"
#else
template <class io>
void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size);
template <class io>
c10::intrusive_ptr<c10::StorageImpl> THPStorage_(readFileRaw)(
io fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);
#endif

View File

@ -3,6 +3,7 @@
#include <torch/csrc/THP.h>
#include <torch/csrc/serialization.h>
#include <c10/core/CPUAllocator.h>
template <class io>
Py_ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);
@ -167,6 +168,183 @@ void doWrite(io fildes, void* raw_buf, size_t nbytes) {
}
}
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <torch/csrc/generic/serialization.cpp>
#include <torch/csrc/THGenerateByteType.h>
// save_save is necessary since the old eager format saved storages as
// [size + data], but the v1.5 eager format removes this since size is saved in
// the filesize.
template <class io>
void THPStorage_writeFileRaw(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size)
{
c10::DeviceGuard guard(self->device());
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
uint8_t *data;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<char[]> cpu_data;
int64_t size_bytes = self->nbytes();
int64_t numel = size_bytes / element_size;
if (self->device_type() == at::kCPU) {
data = self->data<uint8_t>();
#ifdef USE_CUDA
} else if (self->device_type() == at::kCUDA) {
cpu_data = std::unique_ptr<char[]>(new char[size_bytes]);
data = (uint8_t*)cpu_data.get();
C10_CUDA_CHECK(cudaMemcpy(
data,
self->data<uint8_t>(),
size_bytes,
cudaMemcpyDeviceToHost));
#endif
} else {
TORCH_CHECK(false, "writeFileRaw: Device not recognized: ", self->device_type());
}
if (save_size) {
if (torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN)
doWrite(fd, &numel, sizeof(int64_t));
else {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t nsize; // convert big endian cpu to little endian storage
torch::utils::THP_encodeInt64Buffer(
(uint8_t*)&nsize,
(const int64_t*)&numel,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
1);
doWrite(fd, &nsize, sizeof(int64_t));
}
}
// fast track for bytes and little endian
if (element_size == 1 ||
torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
doWrite(fd, data, size_bytes);
} else {
int64_t buffer_size = std::min(numel, (int64_t)5000);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * element_size]);
for (int64_t i = 0; i < numel; i += buffer_size) {
size_t to_convert = std::min(numel - i, buffer_size);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (element_size == 2) {
torch::utils::THP_encodeInt16Buffer(
(uint8_t*)le_buffer.get(),
(const int16_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
} else if (element_size == 4) {
torch::utils::THP_encodeInt32Buffer(
(uint8_t*)le_buffer.get(),
(const int32_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
} else if (element_size == 8) {
torch::utils::THP_encodeInt64Buffer(
(uint8_t*)le_buffer.get(),
(const int64_t*)data + i,
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
}
doWrite(fd, le_buffer.get(), to_convert * element_size);
}
}
}
template void THPStorage_writeFileRaw<int>(c10::StorageImpl *self, int fd, bool save_size, uint64_t element_size);
template void THPStorage_writeFileRaw<PyObject*>(c10::StorageImpl *self, PyObject* fd, bool save_size, uint64_t element_size);
template <class io>
c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw(
io file, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size)
{
c10::OptionalDeviceGuard guard;
if (storage.defined()) {
guard.reset_device(storage->device());
}
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
uint8_t *data;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t size;
doRead(file, &size, sizeof(int64_t));
int64_t nbytes = element_size * size;
if (torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_BIG_ENDIAN) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t nsize; // convert little endian storage to big endian cpu
nsize = nbytes;
torch::utils::THP_decodeInt64Buffer(
&nbytes, (const uint8_t*)&nsize, torch::utils::THP_nativeByteOrder(), 1);
}
if (!storage.defined()) {
storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
nbytes,
c10::GetDefaultCPUAllocator(),
/*resizable=*/true);
} else {
int64_t _storage_nbytes = storage->nbytes();
TORCH_CHECK(
_storage_nbytes == nbytes,
"storage has wrong byte size: expected %ld got %ld",
nbytes,
_storage_nbytes);
}
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<char[]> cpu_data;
if (storage->device_type() == at::kCPU) {
data = storage->data<uint8_t>();
} else {
cpu_data = std::unique_ptr<char[]>(new char[nbytes]);
data = (uint8_t*)cpu_data.get();
}
// fast track for bytes and little endian
if (element_size == 1 ||
torch::utils::THP_nativeByteOrder() ==
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
doRead(file, data, storage->nbytes());
} else {
int64_t buffer_size = std::min(size, (int64_t)5000);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * element_size]);
for (int64_t i = 0; i < size; i += buffer_size) {
size_t to_convert = std::min(size - i, buffer_size);
doRead(file, le_buffer.get(), element_size * to_convert);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (element_size == 2) {
torch::utils::THP_decodeInt16Buffer(
(int16_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
} else if (element_size == 4) {
torch::utils::THP_decodeInt32Buffer(
(int32_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
} else if (element_size == 8) {
torch::utils::THP_decodeInt64Buffer(
(int64_t*)data + i,
le_buffer.get(),
torch::utils::THP_nativeByteOrder(),
to_convert);
}
}
}
#ifdef USE_CUDA
if (storage->device_type() == at::kCUDA) {
C10_CUDA_CHECK(cudaMemcpy(storage->data<uint8_t>(), data, nbytes, cudaMemcpyHostToDevice));
}
#endif
return storage;
}
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<int>(
int fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<PyObject*>(
PyObject* fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);

View File

@ -1,13 +1,17 @@
#ifndef THP_SERIALIZATION_INC
#define THP_SERIALIZATION_INC
#include <torch/csrc/generic/serialization.h>
#include <torch/csrc/THGenerateByteType.h>
template <class io>
void doRead(io fildes, void* buf, size_t nbytes);
template <class io>
void doWrite(io fildes, void* buf, size_t nbytes);
template <class io>
void THPStorage_writeFileRaw(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size);
template <class io>
c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw(
io fd, c10::intrusive_ptr<c10::StorageImpl> storage, uint64_t element_size);
#endif