mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/53973 Two parts to this PR; I had to put them together because adding support for X causes more test code to be exercised, which in turn may require a fix for Y. The first part is restoring the concept of storage to meta tensors. Previously, meta tensors had a nullptr storage (e.g., `meta_tensor.storage()` is an error.) As I was increasing the coverage of meta tensors, I started running into test cases (specifically memory overlap tests) that were failing because not having storage meant I couldn't check for memory overlap. After some discussion, we decided that it would make sense for meta tensors to model this as well (we already model strides, so getting accurate view information also seems useful). This PR does that by: * Rewrite all of the factory functions in MetaTensor.cpp to use the generic versions (which are very carefully written to not actually poke at the data pointer, so everything works out). The key idea here is we give meta tensors a special allocator, MetaAllocator, which always returns a nullptr even if you ask for a nonzero number of bytes. resize_ is also made generic; the normal variant can be used directly rather than having to instruct it to avoid resizing storage * Turn on memory overlap checking in TensorIterator even for meta tensors * Although meta tensors now have storage, the concept of meta storage is NOT exposed to Python land (as it would imply I would have to codegen MetaFloatStorage, MetaDoubleStorage, etc. classes). So `x.storage()` still raises an error and I have a cludge in `__deepcopy__` to break storage sharing upon deep copy (this is wrong, but no tests exercise this at the moment). The second part is adding more support for the most used functions in the test suite. * Inplace operations have very simple meta functions. I added `fill_`, `zero_`, `random_`, `uniform_` and `normal_`. In the case of random, I take advantage of pbelevich's templates for defining random kernels, so that I can reuse the common scaffolding, and then just register a noop stub that actually does the RNG. (Look, another structured kernels tiny variant!) * `copy_` is now implemented. Copying into a meta tensor is always OK, but copying out of a meta tensor raises an error (as we don't know what the "correct" data to copy out is in this case) * `empty_strided` usage from structured kernels now is implemented (TBH, this could have been done as soon as `empty_strided` was added) * Meta was missing in a few places in TensorOptions/DispatchKey utility functions, so I added them * Autograd engine now correctly homes meta tensors with CPU tensors (they have -1 device index so CUDA queues wouldn't work anyway) * `apply_`, `map_` and `map2_` are special cased to no-op on meta tensor self. These count as inplace operations too but they are implemented a little differently. Getting more meta function support triggers a number of bugs in the test suite, which I then fix: - Linear algebra functions sometimes don't report NotImplementedError because they get swallowed by catch all try blocks. This is tracked in https://github.com/pytorch/pytorch/issues/53739 - dlpack obviously doesn't work with meta tensors, I just disabled the test Signed-off-by: Edward Z. Yang <ezyang@fb.com> Differential Revision: D27036572 Test Plan: Imported from OSS Reviewed By: agolynski, bdhirsh Pulled By: ezyang fbshipit-source-id: 7005ecf4feb92a643c37389fdfbd852dbf00ac78
109 lines
3.3 KiB
C++
109 lines
3.3 KiB
C++
#include <torch/csrc/utils/tensor_apply.h>
|
|
|
|
#include <ATen/TensorUtils.h>
|
|
#include <ATen/ExpandUtils.h>
|
|
|
|
#include <torch/csrc/Exceptions.h>
|
|
#include <torch/csrc/utils/python_numbers.h>
|
|
#include <torch/csrc/utils/python_scalars.h>
|
|
|
|
using namespace at;
|
|
|
|
namespace torch { namespace utils {
|
|
|
|
struct StridedData {
|
|
StridedData(const Tensor & tensor)
|
|
: data(tensor.data_ptr())
|
|
, strides(tensor.strides())
|
|
, elementSize(tensor.element_size()) {}
|
|
|
|
void* data;
|
|
IntArrayRef strides;
|
|
int64_t elementSize;
|
|
|
|
void step(int dim) {
|
|
data = (char*)data + (strides[dim] * elementSize);
|
|
}
|
|
};
|
|
|
|
template<size_t N>
|
|
static void recursive_apply(IntArrayRef sizes, ScalarType scalarType, int64_t dim,
|
|
PyObject* fn, std::array<StridedData, N> strided_data) {
|
|
int64_t ndim = sizes.size();
|
|
if (dim == ndim) {
|
|
auto args = THPObjectPtr(PyTuple_New(N));
|
|
if (!args) throw python_error();
|
|
for (size_t i = 0; i < N; i++) {
|
|
PyObject* arg = load_scalar(strided_data[i].data, scalarType);
|
|
if (!arg) throw python_error();
|
|
PyTuple_SET_ITEM(args.get(), i, arg);
|
|
}
|
|
auto ret = THPObjectPtr(PyObject_CallObject(fn, args.get()));
|
|
if (!ret) throw python_error();
|
|
store_scalar(strided_data[0].data, scalarType, ret.get());
|
|
return;
|
|
}
|
|
|
|
auto n = sizes[dim];
|
|
for (int64_t i = 0; i < n; i++) {
|
|
recursive_apply(sizes, scalarType, dim + 1, fn, strided_data);
|
|
for (auto& td : strided_data) {
|
|
td.step(dim);
|
|
}
|
|
}
|
|
}
|
|
|
|
Tensor & apply_(Tensor & self, PyObject* fn) {
|
|
if (self.is_meta()) {
|
|
return self; // Just skip
|
|
}
|
|
if (!self.device().is_cpu()) {
|
|
throw TypeError("apply_ is only implemented on CPU tensors");
|
|
}
|
|
auto scalarType = self.scalar_type();
|
|
recursive_apply<1>(self.sizes(), scalarType, 0, fn, {{ self }});
|
|
return self;
|
|
}
|
|
|
|
Tensor & map_(Tensor & self, const Tensor & other_, PyObject* fn) {
|
|
if (!other_.options().type_equal(self.options())) {
|
|
throw TypeError("map_: expected %s for 'other' (got %s)",
|
|
self.toString().c_str(), other_.toString().c_str());
|
|
}
|
|
if (self.is_meta()) {
|
|
return self; // Just skip
|
|
}
|
|
if (!self.device().is_cpu()) {
|
|
throw TypeError("map_ is only implemented on CPU tensors");
|
|
}
|
|
Tensor other;
|
|
std::tie(other) = expand_inplace(self, other_, "map_");
|
|
auto scalarType = self.scalar_type();
|
|
recursive_apply<2>(self.sizes(), scalarType, 0, fn, {{ self, other }});
|
|
return self;
|
|
}
|
|
|
|
Tensor & map2_(Tensor & self, const Tensor & x_, const Tensor & y_, PyObject* fn) {
|
|
if (!x_.options().type_equal(self.options())) {
|
|
throw TypeError("map2_: expected %s for argument 'x' (got %s)",
|
|
self.toString().c_str(), x_.toString().c_str());
|
|
}
|
|
if (!y_.options().type_equal(self.options())) {
|
|
throw TypeError("map2_: expected %s for argument 'y' (got %s)",
|
|
self.toString().c_str(), y_.toString().c_str());
|
|
}
|
|
if (self.is_meta()) {
|
|
return self; // Just skip
|
|
}
|
|
if (!self.device().is_cpu() || !x_.device().is_cpu() || !y_.device().is_cpu()) {
|
|
throw TypeError("map2_ is only implemented on CPU tensors");
|
|
}
|
|
Tensor other1, other2;
|
|
std::tie(other1, other2) = expand_inplace(self, x_, y_, "map2_");
|
|
auto scalarType = self.scalar_type();
|
|
recursive_apply<3>(self.sizes(), scalarType, 0, fn, {{ self, other1, other2 }});
|
|
return self;
|
|
}
|
|
|
|
}} // namespace torch::utils
|