pytorch/torch/csrc/autograd/utils/wrap_outputs.h
Yu, Guangye ac0b0d11ab [Reland] Fix tensor.data_ptr() representation overflow (#135567)
# Motivation
fix https://github.com/pytorch/pytorch/issues/135550
In PyTorch, [`tensor.data_ptr()`](e889252493/tools/autograd/templates/python_variable_methods.cpp (L204)) is reinterpreted by a [signed int64](e889252493/torch/csrc/autograd/utils/wrap_outputs.h (L50)) data type, which could result in an **overflow issue**, like below:
```python
import torch
a = torch.randn(2).to('xpu')
a.data_ptr()
# one possible output is
-23453392437248
# this is inconsistent with storage.data_ptr()
a.untyped_storage().data_ptr()
# one possible output is
18446720620317114368
```
This PR aims to fix this representation overflow issue to make `tensor.data_ptr()` consistent with [`tensor.untyped_storage().data_ptr()`](c0d2f991b1/torch/csrc/StorageMethods.cpp (L62)). With this PR, the output will become:
```python
import torch
a = torch.randn(2).to('xpu')
a.data_ptr()
# one possible output is
18446720620317114368
# this is consistent with storage.data_ptr()
a.untyped_storage().data_ptr()
# one possible output is
18446720620317114368
```

# Solution
Use `PyLong_FromVoidPtr` to prevent the overflow issue and fit the semantic of `wrap`.

# Additional Context
This PR has been reverted (in place, no more change, and revert commit 2e8d431a8f) due to the change of `tensor.data_ptr()`, which needs to sync up to intel xpu triton side, see [#2192](https://github.com/intel/intel-xpu-backend-for-triton/pull/2192). So we have to update xpu triton commit pin with this PR together.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/135567
Approved by: https://github.com/dvrogozh, https://github.com/EikanWang, https://github.com/albanD
2024-11-28 02:01:52 +00:00

150 lines
3.7 KiB
C++

#pragma once
// Wrap tensor operation outputs as PyObject*
#include <ATen/ScalarOps.h>
#include <ATen/core/Tensor.h>
#include <c10/util/irange.h>
#include <torch/csrc/python_headers.h>
#include <initializer_list>
#include <tuple>
#include <torch/csrc/Dtype.h>
#include <torch/csrc/DynamicTypes.h>
#include <torch/csrc/Layout.h>
#include <torch/csrc/QScheme.h>
#include <torch/csrc/autograd/python_variable.h>
#include <torch/csrc/autograd/variable.h>
#include <torch/csrc/utils/python_numbers.h>
#include <torch/csrc/utils/tensor_qschemes.h>
namespace torch::autograd::utils {
inline PyObject* wrap(bool value) {
if (value) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
}
inline PyObject* wrap(c10::DeviceIndex value) {
return THPUtils_packDeviceIndex(value);
}
inline PyObject* wrap(int64_t value) {
return THPUtils_packInt64(value);
}
inline PyObject* wrap(double value) {
return PyFloat_FromDouble(value);
}
inline PyObject* wrap(c10::complex<double> value) {
// I could probably also use FromComplex with a reinterpret cast,
// but... eh.
return PyComplex_FromDoubles(value.real(), value.imag());
}
inline PyObject* wrap(void* value) {
return PyLong_FromVoidPtr(value);
}
inline PyObject* wrap(THPDtype* dtype) {
return Py_NewRef(dtype);
}
inline PyObject* wrap(at::ScalarType scalarType) {
return Py_NewRef(getTHPDtype(scalarType));
}
inline PyObject* wrap(THPLayout* layout) {
return Py_NewRef(layout);
}
inline PyObject* wrap(at::Layout layout) {
return Py_NewRef(getTHPLayout(layout));
}
inline PyObject* wrap(const at::Tensor& tensor) {
return THPVariable_Wrap(tensor);
}
inline PyObject* wrap(const at::Scalar& scalar) {
return wrap(scalar_to_tensor(scalar));
}
inline PyObject* wrap(at::QScheme qscheme) {
auto* thp_qscheme = torch::utils::getTHPQScheme(qscheme);
Py_INCREF(thp_qscheme);
return thp_qscheme;
}
inline PyObject* wrap(at::TensorList tl) {
auto r = THPObjectPtr{PyTuple_New(static_cast<Py_ssize_t>(tl.size()))};
if (!r)
throw python_error();
for (const auto i : c10::irange(tl.size())) {
PyTuple_SET_ITEM(r.get(), i, wrap(tl[i]));
}
return r.release();
}
inline PyObject* wrap(at::IntArrayRef list) {
auto r = THPObjectPtr{PyTuple_New(static_cast<Py_ssize_t>(list.size()))};
if (!r)
throw python_error();
for (const auto i : c10::irange(list.size())) {
PyTuple_SET_ITEM(r.get(), i, wrap(list[i]));
}
return r.release();
}
inline PyObject* wrap(at::Stream stream) {
return THPStream_Wrap(stream);
}
namespace detail {
template <typename F, typename Tuple, size_t... Is>
void apply_with_idx_impl(
const F& f,
Tuple& t,
std::index_sequence<Is...> /*indices*/) {
(void)std::initializer_list<int>{(f(std::get<Is>(t), Is), 0)...};
}
// For tuple(a, b, c), calls f(a, 0), f(b, 1), f(c, 2)
template <typename F, typename... Ts>
void apply_with_idx(const F& f, std::tuple<Ts...>& t) {
apply_with_idx_impl(f, t, std::index_sequence_for<Ts...>{});
}
} // namespace detail
template <typename... Ts>
PyObject* wrap(std::tuple<Ts...> values) {
auto r = THPObjectPtr{PyTuple_New(sizeof...(Ts))};
if (!r)
throw python_error();
detail::apply_with_idx(
[&](auto& value, size_t idx) {
PyTuple_SET_ITEM(r.get(), idx, wrap(std::move(value)));
},
values);
return r.release();
}
template <typename... Ts>
PyObject* wrap(PyTypeObject* type, std::tuple<Ts...> values) {
auto r = THPObjectPtr{PyStructSequence_New(type)};
if (!r)
throw python_error();
detail::apply_with_idx(
[&](auto& value, size_t idx) {
PyStructSequence_SET_ITEM(r.get(), idx, wrap(std::move(value)));
},
values);
return r.release();
}
} // namespace torch::autograd::utils