mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
# Motivation fix https://github.com/pytorch/pytorch/issues/135550 In PyTorch, [`tensor.data_ptr()`](e889252493/tools/autograd/templates/python_variable_methods.cpp (L204)) is reinterpreted by a [signed int64](e889252493/torch/csrc/autograd/utils/wrap_outputs.h (L50)) data type, which could result in an **overflow issue**, like below: ```python import torch a = torch.randn(2).to('xpu') a.data_ptr() # one possible output is -23453392437248 # this is inconsistent with storage.data_ptr() a.untyped_storage().data_ptr() # one possible output is 18446720620317114368 ``` This PR aims to fix this representation overflow issue to make `tensor.data_ptr()` consistent with [`tensor.untyped_storage().data_ptr()`](c0d2f991b1/torch/csrc/StorageMethods.cpp (L62)). With this PR, the output will become: ```python import torch a = torch.randn(2).to('xpu') a.data_ptr() # one possible output is 18446720620317114368 # this is consistent with storage.data_ptr() a.untyped_storage().data_ptr() # one possible output is 18446720620317114368 ``` # Solution Use `PyLong_FromVoidPtr` to prevent the overflow issue and fit the semantic of `wrap`. # Additional Context This PR has been reverted (in place, no more change, and revert commit2e8d431a8f) due to the change of `tensor.data_ptr()`, which needs to sync up to intel xpu triton side, see [#2192](https://github.com/intel/intel-xpu-backend-for-triton/pull/2192). So we have to update xpu triton commit pin with this PR together. Pull Request resolved: https://github.com/pytorch/pytorch/pull/135567 Approved by: https://github.com/dvrogozh, https://github.com/EikanWang, https://github.com/albanD
150 lines
3.7 KiB
C++
150 lines
3.7 KiB
C++
#pragma once
|
|
|
|
// Wrap tensor operation outputs as PyObject*
|
|
|
|
#include <ATen/ScalarOps.h>
|
|
#include <ATen/core/Tensor.h>
|
|
#include <c10/util/irange.h>
|
|
#include <torch/csrc/python_headers.h>
|
|
#include <initializer_list>
|
|
#include <tuple>
|
|
|
|
#include <torch/csrc/Dtype.h>
|
|
#include <torch/csrc/DynamicTypes.h>
|
|
#include <torch/csrc/Layout.h>
|
|
#include <torch/csrc/QScheme.h>
|
|
#include <torch/csrc/autograd/python_variable.h>
|
|
#include <torch/csrc/autograd/variable.h>
|
|
#include <torch/csrc/utils/python_numbers.h>
|
|
#include <torch/csrc/utils/tensor_qschemes.h>
|
|
|
|
namespace torch::autograd::utils {
|
|
|
|
inline PyObject* wrap(bool value) {
|
|
if (value) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
}
|
|
|
|
inline PyObject* wrap(c10::DeviceIndex value) {
|
|
return THPUtils_packDeviceIndex(value);
|
|
}
|
|
|
|
inline PyObject* wrap(int64_t value) {
|
|
return THPUtils_packInt64(value);
|
|
}
|
|
|
|
inline PyObject* wrap(double value) {
|
|
return PyFloat_FromDouble(value);
|
|
}
|
|
|
|
inline PyObject* wrap(c10::complex<double> value) {
|
|
// I could probably also use FromComplex with a reinterpret cast,
|
|
// but... eh.
|
|
return PyComplex_FromDoubles(value.real(), value.imag());
|
|
}
|
|
|
|
inline PyObject* wrap(void* value) {
|
|
return PyLong_FromVoidPtr(value);
|
|
}
|
|
|
|
inline PyObject* wrap(THPDtype* dtype) {
|
|
return Py_NewRef(dtype);
|
|
}
|
|
|
|
inline PyObject* wrap(at::ScalarType scalarType) {
|
|
return Py_NewRef(getTHPDtype(scalarType));
|
|
}
|
|
|
|
inline PyObject* wrap(THPLayout* layout) {
|
|
return Py_NewRef(layout);
|
|
}
|
|
|
|
inline PyObject* wrap(at::Layout layout) {
|
|
return Py_NewRef(getTHPLayout(layout));
|
|
}
|
|
|
|
inline PyObject* wrap(const at::Tensor& tensor) {
|
|
return THPVariable_Wrap(tensor);
|
|
}
|
|
|
|
inline PyObject* wrap(const at::Scalar& scalar) {
|
|
return wrap(scalar_to_tensor(scalar));
|
|
}
|
|
|
|
inline PyObject* wrap(at::QScheme qscheme) {
|
|
auto* thp_qscheme = torch::utils::getTHPQScheme(qscheme);
|
|
Py_INCREF(thp_qscheme);
|
|
return thp_qscheme;
|
|
}
|
|
|
|
inline PyObject* wrap(at::TensorList tl) {
|
|
auto r = THPObjectPtr{PyTuple_New(static_cast<Py_ssize_t>(tl.size()))};
|
|
if (!r)
|
|
throw python_error();
|
|
for (const auto i : c10::irange(tl.size())) {
|
|
PyTuple_SET_ITEM(r.get(), i, wrap(tl[i]));
|
|
}
|
|
return r.release();
|
|
}
|
|
|
|
inline PyObject* wrap(at::IntArrayRef list) {
|
|
auto r = THPObjectPtr{PyTuple_New(static_cast<Py_ssize_t>(list.size()))};
|
|
if (!r)
|
|
throw python_error();
|
|
for (const auto i : c10::irange(list.size())) {
|
|
PyTuple_SET_ITEM(r.get(), i, wrap(list[i]));
|
|
}
|
|
return r.release();
|
|
}
|
|
|
|
inline PyObject* wrap(at::Stream stream) {
|
|
return THPStream_Wrap(stream);
|
|
}
|
|
|
|
namespace detail {
|
|
template <typename F, typename Tuple, size_t... Is>
|
|
void apply_with_idx_impl(
|
|
const F& f,
|
|
Tuple& t,
|
|
std::index_sequence<Is...> /*indices*/) {
|
|
(void)std::initializer_list<int>{(f(std::get<Is>(t), Is), 0)...};
|
|
}
|
|
|
|
// For tuple(a, b, c), calls f(a, 0), f(b, 1), f(c, 2)
|
|
template <typename F, typename... Ts>
|
|
void apply_with_idx(const F& f, std::tuple<Ts...>& t) {
|
|
apply_with_idx_impl(f, t, std::index_sequence_for<Ts...>{});
|
|
}
|
|
} // namespace detail
|
|
|
|
template <typename... Ts>
|
|
PyObject* wrap(std::tuple<Ts...> values) {
|
|
auto r = THPObjectPtr{PyTuple_New(sizeof...(Ts))};
|
|
if (!r)
|
|
throw python_error();
|
|
detail::apply_with_idx(
|
|
[&](auto& value, size_t idx) {
|
|
PyTuple_SET_ITEM(r.get(), idx, wrap(std::move(value)));
|
|
},
|
|
values);
|
|
return r.release();
|
|
}
|
|
|
|
template <typename... Ts>
|
|
PyObject* wrap(PyTypeObject* type, std::tuple<Ts...> values) {
|
|
auto r = THPObjectPtr{PyStructSequence_New(type)};
|
|
if (!r)
|
|
throw python_error();
|
|
detail::apply_with_idx(
|
|
[&](auto& value, size_t idx) {
|
|
PyStructSequence_SET_ITEM(r.get(), idx, wrap(std::move(value)));
|
|
},
|
|
values);
|
|
return r.release();
|
|
}
|
|
|
|
} // namespace torch::autograd::utils
|