mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Apparently for the array API, cuda default stream and per thread stream should be 1 and 2 instead of 0 and 1: https://data-apis.org/array-api/latest/API_specification/array_object.html?dlpack-self-stream-none#dlpack-self-stream-none. This caused a problem in the interop with CuPy https://github.com/cupy/cupy/pull/5970#discussion_r739912926. cc rgommers leofang mruberry Pull Request resolved: https://github.com/pytorch/pytorch/pull/67618 Reviewed By: albanD Differential Revision: D32521805 Pulled By: mruberry fbshipit-source-id: 95777e4014e5edf1f88ba10adc03c6e34c13248d
75 lines
2.5 KiB
Python
75 lines
2.5 KiB
Python
from typing import Any
|
|
|
|
import torch
|
|
import enum
|
|
|
|
from torch._C import _from_dlpack
|
|
from torch._C import _to_dlpack as to_dlpack
|
|
|
|
|
|
class DLDeviceType(enum.IntEnum):
|
|
# Enums as in DLPack specification (aten/src/ATen/dlpack.h)
|
|
kDLCPU = 1,
|
|
kDLGPU = 2,
|
|
kDLCPUPinned = 3,
|
|
kDLOpenCL = 4,
|
|
kDLVulkan = 7,
|
|
kDLMetal = 8,
|
|
kDLVPI = 9,
|
|
kDLROCM = 10,
|
|
kDLExtDev = 12,
|
|
|
|
|
|
torch._C._add_docstr(to_dlpack, r"""to_dlpack(tensor) -> PyCapsule
|
|
|
|
Returns a DLPack representing the tensor.
|
|
|
|
Args:
|
|
tensor: a tensor to be exported
|
|
|
|
The DLPack shares the tensors memory.
|
|
Note that each DLPack can only be consumed once.
|
|
""")
|
|
|
|
# TODO: add a typing.Protocol to be able to tell Mypy that only objects with
|
|
# __dlpack__ and __dlpack_device__ methods are accepted.
|
|
def from_dlpack(ext_tensor: Any) -> torch.Tensor:
|
|
"""from_dlpack(ext_tensor) -> Tensor
|
|
|
|
Convers a tensor from a external library into a ``torch.Tensor``
|
|
by means of the ``__dlpack__`` protocol.
|
|
|
|
The tensor will share the memory with the object represented
|
|
in the DLPack.
|
|
|
|
.. warning::
|
|
Only call from_dlpack once per capsule. Its behavior when used
|
|
on the same capsule multiple times is undefined.
|
|
|
|
Args:
|
|
ext_tensor (object with __dlpack__ attribute or DLPack capsule):
|
|
The tensor or DLPack capsule to convert.
|
|
"""
|
|
if hasattr(ext_tensor, '__dlpack__'):
|
|
device = ext_tensor.__dlpack_device__()
|
|
# device is either CUDA or ROCm, we need to pass the current
|
|
# stream
|
|
if device[0] in (DLDeviceType.kDLGPU, DLDeviceType.kDLROCM):
|
|
stream = torch.cuda.current_stream('cuda:{}'.format(device[1]))
|
|
# cuda_stream is the pointer to the stream and it is a public
|
|
# attribute, but it is not documented
|
|
# The array API specify that the default legacy stream must be passed
|
|
# with a value of 1 for CUDA
|
|
# https://data-apis.org/array-api/latest/API_specification/array_object.html?dlpack-self-stream-none#dlpack-self-stream-none # NOQA
|
|
is_cuda = device[0] == DLDeviceType.kDLGPU
|
|
# Since pytorch is not using PTDS by default, lets directly pass
|
|
# the legacy stream
|
|
stream_ptr = 1 if is_cuda and stream.cuda_stream == 0 else stream.cuda_stream
|
|
dlpack = ext_tensor.__dlpack__(stream=stream_ptr)
|
|
else:
|
|
dlpack = ext_tensor.__dlpack__()
|
|
else:
|
|
# Old versions just call the converter
|
|
dlpack = ext_tensor
|
|
return _from_dlpack(dlpack)
|