diff --git a/mypy.ini b/mypy.ini index 5b297c89d54..b9c87aca583 100644 --- a/mypy.ini +++ b/mypy.ini @@ -198,9 +198,6 @@ ignore_errors = True [mypy-torch.cuda.comm] ignore_errors = True -[mypy-torch.cuda.memory] -ignore_errors = True - [mypy-torch.cuda.nccl] ignore_errors = True diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index 0fb0372b7fa..9e1e0ba1b3e 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -2,7 +2,7 @@ import torch from torch import Tensor -from typing import (Any, BinaryIO, Callable, ContextManager, Iterator, List, NamedTuple, +from typing import (Any, BinaryIO, Callable, ContextManager, Dict, Iterator, List, NamedTuple, Optional, overload, Sequence, Tuple, TypeVar, Type, Union) from torch._six import inf @@ -300,6 +300,24 @@ class _TensorBase(object): ${tensor_method_hints} # Defined in torch/csrc/cuda/Module.cpp +def _cuda_getCurrentStream(device: _int) -> _int: ... +def _cuda_getDefaultStream(device: _int) -> _int: ... +def _cuda_getCurrentBlasHandle() -> _int: ... +def _cuda_setStream(cuda_stream: _int) -> None: ... +def _cuda_getCompiledVersion() -> _int: ... +def _cuda_cudaHostAllocator() -> _int: ... +def _cuda_cudaCachingAllocator_raw_alloc(size: _int, cuda_stream: _int) -> _int: ... +def _cuda_cudaCachingAllocator_raw_delete(ptr: _int) -> None: ... +def _cuda_emptyCache() -> None: ... +def _cuda_memoryStats(device: _int) -> Dict[str, Any]: ... +def _cuda_resetAccumulatedMemoryStats(device: _int) -> None: ... +def _cuda_resetPeakMemoryStats(device: _int) -> None: ... +def _cuda_memorySnapshot() -> List[Dict[str, Any]]: ... +def _cuda_lock_mutex() -> None: ... +def _cuda_unlock_mutex() -> None: ... +def _nccl_version() -> _int: ... +def _nccl_unique_id() -> bytes: ... + class _CudaDeviceProperties: name: str major: _int diff --git a/torch/cuda/memory.py b/torch/cuda/memory.py index 3afeb1bafdb..299cb56aa2a 100644 --- a/torch/cuda/memory.py +++ b/torch/cuda/memory.py @@ -4,7 +4,7 @@ import warnings from typing import Any, Dict, Union import torch -from . import is_initialized, _get_device_index +from . import is_initialized, _get_device_index, _lazy_init from torch.types import Device def _host_allocator(): @@ -31,7 +31,7 @@ def caching_allocator_alloc(size, device: Union[Device, int] = None, stream=None Arguments: size (int): number of bytes to be allocated. - device (torch.device or int, optional): selected device. If it is + device (torch.device or int, optional): selected device. If it is ``None`` the default CUDA device is used. stream (torch.cuda.Stream or int, optional): selected stream. If is ``None`` then the default stream for the selected device is used.