mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
MemPool is a separate pool of memory handled by the caching allocator. This PR adds the option let the caching allocator try to use this pool as a last resort instead of OOMing by associating a use_on_oom bool with each MemPool.
Usage:
Users can optionally specify a ``use_on_oom`` bool (which is False by default) during MemPool creation. If true, then the CUDACachingAllocator will be able to use memory in this pool as a last resort instead of OOMing.
```
pool = torch.cuda.MemPool(allocator, use_on_oom=True)
with torch.cuda.use_mem_pool(pool):
a = torch.randn(40 * 1024 * 1024, dtype=torch.uint8, device="cuda")
del a
# at the memory limit, this will succeed by using pool's memory in order to avoid the oom
b = torch.randn(40 * 1024 * 1024, dtype=torch.uint8, device="cuda")
```
Testing:
```
python test/test_cuda.py -k test_mempool_limited_memory_with_allocator
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/151487
Approved by: https://github.com/eqy, https://github.com/syed-ahmed, https://github.com/ngimel
32 lines
1.3 KiB
C++
32 lines
1.3 KiB
C++
#include <torch/csrc/python_headers.h>
|
|
|
|
#include <torch/csrc/jit/python/pybind_utils.h>
|
|
#include <torch/csrc/utils/device_lazy_init.h>
|
|
#include <torch/csrc/utils/pybind.h>
|
|
|
|
#include <c10/cuda/CUDACachingAllocator.h>
|
|
|
|
template <typename T>
|
|
using shared_ptr_class_ = py::class_<T, std::shared_ptr<T>>;
|
|
|
|
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
|
void THCPMemPool_init(PyObject* module) {
|
|
auto torch_C_m = py::handle(module).cast<py::module>();
|
|
shared_ptr_class_<::c10::cuda::MemPool>(torch_C_m, "_MemPool")
|
|
.def(
|
|
py::init([](c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator,
|
|
bool is_user_created,
|
|
bool use_on_oom) {
|
|
torch::utils::device_lazy_init(at::kCUDA);
|
|
return std::make_shared<::c10::cuda::MemPool>(
|
|
allocator, is_user_created, use_on_oom);
|
|
}))
|
|
.def_property_readonly("id", &::c10::cuda::MemPool::id)
|
|
.def_property_readonly("allocator", &::c10::cuda::MemPool::allocator)
|
|
.def("use_count", &::c10::cuda::MemPool::use_count);
|
|
shared_ptr_class_<::c10::cuda::MemPoolContext>(torch_C_m, "_MemPoolContext")
|
|
.def(py::init<c10::cuda::MemPool*>())
|
|
.def_static(
|
|
"active_pool", &::c10::cuda::MemPoolContext::getActiveMemPool);
|
|
}
|