pytorch/torch/csrc/cuda/MemPool.cpp
Dan Johnson d22c4cc353 Add option to use mempool on OOM (#151487)
MemPool is a separate pool of memory handled by the caching allocator. This PR adds the option let the caching allocator try to use this pool as a last resort instead of OOMing by associating a use_on_oom bool with each MemPool.

Usage:
Users can optionally specify a ``use_on_oom`` bool (which is False by default) during MemPool creation. If true, then the CUDACachingAllocator will be able to use memory in this pool as a last resort instead of OOMing.

```
pool = torch.cuda.MemPool(allocator, use_on_oom=True)
with torch.cuda.use_mem_pool(pool):
    a = torch.randn(40 * 1024 * 1024, dtype=torch.uint8, device="cuda")
del a
# at the memory limit, this will succeed by using pool's memory in order to avoid the oom
b = torch.randn(40 * 1024 * 1024, dtype=torch.uint8, device="cuda")
```

Testing:
```
python test/test_cuda.py -k test_mempool_limited_memory_with_allocator
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/151487
Approved by: https://github.com/eqy, https://github.com/syed-ahmed, https://github.com/ngimel
2025-04-26 04:04:57 +00:00

32 lines
1.3 KiB
C++

#include <torch/csrc/python_headers.h>
#include <torch/csrc/jit/python/pybind_utils.h>
#include <torch/csrc/utils/device_lazy_init.h>
#include <torch/csrc/utils/pybind.h>
#include <c10/cuda/CUDACachingAllocator.h>
template <typename T>
using shared_ptr_class_ = py::class_<T, std::shared_ptr<T>>;
// NOLINTNEXTLINE(misc-use-internal-linkage)
void THCPMemPool_init(PyObject* module) {
auto torch_C_m = py::handle(module).cast<py::module>();
shared_ptr_class_<::c10::cuda::MemPool>(torch_C_m, "_MemPool")
.def(
py::init([](c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator,
bool is_user_created,
bool use_on_oom) {
torch::utils::device_lazy_init(at::kCUDA);
return std::make_shared<::c10::cuda::MemPool>(
allocator, is_user_created, use_on_oom);
}))
.def_property_readonly("id", &::c10::cuda::MemPool::id)
.def_property_readonly("allocator", &::c10::cuda::MemPool::allocator)
.def("use_count", &::c10::cuda::MemPool::use_count);
shared_ptr_class_<::c10::cuda::MemPoolContext>(torch_C_m, "_MemPoolContext")
.def(py::init<c10::cuda::MemPool*>())
.def_static(
"active_pool", &::c10::cuda::MemPoolContext::getActiveMemPool);
}