Introduce a generic API torch._C._accelerator_setAllocatorSettings (#165291)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165291
Approved by: https://github.com/albanD
ghstack dependencies: #165288, #165289
This commit is contained in:
Yu, Guangye 2025-10-17 17:16:44 +00:00 committed by PyTorch MergeBot
parent a1114beed2
commit b2f5c25b27
6 changed files with 23 additions and 26 deletions

View File

@ -4583,31 +4583,33 @@ class TestCudaMallocAsync(TestCase):
self.assertEqual(reg_mem - start_mem, nbytes) self.assertEqual(reg_mem - start_mem, nbytes)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings("foo:1,bar:2") torch._C._accelerator_setAllocatorSettings("foo:1,bar:2")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings( torch._C._accelerator_setAllocatorSettings(
"garbage_collection_threshold:1.2" "garbage_collection_threshold:1.2"
) )
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings("max_split_size_mb:2") torch._C._accelerator_setAllocatorSettings("max_split_size_mb:2")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings("release_lock_on_cudamalloc:none") torch._C._accelerator_setAllocatorSettings(
"release_lock_on_cudamalloc:none"
)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings( torch._C._accelerator_setAllocatorSettings(
"pinned_use_cuda_host_register:none" "pinned_use_cuda_host_register:none"
) )
with self.assertRaises(RuntimeError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings( torch._C._accelerator_setAllocatorSettings(
"pinned_num_register_threads:none" "pinned_num_register_threads:none"
) )
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
torch.cuda.memory._set_allocator_settings( torch._C._accelerator_setAllocatorSettings(
"pinned_num_register_threads:1024" "pinned_num_register_threads:1024"
) )

View File

@ -2048,7 +2048,6 @@ def _cuda_cudaHostAllocator() -> _int: ...
def _cuda_cudaCachingAllocator_raw_alloc(size: _int, cuda_stream: _int) -> _int: ... def _cuda_cudaCachingAllocator_raw_alloc(size: _int, cuda_stream: _int) -> _int: ...
def _cuda_cudaCachingAllocator_raw_delete(ptr: _int) -> None: ... def _cuda_cudaCachingAllocator_raw_delete(ptr: _int) -> None: ...
def _cuda_cudaCachingAllocator_enable(val: _bool) -> None: ... def _cuda_cudaCachingAllocator_enable(val: _bool) -> None: ...
def _cuda_cudaCachingAllocator_set_allocator_settings(env: str) -> None: ...
def _cuda_beginAllocateToPool(device: _int, mempool_id: tuple[_int, _int]) -> None: ... def _cuda_beginAllocateToPool(device: _int, mempool_id: tuple[_int, _int]) -> None: ...
def _cuda_beginAllocateCurrentThreadToPool( def _cuda_beginAllocateCurrentThreadToPool(
device: _int, device: _int,
@ -2477,6 +2476,7 @@ def _accelerator_emptyCache() -> None: ...
def _accelerator_getDeviceStats(device_index: _int) -> dict[str, Any]: ... def _accelerator_getDeviceStats(device_index: _int) -> dict[str, Any]: ...
def _accelerator_resetAccumulatedStats(device_index: _int) -> None: ... def _accelerator_resetAccumulatedStats(device_index: _int) -> None: ...
def _accelerator_resetPeakStats(device_index: _int) -> None: ... def _accelerator_resetPeakStats(device_index: _int) -> None: ...
def _accelerator_setAllocatorSettings(env: str) -> None: ...
# Defined in torch/csrc/jit/python/python_tracer.cpp # Defined in torch/csrc/jit/python/python_tracer.cpp
class TracingState: class TracingState:

View File

@ -449,6 +449,7 @@ torch_c_binding_in_graph_functions = dict.fromkeys(
"torch._C._accelerator_getAccelerator", "torch._C._accelerator_getAccelerator",
"torch._C._accelerator_getDeviceIndex", "torch._C._accelerator_getDeviceIndex",
"torch._C._accelerator_getStream", "torch._C._accelerator_getStream",
"torch._C._accelerator_setAllocatorSettings",
"torch._C._accelerator_setStream", "torch._C._accelerator_setStream",
"torch._C._accelerator_synchronizeDevice", "torch._C._accelerator_synchronizeDevice",
"torch._C._activate_gpu_trace", "torch._C._activate_gpu_trace",
@ -505,7 +506,6 @@ torch_c_binding_in_graph_functions = dict.fromkeys(
"torch._C._cuda_clearCublasWorkspaces", "torch._C._cuda_clearCublasWorkspaces",
"torch._C._cuda_cudaCachingAllocator_raw_alloc", "torch._C._cuda_cudaCachingAllocator_raw_alloc",
"torch._C._cuda_cudaCachingAllocator_raw_delete", "torch._C._cuda_cudaCachingAllocator_raw_delete",
"torch._C._cuda_cudaCachingAllocator_set_allocator_settings",
"torch._C._cuda_cudaHostAllocator", "torch._C._cuda_cudaHostAllocator",
"torch._C._cuda_customAllocator", "torch._C._cuda_customAllocator",
"torch._C._cuda_emptyCache", "torch._C._cuda_emptyCache",

View File

@ -1,3 +1,4 @@
#include <c10/core/AllocatorConfig.h>
#include <torch/csrc/DeviceAccelerator.h> #include <torch/csrc/DeviceAccelerator.h>
#include <torch/csrc/utils/device_lazy_init.h> #include <torch/csrc/utils/device_lazy_init.h>
@ -136,6 +137,10 @@ void initModule(PyObject* module) {
m.def("_accelerator_resetPeakStats", [](c10::DeviceIndex device_index) { m.def("_accelerator_resetPeakStats", [](c10::DeviceIndex device_index) {
at::accelerator::resetPeakStats(device_index); at::accelerator::resetPeakStats(device_index);
}); });
m.def("_accelerator_setAllocatorSettings", [](std::string env) {
c10::CachingAllocator::setAllocatorSettings(env);
});
} }
} // namespace torch::accelerator } // namespace torch::accelerator

View File

@ -20,8 +20,8 @@
#include <ATen/cuda/detail/CUDAHooks.h> #include <ATen/cuda/detail/CUDAHooks.h>
#include <ATen/cuda/jiterator.h> #include <ATen/cuda/jiterator.h>
#include <ATen/cuda/tunable/Tunable.h> #include <ATen/cuda/tunable/Tunable.h>
#include <c10/core/AllocatorConfig.h>
#include <c10/core/StorageImpl.h> #include <c10/core/StorageImpl.h>
#include <c10/cuda/CUDAAllocatorConfig.h>
#include <c10/cuda/CUDACachingAllocator.h> #include <c10/cuda/CUDACachingAllocator.h>
#include <c10/cuda/CUDAFunctions.h> #include <c10/cuda/CUDAFunctions.h>
#include <ATen/cuda/CUDAGraphsUtils.cuh> #include <ATen/cuda/CUDAGraphsUtils.cuh>
@ -422,16 +422,6 @@ PyObject* THCPModule_cudaCachingAllocator_enable(
END_HANDLE_TH_ERRORS END_HANDLE_TH_ERRORS
} }
PyObject* THCPModule_cudaCachingAllocator_set_allocator_settings(
PyObject* _unused,
PyObject* env) {
HANDLE_TH_ERRORS
c10::cuda::CUDACachingAllocator::setAllocatorSettings(
THPUtils_unpackString(env));
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
PyObject* THCPModule_getAllocatorBackend(PyObject* _unused, PyObject* noargs) { PyObject* THCPModule_getAllocatorBackend(PyObject* _unused, PyObject* noargs) {
HANDLE_TH_ERRORS HANDLE_TH_ERRORS
return THPUtils_packString(c10::cuda::CUDACachingAllocator::name()); return THPUtils_packString(c10::cuda::CUDACachingAllocator::name());
@ -2077,10 +2067,6 @@ static struct PyMethodDef _THCPModule_methods[] = {
THCPModule_cudaCachingAllocator_enable, THCPModule_cudaCachingAllocator_enable,
METH_O, METH_O,
nullptr}, nullptr},
{"_cuda_cudaCachingAllocator_set_allocator_settings",
THCPModule_cudaCachingAllocator_set_allocator_settings,
METH_O,
nullptr},
{"_cuda_getAllocatorBackend", {"_cuda_getAllocatorBackend",
THCPModule_getAllocatorBackend, THCPModule_getAllocatorBackend,
METH_NOARGS, METH_NOARGS,

View File

@ -1101,8 +1101,12 @@ def _save_memory_usage(filename="output.svg", snapshot=None):
f.write(_memory(snapshot)) f.write(_memory(snapshot))
@deprecated(
"torch.cuda._set_allocator_settings is deprecated. Use torch._C._accelerator_setAllocatorSettings instead.",
category=FutureWarning,
)
def _set_allocator_settings(env: str): def _set_allocator_settings(env: str):
return torch._C._cuda_cudaCachingAllocator_set_allocator_settings(env) return torch._C._accelerator_setAllocatorSettings(env)
def get_allocator_backend() -> str: def get_allocator_backend() -> str: