mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
create a new torch.cuda.device_memory_used api (#140870)
Summary: the current torch.cuda.memory_usage returns the memory utilization, more specifically, percent of time over the past sample period global memory being read/written for Nvidia. see more details in https://github.com/pytorch/pytorch/issues/140638 Test Plan: added a new unittest Differential Revision: D65960134 Pull Request resolved: https://github.com/pytorch/pytorch/pull/140870 Approved by: https://github.com/ngimel, https://github.com/eqy
This commit is contained in:
parent
7156d0824d
commit
808da50c2d
|
|
@ -16,6 +16,7 @@ torch.cuda
|
||||||
default_stream
|
default_stream
|
||||||
device
|
device
|
||||||
device_count
|
device_count
|
||||||
|
device_memory_used
|
||||||
device_of
|
device_of
|
||||||
get_arch_list
|
get_arch_list
|
||||||
get_device_capability
|
get_device_capability
|
||||||
|
|
|
||||||
|
|
@ -3947,6 +3947,25 @@ class TestCudaMallocAsync(TestCase):
|
||||||
def test_temperature(self):
|
def test_temperature(self):
|
||||||
self.assertTrue(0 <= torch.cuda.temperature() <= 150)
|
self.assertTrue(0 <= torch.cuda.temperature() <= 150)
|
||||||
|
|
||||||
|
@unittest.skipIf(TEST_WITH_ROCM, "flaky for AMD gpu")
|
||||||
|
@unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
|
||||||
|
def test_device_memory_used(self):
|
||||||
|
"""
|
||||||
|
Verify used device memory in bytes
|
||||||
|
"""
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
a = torch.cuda.device_memory_used()
|
||||||
|
num_bytes = 512 * 1024**2
|
||||||
|
_ = torch.empty(num_bytes, dtype=torch.int8, device="cuda")
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
b = torch.cuda.device_memory_used()
|
||||||
|
mem_bytes = b - a
|
||||||
|
# test the order of magnitude
|
||||||
|
self.assertTrue(num_bytes // 32 <= mem_bytes <= num_bytes * 32)
|
||||||
|
|
||||||
@unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
|
@unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
|
||||||
def test_power_draw(self):
|
def test_power_draw(self):
|
||||||
self.assertTrue(torch.cuda.power_draw() >= 0)
|
self.assertTrue(torch.cuda.power_draw() >= 0)
|
||||||
|
|
|
||||||
|
|
@ -2523,6 +2523,7 @@ torch_non_c_binding_in_graph_functions = dict.fromkeys(
|
||||||
"torch.cuda.current_stream",
|
"torch.cuda.current_stream",
|
||||||
"torch.cuda.default_stream",
|
"torch.cuda.default_stream",
|
||||||
"torch.cuda.device_count",
|
"torch.cuda.device_count",
|
||||||
|
"torch.cuda.device_memory_used",
|
||||||
"torch.cuda.get_arch_list",
|
"torch.cuda.get_arch_list",
|
||||||
"torch.cuda.get_device_capability",
|
"torch.cuda.get_device_capability",
|
||||||
"torch.cuda.get_device_name",
|
"torch.cuda.get_device_name",
|
||||||
|
|
|
||||||
|
|
@ -1110,6 +1110,15 @@ def _get_amdsmi_device_index(device: Optional[Union[int, Device]]) -> int:
|
||||||
return idx_map[idx]
|
return idx_map[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_amdsmi_device_memory_used(device: Optional[Union[Device, int]] = None) -> int:
|
||||||
|
handle = _get_amdsmi_handler()
|
||||||
|
device = _get_amdsmi_device_index(device)
|
||||||
|
# amdsmi_get_gpu_vram_usage returns mem usage in megabytes
|
||||||
|
mem_mega_bytes = amdsmi.amdsmi_get_gpu_vram_usage(handle)["vram_used"]
|
||||||
|
mem_bytes = mem_mega_bytes * 1024 * 1024
|
||||||
|
return mem_bytes
|
||||||
|
|
||||||
|
|
||||||
def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int:
|
def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int:
|
||||||
handle = _get_amdsmi_handler()
|
handle = _get_amdsmi_handler()
|
||||||
device = _get_amdsmi_device_index(device)
|
device = _get_amdsmi_device_index(device)
|
||||||
|
|
@ -1150,6 +1159,24 @@ def _get_amdsmi_clock_rate(device: Optional[Union[Device, int]] = None) -> int:
|
||||||
return clock_info["clk"]
|
return clock_info["clk"]
|
||||||
|
|
||||||
|
|
||||||
|
def device_memory_used(device: Optional[Union[Device, int]] = None) -> int:
|
||||||
|
r"""Return used global (device) memory in bytes as given by `nvidia-smi` or `amd-smi`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
device (torch.device or int, optional): selected device. Returns
|
||||||
|
statistic for the current device, given by :func:`~torch.cuda.current_device`,
|
||||||
|
if :attr:`device` is ``None`` (default).
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not torch.version.hip:
|
||||||
|
handle = _get_pynvml_handler()
|
||||||
|
device = _get_nvml_device_index(device)
|
||||||
|
handle = pynvml.nvmlDeviceGetHandleByIndex(device)
|
||||||
|
return pynvml.nvmlDeviceGetMemoryInfo(handle).used
|
||||||
|
else:
|
||||||
|
return _get_amdsmi_device_memory_used(device)
|
||||||
|
|
||||||
|
|
||||||
def memory_usage(device: Optional[Union[Device, int]] = None) -> int:
|
def memory_usage(device: Optional[Union[Device, int]] = None) -> int:
|
||||||
r"""Return the percent of time over the past sample period during which global (device)
|
r"""Return the percent of time over the past sample period during which global (device)
|
||||||
memory was being read or written as given by `nvidia-smi`.
|
memory was being read or written as given by `nvidia-smi`.
|
||||||
|
|
@ -1609,6 +1636,7 @@ __all__ = [
|
||||||
"default_stream",
|
"default_stream",
|
||||||
"device",
|
"device",
|
||||||
"device_count",
|
"device_count",
|
||||||
|
"device_memory_used",
|
||||||
"device_of",
|
"device_of",
|
||||||
"empty_cache",
|
"empty_cache",
|
||||||
"get_allocator_backend",
|
"get_allocator_backend",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user