Revert "create a new torch.cuda.memory_usage_in_bytes api (#140719)"

This reverts commit 9febc47637.

Reverted https://github.com/pytorch/pytorch/pull/140719 on behalf of https://github.com/huydhn due to Sorry for reverting your change, but the test is flaky on ROCm ([comment](https://github.com/pytorch/pytorch/pull/140719#issuecomment-2479832082))
This commit is contained in:
PyTorch MergeBot 2024-11-15 20:05:32 +00:00
parent 210de39872
commit 03b7ec9237
4 changed files with 0 additions and 46 deletions

View File

@ -28,7 +28,6 @@ torch.cuda
is_available is_available
is_initialized is_initialized
memory_usage memory_usage
memory_usage_in_bytes
set_device set_device
set_stream set_stream
set_sync_debug_mode set_sync_debug_mode

View File

@ -3947,20 +3947,6 @@ class TestCudaMallocAsync(TestCase):
def test_temperature(self): def test_temperature(self):
self.assertTrue(0 <= torch.cuda.temperature() <= 150) self.assertTrue(0 <= torch.cuda.temperature() <= 150)
@unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
def test_memory_usage_in_bytes(self):
"""
Verify memory usage in bytes
"""
torch.cuda.empty_cache()
a = torch.cuda.memory_usage_in_bytes()
num_bytes = 256 * 1024**2
_ = torch.empty(num_bytes, dtype=torch.int8, device="cuda")
torch.cuda.synchronize()
b = torch.cuda.memory_usage_in_bytes()
mem_bytes = b - a
self.assertTrue(mem_bytes > num_bytes // 2, mem_bytes < num_bytes * 8)
@unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available") @unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
def test_power_draw(self): def test_power_draw(self):
self.assertTrue(torch.cuda.power_draw() >= 0) self.assertTrue(torch.cuda.power_draw() >= 0)

View File

@ -2540,7 +2540,6 @@ torch_non_c_binding_in_graph_functions = dict.fromkeys(
"torch.cuda.jiterator._create_jit_fn", "torch.cuda.jiterator._create_jit_fn",
"torch.cuda.jiterator._create_multi_output_jit_fn", "torch.cuda.jiterator._create_multi_output_jit_fn",
"torch.cuda.memory_usage", "torch.cuda.memory_usage",
"torch.cuda.memory_usage_in_bytes",
"torch.cuda.memory._dump_snapshot", "torch.cuda.memory._dump_snapshot",
"torch.cuda.memory._free_mutex", "torch.cuda.memory._free_mutex",
"torch.cuda.memory._get_current_allocator", "torch.cuda.memory._get_current_allocator",

View File

@ -1110,17 +1110,6 @@ def _get_amdsmi_device_index(device: Optional[Union[int, Device]]) -> int:
return idx_map[idx] return idx_map[idx]
def _get_amdsmi_memory_usage_in_bytes(
device: Optional[Union[Device, int]] = None
) -> int:
handle = _get_amdsmi_handler()
device = _get_amdsmi_device_index(device)
# amdsmi_get_gpu_vram_usage returns mem usage in megabytes
mem_mega_bytes = amdsmi.amdsmi_get_gpu_vram_usage(handle)["vram_used"]
mem_bytes = mem_mega_bytes * 1024 * 1024
return mem_bytes
def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int: def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int:
handle = _get_amdsmi_handler() handle = _get_amdsmi_handler()
device = _get_amdsmi_device_index(device) device = _get_amdsmi_device_index(device)
@ -1161,24 +1150,6 @@ def _get_amdsmi_clock_rate(device: Optional[Union[Device, int]] = None) -> int:
return clock_info["clk"] return clock_info["clk"]
def memory_usage_in_bytes(device: Optional[Union[Device, int]] = None) -> int:
r"""Return global (device) memory usage in bytes as given by `nvidia-smi` or `amd-smi`.
Args:
device (torch.device or int, optional): selected device. Returns
statistic for the current device, given by :func:`~torch.cuda.current_device`,
if :attr:`device` is ``None`` (default).
"""
if not torch.version.hip:
handle = _get_pynvml_handler()
device = _get_nvml_device_index(device)
handle = pynvml.nvmlDeviceGetHandleByIndex(device)
return pynvml.nvmlDeviceGetMemoryInfo(handle).used
else:
return _get_amdsmi_memory_usage_in_bytes(device)
def memory_usage(device: Optional[Union[Device, int]] = None) -> int: def memory_usage(device: Optional[Union[Device, int]] = None) -> int:
r"""Return the percent of time over the past sample period during which global (device) r"""Return the percent of time over the past sample period during which global (device)
memory was being read or written as given by `nvidia-smi`. memory was being read or written as given by `nvidia-smi`.
@ -1681,7 +1652,6 @@ __all__ = [
"memory_stats_as_nested_dict", "memory_stats_as_nested_dict",
"memory_summary", "memory_summary",
"memory_usage", "memory_usage",
"memory_usage_in_bytes",
"MemPool", "MemPool",
"MemPoolContext", "MemPoolContext",
"use_mem_pool", "use_mem_pool",