diff --git a/docs/source/cuda.rst b/docs/source/cuda.rst index d6aa51c1ac8..2b30198d576 100644 --- a/docs/source/cuda.rst +++ b/docs/source/cuda.rst @@ -28,7 +28,6 @@ torch.cuda is_available is_initialized memory_usage - memory_usage_in_bytes set_device set_stream set_sync_debug_mode diff --git a/test/test_cuda.py b/test/test_cuda.py index d9e6addb347..961c2444985 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -3947,20 +3947,6 @@ class TestCudaMallocAsync(TestCase): def test_temperature(self): self.assertTrue(0 <= torch.cuda.temperature() <= 150) - @unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available") - def test_memory_usage_in_bytes(self): - """ - Verify memory usage in bytes - """ - torch.cuda.empty_cache() - a = torch.cuda.memory_usage_in_bytes() - num_bytes = 256 * 1024**2 - _ = torch.empty(num_bytes, dtype=torch.int8, device="cuda") - torch.cuda.synchronize() - b = torch.cuda.memory_usage_in_bytes() - mem_bytes = b - a - self.assertTrue(mem_bytes > num_bytes // 2, mem_bytes < num_bytes * 8) - @unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available") def test_power_draw(self): self.assertTrue(torch.cuda.power_draw() >= 0) diff --git a/torch/_dynamo/trace_rules.py b/torch/_dynamo/trace_rules.py index ce9068ab922..370844118fc 100644 --- a/torch/_dynamo/trace_rules.py +++ b/torch/_dynamo/trace_rules.py @@ -2540,7 +2540,6 @@ torch_non_c_binding_in_graph_functions = dict.fromkeys( "torch.cuda.jiterator._create_jit_fn", "torch.cuda.jiterator._create_multi_output_jit_fn", "torch.cuda.memory_usage", - "torch.cuda.memory_usage_in_bytes", "torch.cuda.memory._dump_snapshot", "torch.cuda.memory._free_mutex", "torch.cuda.memory._get_current_allocator", diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py index d349db6f49d..7e17f9ccb6d 100644 --- a/torch/cuda/__init__.py +++ b/torch/cuda/__init__.py @@ -1110,17 +1110,6 @@ def _get_amdsmi_device_index(device: Optional[Union[int, Device]]) -> int: return idx_map[idx] -def _get_amdsmi_memory_usage_in_bytes( - device: Optional[Union[Device, int]] = None -) -> int: - handle = _get_amdsmi_handler() - device = _get_amdsmi_device_index(device) - # amdsmi_get_gpu_vram_usage returns mem usage in megabytes - mem_mega_bytes = amdsmi.amdsmi_get_gpu_vram_usage(handle)["vram_used"] - mem_bytes = mem_mega_bytes * 1024 * 1024 - return mem_bytes - - def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int: handle = _get_amdsmi_handler() device = _get_amdsmi_device_index(device) @@ -1161,24 +1150,6 @@ def _get_amdsmi_clock_rate(device: Optional[Union[Device, int]] = None) -> int: return clock_info["clk"] -def memory_usage_in_bytes(device: Optional[Union[Device, int]] = None) -> int: - r"""Return global (device) memory usage in bytes as given by `nvidia-smi` or `amd-smi`. - - Args: - device (torch.device or int, optional): selected device. Returns - statistic for the current device, given by :func:`~torch.cuda.current_device`, - if :attr:`device` is ``None`` (default). - - """ - if not torch.version.hip: - handle = _get_pynvml_handler() - device = _get_nvml_device_index(device) - handle = pynvml.nvmlDeviceGetHandleByIndex(device) - return pynvml.nvmlDeviceGetMemoryInfo(handle).used - else: - return _get_amdsmi_memory_usage_in_bytes(device) - - def memory_usage(device: Optional[Union[Device, int]] = None) -> int: r"""Return the percent of time over the past sample period during which global (device) memory was being read or written as given by `nvidia-smi`. @@ -1681,7 +1652,6 @@ __all__ = [ "memory_stats_as_nested_dict", "memory_summary", "memory_usage", - "memory_usage_in_bytes", "MemPool", "MemPoolContext", "use_mem_pool",