diff --git a/docs/source/cuda.rst b/docs/source/cuda.rst
index d6aa51c1ac8..2b30198d576 100644
--- a/docs/source/cuda.rst
+++ b/docs/source/cuda.rst
@@ -28,7 +28,6 @@ torch.cuda
     is_available
     is_initialized
     memory_usage
-    memory_usage_in_bytes
     set_device
     set_stream
     set_sync_debug_mode
diff --git a/test/test_cuda.py b/test/test_cuda.py
index d9e6addb347..961c2444985 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -3947,20 +3947,6 @@ class TestCudaMallocAsync(TestCase):
     def test_temperature(self):
         self.assertTrue(0 <= torch.cuda.temperature() <= 150)
 
-    @unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
-    def test_memory_usage_in_bytes(self):
-        """
-        Verify memory usage in bytes
-        """
-        torch.cuda.empty_cache()
-        a = torch.cuda.memory_usage_in_bytes()
-        num_bytes = 256 * 1024**2
-        _ = torch.empty(num_bytes, dtype=torch.int8, device="cuda")
-        torch.cuda.synchronize()
-        b = torch.cuda.memory_usage_in_bytes()
-        mem_bytes = b - a
-        self.assertTrue(mem_bytes > num_bytes // 2, mem_bytes < num_bytes * 8)
-
     @unittest.skipIf(TEST_PYNVML, "pynvml/amdsmi is not available")
     def test_power_draw(self):
         self.assertTrue(torch.cuda.power_draw() >= 0)
diff --git a/torch/_dynamo/trace_rules.py b/torch/_dynamo/trace_rules.py
index ce9068ab922..370844118fc 100644
--- a/torch/_dynamo/trace_rules.py
+++ b/torch/_dynamo/trace_rules.py
@@ -2540,7 +2540,6 @@ torch_non_c_binding_in_graph_functions = dict.fromkeys(
         "torch.cuda.jiterator._create_jit_fn",
         "torch.cuda.jiterator._create_multi_output_jit_fn",
         "torch.cuda.memory_usage",
-        "torch.cuda.memory_usage_in_bytes",
         "torch.cuda.memory._dump_snapshot",
         "torch.cuda.memory._free_mutex",
         "torch.cuda.memory._get_current_allocator",
diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
index d349db6f49d..7e17f9ccb6d 100644
--- a/torch/cuda/__init__.py
+++ b/torch/cuda/__init__.py
@@ -1110,17 +1110,6 @@ def _get_amdsmi_device_index(device: Optional[Union[int, Device]]) -> int:
     return idx_map[idx]
 
 
-def _get_amdsmi_memory_usage_in_bytes(
-    device: Optional[Union[Device, int]] = None
-) -> int:
-    handle = _get_amdsmi_handler()
-    device = _get_amdsmi_device_index(device)
-    # amdsmi_get_gpu_vram_usage returns mem usage in megabytes
-    mem_mega_bytes = amdsmi.amdsmi_get_gpu_vram_usage(handle)["vram_used"]
-    mem_bytes = mem_mega_bytes * 1024 * 1024
-    return mem_bytes
-
-
 def _get_amdsmi_memory_usage(device: Optional[Union[Device, int]] = None) -> int:
     handle = _get_amdsmi_handler()
     device = _get_amdsmi_device_index(device)
@@ -1161,24 +1150,6 @@ def _get_amdsmi_clock_rate(device: Optional[Union[Device, int]] = None) -> int:
         return clock_info["clk"]
 
 
-def memory_usage_in_bytes(device: Optional[Union[Device, int]] = None) -> int:
-    r"""Return global (device) memory usage in bytes as given by `nvidia-smi` or `amd-smi`.
-
-    Args:
-        device (torch.device or int, optional): selected device. Returns
-            statistic for the current device, given by :func:`~torch.cuda.current_device`,
-            if :attr:`device` is ``None`` (default).
-
-    """
-    if not torch.version.hip:
-        handle = _get_pynvml_handler()
-        device = _get_nvml_device_index(device)
-        handle = pynvml.nvmlDeviceGetHandleByIndex(device)
-        return pynvml.nvmlDeviceGetMemoryInfo(handle).used
-    else:
-        return _get_amdsmi_memory_usage_in_bytes(device)
-
-
 def memory_usage(device: Optional[Union[Device, int]] = None) -> int:
     r"""Return the percent of time over the past sample period during which global (device)
     memory was being read or written as given by `nvidia-smi`.
@@ -1681,7 +1652,6 @@ __all__ = [
     "memory_stats_as_nested_dict",
     "memory_summary",
     "memory_usage",
-    "memory_usage_in_bytes",
     "MemPool",
     "MemPoolContext",
     "use_mem_pool",