Add basic Gaudi support to benchmarks/dynamo (#145920)

This PR adds basic Gaudi support to benchmarks/dynamo Pull Request resolved: https://github.com/pytorch/pytorch/pull/145920 Approved by: https://github.com/eellison
2025-12-06 12:20:52 +01:00 · 2025-02-26 14:50:22 +00:00 · 2025-02-26 14:50:22 +00:00 · edaf9ddeb5
commit edaf9ddeb5
parent be830c8b1c
1 changed files with 9 additions and 1 deletions
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -2386,6 +2386,8 @@ class BenchmarkRunner:
                if current_device == "cuda":
                    torch.cuda.reset_peak_memory_stats()
                    empty_gpu_cache(current_device)
                elif current_device == "hpu":
                    torch.hpu.reset_peak_memory_stats()
                t0 = time.perf_counter()
                for _ in range(niters):
                    fn(model, example_inputs)
@ -2393,6 +2395,8 @@ class BenchmarkRunner:
                latency = t1 - t0
                if current_device == "cuda":
                    peak_mem = get_peak_memory()
                elif current_device == "hpu":
                    peak_mem = torch.hpu.max_memory_allocated() / 10**9
                elif current_device == "cpu":
                    total = psutil.virtual_memory().total
                    percentage = psutil.Process(os.getpid()).memory_percent()
@ -2543,6 +2547,8 @@ class BenchmarkRunner:
                if current_device == "cuda":
                    torch.cuda.reset_peak_memory_stats()
                    empty_gpu_cache(current_device)
                elif current_device == "hpu":
                    torch.hpu.reset_peak_memory_stats()
                t0 = time.perf_counter()
                for _ in range(niters):
                    fn(model, example_inputs)
@ -2550,6 +2556,8 @@ class BenchmarkRunner:
                latency = t1 - t0
                if current_device == "cuda":
                    peak_mem = get_peak_memory()
                elif current_device == "hpu":
                    peak_mem = torch.hpu.max_memory_allocated() / 10**9
                elif current_device == "cpu":
                    total = psutil.virtual_memory().total
                    percentage = psutil.Process(os.getpid()).memory_percent()
@ -2870,7 +2878,7 @@ def parse_args(args=None):
        help="ID of the benchmark suite partition to be run. Used to divide CI tasks",
    )
    parser.add_argument(
-        "--devices", "--device", "-d", action="append", help="cpu or cuda"
+        "--devices", "--device", "-d", action="append", help="cpu, cuda or hpu"
    )
    parser.add_argument("--device-index", help="CUDA device index")
    parser.add_argument(