mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Add basic Gaudi support to benchmarks/dynamo (#145920)
This PR adds basic Gaudi support to benchmarks/dynamo Pull Request resolved: https://github.com/pytorch/pytorch/pull/145920 Approved by: https://github.com/eellison
This commit is contained in:
parent
be830c8b1c
commit
edaf9ddeb5
|
|
@ -2386,6 +2386,8 @@ class BenchmarkRunner:
|
||||||
if current_device == "cuda":
|
if current_device == "cuda":
|
||||||
torch.cuda.reset_peak_memory_stats()
|
torch.cuda.reset_peak_memory_stats()
|
||||||
empty_gpu_cache(current_device)
|
empty_gpu_cache(current_device)
|
||||||
|
elif current_device == "hpu":
|
||||||
|
torch.hpu.reset_peak_memory_stats()
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
for _ in range(niters):
|
for _ in range(niters):
|
||||||
fn(model, example_inputs)
|
fn(model, example_inputs)
|
||||||
|
|
@ -2393,6 +2395,8 @@ class BenchmarkRunner:
|
||||||
latency = t1 - t0
|
latency = t1 - t0
|
||||||
if current_device == "cuda":
|
if current_device == "cuda":
|
||||||
peak_mem = get_peak_memory()
|
peak_mem = get_peak_memory()
|
||||||
|
elif current_device == "hpu":
|
||||||
|
peak_mem = torch.hpu.max_memory_allocated() / 10**9
|
||||||
elif current_device == "cpu":
|
elif current_device == "cpu":
|
||||||
total = psutil.virtual_memory().total
|
total = psutil.virtual_memory().total
|
||||||
percentage = psutil.Process(os.getpid()).memory_percent()
|
percentage = psutil.Process(os.getpid()).memory_percent()
|
||||||
|
|
@ -2543,6 +2547,8 @@ class BenchmarkRunner:
|
||||||
if current_device == "cuda":
|
if current_device == "cuda":
|
||||||
torch.cuda.reset_peak_memory_stats()
|
torch.cuda.reset_peak_memory_stats()
|
||||||
empty_gpu_cache(current_device)
|
empty_gpu_cache(current_device)
|
||||||
|
elif current_device == "hpu":
|
||||||
|
torch.hpu.reset_peak_memory_stats()
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
for _ in range(niters):
|
for _ in range(niters):
|
||||||
fn(model, example_inputs)
|
fn(model, example_inputs)
|
||||||
|
|
@ -2550,6 +2556,8 @@ class BenchmarkRunner:
|
||||||
latency = t1 - t0
|
latency = t1 - t0
|
||||||
if current_device == "cuda":
|
if current_device == "cuda":
|
||||||
peak_mem = get_peak_memory()
|
peak_mem = get_peak_memory()
|
||||||
|
elif current_device == "hpu":
|
||||||
|
peak_mem = torch.hpu.max_memory_allocated() / 10**9
|
||||||
elif current_device == "cpu":
|
elif current_device == "cpu":
|
||||||
total = psutil.virtual_memory().total
|
total = psutil.virtual_memory().total
|
||||||
percentage = psutil.Process(os.getpid()).memory_percent()
|
percentage = psutil.Process(os.getpid()).memory_percent()
|
||||||
|
|
@ -2870,7 +2878,7 @@ def parse_args(args=None):
|
||||||
help="ID of the benchmark suite partition to be run. Used to divide CI tasks",
|
help="ID of the benchmark suite partition to be run. Used to divide CI tasks",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--devices", "--device", "-d", action="append", help="cpu or cuda"
|
"--devices", "--device", "-d", action="append", help="cpu, cuda or hpu"
|
||||||
)
|
)
|
||||||
parser.add_argument("--device-index", help="CUDA device index")
|
parser.add_argument("--device-index", help="CUDA device index")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user