From 39f885331394e6baea6220f7e22bf1cda17bc361 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Thu, 4 Jan 2024 02:03:11 +0000 Subject: [PATCH] [inductor] Use max sm clock when calculating device tflops (#116754) See openai/triton#2801 Current SM clocks may fluctuate at runtime and change the result of `get_device_tflops`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/116754 Approved by: https://github.com/lezcano --- torch/_inductor/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index ceb6468da73..188c1998693 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -1123,14 +1123,14 @@ def get_device_tflops(dtype): # Triton API change in https://github.com/openai/triton/pull/2293 from triton.testing import nvsmi - cur_sm_clock = nvsmi(["clocks.current.sm"])[0] + sm_clock = nvsmi(["clocks.max.sm"])[0] if dtype in (torch.float16, torch.bfloat16): - return get_max_tensorcore_tflops(dtype, cur_sm_clock) + return get_max_tensorcore_tflops(dtype, sm_clock) if torch.backends.cuda.matmul.allow_tf32: - return get_max_tensorcore_tflops(torch.float32, cur_sm_clock) + return get_max_tensorcore_tflops(torch.float32, sm_clock) else: - return get_max_simd_tflops(torch.float32, cur_sm_clock) + return get_max_simd_tflops(torch.float32, sm_clock) else: if dtype in (torch.float16, torch.bfloat16): return get_max_tensorcore_tflops(dtype)