From 39f885331394e6baea6220f7e22bf1cda17bc361 Mon Sep 17 00:00:00 2001
From: Peter Bell <peterbell10@live.co.uk>
Date: Thu, 4 Jan 2024 02:03:11 +0000
Subject: [PATCH] [inductor] Use max sm clock when calculating device tflops
 (#116754)

See openai/triton#2801

Current SM clocks may fluctuate at runtime and change the result of
`get_device_tflops`.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116754
Approved by: https://github.com/lezcano
---
 torch/_inductor/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index ceb6468da73..188c1998693 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -1123,14 +1123,14 @@ def get_device_tflops(dtype):
         # Triton API change in https://github.com/openai/triton/pull/2293
         from triton.testing import nvsmi
 
-        cur_sm_clock = nvsmi(["clocks.current.sm"])[0]
+        sm_clock = nvsmi(["clocks.max.sm"])[0]
         if dtype in (torch.float16, torch.bfloat16):
-            return get_max_tensorcore_tflops(dtype, cur_sm_clock)
+            return get_max_tensorcore_tflops(dtype, sm_clock)
 
         if torch.backends.cuda.matmul.allow_tf32:
-            return get_max_tensorcore_tflops(torch.float32, cur_sm_clock)
+            return get_max_tensorcore_tflops(torch.float32, sm_clock)
         else:
-            return get_max_simd_tflops(torch.float32, cur_sm_clock)
+            return get_max_simd_tflops(torch.float32, sm_clock)
     else:
         if dtype in (torch.float16, torch.bfloat16):
             return get_max_tensorcore_tflops(dtype)