[NVFuser] prep for on-by-default

1. fix tests that expected nvfuser off-by-default behavior 2. skip nvfuser if getExecutorMode() == false Pull Request resolved: https://github.com/pytorch/pytorch/pull/76937 Approved by: https://github.com/eellison
2025-12-06 12:20:52 +01:00 · 2022-05-05 19:29:42 -07:00 · 2022-05-05 19:29:42 -07:00 · 6c615a21a0
commit 6c615a21a0
parent d21154b098
4 changed files with 23 additions and 6 deletions
--- a/test/cpp/jit/test_fuser.cpp
+++ b/test/cpp/jit/test_fuser.cpp
@ -9,6 +9,7 @@
 #include <torch/csrc/autograd/generated/variable_factories.h>
 #include <torch/csrc/autograd/variable.h>
 #include <torch/csrc/jit/api/module.h>
+#include <torch/csrc/jit/codegen/cuda/interface.h>
 #include <torch/csrc/jit/codegen/fuser/interface.h>
 #include <torch/csrc/jit/frontend/ir_emitter.h>
 #include <torch/csrc/jit/frontend/tracer.h>
@ -54,7 +55,19 @@
 namespace torch {
 namespace jit {

-TEST(FuserTest, TestSimple_CUDA) {
+class FuserTest : public ::testing::Test {
+  void SetUp() override {
+    old_nvfuser_value_ = fuser::cuda::setEnabled(false);
+  }
+  void TearDown() override {
+    fuser::cuda::setEnabled(old_nvfuser_value_);
+  }
+
+ private:
+  bool old_nvfuser_value_;
+};
+
+TEST_F(FuserTest, TestSimple_CUDA) {
 #if defined(FBCODE_CAFFE2)
  return;
 #endif
@ -77,7 +90,7 @@ TEST(FuserTest, TestSimple_CUDA) {
  ASSERT_EQ(max_diff, 0);
 }

-TEST(FuserTest, TestOne_CUDA) {
+TEST_F(FuserTest, TestOne_CUDA) {
 #if defined(FBCODE_CAFFE2)
  return;
 #endif
@ -137,7 +150,7 @@ TEST(FuserTest, TestOne_CUDA) {
  testOne(0, 2);
 }

-TEST(FuserTest, FusedConcat_CUDA) {
+TEST_F(FuserTest, FusedConcat_CUDA) {
 #if defined(FBCODE_CAFFE2)
  return;
 #endif
@ -182,7 +195,7 @@ TEST(FuserTest, FusedConcat_CUDA) {
  };
 }

-TEST(FuserTest, FusionAliasing) {
+TEST_F(FuserTest, FusionAliasing) {
 #if defined(FBCODE_CAFFE2)
  return;
 #endif
@ -210,7 +223,7 @@ TEST(FuserTest, FusionAliasing) {
      ->run(*g);
 }

-TEST(FuserTest, KernelCaching) {
+TEST_F(FuserTest, KernelCaching) {
 #if defined(FBCODE_CAFFE2)
  return;
 #endif
--- a/torch/csrc/jit/codegen/cuda/interface.cpp
+++ b/torch/csrc/jit/codegen/cuda/interface.cpp
@ -41,7 +41,8 @@ class NVFuserEnabler {
 #ifdef USE_ROCM
    return false;
 #else
-    return at::globalContext().hasCUDA() && NVFuserPassManager::isRegistered();
+    return at::globalContext().hasCUDA() &&
+        NVFuserPassManager::isRegistered() && getExecutorMode();
 #endif
  }

--- a/torch/csrc/jit/codegen/fuser/interface.cpp
+++ b/torch/csrc/jit/codegen/fuser/interface.cpp
@ -19,6 +19,7 @@ bool cpu_fuser_enabled = true;
 bool cpu_fuser_enabled = false;
 #endif

+// note: this doesn't necessarily enable NNC because NVFuser might override it
 bool gpu_fuser_enabled = true;

 } // namespace detail
--- a/torch/testing/_internal/jit_utils.py
+++ b/torch/testing/_internal/jit_utils.py
@ -779,6 +779,7 @@ class TensorExprTestOptions():
        torch._C._debug_set_fusion_group_inlining(False)
        self.old_te_must_use_llvm_cpu = torch._C._jit_get_te_must_use_llvm_cpu()
        torch._C._jit_set_te_must_use_llvm_cpu(False)
+        self.old_nvfuser = torch._C._jit_set_nvfuser_enabled(False)

    def restore(self):
        torch._C._jit_set_profiling_executor(self.old_profiling_executor)
@ -789,6 +790,7 @@ class TensorExprTestOptions():
        torch._C._jit_override_can_fuse_on_cpu(self.old_cpu_fuser_state)
        torch._C._debug_set_fusion_group_inlining(self.old_fusion_inlining)
        torch._C._jit_set_te_must_use_llvm_cpu(self.old_te_must_use_llvm_cpu)
+        torch._C._jit_set_nvfuser_enabled(self.old_nvfuser)

 def clone_inputs(args):
    inputs: List[Union[torch.Tensor, List[torch.Tensor]]] = []