[cuDNN][Quantization] Don't print when plan finalization fails in cuDNN quantization backend (#128177)

Similar in spirit to #125790, hopefully addresses failures seen for cuDNN 9.1 upgrade: #https://github.com/pytorch/pytorch/pull/128166 CC @nWEIdia @atalman Pull Request resolved: https://github.com/pytorch/pytorch/pull/128177 Approved by: https://github.com/nWEIdia, https://github.com/Skylion007
2025-12-06 12:20:52 +01:00 · 2024-06-11 18:09:25 +00:00 · 2024-06-11 18:09:25 +00:00 · cac7a22b92
commit cac7a22b92
parent 8a09940a54
4 changed files with 3 additions and 4 deletions
--- a/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp
@ -242,7 +242,7 @@ Tensor add(Tensor qa, Tensor qb, double output_scale, int64_t output_zero_point)
      run(plan_desc);
      execution_plan_cache[key] = plan_desc;
      return quantized_output.view(orig_sizes);
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
  }

  TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Add Cudnn");
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@ -252,7 +252,7 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
      run(plan);
      execution_plan_cache.emplace(key, plan);
      return;
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
  }

  TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Conv2D Cudnn");
--- a/aten/src/ATen/native/quantized/cudnn/Linear.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Linear.cpp
@ -286,7 +286,7 @@ void PackedLinearWeightCudnn::apply_impl_helper(const at::Tensor& quantized_outp
      run(plan);
      execution_plan_cache.emplace(key, plan);
      return;
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
  }

  TORCH_CHECK(false, "Unable to find an engine to execute this computation Quantized Linear Cudnn");
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@ -4052,7 +4052,6 @@ class TestQuantizedLinear(TestCase):
           use_channelwise=st.sampled_from([False]))  # channelwise currently not supported for qlinear cudnn
    @skipIfNoFBGEMM
    @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.")
-    @unittest.skipIf(TEST_CUDNN and torch.backends.cudnn.version() == 90100, "expected failure on cuDNN 9.1.0")
    @unittest.skipIf(not SM80OrLater, "requires sm80 or later.")
    @unittest.skipIf(TEST_ROCM, "not supported on rocm.")
    # TODO: check with yang regarding CUDNN flags