[cuDNN V8 API] Enable cuDNN v8 API by default (#75466)

Testing via CI Pull Request resolved: https://github.com/pytorch/pytorch/pull/75466 Approved by: https://github.com/ngimel
2025-12-06 00:20:18 +01:00 · 2022-05-17 21:54:17 +00:00 · 2022-05-17 21:54:17 +00:00 · 14ab3ff484
commit 14ab3ff484
parent 068d35a648
5 changed files with 5 additions and 7 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -212,7 +212,7 @@ cmake_dependent_option(
    BUILD_NVFUSER_BENCHMARK "Build C++ binaries for nvfuser benchmarks" ON
    "USE_CUDA;BUILD_TEST" OFF)
 cmake_dependent_option(
-    USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" OFF
+    USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" ON
    "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@ -269,7 +269,7 @@ auto build_opgraph_fused(const cudnnHandle_t handle, const Tensor & x, const Ten
  return opGraph;
 }

-const auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
+auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
   // Method for engine config generator based on heuristics
  auto heurgen_method = [/*&desc,*/ &x, deterministic, allow_tf32, heur_mode](cudnn_frontend::OperationGraph &opGraph) -> cudnn_frontend::EngineConfigList {
      auto heuristics = cudnn_frontend::EngineHeuristicsBuilder()
--- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@ -189,7 +189,6 @@ Tensor quantized_max_pool2d_cudnn(
      stride[0], // vertical stride
      stride[1])); // horizontal stride

-  auto dataType = getCudnnDataType(input);
  float one{1};
  float zero{0.0};
  TensorDescriptor xDesc;
--- a/aten/src/ATen/native/quantized/cudnn/utils.h
+++ b/aten/src/ATen/native/quantized/cudnn/utils.h
@ -19,7 +19,7 @@ This file contains some of the auxiliary functions used by both Conv.cpp & Linea
 #include <c10/util/ArrayRef.h>
 #include <cudnn_frontend.h>

-struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
+struct PackedLinearWeightCudnn : public LinearPackedParamsBase {
  PackedLinearWeightCudnn(
      at::Tensor orig_weight,
      c10::optional<at::Tensor> bias,
@ -77,7 +77,7 @@ struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
 };

 template <int kSpatialDim = 2>
-struct TORCH_API PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
+struct PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
  PackedConvWeightCudnn(
      at::Tensor orig_weight,
      c10::optional<at::Tensor> bias,
@ -317,7 +317,6 @@ cudnn_frontend::ExecutionPlan get_execplan_from_heuristics_else_fall_back(cudnn_
  }

  {
-    auto total_engines = opGraph.getEngineCount();
    // std::cout << opGraph.describe() << " has " << total_engines << " engines." << std::endl;
    auto engine = cudnn_frontend::EngineBuilder().setGlobalEngineIdx(0).setOperationGraph(opGraph).build();
    // std::cout << engine.describe() << std::endl;
--- a/third_party/cudnn_frontend
+++ b/third_party/cudnn_frontend
@ -1 +1 @@
-Subproject commit fa611998a360cbabaa2dcc7c9859748144114fc0
+Subproject commit 43709ab96c47e26eebcdac72f93f946d44ceffa8