diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8793cf1d482..eb0ce9c882e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -212,7 +212,7 @@ cmake_dependent_option(
     BUILD_NVFUSER_BENCHMARK "Build C++ binaries for nvfuser benchmarks" ON
     "USE_CUDA;BUILD_TEST" OFF)
 cmake_dependent_option(
-    USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" OFF
+    USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" ON
     "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
diff --git a/aten/src/ATen/native/cudnn/Conv_v8.cpp b/aten/src/ATen/native/cudnn/Conv_v8.cpp
index b152eb31fb3..24c5f3c2e3d 100644
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@@ -269,7 +269,7 @@ auto build_opgraph_fused(const cudnnHandle_t handle, const Tensor & x, const Ten
   return opGraph;
 }
 
-const auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
+auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
    // Method for engine config generator based on heuristics
   auto heurgen_method = [/*&desc,*/ &x, deterministic, allow_tf32, heur_mode](cudnn_frontend::OperationGraph &opGraph) -> cudnn_frontend::EngineConfigList {
       auto heuristics = cudnn_frontend::EngineHeuristicsBuilder()
diff --git a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
index 7761c846fcb..8335eeeca2f 100644
--- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@@ -189,7 +189,6 @@ Tensor quantized_max_pool2d_cudnn(
       stride[0], // vertical stride
       stride[1])); // horizontal stride
 
-  auto dataType = getCudnnDataType(input);
   float one{1};
   float zero{0.0};
   TensorDescriptor xDesc;
diff --git a/aten/src/ATen/native/quantized/cudnn/utils.h b/aten/src/ATen/native/quantized/cudnn/utils.h
index b7dbe43b7a9..3eba354bd20 100644
--- a/aten/src/ATen/native/quantized/cudnn/utils.h
+++ b/aten/src/ATen/native/quantized/cudnn/utils.h
@@ -19,7 +19,7 @@ This file contains some of the auxiliary functions used by both Conv.cpp & Linea
 #include <c10/util/ArrayRef.h>
 #include <cudnn_frontend.h>
 
-struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
+struct PackedLinearWeightCudnn : public LinearPackedParamsBase {
   PackedLinearWeightCudnn(
       at::Tensor orig_weight,
       c10::optional<at::Tensor> bias,
@@ -77,7 +77,7 @@ struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
 };
 
 template <int kSpatialDim = 2>
-struct TORCH_API PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
+struct PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
   PackedConvWeightCudnn(
       at::Tensor orig_weight,
       c10::optional<at::Tensor> bias,
@@ -317,7 +317,6 @@ cudnn_frontend::ExecutionPlan get_execplan_from_heuristics_else_fall_back(cudnn_
   }
 
   {
-    auto total_engines = opGraph.getEngineCount();
     // std::cout << opGraph.describe() << " has " << total_engines << " engines." << std::endl;
     auto engine = cudnn_frontend::EngineBuilder().setGlobalEngineIdx(0).setOperationGraph(opGraph).build();
     // std::cout << engine.describe() << std::endl;
diff --git a/third_party/cudnn_frontend b/third_party/cudnn_frontend
index fa611998a36..43709ab96c4 160000
--- a/third_party/cudnn_frontend
+++ b/third_party/cudnn_frontend
@@ -1 +1 @@
-Subproject commit fa611998a360cbabaa2dcc7c9859748144114fc0
+Subproject commit 43709ab96c47e26eebcdac72f93f946d44ceffa8