diff --git a/CMakeLists.txt b/CMakeLists.txt index 8793cf1d482..eb0ce9c882e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,7 +212,7 @@ cmake_dependent_option( BUILD_NVFUSER_BENCHMARK "Build C++ binaries for nvfuser benchmarks" ON "USE_CUDA;BUILD_TEST" OFF) cmake_dependent_option( - USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" OFF + USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" ON "USE_CUDNN" OFF) option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON) option(USE_KINETO "Use Kineto profiling library" ON) diff --git a/aten/src/ATen/native/cudnn/Conv_v8.cpp b/aten/src/ATen/native/cudnn/Conv_v8.cpp index b152eb31fb3..24c5f3c2e3d 100644 --- a/aten/src/ATen/native/cudnn/Conv_v8.cpp +++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp @@ -269,7 +269,7 @@ auto build_opgraph_fused(const cudnnHandle_t handle, const Tensor & x, const Ten return opGraph; } -const auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) { +auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) { // Method for engine config generator based on heuristics auto heurgen_method = [/*&desc,*/ &x, deterministic, allow_tf32, heur_mode](cudnn_frontend::OperationGraph &opGraph) -> cudnn_frontend::EngineConfigList { auto heuristics = cudnn_frontend::EngineHeuristicsBuilder() diff --git a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp index 7761c846fcb..8335eeeca2f 100644 --- a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp +++ b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp @@ -189,7 +189,6 @@ Tensor quantized_max_pool2d_cudnn( stride[0], // vertical stride stride[1])); // horizontal stride - auto dataType = getCudnnDataType(input); float one{1}; float zero{0.0}; TensorDescriptor xDesc; diff --git a/aten/src/ATen/native/quantized/cudnn/utils.h b/aten/src/ATen/native/quantized/cudnn/utils.h index b7dbe43b7a9..3eba354bd20 100644 --- a/aten/src/ATen/native/quantized/cudnn/utils.h +++ b/aten/src/ATen/native/quantized/cudnn/utils.h @@ -19,7 +19,7 @@ This file contains some of the auxiliary functions used by both Conv.cpp & Linea #include #include -struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase { +struct PackedLinearWeightCudnn : public LinearPackedParamsBase { PackedLinearWeightCudnn( at::Tensor orig_weight, c10::optional bias, @@ -77,7 +77,7 @@ struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase { }; template -struct TORCH_API PackedConvWeightCudnn : public ConvPackedParamsBase { +struct PackedConvWeightCudnn : public ConvPackedParamsBase { PackedConvWeightCudnn( at::Tensor orig_weight, c10::optional bias, @@ -317,7 +317,6 @@ cudnn_frontend::ExecutionPlan get_execplan_from_heuristics_else_fall_back(cudnn_ } { - auto total_engines = opGraph.getEngineCount(); // std::cout << opGraph.describe() << " has " << total_engines << " engines." << std::endl; auto engine = cudnn_frontend::EngineBuilder().setGlobalEngineIdx(0).setOperationGraph(opGraph).build(); // std::cout << engine.describe() << std::endl; diff --git a/third_party/cudnn_frontend b/third_party/cudnn_frontend index fa611998a36..43709ab96c4 160000 --- a/third_party/cudnn_frontend +++ b/third_party/cudnn_frontend @@ -1 +1 @@ -Subproject commit fa611998a360cbabaa2dcc7c9859748144114fc0 +Subproject commit 43709ab96c47e26eebcdac72f93f946d44ceffa8