mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
[cuDNN V8 API] Enable cuDNN v8 API by default (#75466)
Testing via CI Pull Request resolved: https://github.com/pytorch/pytorch/pull/75466 Approved by: https://github.com/ngimel
This commit is contained in:
parent
068d35a648
commit
14ab3ff484
|
|
@ -212,7 +212,7 @@ cmake_dependent_option(
|
|||
BUILD_NVFUSER_BENCHMARK "Build C++ binaries for nvfuser benchmarks" ON
|
||||
"USE_CUDA;BUILD_TEST" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" OFF
|
||||
USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" ON
|
||||
"USE_CUDNN" OFF)
|
||||
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
|
||||
option(USE_KINETO "Use Kineto profiling library" ON)
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ auto build_opgraph_fused(const cudnnHandle_t handle, const Tensor & x, const Ten
|
|||
return opGraph;
|
||||
}
|
||||
|
||||
const auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
|
||||
auto get_generator_sources(const cudnnBackendDescriptorType_t& desc, const Tensor& x, const bool deterministic, const bool allow_tf32, const cudnnBackendHeurMode_t heur_mode) {
|
||||
// Method for engine config generator based on heuristics
|
||||
auto heurgen_method = [/*&desc,*/ &x, deterministic, allow_tf32, heur_mode](cudnn_frontend::OperationGraph &opGraph) -> cudnn_frontend::EngineConfigList {
|
||||
auto heuristics = cudnn_frontend::EngineHeuristicsBuilder()
|
||||
|
|
|
|||
|
|
@ -189,7 +189,6 @@ Tensor quantized_max_pool2d_cudnn(
|
|||
stride[0], // vertical stride
|
||||
stride[1])); // horizontal stride
|
||||
|
||||
auto dataType = getCudnnDataType(input);
|
||||
float one{1};
|
||||
float zero{0.0};
|
||||
TensorDescriptor xDesc;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ This file contains some of the auxiliary functions used by both Conv.cpp & Linea
|
|||
#include <c10/util/ArrayRef.h>
|
||||
#include <cudnn_frontend.h>
|
||||
|
||||
struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
|
||||
struct PackedLinearWeightCudnn : public LinearPackedParamsBase {
|
||||
PackedLinearWeightCudnn(
|
||||
at::Tensor orig_weight,
|
||||
c10::optional<at::Tensor> bias,
|
||||
|
|
@ -77,7 +77,7 @@ struct TORCH_API PackedLinearWeightCudnn : public LinearPackedParamsBase {
|
|||
};
|
||||
|
||||
template <int kSpatialDim = 2>
|
||||
struct TORCH_API PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
|
||||
struct PackedConvWeightCudnn : public ConvPackedParamsBase<kSpatialDim> {
|
||||
PackedConvWeightCudnn(
|
||||
at::Tensor orig_weight,
|
||||
c10::optional<at::Tensor> bias,
|
||||
|
|
@ -317,7 +317,6 @@ cudnn_frontend::ExecutionPlan get_execplan_from_heuristics_else_fall_back(cudnn_
|
|||
}
|
||||
|
||||
{
|
||||
auto total_engines = opGraph.getEngineCount();
|
||||
// std::cout << opGraph.describe() << " has " << total_engines << " engines." << std::endl;
|
||||
auto engine = cudnn_frontend::EngineBuilder().setGlobalEngineIdx(0).setOperationGraph(opGraph).build();
|
||||
// std::cout << engine.describe() << std::endl;
|
||||
|
|
|
|||
2
third_party/cudnn_frontend
vendored
2
third_party/cudnn_frontend
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit fa611998a360cbabaa2dcc7c9859748144114fc0
|
||||
Subproject commit 43709ab96c47e26eebcdac72f93f946d44ceffa8
|
||||
Loading…
Reference in New Issue
Block a user