Disable the cuDNN workarounds if the version number is new enough to get the corresponding bugs fixed. The bugs that

were work-arounded were fixed and verified.

PiperOrigin-RevId: 215497418
This commit is contained in:
Tim Shen 2018-10-02 18:38:24 -07:00 committed by Todd Wang
parent d76fc38355
commit 82b2794ea0

View File

@ -2487,30 +2487,32 @@ port::Status CudnnSupport::DoConvolveImpl(
// Report an error if we might be hitting a cuDNN bug that accesses illegal
// memory. See nvbugs/2138754, b/80018418.
SE_RETURN_IF_ERROR([&] {
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
return port::Status::OK();
}
if (input_descriptor.ndims() < 3) {
return port::Status::OK();
}
// Checks that a*b is within the valid range (as provided by NVIDIA).
auto check_sizes = [](size_t a, size_t b) {
if ((a * b * 4608 - 1) >> 31 == 0) {
if (CUDNN_VERSION < 7300) {
SE_RETURN_IF_ERROR([&] {
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
return port::Status::OK();
}
return port::Status(
port::error::FAILED_PRECONDITION,
"This configuration potentially accesses illegal memory.");
};
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
output_descriptor.feature_map_count()));
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
input_descriptor.feature_map_count()));
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
output_descriptor.feature_map_count()));
return port::Status::OK();
}());
if (input_descriptor.ndims() < 3) {
return port::Status::OK();
}
// Checks that a*b is within the valid range (as provided by NVIDIA).
auto check_sizes = [](size_t a, size_t b) {
if ((a * b * 4608 - 1) >> 31 == 0) {
return port::Status::OK();
}
return port::Status(
port::error::FAILED_PRECONDITION,
"This configuration potentially accesses illegal memory.");
};
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
output_descriptor.feature_map_count()));
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
input_descriptor.feature_map_count()));
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
output_descriptor.feature_map_count()));
return port::Status::OK();
}());
}
if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
!ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@ -3166,7 +3168,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
// Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
// zero-initialized, nvbugs/2254619.
if (CUDNN_VERSION >= 7000 &&
if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
algorithm_config.algorithm().algo_id() ==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
cudnn_type == CUDNN_DATA_HALF &&
@ -3317,31 +3319,33 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
// Report an error if we might be hitting a cuDNN bug that produces incorrect
// results. See nvbugs/2072856
SE_RETURN_IF_ERROR([&] {
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
return port::Status::OK();
}
if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
return port::Status::OK();
}
int convolution_size = output_descriptor.height() > 1
? filter_descriptor.input_filter_height()
: filter_descriptor.input_filter_width();
if (convolution_size <= 32) {
return port::Status::OK();
}
cudnnConvolutionMode_t convolution_mode;
cudnnDataType_t compute_type;
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode,
&compute_type));
if (convolution_mode != CUDNN_CONVOLUTION) {
return port::Status::OK();
}
return port::Status(
port::error::FAILED_PRECONDITION,
"This configuration potentially produces incorrect results.");
}());
if (CUDNN_VERSION < 7300) {
SE_RETURN_IF_ERROR([&] {
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
return port::Status::OK();
}
if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
return port::Status::OK();
}
int convolution_size = output_descriptor.height() > 1
? filter_descriptor.input_filter_height()
: filter_descriptor.input_filter_width();
if (convolution_size <= 32) {
return port::Status::OK();
}
cudnnConvolutionMode_t convolution_mode;
cudnnDataType_t compute_type;
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
conv.handle(), 0, nullptr, nullptr, nullptr, nullptr,
&convolution_mode, &compute_type));
if (convolution_mode != CUDNN_CONVOLUTION) {
return port::Status::OK();
}
return port::Status(
port::error::FAILED_PRECONDITION,
"This configuration potentially produces incorrect results.");
}());
}
if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
!ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@ -3357,8 +3361,8 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
// This wrong result caused by the bug is very flaky. It needs to be run for
// up to 20 times to produce a mismatch.
//
// TODO(timshen): add a nvbugs link.
if (CUDNN_VERSION >= 7100 &&
// See nvbugs/2379553.
if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
algorithm_config.algorithm().algo_id() ==
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
cudnn_type == CUDNN_DATA_HALF &&