mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
Disable the cuDNN workarounds if the version number is new enough to get the corresponding bugs fixed. The bugs that
were work-arounded were fixed and verified. PiperOrigin-RevId: 215497418
This commit is contained in:
parent
d76fc38355
commit
82b2794ea0
|
|
@ -2487,30 +2487,32 @@ port::Status CudnnSupport::DoConvolveImpl(
|
|||
|
||||
// Report an error if we might be hitting a cuDNN bug that accesses illegal
|
||||
// memory. See nvbugs/2138754, b/80018418.
|
||||
SE_RETURN_IF_ERROR([&] {
|
||||
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
if (input_descriptor.ndims() < 3) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
// Checks that a*b is within the valid range (as provided by NVIDIA).
|
||||
auto check_sizes = [](size_t a, size_t b) {
|
||||
if ((a * b * 4608 - 1) >> 31 == 0) {
|
||||
if (CUDNN_VERSION < 7300) {
|
||||
SE_RETURN_IF_ERROR([&] {
|
||||
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
return port::Status(
|
||||
port::error::FAILED_PRECONDITION,
|
||||
"This configuration potentially accesses illegal memory.");
|
||||
};
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
|
||||
output_descriptor.feature_map_count()));
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
|
||||
input_descriptor.feature_map_count()));
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
|
||||
output_descriptor.feature_map_count()));
|
||||
return port::Status::OK();
|
||||
}());
|
||||
if (input_descriptor.ndims() < 3) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
// Checks that a*b is within the valid range (as provided by NVIDIA).
|
||||
auto check_sizes = [](size_t a, size_t b) {
|
||||
if ((a * b * 4608 - 1) >> 31 == 0) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
return port::Status(
|
||||
port::error::FAILED_PRECONDITION,
|
||||
"This configuration potentially accesses illegal memory.");
|
||||
};
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
|
||||
output_descriptor.feature_map_count()));
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
|
||||
input_descriptor.feature_map_count()));
|
||||
SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
|
||||
output_descriptor.feature_map_count()));
|
||||
return port::Status::OK();
|
||||
}());
|
||||
}
|
||||
|
||||
if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
|
||||
!ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
|
||||
|
|
@ -3166,7 +3168,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
|
|||
|
||||
// Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
|
||||
// zero-initialized, nvbugs/2254619.
|
||||
if (CUDNN_VERSION >= 7000 &&
|
||||
if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
|
||||
algorithm_config.algorithm().algo_id() ==
|
||||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
|
||||
cudnn_type == CUDNN_DATA_HALF &&
|
||||
|
|
@ -3317,31 +3319,33 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
|
|||
|
||||
// Report an error if we might be hitting a cuDNN bug that produces incorrect
|
||||
// results. See nvbugs/2072856
|
||||
SE_RETURN_IF_ERROR([&] {
|
||||
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
int convolution_size = output_descriptor.height() > 1
|
||||
? filter_descriptor.input_filter_height()
|
||||
: filter_descriptor.input_filter_width();
|
||||
if (convolution_size <= 32) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
cudnnConvolutionMode_t convolution_mode;
|
||||
cudnnDataType_t compute_type;
|
||||
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
|
||||
conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode,
|
||||
&compute_type));
|
||||
if (convolution_mode != CUDNN_CONVOLUTION) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
return port::Status(
|
||||
port::error::FAILED_PRECONDITION,
|
||||
"This configuration potentially produces incorrect results.");
|
||||
}());
|
||||
if (CUDNN_VERSION < 7300) {
|
||||
SE_RETURN_IF_ERROR([&] {
|
||||
if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
int convolution_size = output_descriptor.height() > 1
|
||||
? filter_descriptor.input_filter_height()
|
||||
: filter_descriptor.input_filter_width();
|
||||
if (convolution_size <= 32) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
cudnnConvolutionMode_t convolution_mode;
|
||||
cudnnDataType_t compute_type;
|
||||
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
|
||||
conv.handle(), 0, nullptr, nullptr, nullptr, nullptr,
|
||||
&convolution_mode, &compute_type));
|
||||
if (convolution_mode != CUDNN_CONVOLUTION) {
|
||||
return port::Status::OK();
|
||||
}
|
||||
return port::Status(
|
||||
port::error::FAILED_PRECONDITION,
|
||||
"This configuration potentially produces incorrect results.");
|
||||
}());
|
||||
}
|
||||
|
||||
if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
|
||||
!ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
|
||||
|
|
@ -3357,8 +3361,8 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
|
|||
// This wrong result caused by the bug is very flaky. It needs to be run for
|
||||
// up to 20 times to produce a mismatch.
|
||||
//
|
||||
// TODO(timshen): add a nvbugs link.
|
||||
if (CUDNN_VERSION >= 7100 &&
|
||||
// See nvbugs/2379553.
|
||||
if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
|
||||
algorithm_config.algorithm().algo_id() ==
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
|
||||
cudnn_type == CUDNN_DATA_HALF &&
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user