Add functional support for cudnnConvolutionBiasActivationForward().

PiperOrigin-RevId: 157788425
This commit is contained in:
Yangzihao Wang 2017-06-01 17:50:43 -07:00 committed by TensorFlower Gardener
parent 7d7a403096
commit 69075f3546
5 changed files with 434 additions and 35 deletions

View File

@ -239,6 +239,17 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
#undef CUDNN_DNN_ROUTINE_EACH_R5
#endif
// APIs in R6
// clang-format off
#if CUDNN_VERSION >= 6000
#define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \
__macro(cudnnConvolutionBiasActivationForward)
// clang-format on
CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
#undef CUDNN_DNN_ROUTINE_EACH_R6
#endif
#undef CUDNN_DNN_ROUTINE_EACH
} // namespace wrap
@ -1791,6 +1802,7 @@ bool CudnnSupport::DoConvolveImpl(
const FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<T>& biases, dnn::ActivationMode activation_mode,
const BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
@ -1917,6 +1929,26 @@ bool CudnnSupport::DoConvolveImpl(
}
}
const bool has_biases = (biases != nullptr);
const bool supported_activation_mode =
(activation_mode == dnn::ActivationMode::kRelu6 ||
activation_mode == dnn::ActivationMode::kReluX ||
activation_mode == dnn::ActivationMode::kRelu);
if (has_biases && !supported_activation_mode) {
LOG(ERROR) << "cudnnConvolutionBiasActivationForward() only "
"support relu activation.";
return false;
}
if (has_biases && activation_mode != dnn::ActivationMode::kNone) {
LOG(ERROR) << "To use cudnnConvolutionBiasActivationForward() "
"with a valid biases tensor, need to also provide "
"a valid activation mode (currently only supports "
"kRelu6, kReluX, and kRelu).";
return false;
}
std::unique_ptr<CUDATimer> timer;
if (is_profiling) {
timer.reset(new CUDATimer(parent_));
@ -1931,14 +1963,45 @@ bool CudnnSupport::DoConvolveImpl(
return false;
}
}
status = wrap::cudnnConvolutionForward(
parent_, ToHandle(dnn_handle_),
/*alpha=*/&alpha, /*srcDesc=*/input_nd.handle(),
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
/*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/&beta,
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
if (has_biases) {
CHECK(supported_activation_mode);
#if CUDNN_VERSION < 6000
LOG(ERROR) << "cudnnConvolutionBiasActivationForward() is only "
"supported for cuDNN version >= 6.";
return false;
#else
BatchDescriptor bias_dimensions;
bias_dimensions.set_count(1)
.set_feature_map_count(output_descriptor.feature_map_count())
.set_height(1)
.set_width(1)
.set_layout(dnn::DataLayout::kBatchYXDepth);
ScopedTensorDescriptor bias_descriptor{
parent_, bias_dimensions, static_cast<cudnnDataType_t>(cudnn_type)};
ScopedActivationDescriptor activation_desc{parent_, activation_mode,
output_descriptor.value_max()};
status = wrap::cudnnConvolutionBiasActivationForward(
parent_, ToHandle(dnn_handle_),
/*alpha1=*/&alpha, /*srcDesc=*/input_nd.handle(),
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
/*workSpaceSizeInBytes=*/scratch.size(), /*alpha2=*/&beta,
/*zDesc=*/output_nd.handle(), /*z=*/nullptr,
/*biasDesc=*/bias_descriptor.handle(),
/*bias=*/biases.opaque(), /*activationDesc=*/activation_desc.handle(),
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
#endif // CUDNN_VERSION < 6000
} else {
status = wrap::cudnnConvolutionForward(
parent_, ToHandle(dnn_handle_),
/*alpha=*/&alpha, /*srcDesc=*/input_nd.handle(),
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
/*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/&beta,
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
}
if (is_profiling) {
if (!timer->Stop(AsCUDAStream(stream))) {
timer->Destroy();
@ -2211,16 +2274,48 @@ bool CudnnSupport::DoConvolve(
const FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<float>(
stream, CUDNN_DATA_FLOAT, batch_descriptor, input_data, filter_descriptor,
filter_data, convolution_descriptor, output_descriptor, output_data,
filter_data, convolution_descriptor, biases, activation_mode,
output_descriptor, output_data, scratch_allocator, algorithm_config,
output_profile_result);
}
bool CudnnSupport::DoConvolve(
Stream* stream, const BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
const FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<float>(
stream, CUDNN_DATA_FLOAT, batch_descriptor, input_data, filter_descriptor,
filter_data, convolution_descriptor, /*biases=*/nullptr,
dnn::ActivationMode::kNone, output_descriptor, output_data,
scratch_allocator, algorithm_config, output_profile_result);
}
bool CudnnSupport::DoConvolve(
Stream* stream, const BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
const FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
const BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) {
LOG(ERROR) << "double-based DNN not yet implemented";
return false;
}
bool CudnnSupport::DoConvolve(
Stream* stream, const BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
@ -2239,13 +2334,33 @@ bool CudnnSupport::DoConvolve(
const FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<Eigen::half>(
stream, CUDNN_DATA_HALF, batch_descriptor, input_data, filter_descriptor,
filter_data, convolution_descriptor, output_descriptor, output_data,
filter_data, convolution_descriptor, biases, activation_mode,
output_descriptor, output_data, scratch_allocator, algorithm_config,
output_profile_result);
}
bool CudnnSupport::DoConvolve(
Stream* stream, const BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const ConvolutionDescriptor& convolution_descriptor,
const BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<Eigen::half>(
stream, CUDNN_DATA_HALF, batch_descriptor, input_data, filter_descriptor,
filter_data, convolution_descriptor, /*biases=*/nullptr,
dnn::ActivationMode::kNone, output_descriptor, output_data,
scratch_allocator, algorithm_config, output_profile_result);
}

View File

@ -137,7 +137,43 @@ class CudnnSupport : public dnn::DnnSupport {
DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
DeviceMemory<float>* offset_backprop) override;
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& input_descriptor,
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<float>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<float>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<double>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) override;
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
@ -156,7 +192,7 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) override;
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& input_descriptor,
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
@ -477,6 +513,8 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<T>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<T>* output_data,
ScratchAllocator* scratch_allocator,

View File

@ -796,6 +796,7 @@ class NormalizeDescriptor {
// Describes a kind of non-linearity (threshold-like mathematical function).
enum class ActivationMode {
kNone,
kSigmoid,
// Rectified linear activation: f(x) = x < 0 ? 0 : x
kRelu,
@ -910,9 +911,11 @@ class DnnSupport {
// input_data: un-owned device memory region which contains the
// convolution input.
// filter_descriptor: dimensions of the convolution filter.
// weights: coefficients for the convolution filter, these are multiplied
// against values in the input that the filter convolves over.
// convolution_descriptor: stride of the convolution filter.
// biases: un-owned device memory region containing biases to add to the
// input. This can be DeviceMemory pointing to NULL only when activation_mode
// is kNone.
// activation_mode: Type of activation to perform.
// output_descriptor: dimensions of the output layer.
// output_data: un-owned device memory region in which to place the
// convolution result.
@ -939,6 +942,55 @@ class DnnSupport {
// that if the inverse of the filter is applied to the output in VALID mode
// the result is the same size as the input - this requires even more
// padding of the input.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) {
return false;
}
// Enqueues a double-precision fused convolution, bias add, and activation
// operation onto the stream. See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) {
return false;
}
// Enqueues a half-precision fused convolution, bias add, and activation
// operation onto the stream. See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) {
return false;
}
// Enqueues a single-precision convolution operation (without bias add
// or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
@ -950,11 +1002,8 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
// Return a list of algorithms supported by the forward convolution pass.
virtual bool GetConvolveAlgorithms(
bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
// Enqueues a double-precision convolution operation onto the stream.
// Enqueues a double-precision convolution operation (without bias add
// or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@ -965,7 +1014,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) = 0;
// Enqueues a half-precision convolution operation onto the stream.
// Enqueues a half-precision convolution operation (without bias add
// or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@ -979,6 +1029,10 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
// Return a list of algorithms supported by the forward convolution pass.
virtual bool GetConvolveAlgorithms(
bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
// Version of DoConvolve that uses pre-quantized 8 bit coefficients.
// coefficient_scales specifies the scaling of each column of coefficients:
// original float coefficient[row * num_columns + column] =

View File

@ -350,9 +350,65 @@ Stream &Stream::ThenConvolveWithScratch(
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
PARAM(filter_descriptor), PARAM(filter_data),
PARAM(convolution_descriptor), PARAM(biases),
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, biases, activation_mode, output_descriptor,
output, scratch_allocator, dnn::AlgorithmConfig(),
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
}
return *this;
}
Stream &Stream::ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
ScratchAllocator *scratch_allocator) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
PARAM(filter_descriptor), PARAM(filter_data),
PARAM(convolution_descriptor), PARAM(biases),
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, biases, activation_mode, output_descriptor,
output, scratch_allocator, dnn::AlgorithmConfig(),
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
}
return *this;
}
Stream &Stream::ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
PARAM(filter_descriptor), PARAM(filter_data),
PARAM(convolution_descriptor), PARAM(output_descriptor),
@ -362,9 +418,9 @@ Stream &Stream::ThenConvolveWithScratch(
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, output_descriptor, output,
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
convolution_descriptor, output_descriptor, output, scratch_allocator,
dnn::AlgorithmConfig(),
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@ -389,9 +445,74 @@ Stream &Stream::ThenConvolveWithScratch(
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, output_descriptor, output,
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
convolution_descriptor, output_descriptor, output, scratch_allocator,
dnn::AlgorithmConfig(),
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
}
return *this;
}
Stream &Stream::ThenConvolveWithAlgorithm(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
PARAM(filter_descriptor), PARAM(filter_data),
PARAM(convolution_descriptor), PARAM(biases),
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
auto status = dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, biases, activation_mode, output_descriptor,
output, scratch_allocator, algorithm_config, output_profile_result);
if (!status && !output_profile_result) {
SetError();
}
} else {
SetErrorAndLogNoDnnSupport();
}
}
return *this;
}
Stream &Stream::ThenConvolveWithAlgorithm(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
PARAM(filter_descriptor), PARAM(filter_data),
PARAM(convolution_descriptor), PARAM(biases),
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
auto status = dnn->DoConvolve(
this, input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, biases, activation_mode, output_descriptor,
output, scratch_allocator, algorithm_config, output_profile_result);
if (!status && !output_profile_result) {
SetError();
}
} else {
SetErrorAndLogNoDnnSupport();
}
@ -461,6 +582,21 @@ Stream &Stream::ThenConvolveWithAlgorithm(
return *this;
}
Stream &Stream::ThenConvolve(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output) {
return ThenConvolveWithScratch(
input_descriptor, input_data, filter_descriptor, filter_data,
convolution_descriptor, biases, activation_mode, output_descriptor,
output, /*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenConvolve(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
@ -582,7 +718,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
this, filter_descriptor, filter_data, output_descriptor,
backward_output_data, convolution_descriptor, input_descriptor,
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@ -676,7 +812,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
this, filter_descriptor, filter_data, output_descriptor,
backward_output_data, convolution_descriptor, input_descriptor,
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@ -718,7 +854,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
this, input_descriptor, input_data, output_descriptor,
backward_output_data, convolution_descriptor, filter_descriptor,
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@ -779,7 +915,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
this, input_descriptor, input_data, output_descriptor,
backward_output_data, convolution_descriptor, filter_descriptor,
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
/*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@ -3868,7 +4004,7 @@ Stream &Stream::ThenBlasGemmBatched(
int batch_count) {
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
b, ldb, beta, c, ldc, batch_count,
nullptr);
/*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenBlasGemmBatchedWithScratch(
@ -3900,7 +4036,7 @@ Stream &Stream::ThenBlasGemmBatched(
int batch_count) {
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
b, ldb, beta, c, ldc, batch_count,
nullptr);
/*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenBlasGemmBatchedWithScratch(
@ -3934,7 +4070,7 @@ Stream &Stream::ThenBlasGemmBatched(
int batch_count) {
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
b, ldb, beta, c, ldc, batch_count,
nullptr);
/*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenBlasGemmBatchedWithScratch(
@ -3973,7 +4109,7 @@ Stream &Stream::ThenBlasGemmBatched(
int batch_count) {
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
b, ldb, beta, c, ldc, batch_count,
nullptr);
/*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenBlasGemmBatchedWithScratch(

View File

@ -240,6 +240,16 @@ class Stream {
DeviceMemory<float> *offset_backprop);
// TODO(leary) add double-precision version of this interface.
Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output);
Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
@ -268,6 +278,27 @@ class Stream {
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output_data);
Stream &ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator);
Stream &ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator);
Stream &ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
@ -286,6 +317,31 @@ class Stream {
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator);
Stream &ThenConvolveWithAlgorithm(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result);
Stream &ThenConvolveWithAlgorithm(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result);
Stream &ThenConvolveWithAlgorithm(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,