mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-07 12:20:24 +01:00
Add functional support for cudnnConvolutionBiasActivationForward().
PiperOrigin-RevId: 157788425
This commit is contained in:
parent
7d7a403096
commit
69075f3546
|
|
@ -239,6 +239,17 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
|
|||
#undef CUDNN_DNN_ROUTINE_EACH_R5
|
||||
#endif
|
||||
|
||||
// APIs in R6
|
||||
// clang-format off
|
||||
#if CUDNN_VERSION >= 6000
|
||||
#define CUDNN_DNN_ROUTINE_EACH_R6(__macro) \
|
||||
__macro(cudnnConvolutionBiasActivationForward)
|
||||
|
||||
// clang-format on
|
||||
CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
|
||||
#undef CUDNN_DNN_ROUTINE_EACH_R6
|
||||
#endif
|
||||
|
||||
#undef CUDNN_DNN_ROUTINE_EACH
|
||||
|
||||
} // namespace wrap
|
||||
|
|
@ -1791,6 +1802,7 @@ bool CudnnSupport::DoConvolveImpl(
|
|||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<T>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<T>& biases, dnn::ActivationMode activation_mode,
|
||||
const BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
|
|
@ -1917,6 +1929,26 @@ bool CudnnSupport::DoConvolveImpl(
|
|||
}
|
||||
}
|
||||
|
||||
const bool has_biases = (biases != nullptr);
|
||||
const bool supported_activation_mode =
|
||||
(activation_mode == dnn::ActivationMode::kRelu6 ||
|
||||
activation_mode == dnn::ActivationMode::kReluX ||
|
||||
activation_mode == dnn::ActivationMode::kRelu);
|
||||
|
||||
if (has_biases && !supported_activation_mode) {
|
||||
LOG(ERROR) << "cudnnConvolutionBiasActivationForward() only "
|
||||
"support relu activation.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (has_biases && activation_mode != dnn::ActivationMode::kNone) {
|
||||
LOG(ERROR) << "To use cudnnConvolutionBiasActivationForward() "
|
||||
"with a valid biases tensor, need to also provide "
|
||||
"a valid activation mode (currently only supports "
|
||||
"kRelu6, kReluX, and kRelu).";
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_ptr<CUDATimer> timer;
|
||||
if (is_profiling) {
|
||||
timer.reset(new CUDATimer(parent_));
|
||||
|
|
@ -1931,14 +1963,45 @@ bool CudnnSupport::DoConvolveImpl(
|
|||
return false;
|
||||
}
|
||||
}
|
||||
status = wrap::cudnnConvolutionForward(
|
||||
parent_, ToHandle(dnn_handle_),
|
||||
/*alpha=*/&alpha, /*srcDesc=*/input_nd.handle(),
|
||||
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
|
||||
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
|
||||
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
|
||||
/*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/&beta,
|
||||
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
|
||||
if (has_biases) {
|
||||
CHECK(supported_activation_mode);
|
||||
#if CUDNN_VERSION < 6000
|
||||
LOG(ERROR) << "cudnnConvolutionBiasActivationForward() is only "
|
||||
"supported for cuDNN version >= 6.";
|
||||
return false;
|
||||
#else
|
||||
BatchDescriptor bias_dimensions;
|
||||
bias_dimensions.set_count(1)
|
||||
.set_feature_map_count(output_descriptor.feature_map_count())
|
||||
.set_height(1)
|
||||
.set_width(1)
|
||||
.set_layout(dnn::DataLayout::kBatchYXDepth);
|
||||
ScopedTensorDescriptor bias_descriptor{
|
||||
parent_, bias_dimensions, static_cast<cudnnDataType_t>(cudnn_type)};
|
||||
ScopedActivationDescriptor activation_desc{parent_, activation_mode,
|
||||
output_descriptor.value_max()};
|
||||
status = wrap::cudnnConvolutionBiasActivationForward(
|
||||
parent_, ToHandle(dnn_handle_),
|
||||
/*alpha1=*/&alpha, /*srcDesc=*/input_nd.handle(),
|
||||
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
|
||||
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
|
||||
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
|
||||
/*workSpaceSizeInBytes=*/scratch.size(), /*alpha2=*/&beta,
|
||||
/*zDesc=*/output_nd.handle(), /*z=*/nullptr,
|
||||
/*biasDesc=*/bias_descriptor.handle(),
|
||||
/*bias=*/biases.opaque(), /*activationDesc=*/activation_desc.handle(),
|
||||
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
|
||||
#endif // CUDNN_VERSION < 6000
|
||||
} else {
|
||||
status = wrap::cudnnConvolutionForward(
|
||||
parent_, ToHandle(dnn_handle_),
|
||||
/*alpha=*/&alpha, /*srcDesc=*/input_nd.handle(),
|
||||
/*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
|
||||
/*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
|
||||
/*algo=*/algo, /*workSpace=*/scratch.opaque(),
|
||||
/*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/&beta,
|
||||
/*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque());
|
||||
}
|
||||
if (is_profiling) {
|
||||
if (!timer->Stop(AsCUDAStream(stream))) {
|
||||
timer->Destroy();
|
||||
|
|
@ -2211,16 +2274,48 @@ bool CudnnSupport::DoConvolve(
|
|||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<float>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
|
||||
const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) {
|
||||
return DoConvolveImpl<float>(
|
||||
stream, CUDNN_DATA_FLOAT, batch_descriptor, input_data, filter_descriptor,
|
||||
filter_data, convolution_descriptor, output_descriptor, output_data,
|
||||
filter_data, convolution_descriptor, biases, activation_mode,
|
||||
output_descriptor, output_data, scratch_allocator, algorithm_config,
|
||||
output_profile_result);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolve(
|
||||
Stream* stream, const BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<float>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) {
|
||||
return DoConvolveImpl<float>(
|
||||
stream, CUDNN_DATA_FLOAT, batch_descriptor, input_data, filter_descriptor,
|
||||
filter_data, convolution_descriptor, /*biases=*/nullptr,
|
||||
dnn::ActivationMode::kNone, output_descriptor, output_data,
|
||||
scratch_allocator, algorithm_config, output_profile_result);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolve(
|
||||
Stream* stream, const BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<double>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
|
||||
const BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<double>* output_data) {
|
||||
LOG(ERROR) << "double-based DNN not yet implemented";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolve(
|
||||
Stream* stream, const BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
|
|
@ -2239,13 +2334,33 @@ bool CudnnSupport::DoConvolve(
|
|||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<Eigen::half>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) {
|
||||
return DoConvolveImpl<Eigen::half>(
|
||||
stream, CUDNN_DATA_HALF, batch_descriptor, input_data, filter_descriptor,
|
||||
filter_data, convolution_descriptor, output_descriptor, output_data,
|
||||
filter_data, convolution_descriptor, biases, activation_mode,
|
||||
output_descriptor, output_data, scratch_allocator, algorithm_config,
|
||||
output_profile_result);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolve(
|
||||
Stream* stream, const BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<Eigen::half>& filter_data,
|
||||
const ConvolutionDescriptor& convolution_descriptor,
|
||||
const BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) {
|
||||
return DoConvolveImpl<Eigen::half>(
|
||||
stream, CUDNN_DATA_HALF, batch_descriptor, input_data, filter_descriptor,
|
||||
filter_data, convolution_descriptor, /*biases=*/nullptr,
|
||||
dnn::ActivationMode::kNone, output_descriptor, output_data,
|
||||
scratch_allocator, algorithm_config, output_profile_result);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -137,7 +137,43 @@ class CudnnSupport : public dnn::DnnSupport {
|
|||
DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
|
||||
DeviceMemory<float>* offset_backprop) override;
|
||||
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<float>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<float>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<float>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) override;
|
||||
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<double>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<double>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<double>* output_data) override;
|
||||
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<Eigen::half>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<Eigen::half>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
dnn::ProfileResult* output_profile_result) override;
|
||||
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<float>& filter_data,
|
||||
|
|
@ -156,7 +192,7 @@ class CudnnSupport : public dnn::DnnSupport {
|
|||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<double>* output_data) override;
|
||||
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<Eigen::half>& filter_data,
|
||||
|
|
@ -477,6 +513,8 @@ class CudnnSupport : public dnn::DnnSupport {
|
|||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<T>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<T>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<T>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
|
|
|
|||
|
|
@ -796,6 +796,7 @@ class NormalizeDescriptor {
|
|||
|
||||
// Describes a kind of non-linearity (threshold-like mathematical function).
|
||||
enum class ActivationMode {
|
||||
kNone,
|
||||
kSigmoid,
|
||||
// Rectified linear activation: f(x) = x < 0 ? 0 : x
|
||||
kRelu,
|
||||
|
|
@ -910,9 +911,11 @@ class DnnSupport {
|
|||
// input_data: un-owned device memory region which contains the
|
||||
// convolution input.
|
||||
// filter_descriptor: dimensions of the convolution filter.
|
||||
// weights: coefficients for the convolution filter, these are multiplied
|
||||
// against values in the input that the filter convolves over.
|
||||
// convolution_descriptor: stride of the convolution filter.
|
||||
// biases: un-owned device memory region containing biases to add to the
|
||||
// input. This can be DeviceMemory pointing to NULL only when activation_mode
|
||||
// is kNone.
|
||||
// activation_mode: Type of activation to perform.
|
||||
// output_descriptor: dimensions of the output layer.
|
||||
// output_data: un-owned device memory region in which to place the
|
||||
// convolution result.
|
||||
|
|
@ -939,6 +942,55 @@ class DnnSupport {
|
|||
// that if the inverse of the filter is applied to the output in VALID mode
|
||||
// the result is the same size as the input - this requires even more
|
||||
// padding of the input.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<float>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
ProfileResult* output_profile_result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enqueues a double-precision fused convolution, bias add, and activation
|
||||
// operation onto the stream. See DoConvolve above for argument details.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<double>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<double>* output_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enqueues a half-precision fused convolution, bias add, and activation
|
||||
// operation onto the stream. See DoConvolve above for argument details.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::FilterDescriptor& filter_descriptor,
|
||||
const DeviceMemory<Eigen::half>& filter_data,
|
||||
const dnn::ConvolutionDescriptor& convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half>& biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<Eigen::half>* output_data,
|
||||
ScratchAllocator* scratch_allocator,
|
||||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
ProfileResult* output_profile_result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enqueues a single-precision convolution operation (without bias add
|
||||
// or activation) onto the stream.
|
||||
// See DoConvolve above for argument details.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
|
|
@ -950,11 +1002,8 @@ class DnnSupport {
|
|||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
ProfileResult* output_profile_result) = 0;
|
||||
|
||||
// Return a list of algorithms supported by the forward convolution pass.
|
||||
virtual bool GetConvolveAlgorithms(
|
||||
bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
|
||||
|
||||
// Enqueues a double-precision convolution operation onto the stream.
|
||||
// Enqueues a double-precision convolution operation (without bias add
|
||||
// or activation) onto the stream.
|
||||
// See DoConvolve above for argument details.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
|
|
@ -965,7 +1014,8 @@ class DnnSupport {
|
|||
const dnn::BatchDescriptor& output_descriptor,
|
||||
DeviceMemory<double>* output_data) = 0;
|
||||
|
||||
// Enqueues a half-precision convolution operation onto the stream.
|
||||
// Enqueues a half-precision convolution operation (without bias add
|
||||
// or activation) onto the stream.
|
||||
// See DoConvolve above for argument details.
|
||||
virtual bool DoConvolve(
|
||||
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
|
||||
|
|
@ -979,6 +1029,10 @@ class DnnSupport {
|
|||
const dnn::AlgorithmConfig& algorithm_config,
|
||||
ProfileResult* output_profile_result) = 0;
|
||||
|
||||
// Return a list of algorithms supported by the forward convolution pass.
|
||||
virtual bool GetConvolveAlgorithms(
|
||||
bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
|
||||
|
||||
// Version of DoConvolve that uses pre-quantized 8 bit coefficients.
|
||||
// coefficient_scales specifies the scaling of each column of coefficients:
|
||||
// original float coefficient[row * num_columns + column] =
|
||||
|
|
|
|||
|
|
@ -350,9 +350,65 @@ Stream &Stream::ThenConvolveWithScratch(
|
|||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<Eigen::half> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half> &biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<Eigen::half> *output,
|
||||
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
|
||||
PARAM(filter_descriptor), PARAM(filter_data),
|
||||
PARAM(convolution_descriptor), PARAM(biases),
|
||||
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, biases, activation_mode, output_descriptor,
|
||||
output, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveWithScratch(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
|
||||
ScratchAllocator *scratch_allocator) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
|
||||
PARAM(filter_descriptor), PARAM(filter_data),
|
||||
PARAM(convolution_descriptor), PARAM(biases),
|
||||
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, biases, activation_mode, output_descriptor,
|
||||
output, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveWithScratch(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<Eigen::half> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
|
||||
PARAM(filter_descriptor), PARAM(filter_data),
|
||||
PARAM(convolution_descriptor), PARAM(output_descriptor),
|
||||
|
|
@ -362,9 +418,9 @@ Stream &Stream::ThenConvolveWithScratch(
|
|||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, output_descriptor, output,
|
||||
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
convolution_descriptor, output_descriptor, output, scratch_allocator,
|
||||
dnn::AlgorithmConfig(),
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -389,9 +445,74 @@ Stream &Stream::ThenConvolveWithScratch(
|
|||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, output_descriptor, output,
|
||||
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
convolution_descriptor, output_descriptor, output, scratch_allocator,
|
||||
dnn::AlgorithmConfig(),
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveWithAlgorithm(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
|
||||
ScratchAllocator *scratch_allocator,
|
||||
const dnn::AlgorithmConfig &algorithm_config,
|
||||
dnn::ProfileResult *output_profile_result) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
|
||||
PARAM(filter_descriptor), PARAM(filter_data),
|
||||
PARAM(convolution_descriptor), PARAM(biases),
|
||||
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
auto status = dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, biases, activation_mode, output_descriptor,
|
||||
output, scratch_allocator, algorithm_config, output_profile_result);
|
||||
if (!status && !output_profile_result) {
|
||||
SetError();
|
||||
}
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveWithAlgorithm(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<Eigen::half> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half> &biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
|
||||
const dnn::AlgorithmConfig &algorithm_config,
|
||||
dnn::ProfileResult *output_profile_result) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
|
||||
PARAM(filter_descriptor), PARAM(filter_data),
|
||||
PARAM(convolution_descriptor), PARAM(biases),
|
||||
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
auto status = dnn->DoConvolve(
|
||||
this, input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, biases, activation_mode, output_descriptor,
|
||||
output, scratch_allocator, algorithm_config, output_profile_result);
|
||||
if (!status && !output_profile_result) {
|
||||
SetError();
|
||||
}
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -461,6 +582,21 @@ Stream &Stream::ThenConvolveWithAlgorithm(
|
|||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolve(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output) {
|
||||
return ThenConvolveWithScratch(
|
||||
input_descriptor, input_data, filter_descriptor, filter_data,
|
||||
convolution_descriptor, biases, activation_mode, output_descriptor,
|
||||
output, /*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolve(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
|
|
@ -582,7 +718,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
|
|||
this, filter_descriptor, filter_data, output_descriptor,
|
||||
backward_output_data, convolution_descriptor, input_descriptor,
|
||||
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -676,7 +812,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
|
|||
this, filter_descriptor, filter_data, output_descriptor,
|
||||
backward_output_data, convolution_descriptor, input_descriptor,
|
||||
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -718,7 +854,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
|
|||
this, input_descriptor, input_data, output_descriptor,
|
||||
backward_output_data, convolution_descriptor, filter_descriptor,
|
||||
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -779,7 +915,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
|
|||
this, input_descriptor, input_data, output_descriptor,
|
||||
backward_output_data, convolution_descriptor, filter_descriptor,
|
||||
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
|
||||
nullptr));
|
||||
/*output_profile_result=*/nullptr));
|
||||
} else {
|
||||
SetErrorAndLogNoDnnSupport();
|
||||
}
|
||||
|
|
@ -3868,7 +4004,7 @@ Stream &Stream::ThenBlasGemmBatched(
|
|||
int batch_count) {
|
||||
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
|
||||
b, ldb, beta, c, ldc, batch_count,
|
||||
nullptr);
|
||||
/*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenBlasGemmBatchedWithScratch(
|
||||
|
|
@ -3900,7 +4036,7 @@ Stream &Stream::ThenBlasGemmBatched(
|
|||
int batch_count) {
|
||||
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
|
||||
b, ldb, beta, c, ldc, batch_count,
|
||||
nullptr);
|
||||
/*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenBlasGemmBatchedWithScratch(
|
||||
|
|
@ -3934,7 +4070,7 @@ Stream &Stream::ThenBlasGemmBatched(
|
|||
int batch_count) {
|
||||
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
|
||||
b, ldb, beta, c, ldc, batch_count,
|
||||
nullptr);
|
||||
/*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenBlasGemmBatchedWithScratch(
|
||||
|
|
@ -3973,7 +4109,7 @@ Stream &Stream::ThenBlasGemmBatched(
|
|||
int batch_count) {
|
||||
return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
|
||||
b, ldb, beta, c, ldc, batch_count,
|
||||
nullptr);
|
||||
/*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenBlasGemmBatchedWithScratch(
|
||||
|
|
|
|||
|
|
@ -240,6 +240,16 @@ class Stream {
|
|||
DeviceMemory<float> *offset_backprop);
|
||||
|
||||
// TODO(leary) add double-precision version of this interface.
|
||||
Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output);
|
||||
|
||||
Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
|
|
@ -268,6 +278,27 @@ class Stream {
|
|||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output_data);
|
||||
|
||||
Stream &ThenConvolveWithScratch(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<Eigen::half> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half> &biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator);
|
||||
|
||||
Stream &ThenConvolveWithScratch(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator);
|
||||
|
||||
Stream &ThenConvolveWithScratch(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
|
|
@ -286,6 +317,31 @@ class Stream {
|
|||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator);
|
||||
|
||||
Stream &ThenConvolveWithAlgorithm(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<float> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator,
|
||||
const dnn::AlgorithmConfig &algorithm_config,
|
||||
dnn::ProfileResult *output_profile_result);
|
||||
|
||||
Stream &ThenConvolveWithAlgorithm(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::FilterDescriptor &filter_descriptor,
|
||||
const DeviceMemory<Eigen::half> &filter_data,
|
||||
const dnn::ConvolutionDescriptor &convolution_descriptor,
|
||||
const DeviceMemory<Eigen::half> &biases,
|
||||
dnn::ActivationMode activation_mode,
|
||||
const dnn::BatchDescriptor &output_descriptor,
|
||||
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
|
||||
const dnn::AlgorithmConfig &algorithm_config,
|
||||
dnn::ProfileResult *output_profile_result);
|
||||
|
||||
Stream &ThenConvolveWithAlgorithm(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user