mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Added MIOpen conv transpose op (#13938)
Summary: This pull request contains changes for: 1. Removing ConvTranspose related changes from caffe2/operators/hip/conv_op_miopen.cc 2. Adding the file caffe2/operators/hip/conv_transpose_op_miopen.cc 3. Modifying the tests to run convTranspose op using MIOpen engine Differential Revision: D13055099 Pulled By: bddppq fbshipit-source-id: ca284f8f9a073005b22013c375cc958257815865
This commit is contained in:
parent
5059beb644
commit
f4e502a8c5
|
|
@ -47,19 +47,10 @@ class MIOPENConvOpBase : public ConvPoolOpBase<HIPContext> {
|
|||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
|
||||
MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));
|
||||
|
||||
if ((operator_def.type().substr(0, 6) == "Conv") ||
|
||||
(operator_def.type().substr(0, 14) == "ConvGradient")) {
|
||||
if (group_ > 1) {
|
||||
if(group_ > 1) {
|
||||
mode_ = miopenGroupConv;
|
||||
} else {
|
||||
mode_ = miopenConvolution;
|
||||
}
|
||||
} else if (
|
||||
(operator_def.type().substr(0, 7) == "Trans") ||
|
||||
(operator_def.type().substr(0, 15) == "TransGradient")) {
|
||||
mode_ = miopenTranspose;
|
||||
} else {
|
||||
LOG(FATAL) << "Unsupported convolution method: " << operator_def.type();
|
||||
mode_ = miopenConvolution;
|
||||
}
|
||||
|
||||
if (mode_ == miopenGroupConv) {
|
||||
|
|
@ -220,7 +211,7 @@ bool MIOPENConvOp::DoRunWithType() {
|
|||
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
|
||||
CAFFE_ENFORCE(
|
||||
Weight.ndim() == 4,
|
||||
"Conv/Trans op with MIOpen engine is supported only for 2D convolutions");
|
||||
"Conv op with MIOpen engine is supported only for 2D convolutions");
|
||||
|
||||
const int M = Weight.dim32(0);
|
||||
ConvPoolOpBase<HIPContext>::SetOutputSize(X, Y, M);
|
||||
|
|
@ -408,7 +399,7 @@ bool MIOPENConvGradientOp::DoRunWithType() {
|
|||
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
|
||||
CAFFE_ENFORCE(
|
||||
Weight.ndim() == 4,
|
||||
"ConvGradient/TransGradient op with MIOpen engine is supported only for 2D convolutions");
|
||||
"ConvGradient op with MIOpen engine is supported only for 2D convolutions");
|
||||
|
||||
const int M = Weight.dim32(0);
|
||||
int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
|
||||
|
|
@ -638,6 +629,4 @@ bool MIOPENConvGradientOp::RunOnDevice() {
|
|||
|
||||
REGISTER_MIOPEN_OPERATOR(Conv, MIOPENConvOp);
|
||||
REGISTER_MIOPEN_OPERATOR(ConvGradient, MIOPENConvGradientOp);
|
||||
REGISTER_MIOPEN_OPERATOR(Trans, MIOPENConvOp);
|
||||
REGISTER_MIOPEN_OPERATOR(TransGradient, MIOPENConvGradientOp);
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
494
caffe2/operators/hip/conv_transpose_op_miopen.cc
Normal file
494
caffe2/operators/hip/conv_transpose_op_miopen.cc
Normal file
|
|
@ -0,0 +1,494 @@
|
|||
#include "caffe2/core/hip/context_hip.h"
|
||||
#include "caffe2/core/hip/miopen_wrapper.h"
|
||||
#include "caffe2/operators/conv_transpose_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
static constexpr size_t kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES = 64 * 1024 * 1024;
|
||||
|
||||
class MIOPENConvTransposeOpBase : public ConvTransposeUnpoolBase<HIPContext> {
|
||||
public:
|
||||
MIOPENConvTransposeOpBase(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvTransposeUnpoolBase<HIPContext>(operator_def, ws),
|
||||
miopen_wrapper_(&context_),
|
||||
miopen_ws_nbytes_limit_(OperatorBase::GetSingleArgument<size_t>(
|
||||
"ws_nbytes_limit",
|
||||
kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES)),
|
||||
miopen_state_(OperatorBase::GetSingleArgument<size_t>("miopen_state", 0)),
|
||||
alpha_(OperatorBase::GetSingleArgument<float>("alpha", 1.0)),
|
||||
beta_(OperatorBase::GetSingleArgument<float>("beta", 0.0)),
|
||||
exhaustive_search_(
|
||||
OperatorBase::GetSingleArgument<bool>("exhaustive_search", false)) {
|
||||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bottom_desc_));
|
||||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bias_desc_));
|
||||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&weight_desc_));
|
||||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_));
|
||||
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
|
||||
MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));
|
||||
}
|
||||
|
||||
~MIOPENConvTransposeOpBase() {
|
||||
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bottom_desc_));
|
||||
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bias_desc_));
|
||||
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(weight_desc_));
|
||||
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_));
|
||||
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_for_bias_));
|
||||
MIOPEN_ENFORCE(miopenDestroyConvolutionDescriptor(conv_desc_));
|
||||
}
|
||||
|
||||
protected:
|
||||
vector<int64_t> mio_input_dims_;
|
||||
vector<int64_t> mio_weight_dims_;
|
||||
MIOPENWrapper miopen_wrapper_;
|
||||
miopenTensorDescriptor_t bottom_desc_;
|
||||
miopenTensorDescriptor_t bias_desc_;
|
||||
miopenTensorDescriptor_t weight_desc_;
|
||||
miopenTensorDescriptor_t top_desc_;
|
||||
miopenTensorDescriptor_t top_desc_for_bias_;
|
||||
miopenConvolutionDescriptor_t conv_desc_;
|
||||
size_t miopen_state_;
|
||||
const size_t miopen_ws_nbytes_limit_;
|
||||
bool exhaustive_search_;
|
||||
const float alpha_;
|
||||
const float beta_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MIOPENConvTransposeOp final : public MIOPENConvTransposeOpBase {
|
||||
public:
|
||||
MIOPENConvTransposeOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: MIOPENConvTransposeOpBase(operator_def, ws),
|
||||
requestAlgoCount_(
|
||||
OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
|
||||
returnedAlgoCount_(
|
||||
OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
|
||||
bestAlgoFound_(
|
||||
OperatorBase::GetSingleArgument<bool>("bestAlgoFound_", false)),
|
||||
fwdConvWs_(nullptr),
|
||||
fwdConvWsSize_(0),
|
||||
fwdAlgo_(miopenConvolutionFwdAlgoGEMM) {}
|
||||
|
||||
~MIOPENConvTransposeOp() {
|
||||
if (fwdConvWs_) {
|
||||
hipFree(fwdConvWs_);
|
||||
fwdConvWs_ = nullptr;
|
||||
fwdConvWsSize_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
const int requestAlgoCount_;
|
||||
int returnedAlgoCount_;
|
||||
bool bestAlgoFound_;
|
||||
char* fwdConvWs_;
|
||||
size_t fwdConvWsSize_;
|
||||
miopenConvFwdAlgorithm_t fwdAlgo_;
|
||||
// Input: X, W, b
|
||||
// Output: Y
|
||||
INPUT_TAGS(INPUT, FILTER, BIAS);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MIOPENConvTransposeGradientOp final : public MIOPENConvTransposeOpBase {
|
||||
public:
|
||||
MIOPENConvTransposeGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: MIOPENConvTransposeOpBase(operator_def, ws),
|
||||
no_bias_(OperatorBase::GetSingleArgument<int>("no_bias", 0)),
|
||||
requestAlgoCount_(
|
||||
OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
|
||||
returnedAlgoCount_(
|
||||
OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
|
||||
bestDataAlgoFound_(
|
||||
OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
|
||||
bestWeightAlgoFound_(
|
||||
OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
|
||||
bwdWeightWs_(nullptr),
|
||||
bwdWeightWsSize_(0),
|
||||
bwdDataWs_(nullptr),
|
||||
bwdDataWsSize_(0),
|
||||
bwdWeiAlgo_(miopenConvolutionBwdWeightsAlgoGEMM),
|
||||
bwdDataAlgo_(miopenConvolutionBwdDataAlgoGEMM) {
|
||||
CAFFE_ENFORCE(
|
||||
!(no_bias_ && OutputSize() == 3),
|
||||
"If bias is not present, you should not have 3 grad output.");
|
||||
}
|
||||
|
||||
~MIOPENConvTransposeGradientOp() {
|
||||
if (bwdWeightWs_) {
|
||||
hipFree(bwdWeightWs_);
|
||||
bwdWeightWs_ = nullptr;
|
||||
bwdWeightWsSize_ = 0;
|
||||
}
|
||||
if (bwdDataWs_) {
|
||||
hipFree(bwdDataWs_);
|
||||
bwdDataWs_ = nullptr;
|
||||
bwdDataWsSize_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
bool no_bias_;
|
||||
const int requestAlgoCount_;
|
||||
int returnedAlgoCount_;
|
||||
bool bestDataAlgoFound_;
|
||||
bool bestWeightAlgoFound_;
|
||||
miopenConvBwdWeightsAlgorithm_t bwdWeiAlgo_;
|
||||
miopenConvBwdDataAlgorithm_t bwdDataAlgo_;
|
||||
size_t bwdWeightWsSize_;
|
||||
size_t bwdDataWsSize_;
|
||||
char* bwdWeightWs_;
|
||||
char* bwdDataWs_;
|
||||
// input: X, W, dY
|
||||
// output: dW, db, and optionally dX
|
||||
INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
|
||||
OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
bool MIOPENConvTransposeOp<T>::RunOnDevice() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& Weight = Input(FILTER);
|
||||
auto* Y = Output(0);
|
||||
|
||||
// Figure out the output shape
|
||||
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
|
||||
CAFFE_ENFORCE(
|
||||
Weight.ndim() == 4,
|
||||
"ConvTranspose op with MIOpen engine is supported only for 2D convolutions");
|
||||
|
||||
const int C_in = Weight.dim32(1);
|
||||
ConvTransposeUnpoolBase<HIPContext>::SetOutputSize(X, Y, C_in);
|
||||
|
||||
int N = X.dim32(0);
|
||||
int C = X.dim32(1);
|
||||
int H = X.dim32(2);
|
||||
int W = X.ndim() > 3 ? X.dim32(3) : 1;
|
||||
int D = X.ndim() > 4 ? X.dim32(4) : 1;
|
||||
|
||||
int N_out = Y->dim32(0);
|
||||
int C_out = Y->dim32(1);
|
||||
int H_out = Y->dim32(2);
|
||||
int W_out = Y->ndim() > 3 ? Y->dim32(3) : 1;
|
||||
int D_out = Y->ndim() > 4 ? Y->dim32(4) : 1;
|
||||
|
||||
bool input_changed = (X.dims() != mio_input_dims_);
|
||||
bool weight_changed = (Weight.dims() != mio_weight_dims_);
|
||||
|
||||
if (input_changed || weight_changed) {
|
||||
VLOG(1) << "Changing MIOpen descriptor configurations.";
|
||||
if (input_changed) {
|
||||
mio_input_dims_ = X.dims().vec();
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
|
||||
}
|
||||
|
||||
if (weight_changed) {
|
||||
mio_weight_dims_ = Weight.dims().vec();
|
||||
MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
|
||||
conv_desc_,
|
||||
miopenTranspose,
|
||||
pad_t(),
|
||||
pad_l(),
|
||||
stride_h(),
|
||||
stride_w(),
|
||||
1,
|
||||
1));
|
||||
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
weight_desc_,
|
||||
miopenTypeWrapper<T>::type,
|
||||
C,
|
||||
C_out,
|
||||
kernel_h(),
|
||||
kernel_w()));
|
||||
}
|
||||
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
|
||||
|
||||
if (InputSize() == 3) {
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
|
||||
}
|
||||
|
||||
while (!bestAlgoFound_) {
|
||||
miopenConvAlgoPerf_t perf;
|
||||
|
||||
MIOPEN_ENFORCE(miopenConvolutionForwardGetWorkSpaceSize(
|
||||
miopen_wrapper_.inline_miopen_handle(),
|
||||
weight_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
top_desc_,
|
||||
&fwdConvWsSize_));
|
||||
if ((fwdConvWsSize_ > 0) && (fwdConvWs_ == nullptr)) {
|
||||
HIP_CHECK(hipMalloc(&fwdConvWs_, fwdConvWsSize_));
|
||||
}
|
||||
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenFindConvolutionForwardAlgorithm(
|
||||
state->miopen_handle(),
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
weight_desc_,
|
||||
Weight.template data<T>(),
|
||||
conv_desc_,
|
||||
top_desc_,
|
||||
Y->template mutable_data<T>(),
|
||||
requestAlgoCount_,
|
||||
&returnedAlgoCount_,
|
||||
&perf,
|
||||
fwdConvWs_,
|
||||
fwdConvWsSize_,
|
||||
false));
|
||||
});
|
||||
bestAlgoFound_ = true;
|
||||
fwdAlgo_ = perf.fwd_algo;
|
||||
}
|
||||
}
|
||||
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenConvolutionForward(
|
||||
state->miopen_handle(),
|
||||
&alpha_,
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
weight_desc_,
|
||||
Weight.template data<T>(),
|
||||
conv_desc_,
|
||||
fwdAlgo_,
|
||||
&beta_,
|
||||
top_desc_,
|
||||
Y->template mutable_data<T>(),
|
||||
fwdConvWs_,
|
||||
fwdConvWsSize_));
|
||||
});
|
||||
|
||||
if (InputSize() == 3) {
|
||||
auto& bias = Input(BIAS);
|
||||
|
||||
CAFFE_ENFORCE_EQ(bias.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(bias.dim32(0), C_out);
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenConvolutionForwardBias(
|
||||
state->miopen_handle(),
|
||||
&alpha_,
|
||||
bias_desc_,
|
||||
bias.template data<T>(),
|
||||
&beta_,
|
||||
top_desc_,
|
||||
Y->template mutable_data<T>()));
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool MIOPENConvTransposeGradientOp<T>::RunOnDevice() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& Weight = Input(FILTER);
|
||||
auto& dY = Input(OUTPUT_GRAD);
|
||||
auto* dW = Output(FILTER_GRAD);
|
||||
auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
|
||||
dX->ResizeLike(X);
|
||||
dW->ResizeLike(Weight);
|
||||
|
||||
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
|
||||
CAFFE_ENFORCE(
|
||||
Weight.ndim() == 4,
|
||||
"ConvTransposeGradient op with MIOpen engine is supported only for 2D convolutions");
|
||||
|
||||
const int M = Weight.dim32(0);
|
||||
int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
|
||||
W_out = 0, D_out = 0;
|
||||
|
||||
N = X.dim32(0);
|
||||
C = X.dim32(1);
|
||||
H = X.dim32(2);
|
||||
W = X.ndim() > 3 ? X.dim32(3) : 1;
|
||||
D = X.ndim() > 4 ? X.dim32(4) : 1;
|
||||
|
||||
N_out = dY.dim32(0);
|
||||
C_out = dY.dim32(1);
|
||||
H_out = dY.dim32(2);
|
||||
W_out = dY.ndim() > 3 ? dY.dim32(3) : 1;
|
||||
D_out = dY.ndim() > 4 ? dY.dim32(4) : 1;
|
||||
|
||||
bool doBwdDataComputation = (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2)));
|
||||
bool input_changed = (X.dims() != mio_input_dims_);
|
||||
bool weight_changed = (Weight.dims() != mio_weight_dims_);
|
||||
|
||||
if (input_changed || weight_changed) {
|
||||
VLOG(1) << "Changing MIOpen descriptor configurations.";
|
||||
if (input_changed) {
|
||||
mio_input_dims_ = X.dims().vec();
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
|
||||
}
|
||||
|
||||
if (weight_changed) {
|
||||
mio_weight_dims_ = Weight.dims().vec();
|
||||
MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
|
||||
conv_desc_,
|
||||
miopenTranspose,
|
||||
pad_t(),
|
||||
pad_l(),
|
||||
stride_h(),
|
||||
stride_w(),
|
||||
1,
|
||||
1));
|
||||
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
weight_desc_,
|
||||
miopenTypeWrapper<T>::type,
|
||||
C,
|
||||
C_out,
|
||||
kernel_h(),
|
||||
kernel_w()));
|
||||
}
|
||||
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
|
||||
|
||||
if (!no_bias_) {
|
||||
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
|
||||
bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
|
||||
}
|
||||
|
||||
while ((!bestDataAlgoFound_) && doBwdDataComputation) {
|
||||
miopenConvAlgoPerf_t perf;
|
||||
|
||||
MIOPEN_ENFORCE(miopenConvolutionBackwardDataGetWorkSpaceSize(
|
||||
miopen_wrapper_.inline_miopen_handle(),
|
||||
top_desc_,
|
||||
weight_desc_,
|
||||
conv_desc_,
|
||||
bottom_desc_,
|
||||
&bwdDataWsSize_));
|
||||
if ((bwdDataWsSize_ > 0) && (bwdDataWs_ == nullptr)) {
|
||||
HIP_CHECK(hipMalloc(&bwdDataWs_, bwdDataWsSize_));
|
||||
}
|
||||
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenFindConvolutionBackwardDataAlgorithm(
|
||||
state->miopen_handle(),
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
weight_desc_,
|
||||
Weight.template data<T>(),
|
||||
conv_desc_,
|
||||
bottom_desc_,
|
||||
dX->template mutable_data<T>(),
|
||||
requestAlgoCount_,
|
||||
&returnedAlgoCount_,
|
||||
&perf,
|
||||
bwdDataWs_,
|
||||
bwdDataWsSize_,
|
||||
false));
|
||||
});
|
||||
|
||||
bestDataAlgoFound_ = true;
|
||||
bwdDataAlgo_ = perf.bwd_data_algo;
|
||||
}
|
||||
|
||||
while (!bestWeightAlgoFound_) {
|
||||
miopenConvAlgoPerf_t perf;
|
||||
|
||||
MIOPEN_ENFORCE(miopenConvolutionBackwardWeightsGetWorkSpaceSize(
|
||||
miopen_wrapper_.inline_miopen_handle(),
|
||||
top_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
weight_desc_,
|
||||
&bwdWeightWsSize_));
|
||||
if ((bwdWeightWsSize_ > 0) && (bwdWeightWs_ == nullptr)) {
|
||||
HIP_CHECK(hipMalloc(&bwdWeightWs_, bwdWeightWsSize_));
|
||||
}
|
||||
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenFindConvolutionBackwardWeightsAlgorithm(
|
||||
state->miopen_handle(),
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
conv_desc_,
|
||||
weight_desc_,
|
||||
dW->template mutable_data<T>(),
|
||||
requestAlgoCount_,
|
||||
&returnedAlgoCount_,
|
||||
&perf,
|
||||
bwdWeightWs_,
|
||||
bwdWeightWsSize_,
|
||||
false));
|
||||
});
|
||||
bestWeightAlgoFound_ = true;
|
||||
bwdWeiAlgo_ = perf.bwd_weights_algo;
|
||||
}
|
||||
}
|
||||
if (doBwdDataComputation) {
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenConvolutionBackwardData(
|
||||
state->miopen_handle(),
|
||||
&alpha_,
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
weight_desc_,
|
||||
Weight.template data<T>(),
|
||||
conv_desc_,
|
||||
bwdDataAlgo_,
|
||||
&beta_,
|
||||
bottom_desc_,
|
||||
dX->template mutable_data<T>(),
|
||||
bwdDataWs_,
|
||||
bwdDataWsSize_));
|
||||
});
|
||||
}
|
||||
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenConvolutionBackwardWeights(
|
||||
state->miopen_handle(),
|
||||
&alpha_,
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
conv_desc_,
|
||||
bwdWeiAlgo_,
|
||||
&beta_,
|
||||
weight_desc_,
|
||||
dW->template mutable_data<T>(),
|
||||
bwdWeightWs_,
|
||||
bwdWeightWsSize_));
|
||||
});
|
||||
|
||||
////////////////////////////////////// BIAS ///////////////////////////
|
||||
if (!no_bias_) {
|
||||
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
|
||||
dbias->Resize(C_out);
|
||||
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
|
||||
MIOPEN_ENFORCE(miopenConvolutionBackwardBias(
|
||||
state->miopen_handle(),
|
||||
&alpha_,
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
&beta_,
|
||||
bias_desc_,
|
||||
dbias->template mutable_data<T>()));
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_MIOPEN_OPERATOR(ConvTranspose, MIOPENConvTransposeOp<float>);
|
||||
REGISTER_MIOPEN_OPERATOR(ConvTransposeGradient, MIOPENConvTransposeGradientOp<float>);
|
||||
} // namespace caffe2
|
||||
|
|
@ -8,6 +8,7 @@ import hypothesis.strategies as st
|
|||
|
||||
from caffe2.python import core
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.hip_test_util as hiputl
|
||||
|
||||
|
||||
class TestConvolutionTranspose(hu.HypothesisTestCase):
|
||||
|
|
@ -37,6 +38,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
|
|||
b = np.random.rand(output_channels).astype(np.float32) - 0.5
|
||||
outputs = {}
|
||||
for order in ["NCHW", "NHWC"]:
|
||||
# MIOPEN doesn't work with NHWC, fallback to use normal hip
|
||||
if hiputl.run_in_hip(gc, dc) and order == "NHWC":
|
||||
tmp_engine = ""
|
||||
else:
|
||||
tmp_engine = engine
|
||||
op = core.CreateOperator(
|
||||
"ConvTranspose",
|
||||
["X", "w", "b"] if use_bias else ["X", "w"],
|
||||
|
|
@ -46,7 +52,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
|
|||
pad=pad,
|
||||
adj=adj,
|
||||
order=order,
|
||||
engine=engine,
|
||||
engine=tmp_engine,
|
||||
shared_buffer=int(shared_buffer),
|
||||
device_option=gc,
|
||||
)
|
||||
|
|
@ -102,6 +108,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
|
|||
b = np.random.rand(output_channels).astype(np.float32) - 0.5
|
||||
outputs = {}
|
||||
for order in ["NCHW", "NHWC"]:
|
||||
if hiputl.run_in_hip(gc, dc) and order == "NHWC":
|
||||
# MIOPEN doesn't work with NHWC, fallback to use normal hip
|
||||
tmp_engine = ""
|
||||
else:
|
||||
tmp_engine = engine
|
||||
op = core.CreateOperator(
|
||||
"ConvTranspose",
|
||||
["X", "w", "b"] if use_bias else ["X", "w"],
|
||||
|
|
@ -111,7 +122,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
|
|||
pads=[pad] * 4,
|
||||
adjs=[adj] * 2,
|
||||
order=order,
|
||||
engine=engine,
|
||||
engine=tmp_engine,
|
||||
shared_buffer=int(shared_buffer),
|
||||
device_option=gc,
|
||||
)
|
||||
|
|
@ -236,6 +247,8 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
|
|||
order, engine, use_bias,
|
||||
compute_dX, gc, dc):
|
||||
assume(adj < stride)
|
||||
if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
|
||||
assume(order == "NCHW")
|
||||
X = np.random.rand(
|
||||
batch_size, size, size, input_channels).astype(np.float32) - 0.5
|
||||
w = np.random.rand(
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user