Added MIOpen conv transpose op (#13938)

Summary:
This pull request contains changes for:
1. Removing ConvTranspose related changes from caffe2/operators/hip/conv_op_miopen.cc
2. Adding the file caffe2/operators/hip/conv_transpose_op_miopen.cc
3. Modifying the tests to run convTranspose op using MIOpen engine

Differential Revision: D13055099

Pulled By: bddppq

fbshipit-source-id: ca284f8f9a073005b22013c375cc958257815865
This commit is contained in:
Ashish 2018-11-13 20:59:54 -08:00 committed by Facebook Github Bot
parent 5059beb644
commit f4e502a8c5
3 changed files with 513 additions and 17 deletions

View File

@ -47,19 +47,10 @@ class MIOPENConvOpBase : public ConvPoolOpBase<HIPContext> {
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));
if ((operator_def.type().substr(0, 6) == "Conv") ||
(operator_def.type().substr(0, 14) == "ConvGradient")) {
if (group_ > 1) {
if(group_ > 1) {
mode_ = miopenGroupConv;
} else {
mode_ = miopenConvolution;
}
} else if (
(operator_def.type().substr(0, 7) == "Trans") ||
(operator_def.type().substr(0, 15) == "TransGradient")) {
mode_ = miopenTranspose;
} else {
LOG(FATAL) << "Unsupported convolution method: " << operator_def.type();
mode_ = miopenConvolution;
}
if (mode_ == miopenGroupConv) {
@ -220,7 +211,7 @@ bool MIOPENConvOp::DoRunWithType() {
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(
Weight.ndim() == 4,
"Conv/Trans op with MIOpen engine is supported only for 2D convolutions");
"Conv op with MIOpen engine is supported only for 2D convolutions");
const int M = Weight.dim32(0);
ConvPoolOpBase<HIPContext>::SetOutputSize(X, Y, M);
@ -408,7 +399,7 @@ bool MIOPENConvGradientOp::DoRunWithType() {
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(
Weight.ndim() == 4,
"ConvGradient/TransGradient op with MIOpen engine is supported only for 2D convolutions");
"ConvGradient op with MIOpen engine is supported only for 2D convolutions");
const int M = Weight.dim32(0);
int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
@ -638,6 +629,4 @@ bool MIOPENConvGradientOp::RunOnDevice() {
REGISTER_MIOPEN_OPERATOR(Conv, MIOPENConvOp);
REGISTER_MIOPEN_OPERATOR(ConvGradient, MIOPENConvGradientOp);
REGISTER_MIOPEN_OPERATOR(Trans, MIOPENConvOp);
REGISTER_MIOPEN_OPERATOR(TransGradient, MIOPENConvGradientOp);
} // namespace caffe2

View File

@ -0,0 +1,494 @@
#include "caffe2/core/hip/context_hip.h"
#include "caffe2/core/hip/miopen_wrapper.h"
#include "caffe2/operators/conv_transpose_op.h"
namespace caffe2 {
static constexpr size_t kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES = 64 * 1024 * 1024;
class MIOPENConvTransposeOpBase : public ConvTransposeUnpoolBase<HIPContext> {
public:
MIOPENConvTransposeOpBase(const OperatorDef& operator_def, Workspace* ws)
: ConvTransposeUnpoolBase<HIPContext>(operator_def, ws),
miopen_wrapper_(&context_),
miopen_ws_nbytes_limit_(OperatorBase::GetSingleArgument<size_t>(
"ws_nbytes_limit",
kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES)),
miopen_state_(OperatorBase::GetSingleArgument<size_t>("miopen_state", 0)),
alpha_(OperatorBase::GetSingleArgument<float>("alpha", 1.0)),
beta_(OperatorBase::GetSingleArgument<float>("beta", 0.0)),
exhaustive_search_(
OperatorBase::GetSingleArgument<bool>("exhaustive_search", false)) {
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bottom_desc_));
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bias_desc_));
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&weight_desc_));
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_));
MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));
}
~MIOPENConvTransposeOpBase() {
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bottom_desc_));
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bias_desc_));
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(weight_desc_));
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_));
MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_for_bias_));
MIOPEN_ENFORCE(miopenDestroyConvolutionDescriptor(conv_desc_));
}
protected:
vector<int64_t> mio_input_dims_;
vector<int64_t> mio_weight_dims_;
MIOPENWrapper miopen_wrapper_;
miopenTensorDescriptor_t bottom_desc_;
miopenTensorDescriptor_t bias_desc_;
miopenTensorDescriptor_t weight_desc_;
miopenTensorDescriptor_t top_desc_;
miopenTensorDescriptor_t top_desc_for_bias_;
miopenConvolutionDescriptor_t conv_desc_;
size_t miopen_state_;
const size_t miopen_ws_nbytes_limit_;
bool exhaustive_search_;
const float alpha_;
const float beta_;
};
template <typename T>
class MIOPENConvTransposeOp final : public MIOPENConvTransposeOpBase {
public:
MIOPENConvTransposeOp(const OperatorDef& operator_def, Workspace* ws)
: MIOPENConvTransposeOpBase(operator_def, ws),
requestAlgoCount_(
OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
returnedAlgoCount_(
OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
bestAlgoFound_(
OperatorBase::GetSingleArgument<bool>("bestAlgoFound_", false)),
fwdConvWs_(nullptr),
fwdConvWsSize_(0),
fwdAlgo_(miopenConvolutionFwdAlgoGEMM) {}
~MIOPENConvTransposeOp() {
if (fwdConvWs_) {
hipFree(fwdConvWs_);
fwdConvWs_ = nullptr;
fwdConvWsSize_ = 0;
}
}
bool RunOnDevice() override;
private:
const int requestAlgoCount_;
int returnedAlgoCount_;
bool bestAlgoFound_;
char* fwdConvWs_;
size_t fwdConvWsSize_;
miopenConvFwdAlgorithm_t fwdAlgo_;
// Input: X, W, b
// Output: Y
INPUT_TAGS(INPUT, FILTER, BIAS);
};
template <typename T>
class MIOPENConvTransposeGradientOp final : public MIOPENConvTransposeOpBase {
public:
MIOPENConvTransposeGradientOp(const OperatorDef& operator_def, Workspace* ws)
: MIOPENConvTransposeOpBase(operator_def, ws),
no_bias_(OperatorBase::GetSingleArgument<int>("no_bias", 0)),
requestAlgoCount_(
OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
returnedAlgoCount_(
OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
bestDataAlgoFound_(
OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
bestWeightAlgoFound_(
OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
bwdWeightWs_(nullptr),
bwdWeightWsSize_(0),
bwdDataWs_(nullptr),
bwdDataWsSize_(0),
bwdWeiAlgo_(miopenConvolutionBwdWeightsAlgoGEMM),
bwdDataAlgo_(miopenConvolutionBwdDataAlgoGEMM) {
CAFFE_ENFORCE(
!(no_bias_ && OutputSize() == 3),
"If bias is not present, you should not have 3 grad output.");
}
~MIOPENConvTransposeGradientOp() {
if (bwdWeightWs_) {
hipFree(bwdWeightWs_);
bwdWeightWs_ = nullptr;
bwdWeightWsSize_ = 0;
}
if (bwdDataWs_) {
hipFree(bwdDataWs_);
bwdDataWs_ = nullptr;
bwdDataWsSize_ = 0;
}
}
bool RunOnDevice() override;
private:
bool no_bias_;
const int requestAlgoCount_;
int returnedAlgoCount_;
bool bestDataAlgoFound_;
bool bestWeightAlgoFound_;
miopenConvBwdWeightsAlgorithm_t bwdWeiAlgo_;
miopenConvBwdDataAlgorithm_t bwdDataAlgo_;
size_t bwdWeightWsSize_;
size_t bwdDataWsSize_;
char* bwdWeightWs_;
char* bwdDataWs_;
// input: X, W, dY
// output: dW, db, and optionally dX
INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
};
////////////////////////////////////////////////////////////////////////////////
// Implementations
////////////////////////////////////////////////////////////////////////////////
template <typename T>
bool MIOPENConvTransposeOp<T>::RunOnDevice() {
auto& X = Input(INPUT);
auto& Weight = Input(FILTER);
auto* Y = Output(0);
// Figure out the output shape
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(
Weight.ndim() == 4,
"ConvTranspose op with MIOpen engine is supported only for 2D convolutions");
const int C_in = Weight.dim32(1);
ConvTransposeUnpoolBase<HIPContext>::SetOutputSize(X, Y, C_in);
int N = X.dim32(0);
int C = X.dim32(1);
int H = X.dim32(2);
int W = X.ndim() > 3 ? X.dim32(3) : 1;
int D = X.ndim() > 4 ? X.dim32(4) : 1;
int N_out = Y->dim32(0);
int C_out = Y->dim32(1);
int H_out = Y->dim32(2);
int W_out = Y->ndim() > 3 ? Y->dim32(3) : 1;
int D_out = Y->ndim() > 4 ? Y->dim32(4) : 1;
bool input_changed = (X.dims() != mio_input_dims_);
bool weight_changed = (Weight.dims() != mio_weight_dims_);
if (input_changed || weight_changed) {
VLOG(1) << "Changing MIOpen descriptor configurations.";
if (input_changed) {
mio_input_dims_ = X.dims().vec();
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
}
if (weight_changed) {
mio_weight_dims_ = Weight.dims().vec();
MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
conv_desc_,
miopenTranspose,
pad_t(),
pad_l(),
stride_h(),
stride_w(),
1,
1));
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
weight_desc_,
miopenTypeWrapper<T>::type,
C,
C_out,
kernel_h(),
kernel_w()));
}
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
if (InputSize() == 3) {
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
}
while (!bestAlgoFound_) {
miopenConvAlgoPerf_t perf;
MIOPEN_ENFORCE(miopenConvolutionForwardGetWorkSpaceSize(
miopen_wrapper_.inline_miopen_handle(),
weight_desc_,
bottom_desc_,
conv_desc_,
top_desc_,
&fwdConvWsSize_));
if ((fwdConvWsSize_ > 0) && (fwdConvWs_ == nullptr)) {
HIP_CHECK(hipMalloc(&fwdConvWs_, fwdConvWsSize_));
}
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenFindConvolutionForwardAlgorithm(
state->miopen_handle(),
bottom_desc_,
X.template data<T>(),
weight_desc_,
Weight.template data<T>(),
conv_desc_,
top_desc_,
Y->template mutable_data<T>(),
requestAlgoCount_,
&returnedAlgoCount_,
&perf,
fwdConvWs_,
fwdConvWsSize_,
false));
});
bestAlgoFound_ = true;
fwdAlgo_ = perf.fwd_algo;
}
}
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenConvolutionForward(
state->miopen_handle(),
&alpha_,
bottom_desc_,
X.template data<T>(),
weight_desc_,
Weight.template data<T>(),
conv_desc_,
fwdAlgo_,
&beta_,
top_desc_,
Y->template mutable_data<T>(),
fwdConvWs_,
fwdConvWsSize_));
});
if (InputSize() == 3) {
auto& bias = Input(BIAS);
CAFFE_ENFORCE_EQ(bias.ndim(), 1);
CAFFE_ENFORCE_EQ(bias.dim32(0), C_out);
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenConvolutionForwardBias(
state->miopen_handle(),
&alpha_,
bias_desc_,
bias.template data<T>(),
&beta_,
top_desc_,
Y->template mutable_data<T>()));
});
}
return true;
}
template <typename T>
bool MIOPENConvTransposeGradientOp<T>::RunOnDevice() {
auto& X = Input(INPUT);
auto& Weight = Input(FILTER);
auto& dY = Input(OUTPUT_GRAD);
auto* dW = Output(FILTER_GRAD);
auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
dX->ResizeLike(X);
dW->ResizeLike(Weight);
CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
CAFFE_ENFORCE(
Weight.ndim() == 4,
"ConvTransposeGradient op with MIOpen engine is supported only for 2D convolutions");
const int M = Weight.dim32(0);
int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
W_out = 0, D_out = 0;
N = X.dim32(0);
C = X.dim32(1);
H = X.dim32(2);
W = X.ndim() > 3 ? X.dim32(3) : 1;
D = X.ndim() > 4 ? X.dim32(4) : 1;
N_out = dY.dim32(0);
C_out = dY.dim32(1);
H_out = dY.dim32(2);
W_out = dY.ndim() > 3 ? dY.dim32(3) : 1;
D_out = dY.ndim() > 4 ? dY.dim32(4) : 1;
bool doBwdDataComputation = (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2)));
bool input_changed = (X.dims() != mio_input_dims_);
bool weight_changed = (Weight.dims() != mio_weight_dims_);
if (input_changed || weight_changed) {
VLOG(1) << "Changing MIOpen descriptor configurations.";
if (input_changed) {
mio_input_dims_ = X.dims().vec();
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
}
if (weight_changed) {
mio_weight_dims_ = Weight.dims().vec();
MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
conv_desc_,
miopenTranspose,
pad_t(),
pad_l(),
stride_h(),
stride_w(),
1,
1));
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
weight_desc_,
miopenTypeWrapper<T>::type,
C,
C_out,
kernel_h(),
kernel_w()));
}
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
if (!no_bias_) {
MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
}
while ((!bestDataAlgoFound_) && doBwdDataComputation) {
miopenConvAlgoPerf_t perf;
MIOPEN_ENFORCE(miopenConvolutionBackwardDataGetWorkSpaceSize(
miopen_wrapper_.inline_miopen_handle(),
top_desc_,
weight_desc_,
conv_desc_,
bottom_desc_,
&bwdDataWsSize_));
if ((bwdDataWsSize_ > 0) && (bwdDataWs_ == nullptr)) {
HIP_CHECK(hipMalloc(&bwdDataWs_, bwdDataWsSize_));
}
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenFindConvolutionBackwardDataAlgorithm(
state->miopen_handle(),
top_desc_,
dY.template data<T>(),
weight_desc_,
Weight.template data<T>(),
conv_desc_,
bottom_desc_,
dX->template mutable_data<T>(),
requestAlgoCount_,
&returnedAlgoCount_,
&perf,
bwdDataWs_,
bwdDataWsSize_,
false));
});
bestDataAlgoFound_ = true;
bwdDataAlgo_ = perf.bwd_data_algo;
}
while (!bestWeightAlgoFound_) {
miopenConvAlgoPerf_t perf;
MIOPEN_ENFORCE(miopenConvolutionBackwardWeightsGetWorkSpaceSize(
miopen_wrapper_.inline_miopen_handle(),
top_desc_,
bottom_desc_,
conv_desc_,
weight_desc_,
&bwdWeightWsSize_));
if ((bwdWeightWsSize_ > 0) && (bwdWeightWs_ == nullptr)) {
HIP_CHECK(hipMalloc(&bwdWeightWs_, bwdWeightWsSize_));
}
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenFindConvolutionBackwardWeightsAlgorithm(
state->miopen_handle(),
top_desc_,
dY.template data<T>(),
bottom_desc_,
X.template data<T>(),
conv_desc_,
weight_desc_,
dW->template mutable_data<T>(),
requestAlgoCount_,
&returnedAlgoCount_,
&perf,
bwdWeightWs_,
bwdWeightWsSize_,
false));
});
bestWeightAlgoFound_ = true;
bwdWeiAlgo_ = perf.bwd_weights_algo;
}
}
if (doBwdDataComputation) {
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenConvolutionBackwardData(
state->miopen_handle(),
&alpha_,
top_desc_,
dY.template data<T>(),
weight_desc_,
Weight.template data<T>(),
conv_desc_,
bwdDataAlgo_,
&beta_,
bottom_desc_,
dX->template mutable_data<T>(),
bwdDataWs_,
bwdDataWsSize_));
});
}
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenConvolutionBackwardWeights(
state->miopen_handle(),
&alpha_,
top_desc_,
dY.template data<T>(),
bottom_desc_,
X.template data<T>(),
conv_desc_,
bwdWeiAlgo_,
&beta_,
weight_desc_,
dW->template mutable_data<T>(),
bwdWeightWs_,
bwdWeightWsSize_));
});
////////////////////////////////////// BIAS ///////////////////////////
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
dbias->Resize(C_out);
miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
MIOPEN_ENFORCE(miopenConvolutionBackwardBias(
state->miopen_handle(),
&alpha_,
top_desc_,
dY.template data<T>(),
&beta_,
bias_desc_,
dbias->template mutable_data<T>()));
});
}
return true;
}
REGISTER_MIOPEN_OPERATOR(ConvTranspose, MIOPENConvTransposeOp<float>);
REGISTER_MIOPEN_OPERATOR(ConvTransposeGradient, MIOPENConvTransposeGradientOp<float>);
} // namespace caffe2

View File

@ -8,6 +8,7 @@ import hypothesis.strategies as st
from caffe2.python import core
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.hip_test_util as hiputl
class TestConvolutionTranspose(hu.HypothesisTestCase):
@ -37,6 +38,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
b = np.random.rand(output_channels).astype(np.float32) - 0.5
outputs = {}
for order in ["NCHW", "NHWC"]:
# MIOPEN doesn't work with NHWC, fallback to use normal hip
if hiputl.run_in_hip(gc, dc) and order == "NHWC":
tmp_engine = ""
else:
tmp_engine = engine
op = core.CreateOperator(
"ConvTranspose",
["X", "w", "b"] if use_bias else ["X", "w"],
@ -46,7 +52,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
pad=pad,
adj=adj,
order=order,
engine=engine,
engine=tmp_engine,
shared_buffer=int(shared_buffer),
device_option=gc,
)
@ -102,6 +108,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
b = np.random.rand(output_channels).astype(np.float32) - 0.5
outputs = {}
for order in ["NCHW", "NHWC"]:
if hiputl.run_in_hip(gc, dc) and order == "NHWC":
# MIOPEN doesn't work with NHWC, fallback to use normal hip
tmp_engine = ""
else:
tmp_engine = engine
op = core.CreateOperator(
"ConvTranspose",
["X", "w", "b"] if use_bias else ["X", "w"],
@ -111,7 +122,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
pads=[pad] * 4,
adjs=[adj] * 2,
order=order,
engine=engine,
engine=tmp_engine,
shared_buffer=int(shared_buffer),
device_option=gc,
)
@ -236,6 +247,8 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
order, engine, use_bias,
compute_dX, gc, dc):
assume(adj < stride)
if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
assume(order == "NCHW")
X = np.random.rand(
batch_size, size, size, input_channels).astype(np.float32) - 0.5
w = np.random.rand(