Added MIOpen conv transpose op (#13938)

Summary: This pull request contains changes for: 1. Removing ConvTranspose related changes from caffe2/operators/hip/conv_op_miopen.cc 2. Adding the file caffe2/operators/hip/conv_transpose_op_miopen.cc 3. Modifying the tests to run convTranspose op using MIOpen engine Differential Revision: D13055099 Pulled By: bddppq fbshipit-source-id: ca284f8f9a073005b22013c375cc958257815865
2025-12-07 12:21:27 +01:00 · 2018-11-13 20:59:54 -08:00 · 2018-11-13 20:59:54 -08:00 · f4e502a8c5
commit f4e502a8c5
parent 5059beb644
3 changed files with 513 additions and 17 deletions
--- a/caffe2/operators/hip/conv_op_miopen.cc
+++ b/caffe2/operators/hip/conv_op_miopen.cc
@ -47,19 +47,10 @@ class MIOPENConvOpBase : public ConvPoolOpBase<HIPContext> {
    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
    MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));

-    if ((operator_def.type().substr(0, 6) == "Conv") ||
-        (operator_def.type().substr(0, 14) == "ConvGradient")) {
-      if (group_ > 1) {
+    if(group_ > 1) {
        mode_ = miopenGroupConv;
-      } else {
-        mode_ = miopenConvolution;
-      }
-    } else if (
-        (operator_def.type().substr(0, 7) == "Trans") ||
-        (operator_def.type().substr(0, 15) == "TransGradient")) {
-      mode_ = miopenTranspose;
    } else {
-      LOG(FATAL) << "Unsupported convolution method: " << operator_def.type();
+        mode_ = miopenConvolution;
    }

    if (mode_ == miopenGroupConv) {
@ -220,7 +211,7 @@ bool MIOPENConvOp::DoRunWithType() {
  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
  CAFFE_ENFORCE(
      Weight.ndim() == 4,
-      "Conv/Trans op with MIOpen engine is supported only for 2D convolutions");
+      "Conv op with MIOpen engine is supported only for 2D convolutions");

  const int M = Weight.dim32(0);
  ConvPoolOpBase<HIPContext>::SetOutputSize(X, Y, M);
@ -408,7 +399,7 @@ bool MIOPENConvGradientOp::DoRunWithType() {
  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
  CAFFE_ENFORCE(
      Weight.ndim() == 4,
-      "ConvGradient/TransGradient op with MIOpen engine is supported only for 2D convolutions");
+      "ConvGradient op with MIOpen engine is supported only for 2D convolutions");

  const int M = Weight.dim32(0);
  int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
@ -638,6 +629,4 @@ bool MIOPENConvGradientOp::RunOnDevice() {

 REGISTER_MIOPEN_OPERATOR(Conv, MIOPENConvOp);
 REGISTER_MIOPEN_OPERATOR(ConvGradient, MIOPENConvGradientOp);
-REGISTER_MIOPEN_OPERATOR(Trans, MIOPENConvOp);
-REGISTER_MIOPEN_OPERATOR(TransGradient, MIOPENConvGradientOp);
 } // namespace caffe2
--- a/caffe2/operators/hip/conv_transpose_op_miopen.cc
+++ b/caffe2/operators/hip/conv_transpose_op_miopen.cc
@ -0,0 +1,494 @@
+#include "caffe2/core/hip/context_hip.h"
+#include "caffe2/core/hip/miopen_wrapper.h"
+#include "caffe2/operators/conv_transpose_op.h"
+
+namespace caffe2 {
+
+static constexpr size_t kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES = 64 * 1024 * 1024;
+
+class MIOPENConvTransposeOpBase : public ConvTransposeUnpoolBase<HIPContext> {
+ public:
+  MIOPENConvTransposeOpBase(const OperatorDef& operator_def, Workspace* ws)
+      : ConvTransposeUnpoolBase<HIPContext>(operator_def, ws),
+        miopen_wrapper_(&context_),
+        miopen_ws_nbytes_limit_(OperatorBase::GetSingleArgument<size_t>(
+            "ws_nbytes_limit",
+            kCONV_MIOPEN_WORKSPACE_LIMIT_BYTES)),
+        miopen_state_(OperatorBase::GetSingleArgument<size_t>("miopen_state", 0)),
+        alpha_(OperatorBase::GetSingleArgument<float>("alpha", 1.0)),
+        beta_(OperatorBase::GetSingleArgument<float>("beta", 0.0)),
+        exhaustive_search_(
+            OperatorBase::GetSingleArgument<bool>("exhaustive_search", false)) {
+    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bottom_desc_));
+    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&bias_desc_));
+    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&weight_desc_));
+    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_));
+    MIOPEN_ENFORCE(miopenCreateTensorDescriptor(&top_desc_for_bias_));
+    MIOPEN_ENFORCE(miopenCreateConvolutionDescriptor(&conv_desc_));
+  }
+
+  ~MIOPENConvTransposeOpBase() {
+    MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bottom_desc_));
+    MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(bias_desc_));
+    MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(weight_desc_));
+    MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_));
+    MIOPEN_ENFORCE(miopenDestroyTensorDescriptor(top_desc_for_bias_));
+    MIOPEN_ENFORCE(miopenDestroyConvolutionDescriptor(conv_desc_));
+  }
+
+ protected:
+  vector<int64_t> mio_input_dims_;
+  vector<int64_t> mio_weight_dims_;
+  MIOPENWrapper miopen_wrapper_;
+  miopenTensorDescriptor_t bottom_desc_;
+  miopenTensorDescriptor_t bias_desc_;
+  miopenTensorDescriptor_t weight_desc_;
+  miopenTensorDescriptor_t top_desc_;
+  miopenTensorDescriptor_t top_desc_for_bias_;
+  miopenConvolutionDescriptor_t conv_desc_;
+  size_t miopen_state_;
+  const size_t miopen_ws_nbytes_limit_;
+  bool exhaustive_search_;
+  const float alpha_;
+  const float beta_;
+};
+
+template <typename T>
+class MIOPENConvTransposeOp final : public MIOPENConvTransposeOpBase {
+ public:
+  MIOPENConvTransposeOp(const OperatorDef& operator_def, Workspace* ws)
+      : MIOPENConvTransposeOpBase(operator_def, ws),
+        requestAlgoCount_(
+            OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
+        returnedAlgoCount_(
+            OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
+        bestAlgoFound_(
+            OperatorBase::GetSingleArgument<bool>("bestAlgoFound_", false)),
+        fwdConvWs_(nullptr),
+        fwdConvWsSize_(0),
+        fwdAlgo_(miopenConvolutionFwdAlgoGEMM) {}
+
+  ~MIOPENConvTransposeOp() {
+    if (fwdConvWs_) {
+      hipFree(fwdConvWs_);
+      fwdConvWs_ = nullptr;
+      fwdConvWsSize_ = 0;
+    }
+  }
+
+  bool RunOnDevice() override;
+
+ private:
+  const int requestAlgoCount_;
+  int returnedAlgoCount_;
+  bool bestAlgoFound_;
+  char* fwdConvWs_;
+  size_t fwdConvWsSize_;
+  miopenConvFwdAlgorithm_t fwdAlgo_;
+  // Input: X, W, b
+  // Output: Y
+  INPUT_TAGS(INPUT, FILTER, BIAS);
+};
+
+template <typename T>
+class MIOPENConvTransposeGradientOp final : public MIOPENConvTransposeOpBase {
+ public:
+  MIOPENConvTransposeGradientOp(const OperatorDef& operator_def, Workspace* ws)
+      : MIOPENConvTransposeOpBase(operator_def, ws),
+        no_bias_(OperatorBase::GetSingleArgument<int>("no_bias", 0)),
+        requestAlgoCount_(
+            OperatorBase::GetSingleArgument<int>("requestAlgoCount_", 1)),
+        returnedAlgoCount_(
+            OperatorBase::GetSingleArgument<int>("returnedAlgoCount_", 1)),
+        bestDataAlgoFound_(
+            OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
+        bestWeightAlgoFound_(
+            OperatorBase::GetSingleArgument<bool>("bestAlgoFound", false)),
+        bwdWeightWs_(nullptr),
+        bwdWeightWsSize_(0),
+        bwdDataWs_(nullptr),
+        bwdDataWsSize_(0),
+        bwdWeiAlgo_(miopenConvolutionBwdWeightsAlgoGEMM),
+        bwdDataAlgo_(miopenConvolutionBwdDataAlgoGEMM) {
+    CAFFE_ENFORCE(
+        !(no_bias_ && OutputSize() == 3),
+        "If bias is not present, you should not have 3 grad output.");
+  }
+
+  ~MIOPENConvTransposeGradientOp() {
+    if (bwdWeightWs_) {
+      hipFree(bwdWeightWs_);
+      bwdWeightWs_ = nullptr;
+      bwdWeightWsSize_ = 0;
+    }
+    if (bwdDataWs_) {
+      hipFree(bwdDataWs_);
+      bwdDataWs_ = nullptr;
+      bwdDataWsSize_ = 0;
+    }
+  }
+
+  bool RunOnDevice() override;
+
+ private:
+  bool no_bias_;
+  const int requestAlgoCount_;
+  int returnedAlgoCount_;
+  bool bestDataAlgoFound_;
+  bool bestWeightAlgoFound_;
+  miopenConvBwdWeightsAlgorithm_t bwdWeiAlgo_;
+  miopenConvBwdDataAlgorithm_t bwdDataAlgo_;
+  size_t bwdWeightWsSize_;
+  size_t bwdDataWsSize_;
+  char* bwdWeightWs_;
+  char* bwdDataWs_;
+  // input: X, W, dY
+  // output: dW, db, and optionally dX
+  INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
+  OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+bool MIOPENConvTransposeOp<T>::RunOnDevice() {
+  auto& X = Input(INPUT);
+  auto& Weight = Input(FILTER);
+  auto* Y = Output(0);
+
+  // Figure out the output shape
+  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
+  CAFFE_ENFORCE(
+      Weight.ndim() == 4,
+      "ConvTranspose op with MIOpen engine is supported only for 2D convolutions");
+
+  const int C_in = Weight.dim32(1);
+  ConvTransposeUnpoolBase<HIPContext>::SetOutputSize(X, Y, C_in);
+
+  int N = X.dim32(0);
+  int C = X.dim32(1);
+  int H = X.dim32(2);
+  int W = X.ndim() > 3 ? X.dim32(3) : 1;
+  int D = X.ndim() > 4 ? X.dim32(4) : 1;
+
+  int N_out = Y->dim32(0);
+  int C_out = Y->dim32(1);
+  int H_out = Y->dim32(2);
+  int W_out = Y->ndim() > 3 ? Y->dim32(3) : 1;
+  int D_out = Y->ndim() > 4 ? Y->dim32(4) : 1;
+
+  bool input_changed = (X.dims() != mio_input_dims_);
+  bool weight_changed = (Weight.dims() != mio_weight_dims_);
+
+  if (input_changed || weight_changed) {
+    VLOG(1) << "Changing MIOpen descriptor configurations.";
+    if (input_changed) {
+      mio_input_dims_ = X.dims().vec();
+      MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+          bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
+    }
+
+    if (weight_changed) {
+      mio_weight_dims_ = Weight.dims().vec();
+      MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
+          conv_desc_,
+          miopenTranspose,
+          pad_t(),
+          pad_l(),
+          stride_h(),
+          stride_w(),
+          1,
+          1));
+
+      MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+          weight_desc_,
+          miopenTypeWrapper<T>::type,
+          C,
+          C_out,
+          kernel_h(),
+          kernel_w()));
+    }
+
+    MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+        top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
+
+    if (InputSize() == 3) {
+      MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+          bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
+    }
+
+    while (!bestAlgoFound_) {
+      miopenConvAlgoPerf_t perf;
+
+      MIOPEN_ENFORCE(miopenConvolutionForwardGetWorkSpaceSize(
+          miopen_wrapper_.inline_miopen_handle(),
+          weight_desc_,
+          bottom_desc_,
+          conv_desc_,
+          top_desc_,
+          &fwdConvWsSize_));
+      if ((fwdConvWsSize_ > 0) && (fwdConvWs_ == nullptr)) {
+          HIP_CHECK(hipMalloc(&fwdConvWs_, fwdConvWsSize_));
+      }
+
+      miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+          MIOPEN_ENFORCE(miopenFindConvolutionForwardAlgorithm(
+            state->miopen_handle(),
+            bottom_desc_,
+            X.template data<T>(),
+            weight_desc_,
+            Weight.template data<T>(),
+            conv_desc_,
+            top_desc_,
+            Y->template mutable_data<T>(),
+            requestAlgoCount_,
+            &returnedAlgoCount_,
+            &perf,
+            fwdConvWs_,
+            fwdConvWsSize_,
+            false));
+      });
+      bestAlgoFound_ = true;
+      fwdAlgo_ = perf.fwd_algo;
+    }
+  }
+
+  miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+      MIOPEN_ENFORCE(miopenConvolutionForward(
+        state->miopen_handle(),
+        &alpha_,
+        bottom_desc_,
+        X.template data<T>(),
+        weight_desc_,
+        Weight.template data<T>(),
+        conv_desc_,
+        fwdAlgo_,
+        &beta_,
+        top_desc_,
+        Y->template mutable_data<T>(),
+        fwdConvWs_,
+        fwdConvWsSize_));
+  });
+
+  if (InputSize() == 3) {
+      auto& bias = Input(BIAS);
+
+      CAFFE_ENFORCE_EQ(bias.ndim(), 1);
+      CAFFE_ENFORCE_EQ(bias.dim32(0), C_out);
+      miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+        MIOPEN_ENFORCE(miopenConvolutionForwardBias(
+          state->miopen_handle(),
+          &alpha_,
+          bias_desc_,
+          bias.template data<T>(),
+          &beta_,
+          top_desc_,
+          Y->template mutable_data<T>()));
+      });
+  }
+  return true;
+}
+
+template <typename T>
+bool MIOPENConvTransposeGradientOp<T>::RunOnDevice() {
+  auto& X = Input(INPUT);
+  auto& Weight = Input(FILTER);
+  auto& dY = Input(OUTPUT_GRAD);
+  auto* dW = Output(FILTER_GRAD);
+  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
+  dX->ResizeLike(X);
+  dW->ResizeLike(Weight);
+
+  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
+  CAFFE_ENFORCE(
+      Weight.ndim() == 4,
+      "ConvTransposeGradient op with MIOpen engine is supported only for 2D convolutions");
+
+  const int M = Weight.dim32(0);
+  int N = 0, C = 0, H = 0, W = 0, D = 0, N_out = 0, C_out = 0, H_out = 0,
+      W_out = 0, D_out = 0;
+
+  N = X.dim32(0);
+  C = X.dim32(1);
+  H = X.dim32(2);
+  W = X.ndim() > 3 ? X.dim32(3) : 1;
+  D = X.ndim() > 4 ? X.dim32(4) : 1;
+
+  N_out = dY.dim32(0);
+  C_out = dY.dim32(1);
+  H_out = dY.dim32(2);
+  W_out = dY.ndim() > 3 ? dY.dim32(3) : 1;
+  D_out = dY.ndim() > 4 ? dY.dim32(4) : 1;
+
+  bool doBwdDataComputation = (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2)));
+  bool input_changed = (X.dims() != mio_input_dims_);
+  bool weight_changed = (Weight.dims() != mio_weight_dims_);
+
+  if (input_changed || weight_changed) {
+    VLOG(1) << "Changing MIOpen descriptor configurations.";
+    if (input_changed) {
+      mio_input_dims_ = X.dims().vec();
+      MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+          bottom_desc_, miopenTypeWrapper<T>::type, N, C, H, W));
+    }
+
+    if (weight_changed) {
+      mio_weight_dims_ = Weight.dims().vec();
+      MIOPEN_ENFORCE(miopenInitConvolutionDescriptor(
+          conv_desc_,
+          miopenTranspose,
+          pad_t(),
+          pad_l(),
+          stride_h(),
+          stride_w(),
+          1,
+          1));
+
+      MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+          weight_desc_,
+          miopenTypeWrapper<T>::type,
+          C,
+          C_out,
+          kernel_h(),
+          kernel_w()));
+    }
+
+    MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+        top_desc_, miopenTypeWrapper<T>::type, N_out, C_out, H_out, W_out));
+
+    if (!no_bias_) {
+        MIOPEN_ENFORCE(miopenSet4dTensorDescriptor(
+            bias_desc_, miopenTypeWrapper<T>::type, 1, C_out, 1, 1));
+    }
+
+    while ((!bestDataAlgoFound_) && doBwdDataComputation) {
+      miopenConvAlgoPerf_t perf;
+
+      MIOPEN_ENFORCE(miopenConvolutionBackwardDataGetWorkSpaceSize(
+          miopen_wrapper_.inline_miopen_handle(),
+          top_desc_,
+          weight_desc_,
+          conv_desc_,
+          bottom_desc_,
+          &bwdDataWsSize_));
+      if ((bwdDataWsSize_ > 0) && (bwdDataWs_ == nullptr)) {
+          HIP_CHECK(hipMalloc(&bwdDataWs_, bwdDataWsSize_));
+      }
+
+      miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+        MIOPEN_ENFORCE(miopenFindConvolutionBackwardDataAlgorithm(
+          state->miopen_handle(),
+          top_desc_,
+          dY.template data<T>(),
+          weight_desc_,
+          Weight.template data<T>(),
+          conv_desc_,
+          bottom_desc_,
+          dX->template mutable_data<T>(),
+          requestAlgoCount_,
+          &returnedAlgoCount_,
+          &perf,
+          bwdDataWs_,
+          bwdDataWsSize_,
+          false));
+      });
+
+      bestDataAlgoFound_ = true;
+      bwdDataAlgo_ = perf.bwd_data_algo;
+  }
+
+    while (!bestWeightAlgoFound_) {
+        miopenConvAlgoPerf_t perf;
+
+        MIOPEN_ENFORCE(miopenConvolutionBackwardWeightsGetWorkSpaceSize(
+            miopen_wrapper_.inline_miopen_handle(),
+            top_desc_,
+            bottom_desc_,
+            conv_desc_,
+            weight_desc_,
+            &bwdWeightWsSize_));
+        if ((bwdWeightWsSize_ > 0) && (bwdWeightWs_ == nullptr)) {
+          HIP_CHECK(hipMalloc(&bwdWeightWs_, bwdWeightWsSize_));
+        }
+
+        miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+          MIOPEN_ENFORCE(miopenFindConvolutionBackwardWeightsAlgorithm(
+            state->miopen_handle(),
+            top_desc_,
+            dY.template data<T>(),
+            bottom_desc_,
+            X.template data<T>(),
+            conv_desc_,
+            weight_desc_,
+            dW->template mutable_data<T>(),
+            requestAlgoCount_,
+            &returnedAlgoCount_,
+            &perf,
+            bwdWeightWs_,
+            bwdWeightWsSize_,
+            false));
+        });
+        bestWeightAlgoFound_ = true;
+        bwdWeiAlgo_ = perf.bwd_weights_algo;
+    }
+  }
+  if (doBwdDataComputation) {
+      miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+        MIOPEN_ENFORCE(miopenConvolutionBackwardData(
+          state->miopen_handle(),
+          &alpha_,
+          top_desc_,
+          dY.template data<T>(),
+          weight_desc_,
+          Weight.template data<T>(),
+          conv_desc_,
+          bwdDataAlgo_,
+          &beta_,
+          bottom_desc_,
+          dX->template mutable_data<T>(),
+          bwdDataWs_,
+          bwdDataWsSize_));
+        });
+  }
+
+  miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+    MIOPEN_ENFORCE(miopenConvolutionBackwardWeights(
+      state->miopen_handle(),
+      &alpha_,
+      top_desc_,
+      dY.template data<T>(),
+      bottom_desc_,
+      X.template data<T>(),
+      conv_desc_,
+      bwdWeiAlgo_,
+      &beta_,
+      weight_desc_,
+      dW->template mutable_data<T>(),
+      bwdWeightWs_,
+      bwdWeightWsSize_));
+  });
+
+  ////////////////////////////////////// BIAS ///////////////////////////
+  if (!no_bias_) {
+      auto* dbias = Output(BIAS_OR_INPUT_GRAD);
+      dbias->Resize(C_out);
+      miopen_wrapper_.with_miopen_state(miopen_state_, [&](MIOPENState* state) {
+        MIOPEN_ENFORCE(miopenConvolutionBackwardBias(
+          state->miopen_handle(),
+          &alpha_,
+          top_desc_,
+          dY.template data<T>(),
+          &beta_,
+          bias_desc_,
+          dbias->template mutable_data<T>()));
+      });
+  }
+
+  return true;
+}
+
+REGISTER_MIOPEN_OPERATOR(ConvTranspose, MIOPENConvTransposeOp<float>);
+REGISTER_MIOPEN_OPERATOR(ConvTransposeGradient, MIOPENConvTransposeGradientOp<float>);
+} // namespace caffe2
--- a/caffe2/python/operator_test/conv_transpose_test.py
+++ b/caffe2/python/operator_test/conv_transpose_test.py
@ -8,6 +8,7 @@ import hypothesis.strategies as st

 from caffe2.python import core
 import caffe2.python.hypothesis_test_util as hu
+import caffe2.python.hip_test_util as hiputl


 class TestConvolutionTranspose(hu.HypothesisTestCase):
@ -37,6 +38,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        outputs = {}
        for order in ["NCHW", "NHWC"]:
+            # MIOPEN doesn't work with NHWC, fallback to use normal hip
+            if hiputl.run_in_hip(gc, dc) and order == "NHWC":
+                tmp_engine = ""
+            else:
+                tmp_engine = engine
            op = core.CreateOperator(
                "ConvTranspose",
                ["X", "w", "b"] if use_bias else ["X", "w"],
@ -46,7 +52,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
                pad=pad,
                adj=adj,
                order=order,
-                engine=engine,
+                engine=tmp_engine,
                shared_buffer=int(shared_buffer),
                device_option=gc,
            )
@ -102,6 +108,11 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        outputs = {}
        for order in ["NCHW", "NHWC"]:
+            if hiputl.run_in_hip(gc, dc) and order == "NHWC":
+                # MIOPEN doesn't work with NHWC, fallback to use normal hip
+                tmp_engine = ""
+            else:
+                tmp_engine = engine
            op = core.CreateOperator(
                "ConvTranspose",
                ["X", "w", "b"] if use_bias else ["X", "w"],
@ -111,7 +122,7 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
                pads=[pad] * 4,
                adjs=[adj] * 2,
                order=order,
-                engine=engine,
+                engine=tmp_engine,
                shared_buffer=int(shared_buffer),
                device_option=gc,
            )
@ -236,6 +247,8 @@ class TestConvolutionTranspose(hu.HypothesisTestCase):
                                             order, engine, use_bias,
                                             compute_dX, gc, dc):
        assume(adj < stride)
+        if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
+            assume(order == "NCHW")
        X = np.random.rand(
            batch_size, size, size, input_channels).astype(np.float32) - 0.5
        w = np.random.rand(