mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[AMD] TunableOp take priority over DISABLE_ADDMM_HIP_LT (#124161)
Summary: It seems super confusing that if we set DISABLE_ADDMM_HIP_LT + PYTORCH_TUNABLEOP_ENABLED, the former takes priority. This is because the former goes through the gemm_and_bias and tunable op is integrated with gemm path. Before we can integrate tunable op with gemm_and_bias, we'll probably just let tunable op takes priority Test Plan: Run a simple linear program and verified. Differential Revision: D56183954 Pull Request resolved: https://github.com/pytorch/pytorch/pull/124161 Approved by: https://github.com/jeffdaily, https://github.com/nmacchioni
This commit is contained in:
parent
f87c788a34
commit
661fd23640
|
|
@ -6,6 +6,7 @@
|
|||
#include <ATen/OpMathType.h>
|
||||
#include <ATen/TensorUtils.h>
|
||||
#include <ATen/cuda/CUDABlas.h>
|
||||
#include <ATen/cuda/tunable/Tunable.h>
|
||||
#include <ATen/native/Resize.h>
|
||||
#include <c10/util/MaybeOwned.h>
|
||||
|
||||
|
|
@ -174,6 +175,12 @@ cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activa
|
|||
static bool getDisableAddmmCudaLt() {
|
||||
static const char* env_value = std::getenv("DISABLE_ADDMM_CUDA_LT");
|
||||
#ifdef USE_ROCM
|
||||
// if we enable tunable op, it'll take priority over just hipblaslt (heuristics)
|
||||
// note the current tunable op is not the hipblaslt path (gemm_and_bias)
|
||||
auto tuning_ctx = at::cuda::tunable::getTuningContext();
|
||||
if (tuning_ctx->IsTunableOpEnabled()) {
|
||||
return true;
|
||||
}
|
||||
// allow both CUDA and HIP env var names for ROCm builds
|
||||
// also, current default for ROCm builds is disable by default
|
||||
if (env_value == nullptr) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user