From 24ee47b24613b184c7e3b74e1b4aa03e49d703b4 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Tue, 6 Apr 2021 14:49:22 +0000 Subject: [PATCH 1/2] Enabling MLIR generated kernels by default on ROCm --- .bazelrc | 2 -- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.bazelrc b/.bazelrc index 9d57d2c2950..fcef170dded 100644 --- a/.bazelrc +++ b/.bazelrc @@ -248,8 +248,6 @@ build:tensorrt --repo_env TF_NEED_TENSORRT=1 build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true build:rocm --repo_env TF_NEED_ROCM=1 -# Generated kernels are not yet supported on ROCm. -build:rocm --//tensorflow/core/kernels/mlir_generated:enable_gpu=false # Options extracted from configure script build:numa --define=with_numa_support=true diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 530e61a7b5a..f03d2b1670d 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -271,6 +271,7 @@ def _gen_kernel_library( ) # We have to use a sh_test instead of build_test because it doesn't properly find the dependent targets. + gpu_arch_option = "sm_70,compute_75" if cuda_gpu_architectures() else ",".join(rocm_gpu_architectures()) native.sh_test( name = "{op}_{platform}_{type}_{output_type}_gen_test".format( op = op, @@ -288,7 +289,7 @@ def _gen_kernel_library( type = type, output_type = output_type, ), - "--cpu_codegen=true" if enable_cpu else "--arch=sm_70,compute_75", + "--cpu_codegen=true" if enable_cpu else "--arch={}".format(gpu_arch_option), ], size = "medium", data = [ From 4065c404a632aa7001399ea76d53694c5ec4d680 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Tue, 6 Apr 2021 15:51:09 +0000 Subject: [PATCH 2/2] Lowering the tolerance in gpu_unary_ops_test for the "acos" op. The default exact-match comparison errros on ROCm for the "acos" op due to mis-compares on one particular value after the 7th decimal value (for float32) and 16th decimal value (for float64). See below. ``` ... ... tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 13 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 27 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 41 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 55 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 69 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044724799692630768 not equal to 0.044724646955728531) Expected: true i = 83 [ FAILED ] UnaryOpsTest.AcosDT_FLOATDT_FLOAT (5 ms) ... ... tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 13 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 27 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 41 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 55 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 69 tensorflow/core/framework/tensor_testutil.cc:128: Failure Value of: IsEqual(Tx[i], Ty[i], t) Actual: false (0.044725087168733454 not equal to 0.044725087168733128) Expected: true i = 83 [ FAILED ] UnaryOpsTest.AcosDT_DOUBLEDT_DOUBLE (5 ms) ... ... ``` --- .../core/kernels/mlir_generated/gpu_unary_ops_test.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc index 7ce23099bb9..f9e08d73219 100644 --- a/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc +++ b/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc @@ -58,12 +58,17 @@ GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( // Test only values in the function domain. The otherwise returned nan value // fails comparison for equality. +#if defined(TENSORFLOW_USE_ROCM) +auto acos_test_config = test::OpsTestConfig(); +#else +auto acos_test_config = test::OpsTestConfig().ExpectStrictlyEqual(); +#endif GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( Acos, DT_FLOAT, DT_FLOAT, test::DefaultInputBetweenZeroAndOne(), - std::acos, test::OpsTestConfig().ExpectStrictlyEqual()) + std::acos, acos_test_config) GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( Acos, DT_DOUBLE, DT_DOUBLE, test::DefaultInputBetweenZeroAndOne(), - std::acos, test::OpsTestConfig().ExpectStrictlyEqual()) + std::acos, acos_test_config) /// Test `tf.Acosh`.