Merge pull request #48507 from ROCmSoftwarePlatform/google_upstream_r25_port_pr_48187

[r2.5 port][ROCm] Port PR 48187 to r2.5
This commit is contained in:
Mihai Maruseac 2021-04-22 15:26:58 -07:00 committed by GitHub
commit 8e0516e743
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 218 additions and 98 deletions

View File

@ -810,9 +810,9 @@ Status AMDGPUTargetModuleLinker(llvm::Module* module, GpuVersion gpu_version,
// upstream commit), the following mapping will need to change // upstream commit), the following mapping will need to change
std::string MapGCNArchNameTokenToFeatureStr(const std::string& token) { std::string MapGCNArchNameTokenToFeatureStr(const std::string& token) {
if (token == "sramecc+") { if (token == "sramecc+") {
return "+sram-ecc"; return "+sramecc";
} else if (token == "sramecc-") { } else if (token == "sramecc-") {
return "-sram-ecc"; return "-sramecc";
} else if (token == "xnack+") { } else if (token == "xnack+") {
return "+xnack"; return "+xnack";
} else if (token == "xnack-") { } else if (token == "xnack-") {

View File

@ -1196,6 +1196,7 @@ xla_test(
], ],
shard_count = 50, shard_count = 50,
tags = [ tags = [
"no_rocm",
"optonly", "optonly",
], ],
deps = CONVOLUTION_TEST_DEPS + [ deps = CONVOLUTION_TEST_DEPS + [
@ -1261,6 +1262,7 @@ xla_test(
backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_disable_layout_heuristic"]}, backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_disable_layout_heuristic"]},
backends = ["gpu"], backends = ["gpu"],
shard_count = 25, shard_count = 25,
tags = ["no_rocm"],
deps = CONVOLUTION_TEST_DEPS + [ deps = CONVOLUTION_TEST_DEPS + [
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",

View File

@ -1092,7 +1092,6 @@ cuda_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
"notsan", # b/173031470 "notsan", # b/173031470
], ],
deps = [ deps = [
@ -1232,7 +1231,6 @@ distribute_py_test(
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_oss", # b/178656226 "no_oss", # b/178656226
"no_rocm",
"noasan", # b/175816710 "noasan", # b/175816710
"notsan", # b/168645872 "notsan", # b/168645872
], ],
@ -1290,7 +1288,6 @@ distribute_py_test(
main = "distribute_utils_test.py", main = "distribute_utils_test.py",
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_rocm",
], ],
deps = [ deps = [
":combinations", ":combinations",
@ -1318,7 +1315,6 @@ distribute_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
], ],
tpu_tags = [ tpu_tags = [
"no_oss", # b/150954621 Target too big to run serially reliably. "no_oss", # b/150954621 Target too big to run serially reliably.
@ -1769,7 +1765,6 @@ distribute_py_test(
shard_count = 2, shard_count = 2,
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_rocm",
"notsan", # TODO(b/160006974) "notsan", # TODO(b/160006974)
], ],
xla_enable_strict_auto_jit = True, xla_enable_strict_auto_jit = True,
@ -1802,7 +1797,6 @@ distribute_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
"notsan", # TODO(b/160006974) "notsan", # TODO(b/160006974)
], ],
xla_enable_strict_auto_jit = True, xla_enable_strict_auto_jit = True,
@ -1876,7 +1870,6 @@ distribute_py_test(
disable_mlir_bridge = False, disable_mlir_bridge = False,
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_rocm",
], ],
deps = [ deps = [
":combinations", ":combinations",

View File

@ -248,7 +248,6 @@ distribute_py_test(
main = "custom_training_loop_metrics_test.py", main = "custom_training_loop_metrics_test.py",
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_rocm",
], ],
deps = [ deps = [
":strategy_combinations", ":strategy_combinations",
@ -270,7 +269,6 @@ distribute_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
"notsan", # TODO(b/170954243) "notsan", # TODO(b/170954243)
], ],
tpu_tags = [ tpu_tags = [
@ -536,7 +534,7 @@ distribute_py_test(
distribute_py_test( distribute_py_test(
name = "keras_rnn_model_correctness_test", name = "keras_rnn_model_correctness_test",
size = "medium", size = "large",
srcs = ["keras_rnn_model_correctness_test.py"], srcs = ["keras_rnn_model_correctness_test.py"],
full_precision = True, full_precision = True,
main = "keras_rnn_model_correctness_test.py", main = "keras_rnn_model_correctness_test.py",
@ -545,7 +543,7 @@ distribute_py_test(
shard_count = 31, shard_count = 31,
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_rocm", "no_rocm", # Would require size large, but that effectively disables the test for presubmits.
"no_windows_gpu", "no_windows_gpu",
"noasan", # TODO(b/337374867) fails with -fsanitize=null "noasan", # TODO(b/337374867) fails with -fsanitize=null
"notpu", # TODO(b/153672562) "notpu", # TODO(b/153672562)
@ -605,7 +603,6 @@ distribute_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
"no_windows_gpu", "no_windows_gpu",
"notsan", "notsan",
], ],
@ -825,6 +822,7 @@ distribute_py_test(
tags = [ tags = [
"multi_and_single_gpu", "multi_and_single_gpu",
"no_cuda_asan", # times out "no_cuda_asan", # times out
"no_rocm",
], ],
xla_tags = [ xla_tags = [
"no_cuda_asan", # times out "no_cuda_asan", # times out

View File

@ -1735,7 +1735,6 @@ cuda_py_test(
name = "betainc_op_test", name = "betainc_op_test",
size = "small", size = "small",
srcs = ["betainc_op_test.py"], srcs = ["betainc_op_test.py"],
tags = ["no_rocm"], # ROCm 3.9 regression
xla_tags = [ xla_tags = [
"no_cuda_asan", # times out "no_cuda_asan", # times out
], ],
@ -3238,6 +3237,7 @@ cuda_py_test(
srcs = ["extract_image_patches_grad_test.py"], srcs = ["extract_image_patches_grad_test.py"],
shard_count = 15, shard_count = 15,
tags = [ tags = [
"no_rocm",
"nomac", # b/181799478 "nomac", # b/181799478
"notap", # b/31080670 "notap", # b/31080670
], ],
@ -3600,6 +3600,7 @@ cuda_py_test(
size = "medium", size = "medium",
srcs = ["tensordot_op_test.py"], srcs = ["tensordot_op_test.py"],
shard_count = 20, shard_count = 20,
tags = ["no_rocm"],
xla_enable_strict_auto_jit = False, # b/161856380 xla_enable_strict_auto_jit = False, # b/161856380
deps = [ deps = [
"//tensorflow/python:array_ops", "//tensorflow/python:array_ops",

View File

@ -61,7 +61,6 @@ cuda_py_test(
size = "small", size = "small",
srcs = ["beta_test.py"], srcs = ["beta_test.py"],
tags = [ tags = [
"no_rocm", # ROCm 3.9 regression
"notsan", # b/173653918 "notsan", # b/173653918
], ],
xla_tags = [ xla_tags = [

View File

@ -41,7 +41,6 @@ cuda_py_test(
main = "csr_sparse_matrix_ops_test.py", main = "csr_sparse_matrix_ops_test.py",
shard_count = 10, shard_count = 10,
tags = [ tags = [
"no_rocm", # ROCm 3.8 regression
"notsan", # b/149115441 "notsan", # b/149115441
], ],
deps = [ deps = [

View File

@ -33,7 +33,6 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables from tensorflow.python.ops import variables
from tensorflow.python.platform import googletest from tensorflow.python.platform import googletest
from tensorflow.python.platform import test
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@ -59,11 +58,11 @@ class ReduceTest(test_util.TensorFlowTestCase):
x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
with test_util.device(use_gpu=True): with test_util.device(use_gpu=True):
for axis in (0, -2): for axis in (0, -2):
self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), self.assertAllEqual(
[5, 7, 9]) self.evaluate(math_ops.reduce_sum(x, axis=axis)), [5, 7, 9])
for axis in (1, -1): for axis in (1, -1):
self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), self.assertAllEqual(
[6, 15]) self.evaluate(math_ops.reduce_sum(x, axis=axis)), [6, 15])
for axis in (None, (0, 1), (1, 0), (-1, 0), (0, -1), (-2, 1), (1, -2), for axis in (None, (0, 1), (1, 0), (-1, 0), (0, -1), (-2, 1), (1, -2),
(-1, -2), (-2, -1)): (-1, -2), (-2, -1)):
self.assertEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21) self.assertEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21)
@ -358,8 +357,8 @@ class ScalarMulTest(test_util.TensorFlowTestCase):
indices = constant_op.constant([0, 2, 5]) indices = constant_op.constant([0, 2, 5])
x = math_ops.scalar_mul(-3, ops.IndexedSlices(values, indices)) x = math_ops.scalar_mul(-3, ops.IndexedSlices(values, indices))
with test_util.device(use_gpu=True): with test_util.device(use_gpu=True):
self.assertAllEqual(self.evaluate(x.values), self.assertAllEqual(
[[-6, -9], [-15, -21], [0, 3]]) self.evaluate(x.values), [[-6, -9], [-15, -21], [0, 3]])
self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5]) self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])
@ -436,9 +435,11 @@ class AddNTest(test_util.TensorFlowTestCase):
def test_iterable(self): def test_iterable(self):
"""Test that add_n supports iterables (e.g. generators and dict values).""" """Test that add_n supports iterables (e.g. generators and dict values)."""
def fn(): def fn():
yield 1 yield 1
yield 2 yield 2
values_dict = {"a": 1, "b": 2} values_dict = {"a": 1, "b": 2}
with test_util.use_gpu(): with test_util.use_gpu():
self.assertAllEqual(3, math_ops.add_n(fn())) self.assertAllEqual(3, math_ops.add_n(fn()))
@ -483,8 +484,9 @@ class DivAndModTest(test_util.TensorFlowTestCase):
def testFloorModBfloat16(self): def testFloorModBfloat16(self):
nums, divs = self.floatTestData() nums, divs = self.floatTestData()
tf_result = math_ops.floormod(math_ops.cast(nums, dtypes.bfloat16), tf_result = math_ops.floormod(
math_ops.cast(divs, dtypes.bfloat16)) math_ops.cast(nums, dtypes.bfloat16),
math_ops.cast(divs, dtypes.bfloat16))
np_result = nums % divs np_result = nums % divs
self.assertAllEqual(tf_result, np_result) self.assertAllEqual(tf_result, np_result)
@ -742,10 +744,8 @@ class NextAfterTest(test_util.TensorFlowTestCase):
self.assertAllEqual(math_ops.nextafter(one, two) - one, eps) self.assertAllEqual(math_ops.nextafter(one, two) - one, eps)
self.assertAllLess(math_ops.nextafter(one, zero) - one, 0) self.assertAllLess(math_ops.nextafter(one, zero) - one, 0)
self.assertAllEqual( self.assertAllEqual(math_ops.is_nan(math_ops.nextafter(nan, one)), [True])
math_ops.is_nan(math_ops.nextafter(nan, one)), [True]) self.assertAllEqual(math_ops.is_nan(math_ops.nextafter(one, nan)), [True])
self.assertAllEqual(
math_ops.is_nan(math_ops.nextafter(one, nan)), [True])
self.assertAllEqual(math_ops.nextafter(one, one), one) self.assertAllEqual(math_ops.nextafter(one, one), one)
def testBroadcasting(self): def testBroadcasting(self):
@ -786,13 +786,13 @@ class BinaryOpsTest(test_util.TensorFlowTestCase):
r"Attempt to convert a value .* with an unsupported type") r"Attempt to convert a value .* with an unsupported type")
else: else:
error = TypeError error = TypeError
error_message = ( error_message = (r"Failed to convert object of type .* to Tensor")
r"Failed to convert object of type .* to Tensor")
class RHSReturnsTrue(object): class RHSReturnsTrue(object):
def __radd__(self, other): def __radd__(self, other):
return True return True
a = array_ops.ones([1], dtype=dtypes.int32) + RHSReturnsTrue() a = array_ops.ones([1], dtype=dtypes.int32) + RHSReturnsTrue()
self.assertEqual(a, True) self.assertEqual(a, True)
@ -889,12 +889,6 @@ class RangeTest(test_util.TensorFlowTestCase):
class ErfcinvTest(test_util.TensorFlowTestCase): class ErfcinvTest(test_util.TensorFlowTestCase):
def testErfcinv(self): def testErfcinv(self):
if test.is_built_with_rocm():
# The implementation of erfcinv calls ndtri op,
# and the ROCm implementaion for ndtri op has a known bug in it
# whose fix will be in a forthcoming ROCm release (4.0 ?).
# Need to skip this unit-test until that ROCm release is out
self.skipTest("ndtri op implementation is buggy on ROCm")
values = np.random.uniform(0.1, 1.9, size=int(1e4)).astype(np.float32) values = np.random.uniform(0.1, 1.9, size=int(1e4)).astype(np.float32)
approx_id = math_ops.erfc(math_ops.erfcinv(values)) approx_id = math_ops.erfc(math_ops.erfcinv(values))
self.assertAllClose(values, self.evaluate(approx_id)) self.assertAllClose(values, self.evaluate(approx_id))

View File

@ -81,14 +81,7 @@ class MathTest(PForTestCase, parameterized.TestCase):
] ]
self._test_unary_cwise_ops(complex_ops, True) self._test_unary_cwise_ops(complex_ops, True)
@test.disable_with_predicate(
pred=test.is_built_with_rocm, skip_message="This fails on ROCm.")
def test_unary_cwise_real_ops_1(self): def test_unary_cwise_real_ops_1(self):
if test.is_built_with_rocm():
# TODO(rocm):
# This fails on ROCm...see JIRA ticket 236756
self.skipTest("Fails on ROCM")
real_ops = [ real_ops = [
lambda x: math_ops.acosh(1 + math_ops.square(x)), lambda x: math_ops.acosh(1 + math_ops.square(x)),
math_ops.abs, math_ops.abs,
@ -691,15 +684,15 @@ class LinalgTest(PForTestCase):
self._test_loop_fn(loop_fn, 3) self._test_loop_fn(loop_fn, 3)
def test_matrix_inverse(self): def test_matrix_inverse(self):
x = (random_ops.random_uniform([3, 4, 2, 2]) + x = (random_ops.random_uniform([3, 4, 2, 2]) + 10 * linalg_ops.eye(2)
10 * linalg_ops.eye(2)) # Ensure well-conditioned. ) # Ensure well-conditioned.
for adjoint in (True, False): for adjoint in (True, False):
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
def loop_fn(i): def loop_fn(i):
return linalg_ops.matrix_inverse(array_ops.gather(x, i), return linalg_ops.matrix_inverse(
adjoint=adjoint) array_ops.gather(x, i), adjoint=adjoint)
# pylint: enable=cell-var-from-loop # pylint: enable=cell-var-from-loop
self._test_loop_fn(loop_fn, 2) self._test_loop_fn(loop_fn, 2)
@ -710,8 +703,8 @@ class LinalgTest(PForTestCase):
for stack_b in (True, False): for stack_b in (True, False):
shape_a = (2, 4, 3, 3) if stack_a else (4, 3, 3) shape_a = (2, 4, 3, 3) if stack_a else (4, 3, 3)
shape_b = (2, 4, 3, 5) if stack_b else (4, 3, 5) shape_b = (2, 4, 3, 5) if stack_b else (4, 3, 5)
x = (random_ops.random_uniform(shape_a) + x = (random_ops.random_uniform(shape_a) + 10 * linalg_ops.eye(3)
10 * linalg_ops.eye(3)) # Ensure well-conditioned. ) # Ensure well-conditioned.
y = random_ops.random_uniform(shape_b) y = random_ops.random_uniform(shape_b)
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop

View File

@ -139,11 +139,6 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
] ]
) # pyformat: disable ) # pyformat: disable
def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args): def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
if test_util.IsBuiltWithROCm():
# TODO(rocm):
# This fails on ROCm...see JIRA ticket 236756
self.skipTest('Fails on ROCM')
result = op(x, **extra_args) result = op(x, **extra_args)
# Run the wrapped op on the dense values, for comparison. # Run the wrapped op on the dense values, for comparison.
@ -319,7 +314,9 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
ragged_factory_ops.constant_value([['foo', 'bar'], ['baz']]), ragged_factory_ops.constant_value([['foo', 'bar'], ['baz']]),
ragged_factory_ops.constant_value([['2', '9'], ['12']]))}, ragged_factory_ops.constant_value([['2', '9'], ['12']]))},
]) # pyformat: disable ]) # pyformat: disable
def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n, def testListValuedElementwiseOp(self,
inputs,
op=math_ops.add_n,
**extra_args): **extra_args):
use_kwargs = extra_args.pop('use_kwargs', False) use_kwargs = extra_args.pop('use_kwargs', False)
if use_kwargs: if use_kwargs:
@ -676,13 +673,20 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
expected=ragged_factory_ops.constant_value([[5, 4], [3, 2, 1]])), expected=ragged_factory_ops.constant_value([[5, 4], [3, 2, 1]])),
dict( dict(
op=string_ops.string_format, op=string_ops.string_format,
kwargs={'template': 'Hi {}', kwargs={
'inputs': [ragged_factory_ops.constant_value([[1, 2], [3]])]}, 'template': 'Hi {}',
'inputs': [ragged_factory_ops.constant_value([[1, 2], [3]])]
},
expected='Hi [[1, 2], [3]]'), expected='Hi [[1, 2], [3]]'),
]) ])
def testRaggedDispatch(self, op, expected, args=(), result_is_list=False, def testRaggedDispatch(self,
op,
expected,
args=(),
result_is_list=False,
kwargs=None): kwargs=None):
if kwargs is None: kwargs = {} if kwargs is None:
kwargs = {}
result = op(*args, **kwargs) result = op(*args, **kwargs)
if result_is_list: if result_is_list:
self.assertLen(result, len(expected)) self.assertLen(result, len(expected))
@ -694,15 +698,13 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
def testUnaryElementwiseOpsPreserveUniformRowLength(self): def testUnaryElementwiseOpsPreserveUniformRowLength(self):
# Unary elementwise op # Unary elementwise op
rt = ragged_tensor.RaggedTensor.from_uniform_row_length( rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
ragged_factory_ops.constant([[1, 2], [3]]), ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2)
uniform_row_length=2)
self.assertAllEqual(rt.uniform_row_length, self.assertAllEqual(rt.uniform_row_length,
array_ops.zeros_like(rt).uniform_row_length) array_ops.zeros_like(rt).uniform_row_length)
# Unary-list elementwise op # Unary-list elementwise op
rt = ragged_tensor.RaggedTensor.from_uniform_row_length( rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
ragged_factory_ops.constant([[1, 2], [3]]), ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2)
uniform_row_length=2)
self.assertAllEqual(rt.uniform_row_length, self.assertAllEqual(rt.uniform_row_length,
math_ops.add_n([rt, rt]).uniform_row_length) math_ops.add_n([rt, rt]).uniform_row_length)

View File

@ -31,6 +31,7 @@ cc_library(
"//tensorflow/stream_executor/platform", "//tensorflow/stream_executor/platform",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
"@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cuda_headers",
"@local_config_rocm//rocm:rocm_headers",
"@local_config_tensorrt//:tensorrt_headers", "@local_config_tensorrt//:tensorrt_headers",
], ],
) )

View File

@ -45,7 +45,7 @@ port::Status TryDlopenCUDALibraries() {
port::Status TryDlopenROCmLibraries() { port::Status TryDlopenROCmLibraries() {
auto rocblas_status = GetRocblasDsoHandle(); auto rocblas_status = GetRocblasDsoHandle();
auto miopen_status = GetMiopenDsoHandle(); auto miopen_status = GetMiopenDsoHandle();
auto rocfft_status = GetRocfftDsoHandle(); auto rocfft_status = GetHipfftDsoHandle();
auto rocrand_status = GetRocrandDsoHandle(); auto rocrand_status = GetRocrandDsoHandle();
if (!rocblas_status.status().ok() || !miopen_status.status().ok() || if (!rocblas_status.status().ok() || !miopen_status.status().ok() ||
!rocfft_status.status().ok() || !rocrand_status.status().ok()) { !rocfft_status.status().ok() || !rocrand_status.status().ok()) {

View File

@ -26,6 +26,10 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/port.h"
#include "third_party/tensorrt/tensorrt_config.h" #include "third_party/tensorrt/tensorrt_config.h"
#if TENSORFLOW_USE_ROCM
#include "rocm/rocm_config.h"
#endif
namespace stream_executor { namespace stream_executor {
namespace internal { namespace internal {
@ -133,8 +137,12 @@ port::StatusOr<void*> GetMiopenDsoHandle() {
return GetDsoHandle("MIOpen", ""); return GetDsoHandle("MIOpen", "");
} }
port::StatusOr<void*> GetRocfftDsoHandle() { port::StatusOr<void*> GetHipfftDsoHandle() {
#if TF_ROCM_VERSION < 40100
return GetDsoHandle("rocfft", ""); return GetDsoHandle("rocfft", "");
#else
return GetDsoHandle("hipfft", "");
#endif
} }
port::StatusOr<void*> GetRocrandDsoHandle() { port::StatusOr<void*> GetRocrandDsoHandle() {
@ -214,8 +222,8 @@ port::StatusOr<void*> GetMiopenDsoHandle() {
return *result; return *result;
} }
port::StatusOr<void*> GetRocfftDsoHandle() { port::StatusOr<void*> GetHipfftDsoHandle() {
static auto result = new auto(DsoLoader::GetRocfftDsoHandle()); static auto result = new auto(DsoLoader::GetHipfftDsoHandle());
return *result; return *result;
} }

View File

@ -49,7 +49,7 @@ port::StatusOr<void*> GetNvInferPluginDsoHandle();
port::StatusOr<void*> GetRocblasDsoHandle(); port::StatusOr<void*> GetRocblasDsoHandle();
port::StatusOr<void*> GetMiopenDsoHandle(); port::StatusOr<void*> GetMiopenDsoHandle();
port::StatusOr<void*> GetRocfftDsoHandle(); port::StatusOr<void*> GetHipfftDsoHandle();
port::StatusOr<void*> GetRocrandDsoHandle(); port::StatusOr<void*> GetRocrandDsoHandle();
port::StatusOr<void*> GetRoctracerDsoHandle(); port::StatusOr<void*> GetRoctracerDsoHandle();
port::StatusOr<void*> GetHipsparseDsoHandle(); port::StatusOr<void*> GetHipsparseDsoHandle();
@ -84,7 +84,7 @@ port::StatusOr<void*> GetCudnnDsoHandle();
port::StatusOr<void*> GetRocblasDsoHandle(); port::StatusOr<void*> GetRocblasDsoHandle();
port::StatusOr<void*> GetMiopenDsoHandle(); port::StatusOr<void*> GetMiopenDsoHandle();
port::StatusOr<void*> GetRocfftDsoHandle(); port::StatusOr<void*> GetHipfftDsoHandle();
port::StatusOr<void*> GetRocrandDsoHandle(); port::StatusOr<void*> GetRocrandDsoHandle();
port::StatusOr<void*> GetRoctracerDsoHandle(); port::StatusOr<void*> GetRoctracerDsoHandle();
port::StatusOr<void*> GetHipsparseDsoHandle(); port::StatusOr<void*> GetHipsparseDsoHandle();

View File

@ -197,19 +197,19 @@ cc_library(
) )
cc_library( cc_library(
name = "rocfft_if_static", name = "hipfft_if_static",
deps = if_static([ deps = if_static([
"@local_config_rocm//rocm:rocfft", "@local_config_rocm//rocm:hipfft",
]), ]),
) )
cc_library( cc_library(
name = "rocfft_plugin", name = "hipfft_plugin",
srcs = if_rocm_is_configured(["rocm_fft.cc"]), srcs = if_rocm_is_configured(["rocm_fft.cc"]),
hdrs = if_rocm_is_configured(["rocm_fft.h"]), hdrs = if_rocm_is_configured(["rocm_fft.h"]),
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = if_rocm_is_configured([ deps = if_rocm_is_configured([
":rocfft_if_static", ":hipfft_if_static",
":rocm_platform_id", ":rocm_platform_id",
"//tensorflow/stream_executor:event", "//tensorflow/stream_executor:event",
"//tensorflow/stream_executor:fft", "//tensorflow/stream_executor:fft",
@ -356,7 +356,7 @@ cc_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = if_rocm_is_configured([ deps = if_rocm_is_configured([
":miopen_plugin", ":miopen_plugin",
":rocfft_plugin", ":hipfft_plugin",
":rocblas_plugin", ":rocblas_plugin",
":rocrand_plugin", ":rocrand_plugin",
":rocm_driver", ":rocm_driver",

View File

@ -61,7 +61,7 @@ namespace wrap {
static const char *kName; \ static const char *kName; \
using FuncPtrT = std::add_pointer<decltype(::__name)>::type; \ using FuncPtrT = std::add_pointer<decltype(::__name)>::type; \
static void *GetDsoHandle() { \ static void *GetDsoHandle() { \
auto s = internal::CachedDsoLoader::GetRocfftDsoHandle(); \ auto s = internal::CachedDsoLoader::GetHipfftDsoHandle(); \
return s.ValueOrDie(); \ return s.ValueOrDie(); \
} \ } \
static FuncPtrT LoadOrDie() { \ static FuncPtrT LoadOrDie() { \

View File

@ -20,7 +20,18 @@ limitations under the License.
#ifndef TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_ #ifndef TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_
#define TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_ #define TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_
#if TENSORFLOW_USE_ROCM
#include "rocm/rocm_config.h"
#if TF_ROCM_VERSION < 40100
#include "rocm/include/rocfft/hipfft.h" #include "rocm/include/rocfft/hipfft.h"
#else
#include "rocm/include/hipfft/hipfft.h"
#endif
#endif
#include "tensorflow/stream_executor/fft.h" #include "tensorflow/stream_executor/fft.h"
#include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/plugin_registry.h" #include "tensorflow/stream_executor/plugin_registry.h"

View File

@ -3,10 +3,10 @@
FROM ubuntu:bionic FROM ubuntu:bionic
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com> MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/4.0.1/ ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/4.1/
ARG ROCM_BUILD_NAME=xenial ARG ROCM_BUILD_NAME=xenial
ARG ROCM_BUILD_NUM=main ARG ROCM_BUILD_NUM=main
ARG ROCM_PATH=/opt/rocm-4.0.1 ARG ROCM_PATH=/opt/rocm-4.1.0
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
ENV TF_NEED_ROCM 1 ENV TF_NEED_ROCM 1

View File

@ -18,13 +18,14 @@ set -e
set -x set -x
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
N_TEST_JOBS=1 # run tests serially
echo "" echo ""
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo "" echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR # First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-4.0.1 ROCM_INSTALL_DIR=/opt/rocm-4.1.0
if [[ -n $1 ]]; then if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1 ROCM_INSTALL_DIR=$1
fi fi
@ -41,13 +42,100 @@ yes "" | $PYTHON_BIN_PATH configure.py
bazel test \ bazel test \
--config=rocm \ --config=rocm \
-k \ -k \
--test_tag_filters=multi_gpu \ --test_tag_filters=-no_gpu,-no_rocm \
--jobs=${N_BUILD_JOBS} \ --jobs=${N_BUILD_JOBS} \
--local_test_jobs=1 \ --local_test_jobs=${N_TEST_JOBS} \
--test_timeout 600,900,2400,7200 \ --test_timeout 600,900,2400,7200 \
--build_tests_only \ --build_tests_only \
--test_output=errors \ --test_output=errors \
--test_sharding_strategy=disabled \ --test_sharding_strategy=disabled \
--test_size_filters=small,medium,large \ --test_size_filters=small,medium,large \
--cache_test_results=no \
--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048 \
-- \ -- \
//tensorflow/core/nccl:nccl_manager_test //tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test_2gpu \
//tensorflow/core/kernels:collective_nccl_test_2gpu \
//tensorflow/core/nccl:nccl_manager_test_2gpu \
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
//tensorflow/python/distribute:checkpointing_test_2gpu \
//tensorflow/python/distribute:collective_all_reduce_strategy_test_xla_2gpu \
//tensorflow/python/distribute:custom_training_loop_gradient_test_2gpu \
//tensorflow/python/distribute:custom_training_loop_input_test_2gpu \
//tensorflow/python/distribute:distribute_utils_test_2gpu \
//tensorflow/python/distribute:input_lib_test_2gpu \
//tensorflow/python/distribute:input_lib_type_spec_test_2gpu \
//tensorflow/python/distribute:metrics_v1_test_2gpu \
//tensorflow/python/distribute:mirrored_variable_test_2gpu \
//tensorflow/python/distribute:parameter_server_strategy_test_2gpu \
//tensorflow/python/distribute:ps_values_test_2gpu \
//tensorflow/python/distribute:random_generator_test_2gpu \
//tensorflow/python/distribute:test_util_test_2gpu \
//tensorflow/python/distribute:tf_function_test_2gpu \
//tensorflow/python/distribute:vars_test_2gpu \
//tensorflow/python/distribute:warm_starting_util_test_2gpu \
//tensorflow/python/keras/distribute:collective_all_reduce_strategy_test_2gpu \
//tensorflow/python/keras/distribute:collective_all_reduce_strategy_test_xla_2gpu \
//tensorflow/python/keras/distribute:ctl_correctness_test_2gpu \
//tensorflow/python/keras/distribute:custom_training_loop_optimizer_test_2gpu \
//tensorflow/python/keras/distribute:keras_metrics_test_2gpu \
//tensorflow/python/keras/distribute:keras_models_test_2gpu \
//tensorflow/python/keras/distribute:keras_optimizer_v2_test_2gpu \
//tensorflow/python/keras/distribute:keras_stateful_lstm_model_correctness_test_2gpu \
//tensorflow/python/keras/distribute:mirrored_strategy_test_2gpu \
//tensorflow/python/keras/distribute:mirrored_variable_test_2gpu \
//tensorflow/python/keras/distribute:multi_worker_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:category_crossing_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:category_encoding_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:discretization_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:hashing_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:image_preprocessing_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:index_lookup_distribution_test_2gpu \
//tensorflow/python/keras/layers/preprocessing:text_vectorization_distribution_test_2gpu \
//tensorflow/python/keras/utils:multi_gpu_utils_test_2gpu \
//tensorflow/python/keras/utils:multi_gpu_utils_test_xla_2gpu \
//tensorflow/python/kernel_tests:dynamic_partition_op_test_2gpu \
//tensorflow/python/training:saver_test_2gpu \
# no_rocm : //tensorflow/python/keras/distribute:keras_dnn_correctness_test_2gpu \
# no_rocm : //tensorflow/python/keras/distribute:keras_embedding_model_correctness_test_2gpu \
# TIMEOUT : //tensorflow/python/distribute:values_test_2gpu \
# TIMEOUT : //tensorflow/python/keras/distribute:keras_image_model_correctness_test_2gpu \
# TIMEOUT : //tensorflow/python/keras/distribute:keras_rnn_model_correctness_test_2gpu \
# TIMEOUT : //tensorflow/python/keras/distribute:saved_model_mixed_api_test_2gpu \
# TIMEOUT : //tensorflow/python/keras/distribute:saved_model_save_load_test_2gpu \
# Started timing-out with ROCm 4.1
# TIMEOUT : //tensorflow/python/keras/distribute:keras_premade_models_test_2gpu \
# Became FLAKY with ROCm 4.1
# FLAKY : //tensorflow/python/distribute:strategy_common_test_2gpu \
# FLAKY : //tensorflow/python/distribute:strategy_common_test_xla_2gpu \
# FLAKY : //tensorflow/python/distribute:strategy_gather_test_2gpu \
# FLAKY : //tensorflow/python/distribute:strategy_gather_test_xla_2gpu \
# FLAKY : //tensorflow/python/keras/distribute:custom_training_loop_metrics_test_2gpu \
# FLAKY : //tensorflow/python/keras/distribute:custom_training_loop_models_test_2gpu \
# FAILED : //tensorflow/python/distribute/v1:cross_device_ops_test_2gpu \
# FAILED : //tensorflow/python/distribute:cross_device_ops_test_2gpu \
# FAILED : //tensorflow/python/distribute:mirrored_strategy_test_2gpu \
# FAILED : //tensorflow/python/keras/distribute:distribute_strategy_test_2gpu \
# FAILED : //tensorflow/python/kernel_tests:collective_ops_test_2gpu \
# FAILED : //tensorflow/python:collective_ops_gpu_test_2gpu \
# FAILED : //tensorflow/python:nccl_ops_test_2gpu \
# FAILED ON CI Node only : //tensorflow/python/distribute:collective_all_reduce_strategy_test_2gpu \
# See run : http://ml-ci.amd.com:21096/job/tensorflow/job/github-prs-rocmfork-develop-upstream/job/rocm-latest-ubuntu-gpu-multi/216/console
# FAILED ON CI Node only : //tensorflow/python/keras/distribute:keras_save_load_test_2gpu \
# Starting with ROCm 4.1, see run : http://ml-ci.amd.com:21096/job/tensorflow/job/github-prs-rocmfork-develop-upstream/job/rocm-latest-ubuntu-gpu-multi/241/console
# FAILED //tensorflow/python/keras/distribute:minimize_loss_test_2gpu \
# potential breaking commit : https://github.com/tensorflow/tensorflow/commit/74e39c8fa60079862597c9db506cd15b2443a5a2
# NO MORE MULTI_GPU : //tensorflow/python/keras/distribute:checkpointing_test_2gpu \
# multi_gpu tag was commented out in this commit : https://github.com/tensorflow/tensorflow/commit/b87d02a3f8d8b55045bf4250dd72e746357a3eba

View File

@ -27,7 +27,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS}
echo "" echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR # First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-4.0.1 ROCM_INSTALL_DIR=/opt/rocm-4.1.0
if [[ -n $1 ]]; then if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1 ROCM_INSTALL_DIR=$1
fi fi

View File

@ -206,6 +206,28 @@ def _find_rocfft_config(rocm_install_path):
return rocfft_config return rocfft_config
def _find_hipfft_config(rocm_install_path):
def hipfft_version_numbers(path):
version_file = os.path.join(path, "hipfft/include/hipfft-version.h")
if not os.path.exists(version_file):
raise ConfigError(
'hipfft version file "{}" not found'.format(version_file))
major = _get_header_version(version_file, "hipfftVersionMajor")
minor = _get_header_version(version_file, "hipfftVersionMinor")
patch = _get_header_version(version_file, "hipfftVersionPatch")
return major, minor, patch
major, minor, patch = hipfft_version_numbers(rocm_install_path)
hipfft_config = {
"hipfft_version_number":
_get_composite_version_number(major, minor, patch)
}
return hipfft_config
def _find_roctracer_config(rocm_install_path): def _find_roctracer_config(rocm_install_path):
def roctracer_version_numbers(path): def roctracer_version_numbers(path):
@ -289,6 +311,8 @@ def find_rocm_config():
result.update(_find_rocblas_config(rocm_install_path)) result.update(_find_rocblas_config(rocm_install_path))
result.update(_find_rocrand_config(rocm_install_path)) result.update(_find_rocrand_config(rocm_install_path))
result.update(_find_rocfft_config(rocm_install_path)) result.update(_find_rocfft_config(rocm_install_path))
if result["rocm_version_number"] >= 40100:
result.update(_find_hipfft_config(rocm_install_path))
result.update(_find_roctracer_config(rocm_install_path)) result.update(_find_roctracer_config(rocm_install_path))
result.update(_find_hipsparse_config(rocm_install_path)) result.update(_find_hipsparse_config(rocm_install_path))
result.update(_find_rocsolver_config(rocm_install_path)) result.update(_find_rocsolver_config(rocm_install_path))

View File

@ -1 +1 @@
eJy9Wm1v2zgS/q5fQSgoKm8cJe19uEUO+eBNs6j32iSws10smsCgbdrmRhZ1JJU0KPrfb4akZEqWEid2GqCoJQ0fDmeeeaGoPXIqsgfJ5wtN3h+9PyJXC0auWKqE/D0R96SX64WQKia9JCEDFFNkwBSTd2waB3vBHvnEJyDOpiRPp0wSDeN7GZ3Af+5Jl3xhUnGRkvfxEYlQIHSPws5/AOFB5GRJH0gqNMkVAwiuyIwnjLBvE5ZpwlMyEcss4TSdMHLP9cJM40BADfK3gxBjTUGagnwGVzNfjlBtFMa/hdbZ8eHh/f19TI2ysZDzw8QKqsNP/dOz8+HZAShshvyZJkwpItn/ci5hqeMHQjPQZ0LHoGVC74mQhM4lg2daoL73kmuezrtEiZm+p5IBypQrLfk41xVjFdrBmn0BMBdNSdgbkv4wJL/1hv1hFzD+6l99vPjzivzVGwx651f9syG5GJDTi/MP/av+xTlc/U5653+T//bPP3QJA1PBNOxbJlF/UJKjGY3ryJCxigIzYRVSGZvwGZ/AutJ5TueMzMUdkyksh2RMLrlCZypQbwooCV9yTbW5s7YonOZkp39BGIaXkqdIw4vTJUw/llQ+oDJkwSjOPwUXTbSQnBkdyZ1lH1BKgIJoWLPKB6XZMg4CJLyaSA48U4xK4IIypmiDR2KqKkoXPI5W0yqAm0ukwJRpNFVqTMxloYQByqz+OH4i0hmf59IYEMcpPRW5jo1WGUWiiwIcGeJ8gzRbSJHPF0gSlt5xKdIlSzW5o5IbUkag/+fRZe/qYycO+jMILniW8GltSu7M0rXLsXYoFDTqMCmNqyXTuTRuJ3ALDDQRU1a1n6a3zK6r8MGDpzEEDT4q9WrUO/bxEiFurTOs7a0/C59YR5hoX1A5PUB9puBDDXEfqHzs82AmxZKMqXJGdYlhpVupb0zAVisVwTyQlYJS0JgJwvJQZPpQiskyRJEc0x8FXTT4fUbzBNeT5CxAtgYBxJyQ4D5R/BKq+AV5wf0CJgVBMEkoxOmpcdEZWjk6MykQXNU5Dghor1AMZiGjOdMjN90IVRnh0iIjZn3lq+kPMsJAKk2TxBsEun4oWGstXbjcuW1J3CBLHRwZ4wJhvjoiOWnVD8T5jISljUN0oVCxYwNq0gy4kvnqjb4B+b02eaOiZDQxU68JdVamWnvmGwyLj1Bcs5EL41GaL8dMRkv6j5BdAhbD/2DYZOHb/90R/JFfiBEj+3iNVygNV0bcn8aSvJgjQi26JKVLVrhn4PwB2TwDhSGlAxAMNw4DK576Ocq5BqMnwTwEz7mIRcYK5FCGUCFSCBvI7CdhrmcHv4Yda/8l6gY2lCw2PyMZ7tmJyBtF9qPr6X4nJG+Mdl2D3zHjwLNG3qIQGwaAY27Gc0hZWfSu4x46K0GuiYxcJ0DTUQ5R6kfA22Lm8PsPE262ql2nb2NYHCBHxkZk38FW/0Ji+gosgJhnwAqms/j+A+rmdRoWEIYO7RAc0qFdi80ImAVBbzZnaF0IGpqEncKXoOzUMt7m2gbiHeNSUdg8qrJKRU7EGNA9MkqvOP2P4KUbY57OxKETPJiyu7D0BWpaDGHfoLtQkQ/YKdzUYHQT70XVMrMb8yOiKRil8SuAnYrSbj2g99cb88C0boaClVGYO2clZdwTbIWg4zghM4zgadSJ8U4WdWpyq0mqI2MFnY6OwtiZwwbhiaVbdezXoxsnY0KzWeadk8lcaDTJvL8pZj0IOyWq43lDskAWNNzGwGviRUMCK5KvZRoM/O7MEzYghMcvyGeA9yOoJUo7W4XvC57JPNV8yTZgvSdc0+B51AecQ55OknzKDvE3/CsA48UuwsDLB28/9i/JwGr90sDYg+0VJBAvS0PrYluqb7idgAaiSHaYt2HG0ZezwRAa+y65XwhQzvYVFguQIk9k9Ln3B+wFfjFlZp9UnvTPLwaFClBFJON3tvkvSpNlvi36BxOaTHKo8qAdk5DfFTf7LmjrhGmHLM49hXwKdMJIEAZsnCssRQr7q4zC6iF+oa+DmMcdiKqEYVPF8y0G3l1bW1iN0udCoBHCSki+q5Rng4sUXeOyH1et3IXoaud1Y+/hx9XanJXoWnLMmhtElhPcrqJYkDKy3OWrBdbn/gXgvzSmnkEomOjy7HxbTtVRPFoV2fv5KNDLnn4MX14vWvzeXDEqbPK53YgSHnvO2rp+VOaud0xj2P5s1jQZyUdYDvopDvvJkW9204mUiwkdTMlzd32wInrX5EvshP4V/7vYu0OOaIXAnlCmNHkC61eDcNMUj+citfkdC8AM+9XmpdTbJbxZ3/l4MT0r2iaI2kciduSZsEG5NdFScAw92q2fFnzRjdJAYctqHihTAFoCckKRCGqNerONnp8hwD+/feoNt00RazAvyxF1mG2TRFvctPaVXjzWWssGnN3miers9UQhIRY3SxRGcqsm04H4acIH3X09bAyEl+6+NmbaoHf+oWBalWNVQEcNzwM1ajRYHFqjFlc82RdVp6rzYDbTm9EABbdriiyIzwK4PHhNEgD+T2iKasYxI5+f8eooOPL5Ca+GYkZul++a/N6a7lZsqlF6HWXnyW41d53jWtIJmGwjmjvZrZlucXyy2zuvwnML/ROoDlnuatA7PRvsoL7XgbwKv7fyRHkmJZh9b7igsPGmjqHFil1i9YPlaBvSt7CglfcVhtWo34i1c/ZXNKi/V1L4LmHD10pOdrsAKHH8l0v2zusl/HKKnxAI5VzuZP7zy5L+GszLsn4d5nLLtN9Og+YIqFOs9ranEWu3EVDXoF4ClEjuNi0BTnbrEmBx/BJg77xqy2On+DmlYHjx6ctOSkEd6MWbvRrQDrZ7LWRorQQVotUqQSPWzitBRQMXB2sHavUDUUqmfIKH0vhdgJjZ1yxGkZThlwxuPXhQ9tSJdcPheNDG8aZAfILmb4flNwerrwwMw8sWwYCXNF+fw/qKKfzMADz0Y3X51R76aCGSW66NdHhTHCZVDreLEXGeTalm0SZnlp2WUZuc/LSNfeq9dtu4J98VPjLw8XcHjwx8dLP5yLinGvhHDPtU6/PIrE/VjI4fdwbCRduSQh0wTNby4bh8F3nLHrrFGXhKlJCaTaP1yIwh8pcq6pQ533xMFIVv1DF5o/CbgWiFZPR3H/l54YLnwe6loXpQsf0IKcZP6lgUXqdng8HF4BiYf516J/hKywgAO+UwiCONHxsEAUTvaIQfCYxG5OSEhKMRrnE0MsnLLjf4P/9j3RI= eJy9Wn9v2zgS/V+fglBQVN44StJbYBc55ABvmkV91yaBne1i0QYGbdM2t7KoJamkQdHvfjMkJVOylDix0wBFLWn4OJx58zj6sUfORHYv+XyhyZujN0fkesHINUuVkL8n4o70cr0QUsWklyRkgGaKDJhi8pZN42Av2CPv+QTM2ZTk6ZRJomF8L6MT+M9d6ZKPTCouUvImPiIRGoTuUtj5NyDci5ws6T1JhSa5YgDBFZnxhBH2dcIyTXhKJmKZJZymE0buuF6YaRwIuEH+chBirClYU7DP4Gjm2xGqjcP4t9A6Ozk8vLu7i6lxNhZyfphYQ3X4vn92fjE8PwCHzZA/0oQpRST7J+cSljq+JzQDfyZ0DF4m9I4ISehcMrimBfp7J7nm6bxLlJjpOyoZoEy50pKPc10JVuEdrNk3gHDRlIS9IekPQ/Jbb9gfdgHjz/71u8s/rsmfvcGgd3HdPx+SywE5u7x427/uX17A0e+kd/EX+V//4m2XMAgVTMO+ZhL9Byc5htGkjgwZqzgwE9YhlbEJn/EJrCud53TOyFzcMpnCckjG5JIrTKYC96aAkvAl11SbM2uLwmlOd/oXhGF4JXmKNLw8W8L0Y0nlPTpDFozi/FNI0UQLyZnxkdxa9gGlBDiIgTWrvFeaLeMgQMKrieTAM8WoBC4oE4o2eCSmqqJ0IeMYNa0COLlECkyZxlClJsRcFk4YoMz6j+MnIp3xeS5NAHGc0lOR69h4lVEkuijAkSEuN0izhRT5fIEkYektlyJdslSTWyq5IWUE/n8YXfWu33XioD+D4oJrCZ/WpuQuLF27HBuHwkHjDpPSpFoynUuTdgKnIEATMWXV+Gn6hdl1FTm49zyGosFLpV+Nfsc+XiLEF5sMG3ubzyInNhGm2hdUTg/QnynkUEPdByof+zyYSbEkY6pcUJ0wrHwr/Y0JxGrlIoQHVCkoDU2YoCwPRaYPpZgsQzTJUf4o+KIh7zOaJ7ieJGcBsjUIoOaEhPSJ4pdQxS/QBfcLmBQEwSShUKdnJkXnGOXo3EggpKpzEhDwXqEZzEJGc6ZHbroRujLCpUXGzObKd9MfZIyBVJomiTcIfH1bsNZGuki5S9uSuEGWOjgyxgXCfHVEctrqH5jzGQnLGIeYQqFixwb0pBlwZfPJG30D9ntt9sZFyWhipl4z6qxCtXbNDxhuPkJxzUaujEdpvhwzGS3p30J2CUQM/4Nhk4Uf/+Mj+CM/EWNG9vEYj9Aajoy5P40leTFHhF50SUqXrEjPwOUD1DwDh0HSAQiGm4RBFM98jXKpwepJUIfgOhexyFiBHMoQdogUygaU/TTM9ezg17Bj479E3yCGksXmZyTDPTsReaXIfvR5ut8JySvjXdfgd8w4yKyxtyjElgHgmJPxHCQri4477qKLEmhNZOw6AYaOcqhSvwJeFzOH376bcrO72uf0dQyLA+TIxIjsO9jqX0hMX4EbIOoMRMF0Ft++w775OQ0LCEOHdggOcmjXYhUBVRD8ZnOG0YWioUnYKXIJzk4t463WNhDvBJeKxuZSlVUqciYmgO6ScXrF6b8FL9MY83QmDp3hwZTdhmUu0NNiCPsK3YWKfMBOkaaGoJt6L3YtM7sJPyKaDaMMfgWwU3HarQf8/nRjLpjWzVCwMgq1c1ZSxl3BVgg6jlMywwqeRp0Yz2RRp2a3mqQ6MlbQ6egojF04bBGeWrpVx346unE2pjSbbY6dTeZKo8nmzU0x60HYKVEdzxvEAlnQcBoLr4kXDQJWiK9lGgz85sITNiCEJ8/QM8D7HtSE0s5W4fuCZzJPNV+yDVjvGdc8eBr1AeeQp5Mkn7JD/A3/CsB4sYsy8PTg9bv+FRlYr59bGHtwewUC4qk0tC62pfqKtxPQQBRih7oNM44+ng+G0Nh3yd1CgHO2r7BYgBR5JqMPvf/CvcBPZpvZJ5Ur/YvLQeEC7CKS8Vvb/Bdbk2W+3fQPJjSZ5LDLg3dMgr4rbu67oK0Tph2yOHcU9BTohJUgDNg4V7gVKeyvMgqrh/qFvg5qHu9AVKUMm3Y8P2KQ3bW1hdUqfSoEBiGslORxZXs2uEjRNS77ddXKXaiudl439h5+Xa3NWamuJUfV3KCynOF2O4oFKSvLHb5YYX3oXwL+c2vqCYSCia7OL7blVB3Fo1Wh3k9HgV727F34/P2iJe/NO0aFTT63G1HCEy9ZW+8flbnrHdMYbn82a5qM5QMsB/8Uh/vJkR9204mUiwkdTMlzd3ywInrX6CV2Qv+Kfynu3UEjWiGwJ5QpTR7B+tUg3DTV44VIrb7jBjDDfrV5KfV2CU/W73y8mp4VbRNU7QMVO/JC2ODcmmlpOIYe7YsvC77pRjJQxLKqA6UEYCRAEwohqDXqzTF6ukJAfn573xtuKxFrMM/TiDrMtiLRVjetfaVXj7XWsgFntzpRnb0uFBJqcTOhMJZbNZkOxJcJH3T3+2FjITz37mtjpg16F28LplU5VgV01PAyUKNGQ8ShNWpJxaN9UXWqOg9mM70ZDdBwu6bIgvgsgMODlyQB4P+ApqgWHDPy6YpXR8GRTxe8GooZuZ3eNeW9Ve5WbKpReh1l52K3mrt+W70Zx53hdhy3IP4t9Yty3OL/AI7bidxryA/PY3gV43n8rmBcbcnulow3s7vCo9rt7EuzuzJ3XcG1pBMI2EYi7my31nGL40u5PfMiKm6hfwDJYQ+/HvTOzgc76F7rQF7/urfKRPnGVTD7VHxBbxmhjqHFil3b4JfK0TaS3sKCVlWvMKwm7I1YO9f2igd1eVf4pGzDh6bOdmuRtzi+ztszLyr1doofo/Z2rq0FvwrzbM2vwOxA9lto0Kr8FYrVxL8Ra+f6X/GgvgUokdxuugU42623AIvjbwH2zIs29HaKH7MVDC/ff9zJVlAHevajjBrQDh5mtJChdSeoEK22EzRi7XwnqHjg6mDtdXH9dT8lUz7BTy7wqxcxsw8RjSMpw+903HrwNfBj32M0fPoRtHG8qRAfofnrYflFzeobGsPwskUw4CXN1+ewuWIKP6KBDH1fHX6yrzS1EMkXro11eFO8Kq18ulGMiPNsSjWLNnkj32kZtcl7zbaxj721aRv36JPwBwY+/GTsgYEP3mY6llQSUSuVG/KfU/Lz0fHRkaNJczAfnabFvcfuEx7I32Md1gOzPrY1dfzyNhCuqJcUthtTMFren5QP9L+w+27xIUlKlJCaTaN1AYhBYJYq6pRbi/kiLwpfqRPySuGHN9EKyfjvvpT1qhI/qnBP3tW9iu2XfDF+l8qi8HN6PhhcDk6gwD6n3mcwSssIADvlMChXjV/sBAGkfzTCL21GI3J6SsLRCNc4GhmNtMsN/g935/24

View File

@ -51,9 +51,9 @@ cc_library(
) )
cc_library( cc_library(
name = "rocfft", name = "%{hipfft_or_rocfft}",
srcs = ["rocm/lib/%{rocfft_lib}"], srcs = ["rocm/lib/%{hipfft_or_rocfft_lib}"],
data = ["rocm/lib/%{rocfft_lib}"], data = ["rocm/lib/%{hipfft_or_rocfft_lib}"],
includes = [ includes = [
".", ".",
"rocm/include", "rocm/include",
@ -106,7 +106,7 @@ cc_library(
":rocm_headers", ":rocm_headers",
":hip", ":hip",
":rocblas", ":rocblas",
":rocfft", ":%{hipfft_or_rocfft}",
":hiprand", ":hiprand",
":miopen", ":miopen",
":hipsparse", ":hipsparse",

View File

@ -187,6 +187,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin):
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/12.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/12.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/13.0.0/include")
# Support hcc based off clang 10.0.0 (for ROCm 3.3) # Support hcc based off clang 10.0.0 (for ROCm 3.3)
inc_dirs.append(rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/") inc_dirs.append(rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/")
@ -310,7 +311,7 @@ def _select_rocm_lib_paths(repository_ctx, libs_paths, bash_bin):
return libs return libs
def _find_libs(repository_ctx, rocm_config, bash_bin): def _find_libs(repository_ctx, rocm_config, hipfft_or_rocfft, bash_bin):
"""Returns the ROCm libraries on the system. """Returns the ROCm libraries on the system.
Args: Args:
@ -327,7 +328,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
for name, path in [ for name, path in [
("amdhip64", rocm_config.rocm_toolkit_path + "/hip"), ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"), ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"), (hipfft_or_rocfft, rocm_config.rocm_toolkit_path + "/" + hipfft_or_rocfft),
("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"), ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
("MIOpen", rocm_config.rocm_toolkit_path + "/miopen"), ("MIOpen", rocm_config.rocm_toolkit_path + "/miopen"),
("rccl", rocm_config.rocm_toolkit_path + "/rccl"), ("rccl", rocm_config.rocm_toolkit_path + "/rccl"),
@ -456,7 +457,8 @@ def _create_dummy_repository(repository_ctx):
"%{rocblas_lib}": _lib_name("rocblas"), "%{rocblas_lib}": _lib_name("rocblas"),
"%{miopen_lib}": _lib_name("miopen"), "%{miopen_lib}": _lib_name("miopen"),
"%{rccl_lib}": _lib_name("rccl"), "%{rccl_lib}": _lib_name("rccl"),
"%{rocfft_lib}": _lib_name("rocfft"), "%{hipfft_or_rocfft}": "hipfft",
"%{hipfft_or_rocfft_lib}": _lib_name("hipfft"),
"%{hiprand_lib}": _lib_name("hiprand"), "%{hiprand_lib}": _lib_name("hiprand"),
"%{hipsparse_lib}": _lib_name("hipsparse"), "%{hipsparse_lib}": _lib_name("hipsparse"),
"%{roctracer_lib}": _lib_name("roctracer64"), "%{roctracer_lib}": _lib_name("roctracer64"),
@ -537,6 +539,10 @@ def _create_local_rocm_repository(repository_ctx):
bash_bin = get_bash_bin(repository_ctx) bash_bin = get_bash_bin(repository_ctx)
rocm_config = _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script) rocm_config = _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script)
# For ROCm 4.1 and above use hipfft, older ROCm versions use rocfft
rocm_version_number = int(rocm_config.rocm_version_number)
hipfft_or_rocfft = "rocfft" if rocm_version_number < 40100 else "hipfft"
# Copy header and library files to execroot. # Copy header and library files to execroot.
# rocm_toolkit_path # rocm_toolkit_path
rocm_toolkit_path = rocm_config.rocm_toolkit_path rocm_toolkit_path = rocm_config.rocm_toolkit_path
@ -550,9 +556,9 @@ def _create_local_rocm_repository(repository_ctx):
), ),
make_copy_dir_rule( make_copy_dir_rule(
repository_ctx, repository_ctx,
name = "rocfft-include", name = hipfft_or_rocfft + "-include",
src_dir = rocm_toolkit_path + "/rocfft/include", src_dir = rocm_toolkit_path + "/" + hipfft_or_rocfft + "/include",
out_dir = "rocm/include/rocfft", out_dir = "rocm/include/" + hipfft_or_rocfft,
), ),
make_copy_dir_rule( make_copy_dir_rule(
repository_ctx, repository_ctx,
@ -586,7 +592,7 @@ def _create_local_rocm_repository(repository_ctx):
), ),
] ]
rocm_libs = _find_libs(repository_ctx, rocm_config, bash_bin) rocm_libs = _find_libs(repository_ctx, rocm_config, hipfft_or_rocfft, bash_bin)
rocm_lib_srcs = [] rocm_lib_srcs = []
rocm_lib_outs = [] rocm_lib_outs = []
for lib in rocm_libs.values(): for lib in rocm_libs.values():
@ -632,7 +638,8 @@ def _create_local_rocm_repository(repository_ctx):
{ {
"%{hip_lib}": rocm_libs["amdhip64"].file_name, "%{hip_lib}": rocm_libs["amdhip64"].file_name,
"%{rocblas_lib}": rocm_libs["rocblas"].file_name, "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
"%{rocfft_lib}": rocm_libs["rocfft"].file_name, "%{hipfft_or_rocfft}": hipfft_or_rocfft,
"%{hipfft_or_rocfft_lib}": rocm_libs[hipfft_or_rocfft].file_name,
"%{hiprand_lib}": rocm_libs["hiprand"].file_name, "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
"%{miopen_lib}": rocm_libs["MIOpen"].file_name, "%{miopen_lib}": rocm_libs["MIOpen"].file_name,
"%{rccl_lib}": rocm_libs["rccl"].file_name, "%{rccl_lib}": rocm_libs["rccl"].file_name,
@ -641,7 +648,7 @@ def _create_local_rocm_repository(repository_ctx):
"%{rocsolver_lib}": rocm_libs["rocsolver"].file_name, "%{rocsolver_lib}": rocm_libs["rocsolver"].file_name,
"%{copy_rules}": "\n".join(copy_rules), "%{copy_rules}": "\n".join(copy_rules),
"%{rocm_headers}": ('":rocm-include",\n' + "%{rocm_headers}": ('":rocm-include",\n' +
'":rocfft-include",\n' + '":' + hipfft_or_rocfft + '-include",\n' +
'":rocblas-include",\n' + '":rocblas-include",\n' +
'":miopen-include",\n' + '":miopen-include",\n' +
'":rccl-include",\n' + '":rccl-include",\n' +