mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
Merge pull request #48507 from ROCmSoftwarePlatform/google_upstream_r25_port_pr_48187
[r2.5 port][ROCm] Port PR 48187 to r2.5
This commit is contained in:
commit
8e0516e743
|
|
@ -810,9 +810,9 @@ Status AMDGPUTargetModuleLinker(llvm::Module* module, GpuVersion gpu_version,
|
|||
// upstream commit), the following mapping will need to change
|
||||
std::string MapGCNArchNameTokenToFeatureStr(const std::string& token) {
|
||||
if (token == "sramecc+") {
|
||||
return "+sram-ecc";
|
||||
return "+sramecc";
|
||||
} else if (token == "sramecc-") {
|
||||
return "-sram-ecc";
|
||||
return "-sramecc";
|
||||
} else if (token == "xnack+") {
|
||||
return "+xnack";
|
||||
} else if (token == "xnack-") {
|
||||
|
|
|
|||
|
|
@ -1196,6 +1196,7 @@ xla_test(
|
|||
],
|
||||
shard_count = 50,
|
||||
tags = [
|
||||
"no_rocm",
|
||||
"optonly",
|
||||
],
|
||||
deps = CONVOLUTION_TEST_DEPS + [
|
||||
|
|
@ -1261,6 +1262,7 @@ xla_test(
|
|||
backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_disable_layout_heuristic"]},
|
||||
backends = ["gpu"],
|
||||
shard_count = 25,
|
||||
tags = ["no_rocm"],
|
||||
deps = CONVOLUTION_TEST_DEPS + [
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
|
|
|
|||
|
|
@ -1092,7 +1092,6 @@ cuda_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
"notsan", # b/173031470
|
||||
],
|
||||
deps = [
|
||||
|
|
@ -1232,7 +1231,6 @@ distribute_py_test(
|
|||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_oss", # b/178656226
|
||||
"no_rocm",
|
||||
"noasan", # b/175816710
|
||||
"notsan", # b/168645872
|
||||
],
|
||||
|
|
@ -1290,7 +1288,6 @@ distribute_py_test(
|
|||
main = "distribute_utils_test.py",
|
||||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_rocm",
|
||||
],
|
||||
deps = [
|
||||
":combinations",
|
||||
|
|
@ -1318,7 +1315,6 @@ distribute_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
],
|
||||
tpu_tags = [
|
||||
"no_oss", # b/150954621 Target too big to run serially reliably.
|
||||
|
|
@ -1769,7 +1765,6 @@ distribute_py_test(
|
|||
shard_count = 2,
|
||||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_rocm",
|
||||
"notsan", # TODO(b/160006974)
|
||||
],
|
||||
xla_enable_strict_auto_jit = True,
|
||||
|
|
@ -1802,7 +1797,6 @@ distribute_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
"notsan", # TODO(b/160006974)
|
||||
],
|
||||
xla_enable_strict_auto_jit = True,
|
||||
|
|
@ -1876,7 +1870,6 @@ distribute_py_test(
|
|||
disable_mlir_bridge = False,
|
||||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_rocm",
|
||||
],
|
||||
deps = [
|
||||
":combinations",
|
||||
|
|
|
|||
|
|
@ -248,7 +248,6 @@ distribute_py_test(
|
|||
main = "custom_training_loop_metrics_test.py",
|
||||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_rocm",
|
||||
],
|
||||
deps = [
|
||||
":strategy_combinations",
|
||||
|
|
@ -270,7 +269,6 @@ distribute_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
"notsan", # TODO(b/170954243)
|
||||
],
|
||||
tpu_tags = [
|
||||
|
|
@ -536,7 +534,7 @@ distribute_py_test(
|
|||
|
||||
distribute_py_test(
|
||||
name = "keras_rnn_model_correctness_test",
|
||||
size = "medium",
|
||||
size = "large",
|
||||
srcs = ["keras_rnn_model_correctness_test.py"],
|
||||
full_precision = True,
|
||||
main = "keras_rnn_model_correctness_test.py",
|
||||
|
|
@ -545,7 +543,7 @@ distribute_py_test(
|
|||
shard_count = 31,
|
||||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_rocm",
|
||||
"no_rocm", # Would require size large, but that effectively disables the test for presubmits.
|
||||
"no_windows_gpu",
|
||||
"noasan", # TODO(b/337374867) fails with -fsanitize=null
|
||||
"notpu", # TODO(b/153672562)
|
||||
|
|
@ -605,7 +603,6 @@ distribute_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
"no_windows_gpu",
|
||||
"notsan",
|
||||
],
|
||||
|
|
@ -825,6 +822,7 @@ distribute_py_test(
|
|||
tags = [
|
||||
"multi_and_single_gpu",
|
||||
"no_cuda_asan", # times out
|
||||
"no_rocm",
|
||||
],
|
||||
xla_tags = [
|
||||
"no_cuda_asan", # times out
|
||||
|
|
|
|||
|
|
@ -1735,7 +1735,6 @@ cuda_py_test(
|
|||
name = "betainc_op_test",
|
||||
size = "small",
|
||||
srcs = ["betainc_op_test.py"],
|
||||
tags = ["no_rocm"], # ROCm 3.9 regression
|
||||
xla_tags = [
|
||||
"no_cuda_asan", # times out
|
||||
],
|
||||
|
|
@ -3238,6 +3237,7 @@ cuda_py_test(
|
|||
srcs = ["extract_image_patches_grad_test.py"],
|
||||
shard_count = 15,
|
||||
tags = [
|
||||
"no_rocm",
|
||||
"nomac", # b/181799478
|
||||
"notap", # b/31080670
|
||||
],
|
||||
|
|
@ -3600,6 +3600,7 @@ cuda_py_test(
|
|||
size = "medium",
|
||||
srcs = ["tensordot_op_test.py"],
|
||||
shard_count = 20,
|
||||
tags = ["no_rocm"],
|
||||
xla_enable_strict_auto_jit = False, # b/161856380
|
||||
deps = [
|
||||
"//tensorflow/python:array_ops",
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ cuda_py_test(
|
|||
size = "small",
|
||||
srcs = ["beta_test.py"],
|
||||
tags = [
|
||||
"no_rocm", # ROCm 3.9 regression
|
||||
"notsan", # b/173653918
|
||||
],
|
||||
xla_tags = [
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ cuda_py_test(
|
|||
main = "csr_sparse_matrix_ops_test.py",
|
||||
shard_count = 10,
|
||||
tags = [
|
||||
"no_rocm", # ROCm 3.8 regression
|
||||
"notsan", # b/149115441
|
||||
],
|
||||
deps = [
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ from tensorflow.python.ops import math_ops
|
|||
from tensorflow.python.ops import resource_variable_ops
|
||||
from tensorflow.python.ops import variables
|
||||
from tensorflow.python.platform import googletest
|
||||
from tensorflow.python.platform import test
|
||||
|
||||
|
||||
@test_util.run_all_in_graph_and_eager_modes
|
||||
|
|
@ -59,11 +58,11 @@ class ReduceTest(test_util.TensorFlowTestCase):
|
|||
x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
|
||||
with test_util.device(use_gpu=True):
|
||||
for axis in (0, -2):
|
||||
self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)),
|
||||
[5, 7, 9])
|
||||
self.assertAllEqual(
|
||||
self.evaluate(math_ops.reduce_sum(x, axis=axis)), [5, 7, 9])
|
||||
for axis in (1, -1):
|
||||
self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)),
|
||||
[6, 15])
|
||||
self.assertAllEqual(
|
||||
self.evaluate(math_ops.reduce_sum(x, axis=axis)), [6, 15])
|
||||
for axis in (None, (0, 1), (1, 0), (-1, 0), (0, -1), (-2, 1), (1, -2),
|
||||
(-1, -2), (-2, -1)):
|
||||
self.assertEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21)
|
||||
|
|
@ -358,8 +357,8 @@ class ScalarMulTest(test_util.TensorFlowTestCase):
|
|||
indices = constant_op.constant([0, 2, 5])
|
||||
x = math_ops.scalar_mul(-3, ops.IndexedSlices(values, indices))
|
||||
with test_util.device(use_gpu=True):
|
||||
self.assertAllEqual(self.evaluate(x.values),
|
||||
[[-6, -9], [-15, -21], [0, 3]])
|
||||
self.assertAllEqual(
|
||||
self.evaluate(x.values), [[-6, -9], [-15, -21], [0, 3]])
|
||||
self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])
|
||||
|
||||
|
||||
|
|
@ -436,9 +435,11 @@ class AddNTest(test_util.TensorFlowTestCase):
|
|||
|
||||
def test_iterable(self):
|
||||
"""Test that add_n supports iterables (e.g. generators and dict values)."""
|
||||
|
||||
def fn():
|
||||
yield 1
|
||||
yield 2
|
||||
|
||||
values_dict = {"a": 1, "b": 2}
|
||||
with test_util.use_gpu():
|
||||
self.assertAllEqual(3, math_ops.add_n(fn()))
|
||||
|
|
@ -483,8 +484,9 @@ class DivAndModTest(test_util.TensorFlowTestCase):
|
|||
|
||||
def testFloorModBfloat16(self):
|
||||
nums, divs = self.floatTestData()
|
||||
tf_result = math_ops.floormod(math_ops.cast(nums, dtypes.bfloat16),
|
||||
math_ops.cast(divs, dtypes.bfloat16))
|
||||
tf_result = math_ops.floormod(
|
||||
math_ops.cast(nums, dtypes.bfloat16),
|
||||
math_ops.cast(divs, dtypes.bfloat16))
|
||||
np_result = nums % divs
|
||||
self.assertAllEqual(tf_result, np_result)
|
||||
|
||||
|
|
@ -742,10 +744,8 @@ class NextAfterTest(test_util.TensorFlowTestCase):
|
|||
|
||||
self.assertAllEqual(math_ops.nextafter(one, two) - one, eps)
|
||||
self.assertAllLess(math_ops.nextafter(one, zero) - one, 0)
|
||||
self.assertAllEqual(
|
||||
math_ops.is_nan(math_ops.nextafter(nan, one)), [True])
|
||||
self.assertAllEqual(
|
||||
math_ops.is_nan(math_ops.nextafter(one, nan)), [True])
|
||||
self.assertAllEqual(math_ops.is_nan(math_ops.nextafter(nan, one)), [True])
|
||||
self.assertAllEqual(math_ops.is_nan(math_ops.nextafter(one, nan)), [True])
|
||||
self.assertAllEqual(math_ops.nextafter(one, one), one)
|
||||
|
||||
def testBroadcasting(self):
|
||||
|
|
@ -786,13 +786,13 @@ class BinaryOpsTest(test_util.TensorFlowTestCase):
|
|||
r"Attempt to convert a value .* with an unsupported type")
|
||||
else:
|
||||
error = TypeError
|
||||
error_message = (
|
||||
r"Failed to convert object of type .* to Tensor")
|
||||
error_message = (r"Failed to convert object of type .* to Tensor")
|
||||
|
||||
class RHSReturnsTrue(object):
|
||||
|
||||
def __radd__(self, other):
|
||||
return True
|
||||
|
||||
a = array_ops.ones([1], dtype=dtypes.int32) + RHSReturnsTrue()
|
||||
self.assertEqual(a, True)
|
||||
|
||||
|
|
@ -889,12 +889,6 @@ class RangeTest(test_util.TensorFlowTestCase):
|
|||
class ErfcinvTest(test_util.TensorFlowTestCase):
|
||||
|
||||
def testErfcinv(self):
|
||||
if test.is_built_with_rocm():
|
||||
# The implementation of erfcinv calls ndtri op,
|
||||
# and the ROCm implementaion for ndtri op has a known bug in it
|
||||
# whose fix will be in a forthcoming ROCm release (4.0 ?).
|
||||
# Need to skip this unit-test until that ROCm release is out
|
||||
self.skipTest("ndtri op implementation is buggy on ROCm")
|
||||
values = np.random.uniform(0.1, 1.9, size=int(1e4)).astype(np.float32)
|
||||
approx_id = math_ops.erfc(math_ops.erfcinv(values))
|
||||
self.assertAllClose(values, self.evaluate(approx_id))
|
||||
|
|
|
|||
|
|
@ -81,14 +81,7 @@ class MathTest(PForTestCase, parameterized.TestCase):
|
|||
]
|
||||
self._test_unary_cwise_ops(complex_ops, True)
|
||||
|
||||
@test.disable_with_predicate(
|
||||
pred=test.is_built_with_rocm, skip_message="This fails on ROCm.")
|
||||
def test_unary_cwise_real_ops_1(self):
|
||||
if test.is_built_with_rocm():
|
||||
# TODO(rocm):
|
||||
# This fails on ROCm...see JIRA ticket 236756
|
||||
self.skipTest("Fails on ROCM")
|
||||
|
||||
real_ops = [
|
||||
lambda x: math_ops.acosh(1 + math_ops.square(x)),
|
||||
math_ops.abs,
|
||||
|
|
@ -691,15 +684,15 @@ class LinalgTest(PForTestCase):
|
|||
self._test_loop_fn(loop_fn, 3)
|
||||
|
||||
def test_matrix_inverse(self):
|
||||
x = (random_ops.random_uniform([3, 4, 2, 2]) +
|
||||
10 * linalg_ops.eye(2)) # Ensure well-conditioned.
|
||||
x = (random_ops.random_uniform([3, 4, 2, 2]) + 10 * linalg_ops.eye(2)
|
||||
) # Ensure well-conditioned.
|
||||
|
||||
for adjoint in (True, False):
|
||||
|
||||
# pylint: disable=cell-var-from-loop
|
||||
def loop_fn(i):
|
||||
return linalg_ops.matrix_inverse(array_ops.gather(x, i),
|
||||
adjoint=adjoint)
|
||||
return linalg_ops.matrix_inverse(
|
||||
array_ops.gather(x, i), adjoint=adjoint)
|
||||
|
||||
# pylint: enable=cell-var-from-loop
|
||||
self._test_loop_fn(loop_fn, 2)
|
||||
|
|
@ -710,8 +703,8 @@ class LinalgTest(PForTestCase):
|
|||
for stack_b in (True, False):
|
||||
shape_a = (2, 4, 3, 3) if stack_a else (4, 3, 3)
|
||||
shape_b = (2, 4, 3, 5) if stack_b else (4, 3, 5)
|
||||
x = (random_ops.random_uniform(shape_a) +
|
||||
10 * linalg_ops.eye(3)) # Ensure well-conditioned.
|
||||
x = (random_ops.random_uniform(shape_a) + 10 * linalg_ops.eye(3)
|
||||
) # Ensure well-conditioned.
|
||||
y = random_ops.random_uniform(shape_b)
|
||||
|
||||
# pylint: disable=cell-var-from-loop
|
||||
|
|
|
|||
|
|
@ -139,11 +139,6 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
|||
]
|
||||
) # pyformat: disable
|
||||
def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
|
||||
if test_util.IsBuiltWithROCm():
|
||||
# TODO(rocm):
|
||||
# This fails on ROCm...see JIRA ticket 236756
|
||||
self.skipTest('Fails on ROCM')
|
||||
|
||||
result = op(x, **extra_args)
|
||||
|
||||
# Run the wrapped op on the dense values, for comparison.
|
||||
|
|
@ -319,7 +314,9 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
|||
ragged_factory_ops.constant_value([['foo', 'bar'], ['baz']]),
|
||||
ragged_factory_ops.constant_value([['2', '9'], ['12']]))},
|
||||
]) # pyformat: disable
|
||||
def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n,
|
||||
def testListValuedElementwiseOp(self,
|
||||
inputs,
|
||||
op=math_ops.add_n,
|
||||
**extra_args):
|
||||
use_kwargs = extra_args.pop('use_kwargs', False)
|
||||
if use_kwargs:
|
||||
|
|
@ -676,13 +673,20 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
|||
expected=ragged_factory_ops.constant_value([[5, 4], [3, 2, 1]])),
|
||||
dict(
|
||||
op=string_ops.string_format,
|
||||
kwargs={'template': 'Hi {}',
|
||||
'inputs': [ragged_factory_ops.constant_value([[1, 2], [3]])]},
|
||||
kwargs={
|
||||
'template': 'Hi {}',
|
||||
'inputs': [ragged_factory_ops.constant_value([[1, 2], [3]])]
|
||||
},
|
||||
expected='Hi [[1, 2], [3]]'),
|
||||
])
|
||||
def testRaggedDispatch(self, op, expected, args=(), result_is_list=False,
|
||||
def testRaggedDispatch(self,
|
||||
op,
|
||||
expected,
|
||||
args=(),
|
||||
result_is_list=False,
|
||||
kwargs=None):
|
||||
if kwargs is None: kwargs = {}
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
result = op(*args, **kwargs)
|
||||
if result_is_list:
|
||||
self.assertLen(result, len(expected))
|
||||
|
|
@ -694,15 +698,13 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
|||
def testUnaryElementwiseOpsPreserveUniformRowLength(self):
|
||||
# Unary elementwise op
|
||||
rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
|
||||
ragged_factory_ops.constant([[1, 2], [3]]),
|
||||
uniform_row_length=2)
|
||||
ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2)
|
||||
self.assertAllEqual(rt.uniform_row_length,
|
||||
array_ops.zeros_like(rt).uniform_row_length)
|
||||
|
||||
# Unary-list elementwise op
|
||||
rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
|
||||
ragged_factory_ops.constant([[1, 2], [3]]),
|
||||
uniform_row_length=2)
|
||||
ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2)
|
||||
self.assertAllEqual(rt.uniform_row_length,
|
||||
math_ops.add_n([rt, rt]).uniform_row_length)
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ cc_library(
|
|||
"//tensorflow/stream_executor/platform",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"@local_config_rocm//rocm:rocm_headers",
|
||||
"@local_config_tensorrt//:tensorrt_headers",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ port::Status TryDlopenCUDALibraries() {
|
|||
port::Status TryDlopenROCmLibraries() {
|
||||
auto rocblas_status = GetRocblasDsoHandle();
|
||||
auto miopen_status = GetMiopenDsoHandle();
|
||||
auto rocfft_status = GetRocfftDsoHandle();
|
||||
auto rocfft_status = GetHipfftDsoHandle();
|
||||
auto rocrand_status = GetRocrandDsoHandle();
|
||||
if (!rocblas_status.status().ok() || !miopen_status.status().ok() ||
|
||||
!rocfft_status.status().ok() || !rocrand_status.status().ok()) {
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@ limitations under the License.
|
|||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "third_party/tensorrt/tensorrt_config.h"
|
||||
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
#include "rocm/rocm_config.h"
|
||||
#endif
|
||||
|
||||
namespace stream_executor {
|
||||
namespace internal {
|
||||
|
||||
|
|
@ -133,8 +137,12 @@ port::StatusOr<void*> GetMiopenDsoHandle() {
|
|||
return GetDsoHandle("MIOpen", "");
|
||||
}
|
||||
|
||||
port::StatusOr<void*> GetRocfftDsoHandle() {
|
||||
port::StatusOr<void*> GetHipfftDsoHandle() {
|
||||
#if TF_ROCM_VERSION < 40100
|
||||
return GetDsoHandle("rocfft", "");
|
||||
#else
|
||||
return GetDsoHandle("hipfft", "");
|
||||
#endif
|
||||
}
|
||||
|
||||
port::StatusOr<void*> GetRocrandDsoHandle() {
|
||||
|
|
@ -214,8 +222,8 @@ port::StatusOr<void*> GetMiopenDsoHandle() {
|
|||
return *result;
|
||||
}
|
||||
|
||||
port::StatusOr<void*> GetRocfftDsoHandle() {
|
||||
static auto result = new auto(DsoLoader::GetRocfftDsoHandle());
|
||||
port::StatusOr<void*> GetHipfftDsoHandle() {
|
||||
static auto result = new auto(DsoLoader::GetHipfftDsoHandle());
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ port::StatusOr<void*> GetNvInferPluginDsoHandle();
|
|||
|
||||
port::StatusOr<void*> GetRocblasDsoHandle();
|
||||
port::StatusOr<void*> GetMiopenDsoHandle();
|
||||
port::StatusOr<void*> GetRocfftDsoHandle();
|
||||
port::StatusOr<void*> GetHipfftDsoHandle();
|
||||
port::StatusOr<void*> GetRocrandDsoHandle();
|
||||
port::StatusOr<void*> GetRoctracerDsoHandle();
|
||||
port::StatusOr<void*> GetHipsparseDsoHandle();
|
||||
|
|
@ -84,7 +84,7 @@ port::StatusOr<void*> GetCudnnDsoHandle();
|
|||
|
||||
port::StatusOr<void*> GetRocblasDsoHandle();
|
||||
port::StatusOr<void*> GetMiopenDsoHandle();
|
||||
port::StatusOr<void*> GetRocfftDsoHandle();
|
||||
port::StatusOr<void*> GetHipfftDsoHandle();
|
||||
port::StatusOr<void*> GetRocrandDsoHandle();
|
||||
port::StatusOr<void*> GetRoctracerDsoHandle();
|
||||
port::StatusOr<void*> GetHipsparseDsoHandle();
|
||||
|
|
|
|||
|
|
@ -197,19 +197,19 @@ cc_library(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocfft_if_static",
|
||||
name = "hipfft_if_static",
|
||||
deps = if_static([
|
||||
"@local_config_rocm//rocm:rocfft",
|
||||
"@local_config_rocm//rocm:hipfft",
|
||||
]),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocfft_plugin",
|
||||
name = "hipfft_plugin",
|
||||
srcs = if_rocm_is_configured(["rocm_fft.cc"]),
|
||||
hdrs = if_rocm_is_configured(["rocm_fft.h"]),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = if_rocm_is_configured([
|
||||
":rocfft_if_static",
|
||||
":hipfft_if_static",
|
||||
":rocm_platform_id",
|
||||
"//tensorflow/stream_executor:event",
|
||||
"//tensorflow/stream_executor:fft",
|
||||
|
|
@ -356,7 +356,7 @@ cc_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = if_rocm_is_configured([
|
||||
":miopen_plugin",
|
||||
":rocfft_plugin",
|
||||
":hipfft_plugin",
|
||||
":rocblas_plugin",
|
||||
":rocrand_plugin",
|
||||
":rocm_driver",
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ namespace wrap {
|
|||
static const char *kName; \
|
||||
using FuncPtrT = std::add_pointer<decltype(::__name)>::type; \
|
||||
static void *GetDsoHandle() { \
|
||||
auto s = internal::CachedDsoLoader::GetRocfftDsoHandle(); \
|
||||
auto s = internal::CachedDsoLoader::GetHipfftDsoHandle(); \
|
||||
return s.ValueOrDie(); \
|
||||
} \
|
||||
static FuncPtrT LoadOrDie() { \
|
||||
|
|
|
|||
|
|
@ -20,7 +20,18 @@ limitations under the License.
|
|||
#ifndef TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_
|
||||
#define TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_FFT_H_
|
||||
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
|
||||
#include "rocm/rocm_config.h"
|
||||
|
||||
#if TF_ROCM_VERSION < 40100
|
||||
#include "rocm/include/rocfft/hipfft.h"
|
||||
#else
|
||||
#include "rocm/include/hipfft/hipfft.h"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include "tensorflow/stream_executor/fft.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/plugin_registry.h"
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@
|
|||
FROM ubuntu:bionic
|
||||
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
|
||||
|
||||
ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/4.0.1/
|
||||
ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/4.1/
|
||||
ARG ROCM_BUILD_NAME=xenial
|
||||
ARG ROCM_BUILD_NUM=main
|
||||
ARG ROCM_PATH=/opt/rocm-4.0.1
|
||||
ARG ROCM_PATH=/opt/rocm-4.1.0
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV TF_NEED_ROCM 1
|
||||
|
|
|
|||
|
|
@ -18,13 +18,14 @@ set -e
|
|||
set -x
|
||||
|
||||
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
N_TEST_JOBS=1 # run tests serially
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
|
||||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-4.0.1
|
||||
ROCM_INSTALL_DIR=/opt/rocm-4.1.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
|
@ -41,13 +42,100 @@ yes "" | $PYTHON_BIN_PATH configure.py
|
|||
bazel test \
|
||||
--config=rocm \
|
||||
-k \
|
||||
--test_tag_filters=multi_gpu \
|
||||
--test_tag_filters=-no_gpu,-no_rocm \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=1 \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
--test_sharding_strategy=disabled \
|
||||
--test_size_filters=small,medium,large \
|
||||
--cache_test_results=no \
|
||||
--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048 \
|
||||
-- \
|
||||
//tensorflow/core/nccl:nccl_manager_test
|
||||
//tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test_2gpu \
|
||||
//tensorflow/core/kernels:collective_nccl_test_2gpu \
|
||||
//tensorflow/core/nccl:nccl_manager_test_2gpu \
|
||||
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
|
||||
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
|
||||
//tensorflow/python/distribute:checkpointing_test_2gpu \
|
||||
//tensorflow/python/distribute:collective_all_reduce_strategy_test_xla_2gpu \
|
||||
//tensorflow/python/distribute:custom_training_loop_gradient_test_2gpu \
|
||||
//tensorflow/python/distribute:custom_training_loop_input_test_2gpu \
|
||||
//tensorflow/python/distribute:distribute_utils_test_2gpu \
|
||||
//tensorflow/python/distribute:input_lib_test_2gpu \
|
||||
//tensorflow/python/distribute:input_lib_type_spec_test_2gpu \
|
||||
//tensorflow/python/distribute:metrics_v1_test_2gpu \
|
||||
//tensorflow/python/distribute:mirrored_variable_test_2gpu \
|
||||
//tensorflow/python/distribute:parameter_server_strategy_test_2gpu \
|
||||
//tensorflow/python/distribute:ps_values_test_2gpu \
|
||||
//tensorflow/python/distribute:random_generator_test_2gpu \
|
||||
//tensorflow/python/distribute:test_util_test_2gpu \
|
||||
//tensorflow/python/distribute:tf_function_test_2gpu \
|
||||
//tensorflow/python/distribute:vars_test_2gpu \
|
||||
//tensorflow/python/distribute:warm_starting_util_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:collective_all_reduce_strategy_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:collective_all_reduce_strategy_test_xla_2gpu \
|
||||
//tensorflow/python/keras/distribute:ctl_correctness_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:custom_training_loop_optimizer_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:keras_metrics_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:keras_models_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:keras_optimizer_v2_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:keras_stateful_lstm_model_correctness_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:mirrored_strategy_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:mirrored_variable_test_2gpu \
|
||||
//tensorflow/python/keras/distribute:multi_worker_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:category_crossing_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:category_encoding_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:discretization_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:hashing_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:image_preprocessing_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:index_lookup_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/layers/preprocessing:text_vectorization_distribution_test_2gpu \
|
||||
//tensorflow/python/keras/utils:multi_gpu_utils_test_2gpu \
|
||||
//tensorflow/python/keras/utils:multi_gpu_utils_test_xla_2gpu \
|
||||
//tensorflow/python/kernel_tests:dynamic_partition_op_test_2gpu \
|
||||
//tensorflow/python/training:saver_test_2gpu \
|
||||
|
||||
|
||||
|
||||
|
||||
# no_rocm : //tensorflow/python/keras/distribute:keras_dnn_correctness_test_2gpu \
|
||||
# no_rocm : //tensorflow/python/keras/distribute:keras_embedding_model_correctness_test_2gpu \
|
||||
|
||||
# TIMEOUT : //tensorflow/python/distribute:values_test_2gpu \
|
||||
# TIMEOUT : //tensorflow/python/keras/distribute:keras_image_model_correctness_test_2gpu \
|
||||
# TIMEOUT : //tensorflow/python/keras/distribute:keras_rnn_model_correctness_test_2gpu \
|
||||
# TIMEOUT : //tensorflow/python/keras/distribute:saved_model_mixed_api_test_2gpu \
|
||||
# TIMEOUT : //tensorflow/python/keras/distribute:saved_model_save_load_test_2gpu \
|
||||
|
||||
# Started timing-out with ROCm 4.1
|
||||
# TIMEOUT : //tensorflow/python/keras/distribute:keras_premade_models_test_2gpu \
|
||||
|
||||
# Became FLAKY with ROCm 4.1
|
||||
# FLAKY : //tensorflow/python/distribute:strategy_common_test_2gpu \
|
||||
# FLAKY : //tensorflow/python/distribute:strategy_common_test_xla_2gpu \
|
||||
# FLAKY : //tensorflow/python/distribute:strategy_gather_test_2gpu \
|
||||
# FLAKY : //tensorflow/python/distribute:strategy_gather_test_xla_2gpu \
|
||||
# FLAKY : //tensorflow/python/keras/distribute:custom_training_loop_metrics_test_2gpu \
|
||||
# FLAKY : //tensorflow/python/keras/distribute:custom_training_loop_models_test_2gpu \
|
||||
|
||||
# FAILED : //tensorflow/python/distribute/v1:cross_device_ops_test_2gpu \
|
||||
# FAILED : //tensorflow/python/distribute:cross_device_ops_test_2gpu \
|
||||
# FAILED : //tensorflow/python/distribute:mirrored_strategy_test_2gpu \
|
||||
# FAILED : //tensorflow/python/keras/distribute:distribute_strategy_test_2gpu \
|
||||
# FAILED : //tensorflow/python/kernel_tests:collective_ops_test_2gpu \
|
||||
# FAILED : //tensorflow/python:collective_ops_gpu_test_2gpu \
|
||||
# FAILED : //tensorflow/python:nccl_ops_test_2gpu \
|
||||
|
||||
# FAILED ON CI Node only : //tensorflow/python/distribute:collective_all_reduce_strategy_test_2gpu \
|
||||
# See run : http://ml-ci.amd.com:21096/job/tensorflow/job/github-prs-rocmfork-develop-upstream/job/rocm-latest-ubuntu-gpu-multi/216/console
|
||||
|
||||
# FAILED ON CI Node only : //tensorflow/python/keras/distribute:keras_save_load_test_2gpu \
|
||||
# Starting with ROCm 4.1, see run : http://ml-ci.amd.com:21096/job/tensorflow/job/github-prs-rocmfork-develop-upstream/job/rocm-latest-ubuntu-gpu-multi/241/console
|
||||
|
||||
# FAILED //tensorflow/python/keras/distribute:minimize_loss_test_2gpu \
|
||||
# potential breaking commit : https://github.com/tensorflow/tensorflow/commit/74e39c8fa60079862597c9db506cd15b2443a5a2
|
||||
|
||||
# NO MORE MULTI_GPU : //tensorflow/python/keras/distribute:checkpointing_test_2gpu \
|
||||
# multi_gpu tag was commented out in this commit : https://github.com/tensorflow/tensorflow/commit/b87d02a3f8d8b55045bf4250dd72e746357a3eba
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS}
|
|||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-4.0.1
|
||||
ROCM_INSTALL_DIR=/opt/rocm-4.1.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
|
|
|||
24
third_party/gpus/find_rocm_config.py
vendored
24
third_party/gpus/find_rocm_config.py
vendored
|
|
@ -206,6 +206,28 @@ def _find_rocfft_config(rocm_install_path):
|
|||
return rocfft_config
|
||||
|
||||
|
||||
def _find_hipfft_config(rocm_install_path):
|
||||
|
||||
def hipfft_version_numbers(path):
|
||||
version_file = os.path.join(path, "hipfft/include/hipfft-version.h")
|
||||
if not os.path.exists(version_file):
|
||||
raise ConfigError(
|
||||
'hipfft version file "{}" not found'.format(version_file))
|
||||
major = _get_header_version(version_file, "hipfftVersionMajor")
|
||||
minor = _get_header_version(version_file, "hipfftVersionMinor")
|
||||
patch = _get_header_version(version_file, "hipfftVersionPatch")
|
||||
return major, minor, patch
|
||||
|
||||
major, minor, patch = hipfft_version_numbers(rocm_install_path)
|
||||
|
||||
hipfft_config = {
|
||||
"hipfft_version_number":
|
||||
_get_composite_version_number(major, minor, patch)
|
||||
}
|
||||
|
||||
return hipfft_config
|
||||
|
||||
|
||||
def _find_roctracer_config(rocm_install_path):
|
||||
|
||||
def roctracer_version_numbers(path):
|
||||
|
|
@ -289,6 +311,8 @@ def find_rocm_config():
|
|||
result.update(_find_rocblas_config(rocm_install_path))
|
||||
result.update(_find_rocrand_config(rocm_install_path))
|
||||
result.update(_find_rocfft_config(rocm_install_path))
|
||||
if result["rocm_version_number"] >= 40100:
|
||||
result.update(_find_hipfft_config(rocm_install_path))
|
||||
result.update(_find_roctracer_config(rocm_install_path))
|
||||
result.update(_find_hipsparse_config(rocm_install_path))
|
||||
result.update(_find_rocsolver_config(rocm_install_path))
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
eJy9Wm1v2zgS/q5fQSgoKm8cJe19uEUO+eBNs6j32iSws10smsCgbdrmRhZ1JJU0KPrfb4akZEqWEid2GqCoJQ0fDmeeeaGoPXIqsgfJ5wtN3h+9PyJXC0auWKqE/D0R96SX64WQKia9JCEDFFNkwBSTd2waB3vBHvnEJyDOpiRPp0wSDeN7GZ3Af+5Jl3xhUnGRkvfxEYlQIHSPws5/AOFB5GRJH0gqNMkVAwiuyIwnjLBvE5ZpwlMyEcss4TSdMHLP9cJM40BADfK3gxBjTUGagnwGVzNfjlBtFMa/hdbZ8eHh/f19TI2ysZDzw8QKqsNP/dOz8+HZAShshvyZJkwpItn/ci5hqeMHQjPQZ0LHoGVC74mQhM4lg2daoL73kmuezrtEiZm+p5IBypQrLfk41xVjFdrBmn0BMBdNSdgbkv4wJL/1hv1hFzD+6l99vPjzivzVGwx651f9syG5GJDTi/MP/av+xTlc/U5653+T//bPP3QJA1PBNOxbJlF/UJKjGY3ryJCxigIzYRVSGZvwGZ/AutJ5TueMzMUdkyksh2RMLrlCZypQbwooCV9yTbW5s7YonOZkp39BGIaXkqdIw4vTJUw/llQ+oDJkwSjOPwUXTbSQnBkdyZ1lH1BKgIJoWLPKB6XZMg4CJLyaSA48U4xK4IIypmiDR2KqKkoXPI5W0yqAm0ukwJRpNFVqTMxloYQByqz+OH4i0hmf59IYEMcpPRW5jo1WGUWiiwIcGeJ8gzRbSJHPF0gSlt5xKdIlSzW5o5IbUkag/+fRZe/qYycO+jMILniW8GltSu7M0rXLsXYoFDTqMCmNqyXTuTRuJ3ALDDQRU1a1n6a3zK6r8MGDpzEEDT4q9WrUO/bxEiFurTOs7a0/C59YR5hoX1A5PUB9puBDDXEfqHzs82AmxZKMqXJGdYlhpVupb0zAVisVwTyQlYJS0JgJwvJQZPpQiskyRJEc0x8FXTT4fUbzBNeT5CxAtgYBxJyQ4D5R/BKq+AV5wf0CJgVBMEkoxOmpcdEZWjk6MykQXNU5Dghor1AMZiGjOdMjN90IVRnh0iIjZn3lq+kPMsJAKk2TxBsEun4oWGstXbjcuW1J3CBLHRwZ4wJhvjoiOWnVD8T5jISljUN0oVCxYwNq0gy4kvnqjb4B+b02eaOiZDQxU68JdVamWnvmGwyLj1Bcs5EL41GaL8dMRkv6j5BdAhbD/2DYZOHb/90R/JFfiBEj+3iNVygNV0bcn8aSvJgjQi26JKVLVrhn4PwB2TwDhSGlAxAMNw4DK576Ocq5BqMnwTwEz7mIRcYK5FCGUCFSCBvI7CdhrmcHv4Yda/8l6gY2lCw2PyMZ7tmJyBtF9qPr6X4nJG+Mdl2D3zHjwLNG3qIQGwaAY27Gc0hZWfSu4x46K0GuiYxcJ0DTUQ5R6kfA22Lm8PsPE262ql2nb2NYHCBHxkZk38FW/0Ji+gosgJhnwAqms/j+A+rmdRoWEIYO7RAc0qFdi80ImAVBbzZnaF0IGpqEncKXoOzUMt7m2gbiHeNSUdg8qrJKRU7EGNA9MkqvOP2P4KUbY57OxKETPJiyu7D0BWpaDGHfoLtQkQ/YKdzUYHQT70XVMrMb8yOiKRil8SuAnYrSbj2g99cb88C0boaClVGYO2clZdwTbIWg4zghM4zgadSJ8U4WdWpyq0mqI2MFnY6OwtiZwwbhiaVbdezXoxsnY0KzWeadk8lcaDTJvL8pZj0IOyWq43lDskAWNNzGwGviRUMCK5KvZRoM/O7MEzYghMcvyGeA9yOoJUo7W4XvC57JPNV8yTZgvSdc0+B51AecQ55OknzKDvE3/CsA48UuwsDLB28/9i/JwGr90sDYg+0VJBAvS0PrYluqb7idgAaiSHaYt2HG0ZezwRAa+y65XwhQzvYVFguQIk9k9Ln3B+wFfjFlZp9UnvTPLwaFClBFJON3tvkvSpNlvi36BxOaTHKo8qAdk5DfFTf7LmjrhGmHLM49hXwKdMJIEAZsnCssRQr7q4zC6iF+oa+DmMcdiKqEYVPF8y0G3l1bW1iN0udCoBHCSki+q5Rng4sUXeOyH1et3IXoaud1Y+/hx9XanJXoWnLMmhtElhPcrqJYkDKy3OWrBdbn/gXgvzSmnkEomOjy7HxbTtVRPFoV2fv5KNDLnn4MX14vWvzeXDEqbPK53YgSHnvO2rp+VOaud0xj2P5s1jQZyUdYDvopDvvJkW9204mUiwkdTMlzd32wInrX5EvshP4V/7vYu0OOaIXAnlCmNHkC61eDcNMUj+citfkdC8AM+9XmpdTbJbxZ3/l4MT0r2iaI2kciduSZsEG5NdFScAw92q2fFnzRjdJAYctqHihTAFoCckKRCGqNerONnp8hwD+/feoNt00RazAvyxF1mG2TRFvctPaVXjzWWssGnN3miers9UQhIRY3SxRGcqsm04H4acIH3X09bAyEl+6+NmbaoHf+oWBalWNVQEcNzwM1ajRYHFqjFlc82RdVp6rzYDbTm9EABbdriiyIzwK4PHhNEgD+T2iKasYxI5+f8eooOPL5Ca+GYkZul++a/N6a7lZsqlF6HWXnyW41d53jWtIJmGwjmjvZrZlucXyy2zuvwnML/ROoDlnuatA7PRvsoL7XgbwKv7fyRHkmJZh9b7igsPGmjqHFil1i9YPlaBvSt7CglfcVhtWo34i1c/ZXNKi/V1L4LmHD10pOdrsAKHH8l0v2zusl/HKKnxAI5VzuZP7zy5L+GszLsn4d5nLLtN9Og+YIqFOs9ranEWu3EVDXoF4ClEjuNi0BTnbrEmBx/BJg77xqy2On+DmlYHjx6ctOSkEd6MWbvRrQDrZ7LWRorQQVotUqQSPWzitBRQMXB2sHavUDUUqmfIKH0vhdgJjZ1yxGkZThlwxuPXhQ9tSJdcPheNDG8aZAfILmb4flNwerrwwMw8sWwYCXNF+fw/qKKfzMADz0Y3X51R76aCGSW66NdHhTHCZVDreLEXGeTalm0SZnlp2WUZuc/LSNfeq9dtu4J98VPjLw8XcHjwx8dLP5yLinGvhHDPtU6/PIrE/VjI4fdwbCRduSQh0wTNby4bh8F3nLHrrFGXhKlJCaTaP1yIwh8pcq6pQ533xMFIVv1DF5o/CbgWiFZPR3H/l54YLnwe6loXpQsf0IKcZP6lgUXqdng8HF4BiYf516J/hKywgAO+UwiCONHxsEAUTvaIQfCYxG5OSEhKMRrnE0MsnLLjf4P/9j3RI=
|
||||
eJy9Wn9v2zgS/V+fglBQVN44StJbYBc55ABvmkV91yaBne1i0QYGbdM2t7KoJamkQdHvfjMkJVOylDix0wBFLWn4OJx58zj6sUfORHYv+XyhyZujN0fkesHINUuVkL8n4o70cr0QUsWklyRkgGaKDJhi8pZN42Av2CPv+QTM2ZTk6ZRJomF8L6MT+M9d6ZKPTCouUvImPiIRGoTuUtj5NyDci5ws6T1JhSa5YgDBFZnxhBH2dcIyTXhKJmKZJZymE0buuF6YaRwIuEH+chBirClYU7DP4Gjm2xGqjcP4t9A6Ozk8vLu7i6lxNhZyfphYQ3X4vn92fjE8PwCHzZA/0oQpRST7J+cSljq+JzQDfyZ0DF4m9I4ISehcMrimBfp7J7nm6bxLlJjpOyoZoEy50pKPc10JVuEdrNk3gHDRlIS9IekPQ/Jbb9gfdgHjz/71u8s/rsmfvcGgd3HdPx+SywE5u7x427/uX17A0e+kd/EX+V//4m2XMAgVTMO+ZhL9Byc5htGkjgwZqzgwE9YhlbEJn/EJrCud53TOyFzcMpnCckjG5JIrTKYC96aAkvAl11SbM2uLwmlOd/oXhGF4JXmKNLw8W8L0Y0nlPTpDFozi/FNI0UQLyZnxkdxa9gGlBDiIgTWrvFeaLeMgQMKrieTAM8WoBC4oE4o2eCSmqqJ0IeMYNa0COLlECkyZxlClJsRcFk4YoMz6j+MnIp3xeS5NAHGc0lOR69h4lVEkuijAkSEuN0izhRT5fIEkYektlyJdslSTWyq5IWUE/n8YXfWu33XioD+D4oJrCZ/WpuQuLF27HBuHwkHjDpPSpFoynUuTdgKnIEATMWXV+Gn6hdl1FTm49zyGosFLpV+Nfsc+XiLEF5sMG3ubzyInNhGm2hdUTg/QnynkUEPdByof+zyYSbEkY6pcUJ0wrHwr/Y0JxGrlIoQHVCkoDU2YoCwPRaYPpZgsQzTJUf4o+KIh7zOaJ7ieJGcBsjUIoOaEhPSJ4pdQxS/QBfcLmBQEwSShUKdnJkXnGOXo3EggpKpzEhDwXqEZzEJGc6ZHbroRujLCpUXGzObKd9MfZIyBVJomiTcIfH1bsNZGuki5S9uSuEGWOjgyxgXCfHVEctrqH5jzGQnLGIeYQqFixwb0pBlwZfPJG30D9ntt9sZFyWhipl4z6qxCtXbNDxhuPkJxzUaujEdpvhwzGS3p30J2CUQM/4Nhk4Uf/+Mj+CM/EWNG9vEYj9Aajoy5P40leTFHhF50SUqXrEjPwOUD1DwDh0HSAQiGm4RBFM98jXKpwepJUIfgOhexyFiBHMoQdogUygaU/TTM9ezg17Bj479E3yCGksXmZyTDPTsReaXIfvR5ut8JySvjXdfgd8w4yKyxtyjElgHgmJPxHCQri4477qKLEmhNZOw6AYaOcqhSvwJeFzOH376bcrO72uf0dQyLA+TIxIjsO9jqX0hMX4EbIOoMRMF0Ft++w775OQ0LCEOHdggOcmjXYhUBVRD8ZnOG0YWioUnYKXIJzk4t463WNhDvBJeKxuZSlVUqciYmgO6ScXrF6b8FL9MY83QmDp3hwZTdhmUu0NNiCPsK3YWKfMBOkaaGoJt6L3YtM7sJPyKaDaMMfgWwU3HarQf8/nRjLpjWzVCwMgq1c1ZSxl3BVgg6jlMywwqeRp0Yz2RRp2a3mqQ6MlbQ6egojF04bBGeWrpVx346unE2pjSbbY6dTeZKo8nmzU0x60HYKVEdzxvEAlnQcBoLr4kXDQJWiK9lGgz85sITNiCEJ8/QM8D7HtSE0s5W4fuCZzJPNV+yDVjvGdc8eBr1AeeQp5Mkn7JD/A3/CsB4sYsy8PTg9bv+FRlYr59bGHtwewUC4qk0tC62pfqKtxPQQBRih7oNM44+ng+G0Nh3yd1CgHO2r7BYgBR5JqMPvf/CvcBPZpvZJ5Ur/YvLQeEC7CKS8Vvb/Bdbk2W+3fQPJjSZ5LDLg3dMgr4rbu67oK0Tph2yOHcU9BTohJUgDNg4V7gVKeyvMgqrh/qFvg5qHu9AVKUMm3Y8P2KQ3bW1hdUqfSoEBiGslORxZXs2uEjRNS77ddXKXaiudl439h5+Xa3NWamuJUfV3KCynOF2O4oFKSvLHb5YYX3oXwL+c2vqCYSCia7OL7blVB3Fo1Wh3k9HgV727F34/P2iJe/NO0aFTT63G1HCEy9ZW+8flbnrHdMYbn82a5qM5QMsB/8Uh/vJkR9204mUiwkdTMlzd3ywInrX6CV2Qv+Kfynu3UEjWiGwJ5QpTR7B+tUg3DTV44VIrb7jBjDDfrV5KfV2CU/W73y8mp4VbRNU7QMVO/JC2ODcmmlpOIYe7YsvC77pRjJQxLKqA6UEYCRAEwohqDXqzTF6ukJAfn573xtuKxFrMM/TiDrMtiLRVjetfaVXj7XWsgFntzpRnb0uFBJqcTOhMJZbNZkOxJcJH3T3+2FjITz37mtjpg16F28LplU5VgV01PAyUKNGQ8ShNWpJxaN9UXWqOg9mM70ZDdBwu6bIgvgsgMODlyQB4P+ApqgWHDPy6YpXR8GRTxe8GooZuZ3eNeW9Ve5WbKpReh1l52K3mrt+W70Zx53hdhy3IP4t9Yty3OL/AI7bidxryA/PY3gV43n8rmBcbcnulow3s7vCo9rt7EuzuzJ3XcG1pBMI2EYi7my31nGL40u5PfMiKm6hfwDJYQ+/HvTOzgc76F7rQF7/urfKRPnGVTD7VHxBbxmhjqHFil3b4JfK0TaS3sKCVlWvMKwm7I1YO9f2igd1eVf4pGzDh6bOdmuRtzi+ztszLyr1doofo/Z2rq0FvwrzbM2vwOxA9lto0Kr8FYrVxL8Ra+f6X/GgvgUokdxuugU42623AIvjbwH2zIs29HaKH7MVDC/ff9zJVlAHevajjBrQDh5mtJChdSeoEK22EzRi7XwnqHjg6mDtdXH9dT8lUz7BTy7wqxcxsw8RjSMpw+903HrwNfBj32M0fPoRtHG8qRAfofnrYflFzeobGsPwskUw4CXN1+ewuWIKP6KBDH1fHX6yrzS1EMkXro11eFO8Kq18ulGMiPNsSjWLNnkj32kZtcl7zbaxj721aRv36JPwBwY+/GTsgYEP3mY6llQSUSuVG/KfU/Lz0fHRkaNJczAfnabFvcfuEx7I32Md1gOzPrY1dfzyNhCuqJcUthtTMFren5QP9L+w+27xIUlKlJCaTaN1AYhBYJYq6pRbi/kiLwpfqRPySuGHN9EKyfjvvpT1qhI/qnBP3tW9iu2XfDF+l8qi8HN6PhhcDk6gwD6n3mcwSssIADvlMChXjV/sBAGkfzTCL21GI3J6SsLRCNc4GhmNtMsN/g935/24
|
||||
8
third_party/gpus/rocm/BUILD.tpl
vendored
8
third_party/gpus/rocm/BUILD.tpl
vendored
|
|
@ -51,9 +51,9 @@ cc_library(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocfft",
|
||||
srcs = ["rocm/lib/%{rocfft_lib}"],
|
||||
data = ["rocm/lib/%{rocfft_lib}"],
|
||||
name = "%{hipfft_or_rocfft}",
|
||||
srcs = ["rocm/lib/%{hipfft_or_rocfft_lib}"],
|
||||
data = ["rocm/lib/%{hipfft_or_rocfft_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
|
|
@ -106,7 +106,7 @@ cc_library(
|
|||
":rocm_headers",
|
||||
":hip",
|
||||
":rocblas",
|
||||
":rocfft",
|
||||
":%{hipfft_or_rocfft}",
|
||||
":hiprand",
|
||||
":miopen",
|
||||
":hipsparse",
|
||||
|
|
|
|||
25
third_party/gpus/rocm_configure.bzl
vendored
25
third_party/gpus/rocm_configure.bzl
vendored
|
|
@ -187,6 +187,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin):
|
|||
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include")
|
||||
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include")
|
||||
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/12.0.0/include")
|
||||
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/13.0.0/include")
|
||||
|
||||
# Support hcc based off clang 10.0.0 (for ROCm 3.3)
|
||||
inc_dirs.append(rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/")
|
||||
|
|
@ -310,7 +311,7 @@ def _select_rocm_lib_paths(repository_ctx, libs_paths, bash_bin):
|
|||
|
||||
return libs
|
||||
|
||||
def _find_libs(repository_ctx, rocm_config, bash_bin):
|
||||
def _find_libs(repository_ctx, rocm_config, hipfft_or_rocfft, bash_bin):
|
||||
"""Returns the ROCm libraries on the system.
|
||||
|
||||
Args:
|
||||
|
|
@ -327,7 +328,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
|
|||
for name, path in [
|
||||
("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
|
||||
("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
|
||||
("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
|
||||
(hipfft_or_rocfft, rocm_config.rocm_toolkit_path + "/" + hipfft_or_rocfft),
|
||||
("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
|
||||
("MIOpen", rocm_config.rocm_toolkit_path + "/miopen"),
|
||||
("rccl", rocm_config.rocm_toolkit_path + "/rccl"),
|
||||
|
|
@ -456,7 +457,8 @@ def _create_dummy_repository(repository_ctx):
|
|||
"%{rocblas_lib}": _lib_name("rocblas"),
|
||||
"%{miopen_lib}": _lib_name("miopen"),
|
||||
"%{rccl_lib}": _lib_name("rccl"),
|
||||
"%{rocfft_lib}": _lib_name("rocfft"),
|
||||
"%{hipfft_or_rocfft}": "hipfft",
|
||||
"%{hipfft_or_rocfft_lib}": _lib_name("hipfft"),
|
||||
"%{hiprand_lib}": _lib_name("hiprand"),
|
||||
"%{hipsparse_lib}": _lib_name("hipsparse"),
|
||||
"%{roctracer_lib}": _lib_name("roctracer64"),
|
||||
|
|
@ -537,6 +539,10 @@ def _create_local_rocm_repository(repository_ctx):
|
|||
bash_bin = get_bash_bin(repository_ctx)
|
||||
rocm_config = _get_rocm_config(repository_ctx, bash_bin, find_rocm_config_script)
|
||||
|
||||
# For ROCm 4.1 and above use hipfft, older ROCm versions use rocfft
|
||||
rocm_version_number = int(rocm_config.rocm_version_number)
|
||||
hipfft_or_rocfft = "rocfft" if rocm_version_number < 40100 else "hipfft"
|
||||
|
||||
# Copy header and library files to execroot.
|
||||
# rocm_toolkit_path
|
||||
rocm_toolkit_path = rocm_config.rocm_toolkit_path
|
||||
|
|
@ -550,9 +556,9 @@ def _create_local_rocm_repository(repository_ctx):
|
|||
),
|
||||
make_copy_dir_rule(
|
||||
repository_ctx,
|
||||
name = "rocfft-include",
|
||||
src_dir = rocm_toolkit_path + "/rocfft/include",
|
||||
out_dir = "rocm/include/rocfft",
|
||||
name = hipfft_or_rocfft + "-include",
|
||||
src_dir = rocm_toolkit_path + "/" + hipfft_or_rocfft + "/include",
|
||||
out_dir = "rocm/include/" + hipfft_or_rocfft,
|
||||
),
|
||||
make_copy_dir_rule(
|
||||
repository_ctx,
|
||||
|
|
@ -586,7 +592,7 @@ def _create_local_rocm_repository(repository_ctx):
|
|||
),
|
||||
]
|
||||
|
||||
rocm_libs = _find_libs(repository_ctx, rocm_config, bash_bin)
|
||||
rocm_libs = _find_libs(repository_ctx, rocm_config, hipfft_or_rocfft, bash_bin)
|
||||
rocm_lib_srcs = []
|
||||
rocm_lib_outs = []
|
||||
for lib in rocm_libs.values():
|
||||
|
|
@ -632,7 +638,8 @@ def _create_local_rocm_repository(repository_ctx):
|
|||
{
|
||||
"%{hip_lib}": rocm_libs["amdhip64"].file_name,
|
||||
"%{rocblas_lib}": rocm_libs["rocblas"].file_name,
|
||||
"%{rocfft_lib}": rocm_libs["rocfft"].file_name,
|
||||
"%{hipfft_or_rocfft}": hipfft_or_rocfft,
|
||||
"%{hipfft_or_rocfft_lib}": rocm_libs[hipfft_or_rocfft].file_name,
|
||||
"%{hiprand_lib}": rocm_libs["hiprand"].file_name,
|
||||
"%{miopen_lib}": rocm_libs["MIOpen"].file_name,
|
||||
"%{rccl_lib}": rocm_libs["rccl"].file_name,
|
||||
|
|
@ -641,7 +648,7 @@ def _create_local_rocm_repository(repository_ctx):
|
|||
"%{rocsolver_lib}": rocm_libs["rocsolver"].file_name,
|
||||
"%{copy_rules}": "\n".join(copy_rules),
|
||||
"%{rocm_headers}": ('":rocm-include",\n' +
|
||||
'":rocfft-include",\n' +
|
||||
'":' + hipfft_or_rocfft + '-include",\n' +
|
||||
'":rocblas-include",\n' +
|
||||
'":miopen-include",\n' +
|
||||
'":rccl-include",\n' +
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user