mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 00:19:58 +01:00
Integrate hermetic ML toolchains for TensorFlow.
Hermetic C++ toolchains and CUDA are enabled for Linux x86_64 platform by default. List of covered OSs will be extended in a few closest months. Developers still could use non hermetic toolchains with help of --config=clang_local flag. std::reduce replace with a traditional for loop. This is necessary because GCC 8 offers only partial support for C++17, and using std::reduce in this environment leads to "Undefined method" error. PiperOrigin-RevId: 775771057
This commit is contained in:
parent
ce60a770af
commit
ac56b0e840
45
.bazelrc
45
.bazelrc
|
|
@ -159,9 +159,13 @@ common --incompatible_enforce_config_setting_visibility
|
|||
# TODO: Enable Bzlmod
|
||||
common --noenable_bzlmod
|
||||
|
||||
build --incompatible_enable_cc_toolchain_resolution
|
||||
build --repo_env USE_HERMETIC_CC_TOOLCHAIN=1
|
||||
|
||||
# TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260
|
||||
common --noincompatible_enable_cc_toolchain_resolution
|
||||
common --noincompatible_enable_android_toolchain_resolution
|
||||
build:clang_local --noincompatible_enable_cc_toolchain_resolution
|
||||
build:clang_local --noincompatible_enable_android_toolchain_resolution
|
||||
build:clang_local --repo_env USE_HERMETIC_CC_TOOLCHAIN=0
|
||||
|
||||
# Print a stacktrace when a test is killed
|
||||
test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"
|
||||
|
|
@ -172,6 +176,7 @@ test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"
|
|||
# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the
|
||||
# target CPU to build transient dependencies correctly. See
|
||||
# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu
|
||||
build:android --config=clang_local
|
||||
build:android --crosstool_top=//external:android/crosstool
|
||||
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
||||
build:android_arm --config=android
|
||||
|
|
@ -197,6 +202,8 @@ build:android --dynamic_mode=off
|
|||
# TODO(belitskiy): Remove once on Clang 20.
|
||||
build:android --define=xnn_enable_avxvnniint8=false
|
||||
|
||||
build:macos --config=clang_local
|
||||
|
||||
# Sets the default Apple platform to macOS.
|
||||
build:macos --apple_platform_type=macos
|
||||
|
||||
|
|
@ -220,6 +227,7 @@ build:apple-toolchain --crosstool_top=@local_config_apple_cc//:toolchain
|
|||
build:apple-toolchain --host_crosstool_top=@local_config_apple_cc//:toolchain
|
||||
|
||||
# Settings for MacOS on ARM CPUs.
|
||||
build:macos_arm64 --config=clang_local
|
||||
build:macos_arm64 --cpu=darwin_arm64
|
||||
build:macos_arm64 --macos_minimum_os=11.0
|
||||
build:macos_arm64 --platforms=@build_bazel_apple_support//configs/platforms:darwin_arm64
|
||||
|
|
@ -229,6 +237,7 @@ build:ios --apple_platform_type=ios
|
|||
build:ios --copt=-fembed-bitcode
|
||||
build:ios --copt=-Wno-c++11-narrowing
|
||||
build:ios --config=apple-toolchain
|
||||
build:ios --config=clang_local
|
||||
build:ios_armv7 --config=ios
|
||||
build:ios_armv7 --cpu=ios_armv7
|
||||
build:ios_armv7 --platforms=@org_tensorflow//tensorflow/tools/toolchains/ios:ios_armv7
|
||||
|
|
@ -354,6 +363,7 @@ build:tpu --define=framework_shared_object=true
|
|||
build:tpu --copt=-DLIBTPU_ON_GCE
|
||||
build:tpu --define=enable_mlir_bridge=true
|
||||
|
||||
build:rocm --config=clang_local
|
||||
build:rocm --copt=-Wno-gnu-offsetof-extensions
|
||||
build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
|
||||
build:rocm --define=using_rocm_hipcc=true
|
||||
|
|
@ -374,6 +384,7 @@ build:rocm_ci_hermetic --repo_env="OS=ubuntu_22.04"
|
|||
build:rocm_ci_hermetic --repo_env="ROCM_VERSION=6.2.0"
|
||||
build:rocm_ci_hermetic --@local_config_rocm//rocm:use_rocm_hermetic_rpath=True
|
||||
|
||||
build:sycl --config=clang_local
|
||||
build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
|
||||
build:sycl --define=using_sycl=true
|
||||
build:sycl --define=tensorflow_mkldnn_contraction_kernel=0
|
||||
|
|
@ -418,6 +429,7 @@ build:linux --copt="-Werror=switch"
|
|||
# Linux ARM64 specific options
|
||||
build:linux_arm64 --copt="-mtune=generic" --copt="-march=armv8-a" --copt="-O3"
|
||||
|
||||
build:windows --config=clang_local
|
||||
|
||||
# On Windows, `__cplusplus` is wrongly defined without this switch
|
||||
# See https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/
|
||||
|
|
@ -536,6 +548,7 @@ test:win_clang_base --host_linkopt=/FORCE:MULTIPLE
|
|||
test:win_clang_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true --test_summary=short
|
||||
|
||||
build:win_clang --config=win_clang_base
|
||||
build:win_clang --config=clang_local
|
||||
build:win_clang --extra_toolchains=@local_config_cc//:cc-toolchain-x64_windows-clang-cl
|
||||
build:win_clang --extra_execution_platforms=//tensorflow/tools/toolchains/win:x64_windows-clang-cl
|
||||
build:win_clang --host_platform=//tensorflow/tools/toolchains/win:x64_windows-clang-cl
|
||||
|
|
@ -599,11 +612,6 @@ build:rbe_linux --host_linkopt=-lm
|
|||
|
||||
build:rbe_linux_cpu --config=rbe_linux
|
||||
# Linux cpu and cuda builds share the same toolchain now.
|
||||
build:rbe_linux_cpu --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cpu --crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cpu --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
|
||||
build:rbe_linux_cpu --repo_env=CC="/usr/lib/llvm-18/bin/clang"
|
||||
build:rbe_linux_cpu --repo_env=TF_SYSROOT="/dt9"
|
||||
build:rbe_linux_cpu --extra_execution_platforms="@ml_build_config_platform//:platform"
|
||||
build:rbe_linux_cpu --host_platform="@ml_build_config_platform//:platform"
|
||||
build:rbe_linux_cpu --platforms="@ml_build_config_platform//:platform"
|
||||
|
|
@ -625,6 +633,13 @@ common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instance
|
|||
# build:rbe_linux_cpu --repo_env USE_CUDA_REDISTRIBUTIONS=1
|
||||
# build:rbe_linux_cpu --config=cuda_version
|
||||
|
||||
build:rbe_linux_cpu_clang_local --config=clang_local
|
||||
build:rbe_linux_cpu_clang_local --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cpu_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cpu_clang_local --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
|
||||
build:rbe_linux_cpu_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang"
|
||||
build:rbe_linux_cpu_clang_local --repo_env=TF_SYSROOT="/dt9"
|
||||
|
||||
# TODO(kanglan): Remove it after toolchain update is complete.
|
||||
build:rbe_linux_cpu_old --config=rbe_linux
|
||||
build:rbe_linux_cpu_old --host_crosstool_top="@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain"
|
||||
|
|
@ -648,6 +663,7 @@ build:rbe_linux_cuda_nvcc --config=cuda_nvcc
|
|||
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
|
||||
|
||||
build:rbe_win_base --config=rbe_base
|
||||
build:rbe_win_base --config=clang_local
|
||||
build:rbe_win_base --shell_executable=C:\\tools\\msys64\\usr\\bin\\bash.exe
|
||||
build:rbe_win_base --remote_instance_name=projects/tensorflow-testing/instances/windows
|
||||
# Don't build the python zip archive in the RBE build.
|
||||
|
|
@ -663,6 +679,7 @@ build:rbe_windows_x86_cpu_2022 --config=rbe_win_base --config=windows_x86_cpu_20
|
|||
# END TF REMOTE BUILD EXECUTION OPTIONS
|
||||
|
||||
# TFLite build configs for generic embedded Linux
|
||||
build:elinux --config=clang_local
|
||||
build:elinux --crosstool_top=@local_config_embedded_arm//:toolchain
|
||||
build:elinux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
||||
build:elinux_aarch64 --config=elinux
|
||||
|
|
@ -709,21 +726,20 @@ build:release_linux_base --linkopt=-Wl,--undefined-version
|
|||
|
||||
# Container environment settings below this point.
|
||||
# Set Clang as compiler. Use the actual path to clang installed in container.
|
||||
build:release_linux_base --repo_env=CC="/usr/lib/llvm-18/bin/clang"
|
||||
build:release_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang"
|
||||
# Test-related settings below this point.
|
||||
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
|
||||
test:release_linux_base --local_test_jobs=HOST_CPUS
|
||||
# Give only the list of failed tests at the end of the log
|
||||
test:release_linux_base --test_summary=short
|
||||
|
||||
# Use the Clang toolchain to compile
|
||||
build:release_cpu_linux --config=release_linux_base
|
||||
build:release_cpu_linux --crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:release_cpu_linux --repo_env=TF_SYSROOT="/dt9"
|
||||
# Target the AVX instruction set
|
||||
build:release_cpu_linux --config=avx_linux
|
||||
|
||||
# Deprecated release_cpu_linux config with non-hermetic toolchains.
|
||||
build:release_cpu_linux_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
|
||||
build:release_cpu_linux_clang_local --repo_env=TF_SYSROOT="/dt9"
|
||||
|
||||
build:release_gpu_linux --config=release_cpu_linux
|
||||
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
|
||||
# Note that linux cpu and cuda builds share the same toolchain now.
|
||||
|
|
@ -733,6 +749,9 @@ test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --ru
|
|||
|
||||
build:release_arm64_linux --config=release_linux_base
|
||||
build:release_arm64_linux --config=linux_arm64
|
||||
build:release_arm64_linux --config=clang_local
|
||||
build:release_arm64_linux --repo_env=CC="/usr/lib/llvm-18/bin/clang"
|
||||
build:release_arm64_linux --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang"
|
||||
build:release_arm64_linux --crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain"
|
||||
build:release_arm64_linux --config=mkl_aarch64_threadpool
|
||||
build:release_arm64_linux --copt=-flax-vector-conversions
|
||||
|
|
@ -741,6 +760,7 @@ test:release_arm64_linux --flaky_test_attempts=3
|
|||
build:release_cpu_macos --config=avx_linux
|
||||
|
||||
# Base build configs for macOS
|
||||
build:release_macos_base --config=clang_local
|
||||
build:release_macos_base --action_env DEVELOPER_DIR=/Applications/Xcode.app/Contents/Developer
|
||||
build:release_macos_base --define=no_nccl_support=true --output_filter=^$
|
||||
|
||||
|
|
@ -906,6 +926,7 @@ test:windows_x86_cpu_2022_pycpp_test --config=windows_x86_cpu_2022_pycpp_test_op
|
|||
# flags seem to be actually used to specify the execution platform details. It
|
||||
# seems it is this way because these flags are old and predate the distinction
|
||||
# between host and execution platform.
|
||||
build:cross_compile_base --config=clang_local
|
||||
build:cross_compile_base --host_cpu=k8
|
||||
build:cross_compile_base --host_crosstool_top=//tensorflow/tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite
|
||||
build:cross_compile_base --extra_execution_platforms=//tensorflow/tools/toolchains/cross_compile/config:linux_x86_64
|
||||
|
|
|
|||
27
WORKSPACE
27
WORKSPACE
|
|
@ -86,7 +86,7 @@ load(
|
|||
python_wheel_version_suffix_repository(name = "tf_wheel_version_suffix")
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
|
||||
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
|
||||
"cuda_json_init_repository",
|
||||
)
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ load(
|
|||
"CUDNN_REDISTRIBUTIONS",
|
||||
)
|
||||
load(
|
||||
"@local_xla//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
|
||||
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
|
||||
"cuda_redist_init_repositories",
|
||||
"cudnn_redist_init_repository",
|
||||
)
|
||||
|
|
@ -112,28 +112,28 @@ cudnn_redist_init_repository(
|
|||
)
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
|
||||
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
|
||||
"cuda_configure",
|
||||
)
|
||||
|
||||
cuda_configure(name = "local_config_cuda")
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
|
||||
"@rules_ml_toolchain//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
|
||||
"nccl_redist_init_repository",
|
||||
)
|
||||
|
||||
nccl_redist_init_repository()
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/nccl/hermetic:nccl_configure.bzl",
|
||||
"@rules_ml_toolchain//third_party/nccl/hermetic:nccl_configure.bzl",
|
||||
"nccl_configure",
|
||||
)
|
||||
|
||||
nccl_configure(name = "local_config_nccl")
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl",
|
||||
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl",
|
||||
"nvshmem_json_init_repository",
|
||||
)
|
||||
|
||||
|
|
@ -144,7 +144,7 @@ load(
|
|||
"NVSHMEM_REDISTRIBUTIONS",
|
||||
)
|
||||
load(
|
||||
"@local_xla//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl",
|
||||
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl",
|
||||
"nvshmem_redist_init_repository",
|
||||
)
|
||||
|
||||
|
|
@ -153,8 +153,19 @@ nvshmem_redist_init_repository(
|
|||
)
|
||||
|
||||
load(
|
||||
"@local_xla//third_party/nvshmem/hermetic:nvshmem_configure.bzl",
|
||||
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_configure.bzl",
|
||||
"nvshmem_configure",
|
||||
)
|
||||
|
||||
nvshmem_configure(name = "local_config_nvshmem")
|
||||
|
||||
load(
|
||||
"@rules_ml_toolchain//cc_toolchain/deps:cc_toolchain_deps.bzl",
|
||||
"cc_toolchain_deps",
|
||||
)
|
||||
|
||||
cc_toolchain_deps()
|
||||
|
||||
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64")
|
||||
|
||||
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64_cuda")
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ TFCI_INDEX_HTML_ENABLE=1
|
|||
TFCI_LIB_SUFFIX="-cpu-linux-x86_64"
|
||||
TFCI_OUTPUT_DIR=build_output
|
||||
TFCI_WHL_AUDIT_ENABLE=1
|
||||
TFCI_WHL_AUDIT_PLAT=manylinux2014_x86_64
|
||||
TFCI_WHL_AUDIT_PLAT=manylinux_2_27_x86_64
|
||||
TFCI_WHL_BAZEL_TEST_ENABLE=1
|
||||
TFCI_WHL_SIZE_LIMIT=260M
|
||||
TFCI_WHL_SIZE_LIMIT_ENABLE=1
|
||||
|
|
|
|||
|
|
@ -239,8 +239,10 @@ TEST(RuntimeShapeTest, TestExtendedShapeSmallToBig) {
|
|||
TEST_P(RuntimeShapeTest, TestFlatSize) {
|
||||
const std::vector<int32_t> src = IotaVector(kSmallSize);
|
||||
const RuntimeShape shape(src.size(), src.data());
|
||||
EXPECT_EQ(shape.FlatSize(),
|
||||
std::reduce(src.begin(), src.end(), 1, std::multiplies<int>{}));
|
||||
int32_t flat_size = 1;
|
||||
for (std::vector<int>::const_iterator it = src.begin(); it != src.end(); ++it)
|
||||
flat_size *= *it;
|
||||
EXPECT_EQ(shape.FlatSize(), flat_size);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(BigSmall, RuntimeShapeTest,
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ limitations under the License.
|
|||
|
||||
#include "tensorflow/core/framework/tensor_testutil.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
|
|
|||
|
|
@ -239,8 +239,10 @@ TEST(RuntimeShapeTest, TestExtendedShapeSmallToBig) {
|
|||
TEST_P(RuntimeShapeTest, TestFlatSize) {
|
||||
const std::vector<int32_t> src = IotaVector(kSmallSize);
|
||||
const RuntimeShape shape(src.size(), src.data());
|
||||
EXPECT_EQ(shape.FlatSize(),
|
||||
std::reduce(src.begin(), src.end(), 1, std::multiplies<int>{}));
|
||||
int32_t flat_size = 1;
|
||||
for (std::vector<int>::const_iterator it = src.begin(); it != src.end(); ++it)
|
||||
flat_size *= *it;
|
||||
EXPECT_EQ(shape.FlatSize(), flat_size);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(BigSmall, RuntimeShapeTest,
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ verify_manylinux_compliance_test(
|
|||
"manual",
|
||||
],
|
||||
wheel = ":wheel",
|
||||
x86_64_compliance_tag = "manylinux_2_17_x86_64",
|
||||
x86_64_compliance_tag = "manylinux_2_27_x86_64",
|
||||
)
|
||||
|
||||
py_import(
|
||||
|
|
|
|||
|
|
@ -140,10 +140,10 @@ def workspace():
|
|||
# Details: https://github.com/google-ml-infra/rules_ml_toolchain
|
||||
http_archive(
|
||||
name = "rules_ml_toolchain",
|
||||
sha256 = "c85a3ae3da6af08dcc5065387e8d9b033913407c8fa5b074881fce516b482f69",
|
||||
strip_prefix = "rules_ml_toolchain-f1e2b169441df00c8b1e9b08371d9ec8e0517ce6",
|
||||
sha256 = "2bb5d2f7a94ceffb2b7bac881e6c13b830871bf808c2ee1dba7ec9a0d60bf660",
|
||||
strip_prefix = "rules_ml_toolchain-0586ff3ca7c60f7963e5aa46cd390cf052c4f8b1",
|
||||
urls = [
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/f1e2b169441df00c8b1e9b08371d9ec8e0517ce6.tar.gz",
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/0586ff3ca7c60f7963e5aa46cd390cf052c4f8b1.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user