Add Hermetic C++ Toolchains for Linux x86_64 builds.

Hermetic toolchains give us builds that are isolated from the host system, cutting down on unexpected dependencies and side effects.

With these changes, TensorFlow will build for Linux x86_64 architectures (both CPU and CUDA-enabled GPU) using self-contained C++ toolchains. If you need to use a non-hermetic toolchain, you can do so by adding the flag --config=clang_local. For remote builds with a non-hermetic toolchain, simply append _clang_local to your existing RBE flag. For example, if your hermetic RBE build uses --config=rbe_linux_cpu, the non-hermetic version would be --config=rbe_linux_cpu_clang_local.

    Example: Run CPU tests for Linux x86_64

    For hermetic tests, run following command (no env variables like CC, CXX, BAZEL_COMPILER, CLANG_COMPILER_PATH):
	bazel test \
		--config=avx_linux \
		--config=release_linux_base \
		--config=linux_cpu_pycpp_test_filters \
		--repo_env=HERMETIC_PYTHON_VERSION=3.11 \
		//tensorflow/... -- -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/...  -//tensorflow/tools/toolchains/...

    For Linux x86_64 non-hermetic tests use commands with the flag "--config=clang_local" and env variables CC, CXX, BAZEL_COMPILER, CLANG_COMPILER_PATH, etc.:
	bazel test \
		--config=clang_local \
		--config=avx_linux \
		--config=release_linux_base \
		--config=linux_cpu_pycpp_test_filters \
		--repo_env=HERMETIC_PYTHON_VERSION=3.11 \
		--action_env=CLANG_COMPILER_PATH=/usr/lib/llvm-18/bin/clang \
		--host_action_env=CLANG_COMPILER_PATH=/usr/lib/llvm-18/bin/clang \
		--repo_env=CC=/usr/lib/llvm-18/bin/clang \
		--repo_env=CXX=/usr/lib/llvm-18/bin/clang++ \
		--repo_env=BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang \
		//tensorflow/... -- -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/...  -//tensorflow/tools/toolchains/...

PiperOrigin-RevId: 783911228
This commit is contained in:
A. Unique TensorFlower 2025-07-16 15:10:38 -07:00 committed by TensorFlower Gardener
parent 3ee71d2c03
commit f7dfba3f74
8 changed files with 70 additions and 35 deletions

View File

@ -159,9 +159,13 @@ common --incompatible_enforce_config_setting_visibility
# TODO: Enable Bzlmod
common --noenable_bzlmod
build --incompatible_enable_cc_toolchain_resolution
build --repo_env USE_HERMETIC_CC_TOOLCHAIN=1
# TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260
common --noincompatible_enable_cc_toolchain_resolution
common --noincompatible_enable_android_toolchain_resolution
build:clang_local --noincompatible_enable_cc_toolchain_resolution
build:clang_local --noincompatible_enable_android_toolchain_resolution
build:clang_local --repo_env USE_HERMETIC_CC_TOOLCHAIN=0
# Print a stacktrace when a test is killed
test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"
@ -172,6 +176,7 @@ test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"
# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the
# target CPU to build transient dependencies correctly. See
# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu
build:android --config=clang_local
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:android_arm --config=android
@ -210,7 +215,8 @@ build:macos --features=archive_param_file
build:macos --linkopt=-Wl,-undefined,dynamic_lookup
build:macos --host_linkopt=-Wl,-undefined,dynamic_lookup
# Use the Apple toolchain for MacOS builds.
# Use the old Apple toolchain for MacOS builds.
build:macos --config=clang_local
build:macos --config=apple-toolchain
# Use cc toolchains from apple_support for Apple builds (ios, macos, etc).
@ -222,12 +228,14 @@ build:apple-toolchain --host_crosstool_top=@local_config_apple_cc//:toolchain
# Settings for MacOS on ARM CPUs.
build:macos_arm64 --cpu=darwin_arm64
build:macos_arm64 --macos_minimum_os=11.0
build:macos_arm64 --config=clang_local
build:macos_arm64 --platforms=@build_bazel_apple_support//configs/platforms:darwin_arm64
# iOS configs for each architecture and the fat binary builds.
build:ios --apple_platform_type=ios
build:ios --copt=-fembed-bitcode
build:ios --copt=-Wno-c++11-narrowing
build:ios --config=clang_local
build:ios --config=apple-toolchain
build:ios_armv7 --config=ios
build:ios_armv7 --cpu=ios_armv7
@ -355,6 +363,7 @@ build:tpu --copt=-DLIBTPU_ON_GCE
build:tpu --define=enable_mlir_bridge=true
build:rocm --copt=-Wno-gnu-offsetof-extensions
build:rocm --config=clang_local
build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
build:rocm --define=using_rocm_hipcc=true
build:rocm --define=tensorflow_mkldnn_contraction_kernel=0
@ -374,6 +383,7 @@ build:rocm_ci_hermetic --repo_env="OS=ubuntu_22.04"
build:rocm_ci_hermetic --repo_env="ROCM_VERSION=6.2.0"
build:rocm_ci_hermetic --@local_config_rocm//rocm:use_rocm_hermetic_rpath=True
build:sycl --config=clang_local
build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
build:sycl --define=using_sycl=true
build:sycl --define=tensorflow_mkldnn_contraction_kernel=0
@ -506,6 +516,9 @@ build:windows --verbose_failures
# See: https://github.com/bazelbuild/bazel/issues/5163
build:windows --features=compiler_param_file
# Use old toolchains for Windows builds.
build:windows --config=clang_local
# Do not risk cache corruption. See:
# https://github.com/bazelbuild/bazel/issues/3360
build:linux --experimental_guard_against_concurrent_changes
@ -536,6 +549,7 @@ test:win_clang_base --host_linkopt=/FORCE:MULTIPLE
test:win_clang_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true --test_summary=short
build:win_clang --config=win_clang_base
build:win_clang --config=clang_local
build:win_clang --extra_toolchains=@local_config_cc//:cc-toolchain-x64_windows-clang-cl
build:win_clang --extra_execution_platforms=//tensorflow/tools/toolchains/win:x64_windows-clang-cl
build:win_clang --host_platform=//tensorflow/tools/toolchains/win:x64_windows-clang-cl
@ -598,12 +612,6 @@ build:rbe_linux --linkopt=-lm
build:rbe_linux --host_linkopt=-lm
build:rbe_linux_cpu --config=rbe_linux
# Linux cpu and cuda builds share the same toolchain now.
build:rbe_linux_cpu --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --crosstool_top="@local_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
build:rbe_linux_cpu --repo_env=CC="/usr/lib/llvm-18/bin/clang"
build:rbe_linux_cpu --repo_env=TF_SYSROOT="/dt9"
build:rbe_linux_cpu --extra_execution_platforms="@ml_build_config_platform//:platform"
build:rbe_linux_cpu --host_platform="@ml_build_config_platform//:platform"
build:rbe_linux_cpu --platforms="@ml_build_config_platform//:platform"
@ -625,6 +633,15 @@ common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instance
# build:rbe_linux_cpu --repo_env USE_CUDA_REDISTRIBUTIONS=1
# build:rbe_linux_cpu --config=cuda_version
# Deprecated RBE config with non-hermetic toolchains.
build:rbe_linux_cpu_clang_local --config=rbe_linux_cpu
build:rbe_linux_cpu_clang_local --config=clang_local
build:rbe_linux_cpu_clang_local --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu_clang_local --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
build:rbe_linux_cpu_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang"
build:rbe_linux_cpu_clang_local --repo_env=TF_SYSROOT="/dt9"
# TODO(kanglan): Remove it after toolchain update is complete.
build:rbe_linux_cpu_old --config=rbe_linux
build:rbe_linux_cpu_old --host_crosstool_top="@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain"
@ -648,6 +665,7 @@ build:rbe_linux_cuda_nvcc --config=cuda_nvcc
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
build:rbe_win_base --config=rbe_base
build:rbe_win_base --config=clang_local
build:rbe_win_base --shell_executable=C:\\tools\\msys64\\usr\\bin\\bash.exe
build:rbe_win_base --remote_instance_name=projects/tensorflow-testing/instances/windows
# Don't build the python zip archive in the RBE build.
@ -663,6 +681,7 @@ build:rbe_windows_x86_cpu_2022 --config=rbe_win_base --config=windows_x86_cpu_20
# END TF REMOTE BUILD EXECUTION OPTIONS
# TFLite build configs for generic embedded Linux
build:elinux --config=clang_local
build:elinux --crosstool_top=@local_config_embedded_arm//:toolchain
build:elinux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:elinux_aarch64 --config=elinux
@ -708,9 +727,6 @@ build:release_linux_base --linkopt="-lm"
build:release_linux_base --linkopt=-Wl,--undefined-version
# Container environment settings below this point.
# Set Clang as compiler. Use the actual path to clang installed in container.
build:release_linux_base --repo_env=CC="/usr/lib/llvm-18/bin/clang"
build:release_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang"
# Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS
@ -719,8 +735,6 @@ test:release_linux_base --test_summary=short
# Use the Clang toolchain to compile
build:release_cpu_linux --config=release_linux_base
build:release_cpu_linux --crosstool_top="@local_config_cuda//crosstool:toolchain"
build:release_cpu_linux --repo_env=TF_SYSROOT="/dt9"
# Target the AVX instruction set
build:release_cpu_linux --config=avx_linux
@ -731,8 +745,23 @@ build:release_gpu_linux --config=cuda_clang_official
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute
# Deprecated release CPU config with non-hermetic toolchains.
build:release_cpu_linux_clang_local --config=release_cpu_linux
build:release_cpu_linux_clang_local --config=clang_local
build:release_cpu_linux_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang"
build:release_cpu_linux_clang_local --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang"
build:release_cpu_linux_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
build:release_cpu_linux_clang_local --repo_env=TF_SYSROOT="/dt9"
# Deprecated release GPU config with non-hermetic toolchains.
build:release_gpu_linux_clang_local --config=release_gpu_linux
build:release_gpu_linux_clang_local --config=release_cpu_linux_clang_local
build:release_arm64_linux --config=release_linux_base
build:release_arm64_linux --config=linux_arm64
build:release_arm64_linux --config=clang_local
build:release_arm64_linux --repo_env=CC="/usr/lib/llvm-18/bin/clang"
build:release_arm64_linux --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang"
build:release_arm64_linux --crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain"
build:release_arm64_linux --config=mkl_aarch64_threadpool
build:release_arm64_linux --copt=-flax-vector-conversions
@ -741,6 +770,7 @@ test:release_arm64_linux --flaky_test_attempts=3
build:release_cpu_macos --config=avx_linux
# Base build configs for macOS
build:release_macos_base --config=clang_local
build:release_macos_base --action_env DEVELOPER_DIR=/Applications/Xcode.app/Contents/Developer
build:release_macos_base --define=no_nccl_support=true --output_filter=^$
@ -906,6 +936,7 @@ test:windows_x86_cpu_2022_pycpp_test --config=windows_x86_cpu_2022_pycpp_test_op
# flags seem to be actually used to specify the execution platform details. It
# seems it is this way because these flags are old and predate the distinction
# between host and execution platform.
build:cross_compile_base --config=clang_local
build:cross_compile_base --host_cpu=k8
build:cross_compile_base --host_crosstool_top=//tensorflow/tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite
build:cross_compile_base --extra_execution_platforms=//tensorflow/tools/toolchains/cross_compile/config:linux_x86_64

View File

@ -85,6 +85,17 @@ load(
python_wheel_version_suffix_repository(name = "tf_wheel_version_suffix")
load(
"@rules_ml_toolchain//cc_toolchain/deps:cc_toolchain_deps.bzl",
"cc_toolchain_deps",
)
cc_toolchain_deps()
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64")
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64_cuda")
load(
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
@ -158,14 +169,3 @@ load(
)
nvshmem_configure(name = "local_config_nvshmem")
load(
"@rules_ml_toolchain//cc_toolchain/deps:cc_toolchain_deps.bzl",
"cc_toolchain_deps",
)
cc_toolchain_deps()
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64")
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64_cuda")

View File

@ -23,7 +23,7 @@ TFCI_INDEX_HTML_ENABLE=1
TFCI_LIB_SUFFIX="-cpu-linux-x86_64"
TFCI_OUTPUT_DIR=build_output
TFCI_WHL_AUDIT_ENABLE=1
TFCI_WHL_AUDIT_PLAT=manylinux2014_x86_64
TFCI_WHL_AUDIT_PLAT=manylinux_2_27_x86_64
TFCI_WHL_BAZEL_TEST_ENABLE=1
TFCI_WHL_SIZE_LIMIT=260M
TFCI_WHL_SIZE_LIMIT_ENABLE=1

View File

@ -239,8 +239,10 @@ TEST(RuntimeShapeTest, TestExtendedShapeSmallToBig) {
TEST_P(RuntimeShapeTest, TestFlatSize) {
const std::vector<int32_t> src = IotaVector(kSmallSize);
const RuntimeShape shape(src.size(), src.data());
EXPECT_EQ(shape.FlatSize(),
std::reduce(src.begin(), src.end(), 1, std::multiplies<int>{}));
int32_t flat_size = 1;
for (std::vector<int>::const_iterator it = src.begin(); it != src.end(); ++it)
flat_size *= *it;
EXPECT_EQ(shape.FlatSize(), flat_size);
}
INSTANTIATE_TEST_SUITE_P(BigSmall, RuntimeShapeTest,

View File

@ -15,7 +15,7 @@ limitations under the License.
#include "tensorflow/core/framework/tensor_testutil.h"
#include <cmath>
#include <iomanip>
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/platform/types.h"

View File

@ -239,8 +239,10 @@ TEST(RuntimeShapeTest, TestExtendedShapeSmallToBig) {
TEST_P(RuntimeShapeTest, TestFlatSize) {
const std::vector<int32_t> src = IotaVector(kSmallSize);
const RuntimeShape shape(src.size(), src.data());
EXPECT_EQ(shape.FlatSize(),
std::reduce(src.begin(), src.end(), 1, std::multiplies<int>{}));
int32_t flat_size = 1;
for (std::vector<int>::const_iterator it = src.begin(); it != src.end(); ++it)
flat_size *= *it;
EXPECT_EQ(shape.FlatSize(), flat_size);
}
INSTANTIATE_TEST_SUITE_P(BigSmall, RuntimeShapeTest,

View File

@ -426,7 +426,7 @@ verify_manylinux_compliance_test(
"manual",
],
wheel = ":wheel",
x86_64_compliance_tag = "manylinux_2_17_x86_64",
x86_64_compliance_tag = "manylinux_2_27_x86_64",
)
py_import(

View File

@ -140,10 +140,10 @@ def workspace():
# Details: https://github.com/google-ml-infra/rules_ml_toolchain
http_archive(
name = "rules_ml_toolchain",
sha256 = "de3b14418657eeacd8afc2aa89608be6ec8d66cd6a5de81c4f693e77bc41bee1",
strip_prefix = "rules_ml_toolchain-5653e5a0ca87c1272069b4b24864e55ce7f129a1",
sha256 = "562e0517f4e833afe0de7bb8da49f9adafcbca30a8259f118a65b4adf533b51f",
strip_prefix = "rules_ml_toolchain-4995c0be587c6e173fe8cf8dc614f92011f7913d",
urls = [
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/5653e5a0ca87c1272069b4b24864e55ce7f129a1.tar.gz",
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/4995c0be587c6e173fe8cf8dc614f92011f7913d.tar.gz",
],
)