From fdcc8a688866ebb096d26305e3f41dad7cbf8019 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 23 Sep 2025 15:02:04 -0700 Subject: [PATCH] Replace RBE Docker container image: use Docker image without pre-installed CUDA packages. Enable CUDA forward-compatibility mode in all RBE jobs by default. Forward compatibility mode in hermetic CUDA allows the linker to use the user-mode driver from Bazel cache, so there is no need to install UMD in the RBE Docker image. UMD on RBE machines is rarely updated, thus RBE jobs need forward compatibility mode to enable the most recent CUDA features usage in the tests. The non-RBE job runners are updated more often, hence we can update the drivers on those machines and not rely on forward compatibility mode. PiperOrigin-RevId: 810595379 --- .bazelrc | 4 + WORKSPACE | 4 + .../toolchains/remote_config/configs.bzl | 7 +- tensorflow/workspace0.bzl | 6 +- .../workflows/benchmarks/build_binaries.sh | 4 +- third_party/xla/WORKSPACE | 6 +- third_party/xla/build_tools/ci/build.py | 105 ++++++++++++++++++ .../xla/build_tools/ci/golden_commands.txt | 18 +++ third_party/xla/tensorflow.bazelrc | 3 + .../toolchains/remote_config/configs.bzl | 7 +- third_party/xla/workspace0.bzl | 6 +- 11 files changed, 153 insertions(+), 17 deletions(-) diff --git a/.bazelrc b/.bazelrc index be122db30c9..2d88beaa52d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -169,6 +169,7 @@ build --repo_env USE_HERMETIC_CC_TOOLCHAIN=1 # TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260 build:clang_local --noincompatible_enable_cc_toolchain_resolution build:clang_local --noincompatible_enable_android_toolchain_resolution +build:clang_local --@rules_ml_toolchain//common:enable_hermetic_cc=False build:clang_local --repo_env USE_HERMETIC_CC_TOOLCHAIN=0 # Print a stacktrace when a test is killed @@ -665,6 +666,9 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 +# Enable forward compatibility for CUDA builds because RBE docker image doesn't +# have latest CUDA drivers installed. +build:rbe_linux_cuda --@cuda_driver//:enable_forward_compatibility=true build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=cuda_nvcc diff --git a/WORKSPACE b/WORKSPACE index a125bcc15b5..c9de6694a75 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -102,6 +102,10 @@ register_toolchains("@rules_ml_toolchain//cc:linux_x86_64_linux_x86_64") register_toolchains("@rules_ml_toolchain//cc:linux_x86_64_linux_x86_64_cuda") +register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64") + +register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64_cuda") + load( "@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl", "cuda_json_init_repository", diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index 4954601a4fb..fbf723bb44a 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -47,10 +47,11 @@ def initialize_rbe_configs(): python_bin_path = "C:/Python37/python.exe", ) - # The `ml-build-rbe` image is identical to the `ml-build` image except for the base image. # The `ml-build`'s base image is a standard `ubuntu22.04` image. - # The `ml-build-rbe`'s base image is `nvidia/cuda:12.3.2-base-ubuntu22.04` which has nvidia driver installed. - ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build-rbe@sha256:468a498a1f1f49daa257dcf8ee2f653c8c54e7621da511ce3ab7c14fcbd92d6f") + # Note that in order to use this image with RBE GPU builds, you need to have hermetic CUDA + # toolchain integrated into your project, and pass + # `--@cuda_driver//:enable_forward_compatibility=true` to Bazel command. + ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build@sha256:ea67e8453d8b09c2ba48853da5e79efef4b65804b4a48dfae4b4da89ffd38405") # TF-Version-Specific SIG Build RBE Configs. The crosstool generated from these # configs are python-version-independent because they only care about the diff --git a/tensorflow/workspace0.bzl b/tensorflow/workspace0.bzl index 983a1a9b0c7..17242556683 100644 --- a/tensorflow/workspace0.bzl +++ b/tensorflow/workspace0.bzl @@ -140,10 +140,10 @@ def workspace(): # Details: https://github.com/google-ml-infra/rules_ml_toolchain http_archive( name = "rules_ml_toolchain", - sha256 = "59d7eb36a02cbe3c2e2fa67fda5e8f1ab7e274bc4773bbd207c51fe199e11c19", - strip_prefix = "rules_ml_toolchain-ffd9e3d7b84e43c2686c803cb08ce790ffd58baa", + sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6", + strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2", urls = [ - "https://github.com/google-ml-infra/rules_ml_toolchain/archive/ffd9e3d7b84e43c2686c803cb08ce790ffd58baa.tar.gz", + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz", ], ) diff --git a/third_party/xla/.github/workflows/benchmarks/build_binaries.sh b/third_party/xla/.github/workflows/benchmarks/build_binaries.sh index 802bd641645..e54ff01be85 100755 --- a/third_party/xla/.github/workflows/benchmarks/build_binaries.sh +++ b/third_party/xla/.github/workflows/benchmarks/build_binaries.sh @@ -85,13 +85,13 @@ case "$HARDWARE_CATEGORY" in device_type_flag_value="host" ;; GPU_L4) - BUILD_TYPE="XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS" # Or _48_VCPU if that's the more common + BUILD_TYPE="XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS" # Or _48_VCPU if that's the more common runner_binary_path="./$BAZEL_BIN_DIR/xla/tools/multihost_hlo_runner/hlo_runner_main_gpu" stats_binary_path="./$BAZEL_BIN_DIR/xla/tools/compute_xspace_stats_main_gpu" device_type_flag_value="gpu" ;; GPU_B200) - BUILD_TYPE="XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS" + BUILD_TYPE="XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS" runner_binary_path="./$BAZEL_BIN_DIR/xla/tools/multihost_hlo_runner/hlo_runner_main_gpu" stats_binary_path="./$BAZEL_BIN_DIR/xla/tools/compute_xspace_stats_main_gpu" device_type_flag_value="gpu" diff --git a/third_party/xla/WORKSPACE b/third_party/xla/WORKSPACE index bc871108513..bd737f2542e 100644 --- a/third_party/xla/WORKSPACE +++ b/third_party/xla/WORKSPACE @@ -9,10 +9,10 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") # Details: https://github.com/google-ml-infra/rules_ml_toolchain http_archive( name = "rules_ml_toolchain", - sha256 = "1a855dd94eebedae69d1804e8837ad70b8018358a0a03eea0bec71d7dc2b096a", - strip_prefix = "rules_ml_toolchain-d321763a84c900bc29b4f5459a4f81fad19b2356", + sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6", + strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2", urls = [ - "https://github.com/google-ml-infra/rules_ml_toolchain/archive/d321763a84c900bc29b4f5459a4f81fad19b2356.tar.gz", + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz", ], ) diff --git a/third_party/xla/build_tools/ci/build.py b/third_party/xla/build_tools/ci/build.py index 5606673c714..c34fa4b5217 100755 --- a/third_party/xla/build_tools/ci/build.py +++ b/third_party/xla/build_tools/ci/build.py @@ -117,6 +117,9 @@ class BuildType(enum.Enum): XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto() XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto() XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto() + XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto() + XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto() + XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto() XLA_MACOS_X86_CPU_KOKORO = enum.auto() XLA_MACOS_ARM64_CPU_KOKORO = enum.auto() @@ -429,6 +432,39 @@ Build( subcommand="build", ) +Build( + type_=BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS, + repo="openxla/xla", + target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, + configs=("warnings", "rbe_linux_cuda_nvcc"), + test_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ) + + _tag_filters_for_compute_capability(compute_capability=75), + build_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ), + options={ + "run_under": "//build_tools/ci:parallel_gpu_execute", + "//xla/tsl:ci_build": True, + "@local_config_cuda//cuda:include_cuda_libs": False, + **_DEFAULT_BAZEL_OPTIONS, + }, + repo_env={ + "TF_CUDA_COMPUTE_CAPABILITIES": "7.5", + }, + extra_setup_commands=(["nvidia-smi"],), + subcommand="build", +) + Build( type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS, repo="openxla/xla", @@ -461,6 +497,39 @@ Build( subcommand="build", ) +Build( + type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS, + repo="openxla/xla", + configs=("warnings", "rbe_linux_cuda_nvcc"), + target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, + test_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ) + + _tag_filters_for_compute_capability(compute_capability=75), + build_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ), + options={ + "run_under": "//build_tools/ci:parallel_gpu_execute", + "//xla/tsl:ci_build": True, + "@local_config_cuda//cuda:include_cuda_libs": False, + **_DEFAULT_BAZEL_OPTIONS, + }, + repo_env={ + "TF_CUDA_COMPUTE_CAPABILITIES": "7.5", + }, + extra_setup_commands=(["nvidia-smi"],), + subcommand="build", +) + Build( type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS, repo="openxla/xla", @@ -496,6 +565,42 @@ Build( subcommand="build", ) +Build( + type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS, + repo="openxla/xla", + configs=(), + target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, + test_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ) + + _tag_filters_for_compute_capability(compute_capability=100), + build_tag_filters=( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", + ), + options={ + "run_under": "//build_tools/ci:parallel_gpu_execute", + # Use User Mode and Kernel Mode Drivers pre-installed on the system. + "//xla/tsl:ci_build": True, + "@local_config_cuda//cuda:include_cuda_libs": False, + **_DEFAULT_BAZEL_OPTIONS, + }, + repo_env={ + "TF_CUDA_COMPUTE_CAPABILITIES": "10", + "HERMETIC_CUDA_VERSION": "12.8.0", + "HERMETIC_CUDNN_VERSION": "9.8.0", + }, + extra_setup_commands=(["nvidia-smi"],), + subcommand="build", +) + macos_tag_filter = ( "-no_oss", "-gpu", diff --git a/third_party/xla/build_tools/ci/golden_commands.txt b/third_party/xla/build_tools/ci/golden_commands.txt index 06386e2e7e5..508892fe7c8 100644 --- a/third_party/xla/build_tools/ci/golden_commands.txt +++ b/third_party/xla/build_tools/ci/golden_commands.txt @@ -44,18 +44,36 @@ parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_fi bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=nonccl --config=rbe_linux_cpu --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/... //build_tools/... @local_tsl//tsl/... bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_CPU_GITHUB_ACTIONS +# BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS +nvidia-smi +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel analyze-profile profile.json.gz +# END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS +# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS +nvidia-smi +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel analyze-profile profile.json.gz +# END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS +# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS +nvidia-smi +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel analyze-profile profile.json.gz +# END BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu diff --git a/third_party/xla/tensorflow.bazelrc b/third_party/xla/tensorflow.bazelrc index ac32e59e46b..a81d70cb413 100644 --- a/third_party/xla/tensorflow.bazelrc +++ b/third_party/xla/tensorflow.bazelrc @@ -545,6 +545,9 @@ build:rbe_linux_cuda --config=rbe_linux_cpu build:rbe_linux_cuda --repo_env=TF_SYSROOT= # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 +# Enable forward compatibility for CUDA builds because RBE docker image doesn't +# have latest CUDA drivers installed. +build:rbe_linux_cuda --@cuda_driver//:enable_forward_compatibility=true build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=cuda_nvcc diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index fbc72c0adea..4c689ece55a 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -47,10 +47,11 @@ def initialize_rbe_configs(): python_bin_path = "C:/Python37/python.exe", ) - # The `ml-build-rbe` image is identical to the `ml-build` image except for the base image. # The `ml-build`'s base image is a standard `ubuntu22.04` image. - # The `ml-build-rbe`'s base image is `nvidia/cuda:12.3.2-base-ubuntu22.04` which has nvidia driver installed. - ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build-rbe@sha256:468a498a1f1f49daa257dcf8ee2f653c8c54e7621da511ce3ab7c14fcbd92d6f") + # Note that in order to use this image with RBE GPU builds, you need to have hermetic CUDA + # toolchain integrated into your project, and pass + # `--@cuda_driver//:enable_forward_compatibility=true` to Bazel command. + ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build@sha256:ea67e8453d8b09c2ba48853da5e79efef4b65804b4a48dfae4b4da89ffd38405") # TF-Version-Specific SIG Build RBE Configs. The crosstool generated from these # configs are python-version-independent because they only care about the diff --git a/third_party/xla/workspace0.bzl b/third_party/xla/workspace0.bzl index 4d7dd874307..e4e05b24a79 100644 --- a/third_party/xla/workspace0.bzl +++ b/third_party/xla/workspace0.bzl @@ -140,10 +140,10 @@ def workspace(): if "rules_ml_toolchain" not in native.existing_rules(): http_archive( name = "rules_ml_toolchain", - sha256 = "1a855dd94eebedae69d1804e8837ad70b8018358a0a03eea0bec71d7dc2b096a", - strip_prefix = "rules_ml_toolchain-d321763a84c900bc29b4f5459a4f81fad19b2356", + sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6", + strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2", urls = [ - "https://github.com/google-ml-infra/rules_ml_toolchain/archive/d321763a84c900bc29b4f5459a4f81fad19b2356.tar.gz", + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz", ], )