mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 00:19:58 +01:00
Replace RBE Docker container image: use Docker image without pre-installed CUDA packages.
Enable CUDA forward-compatibility mode in all RBE jobs by default. Forward compatibility mode in hermetic CUDA allows the linker to use the user-mode driver from Bazel cache, so there is no need to install UMD in the RBE Docker image. UMD on RBE machines is rarely updated, thus RBE jobs need forward compatibility mode to enable the most recent CUDA features usage in the tests. The non-RBE job runners are updated more often, hence we can update the drivers on those machines and not rely on forward compatibility mode. PiperOrigin-RevId: 810595379
This commit is contained in:
parent
0ccf4a29f6
commit
fdcc8a6888
4
.bazelrc
4
.bazelrc
|
|
@ -169,6 +169,7 @@ build --repo_env USE_HERMETIC_CC_TOOLCHAIN=1
|
|||
# TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260
|
||||
build:clang_local --noincompatible_enable_cc_toolchain_resolution
|
||||
build:clang_local --noincompatible_enable_android_toolchain_resolution
|
||||
build:clang_local --@rules_ml_toolchain//common:enable_hermetic_cc=False
|
||||
build:clang_local --repo_env USE_HERMETIC_CC_TOOLCHAIN=0
|
||||
|
||||
# Print a stacktrace when a test is killed
|
||||
|
|
@ -665,6 +666,9 @@ build:rbe_linux_cuda --config=cuda_clang_official
|
|||
build:rbe_linux_cuda --config=rbe_linux_cpu
|
||||
# For Remote build execution -- GPU configuration
|
||||
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
|
||||
# Enable forward compatibility for CUDA builds because RBE docker image doesn't
|
||||
# have latest CUDA drivers installed.
|
||||
build:rbe_linux_cuda --@cuda_driver//:enable_forward_compatibility=true
|
||||
|
||||
build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
|
||||
build:rbe_linux_cuda_nvcc --config=cuda_nvcc
|
||||
|
|
|
|||
|
|
@ -102,6 +102,10 @@ register_toolchains("@rules_ml_toolchain//cc:linux_x86_64_linux_x86_64")
|
|||
|
||||
register_toolchains("@rules_ml_toolchain//cc:linux_x86_64_linux_x86_64_cuda")
|
||||
|
||||
register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64")
|
||||
|
||||
register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64_cuda")
|
||||
|
||||
load(
|
||||
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
|
||||
"cuda_json_init_repository",
|
||||
|
|
|
|||
|
|
@ -47,10 +47,11 @@ def initialize_rbe_configs():
|
|||
python_bin_path = "C:/Python37/python.exe",
|
||||
)
|
||||
|
||||
# The `ml-build-rbe` image is identical to the `ml-build` image except for the base image.
|
||||
# The `ml-build`'s base image is a standard `ubuntu22.04` image.
|
||||
# The `ml-build-rbe`'s base image is `nvidia/cuda:12.3.2-base-ubuntu22.04` which has nvidia driver installed.
|
||||
ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build-rbe@sha256:468a498a1f1f49daa257dcf8ee2f653c8c54e7621da511ce3ab7c14fcbd92d6f")
|
||||
# Note that in order to use this image with RBE GPU builds, you need to have hermetic CUDA
|
||||
# toolchain integrated into your project, and pass
|
||||
# `--@cuda_driver//:enable_forward_compatibility=true` to Bazel command.
|
||||
ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build@sha256:ea67e8453d8b09c2ba48853da5e79efef4b65804b4a48dfae4b4da89ffd38405")
|
||||
|
||||
# TF-Version-Specific SIG Build RBE Configs. The crosstool generated from these
|
||||
# configs are python-version-independent because they only care about the
|
||||
|
|
|
|||
|
|
@ -140,10 +140,10 @@ def workspace():
|
|||
# Details: https://github.com/google-ml-infra/rules_ml_toolchain
|
||||
http_archive(
|
||||
name = "rules_ml_toolchain",
|
||||
sha256 = "59d7eb36a02cbe3c2e2fa67fda5e8f1ab7e274bc4773bbd207c51fe199e11c19",
|
||||
strip_prefix = "rules_ml_toolchain-ffd9e3d7b84e43c2686c803cb08ce790ffd58baa",
|
||||
sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6",
|
||||
strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2",
|
||||
urls = [
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/ffd9e3d7b84e43c2686c803cb08ce790ffd58baa.tar.gz",
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -85,13 +85,13 @@ case "$HARDWARE_CATEGORY" in
|
|||
device_type_flag_value="host"
|
||||
;;
|
||||
GPU_L4)
|
||||
BUILD_TYPE="XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS" # Or _48_VCPU if that's the more common
|
||||
BUILD_TYPE="XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS" # Or _48_VCPU if that's the more common
|
||||
runner_binary_path="./$BAZEL_BIN_DIR/xla/tools/multihost_hlo_runner/hlo_runner_main_gpu"
|
||||
stats_binary_path="./$BAZEL_BIN_DIR/xla/tools/compute_xspace_stats_main_gpu"
|
||||
device_type_flag_value="gpu"
|
||||
;;
|
||||
GPU_B200)
|
||||
BUILD_TYPE="XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS"
|
||||
BUILD_TYPE="XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS"
|
||||
runner_binary_path="./$BAZEL_BIN_DIR/xla/tools/multihost_hlo_runner/hlo_runner_main_gpu"
|
||||
stats_binary_path="./$BAZEL_BIN_DIR/xla/tools/compute_xspace_stats_main_gpu"
|
||||
device_type_flag_value="gpu"
|
||||
|
|
|
|||
6
third_party/xla/WORKSPACE
vendored
6
third_party/xla/WORKSPACE
vendored
|
|
@ -9,10 +9,10 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
|||
# Details: https://github.com/google-ml-infra/rules_ml_toolchain
|
||||
http_archive(
|
||||
name = "rules_ml_toolchain",
|
||||
sha256 = "1a855dd94eebedae69d1804e8837ad70b8018358a0a03eea0bec71d7dc2b096a",
|
||||
strip_prefix = "rules_ml_toolchain-d321763a84c900bc29b4f5459a4f81fad19b2356",
|
||||
sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6",
|
||||
strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2",
|
||||
urls = [
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/d321763a84c900bc29b4f5459a4f81fad19b2356.tar.gz",
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
105
third_party/xla/build_tools/ci/build.py
vendored
105
third_party/xla/build_tools/ci/build.py
vendored
|
|
@ -117,6 +117,9 @@ class BuildType(enum.Enum):
|
|||
XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
|
||||
|
||||
XLA_MACOS_X86_CPU_KOKORO = enum.auto()
|
||||
XLA_MACOS_ARM64_CPU_KOKORO = enum.auto()
|
||||
|
|
@ -429,6 +432,39 @@ Build(
|
|||
subcommand="build",
|
||||
)
|
||||
|
||||
Build(
|
||||
type_=BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
|
||||
repo="openxla/xla",
|
||||
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
|
||||
configs=("warnings", "rbe_linux_cuda_nvcc"),
|
||||
test_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
)
|
||||
+ _tag_filters_for_compute_capability(compute_capability=75),
|
||||
build_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
),
|
||||
options={
|
||||
"run_under": "//build_tools/ci:parallel_gpu_execute",
|
||||
"//xla/tsl:ci_build": True,
|
||||
"@local_config_cuda//cuda:include_cuda_libs": False,
|
||||
**_DEFAULT_BAZEL_OPTIONS,
|
||||
},
|
||||
repo_env={
|
||||
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
|
||||
},
|
||||
extra_setup_commands=(["nvidia-smi"],),
|
||||
subcommand="build",
|
||||
)
|
||||
|
||||
Build(
|
||||
type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS,
|
||||
repo="openxla/xla",
|
||||
|
|
@ -461,6 +497,39 @@ Build(
|
|||
subcommand="build",
|
||||
)
|
||||
|
||||
Build(
|
||||
type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
|
||||
repo="openxla/xla",
|
||||
configs=("warnings", "rbe_linux_cuda_nvcc"),
|
||||
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
|
||||
test_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
)
|
||||
+ _tag_filters_for_compute_capability(compute_capability=75),
|
||||
build_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
),
|
||||
options={
|
||||
"run_under": "//build_tools/ci:parallel_gpu_execute",
|
||||
"//xla/tsl:ci_build": True,
|
||||
"@local_config_cuda//cuda:include_cuda_libs": False,
|
||||
**_DEFAULT_BAZEL_OPTIONS,
|
||||
},
|
||||
repo_env={
|
||||
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
|
||||
},
|
||||
extra_setup_commands=(["nvidia-smi"],),
|
||||
subcommand="build",
|
||||
)
|
||||
|
||||
Build(
|
||||
type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS,
|
||||
repo="openxla/xla",
|
||||
|
|
@ -496,6 +565,42 @@ Build(
|
|||
subcommand="build",
|
||||
)
|
||||
|
||||
Build(
|
||||
type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
|
||||
repo="openxla/xla",
|
||||
configs=(),
|
||||
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
|
||||
test_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
)
|
||||
+ _tag_filters_for_compute_capability(compute_capability=100),
|
||||
build_tag_filters=(
|
||||
"-no_oss",
|
||||
"requires-gpu-nvidia",
|
||||
"gpu",
|
||||
"-rocm-only",
|
||||
"-oneapi-only",
|
||||
),
|
||||
options={
|
||||
"run_under": "//build_tools/ci:parallel_gpu_execute",
|
||||
# Use User Mode and Kernel Mode Drivers pre-installed on the system.
|
||||
"//xla/tsl:ci_build": True,
|
||||
"@local_config_cuda//cuda:include_cuda_libs": False,
|
||||
**_DEFAULT_BAZEL_OPTIONS,
|
||||
},
|
||||
repo_env={
|
||||
"TF_CUDA_COMPUTE_CAPABILITIES": "10",
|
||||
"HERMETIC_CUDA_VERSION": "12.8.0",
|
||||
"HERMETIC_CUDNN_VERSION": "9.8.0",
|
||||
},
|
||||
extra_setup_commands=(["nvidia-smi"],),
|
||||
subcommand="build",
|
||||
)
|
||||
|
||||
macos_tag_filter = (
|
||||
"-no_oss",
|
||||
"-gpu",
|
||||
|
|
|
|||
|
|
@ -44,18 +44,36 @@ parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_fi
|
|||
bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=nonccl --config=rbe_linux_cpu --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/... //build_tools/... @local_tsl//tsl/...
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_CPU_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
bazel analyze-profile profile.json.gz
|
||||
# END BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS
|
||||
# BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS
|
||||
nvidia-smi
|
||||
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu
|
||||
|
|
|
|||
3
third_party/xla/tensorflow.bazelrc
vendored
3
third_party/xla/tensorflow.bazelrc
vendored
|
|
@ -545,6 +545,9 @@ build:rbe_linux_cuda --config=rbe_linux_cpu
|
|||
build:rbe_linux_cuda --repo_env=TF_SYSROOT=
|
||||
# For Remote build execution -- GPU configuration
|
||||
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
|
||||
# Enable forward compatibility for CUDA builds because RBE docker image doesn't
|
||||
# have latest CUDA drivers installed.
|
||||
build:rbe_linux_cuda --@cuda_driver//:enable_forward_compatibility=true
|
||||
|
||||
build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
|
||||
build:rbe_linux_cuda_nvcc --config=cuda_nvcc
|
||||
|
|
|
|||
|
|
@ -47,10 +47,11 @@ def initialize_rbe_configs():
|
|||
python_bin_path = "C:/Python37/python.exe",
|
||||
)
|
||||
|
||||
# The `ml-build-rbe` image is identical to the `ml-build` image except for the base image.
|
||||
# The `ml-build`'s base image is a standard `ubuntu22.04` image.
|
||||
# The `ml-build-rbe`'s base image is `nvidia/cuda:12.3.2-base-ubuntu22.04` which has nvidia driver installed.
|
||||
ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build-rbe@sha256:468a498a1f1f49daa257dcf8ee2f653c8c54e7621da511ce3ab7c14fcbd92d6f")
|
||||
# Note that in order to use this image with RBE GPU builds, you need to have hermetic CUDA
|
||||
# toolchain integrated into your project, and pass
|
||||
# `--@cuda_driver//:enable_forward_compatibility=true` to Bazel command.
|
||||
ml_build_rbe_config("docker://us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build@sha256:ea67e8453d8b09c2ba48853da5e79efef4b65804b4a48dfae4b4da89ffd38405")
|
||||
|
||||
# TF-Version-Specific SIG Build RBE Configs. The crosstool generated from these
|
||||
# configs are python-version-independent because they only care about the
|
||||
|
|
|
|||
6
third_party/xla/workspace0.bzl
vendored
6
third_party/xla/workspace0.bzl
vendored
|
|
@ -140,10 +140,10 @@ def workspace():
|
|||
if "rules_ml_toolchain" not in native.existing_rules():
|
||||
http_archive(
|
||||
name = "rules_ml_toolchain",
|
||||
sha256 = "1a855dd94eebedae69d1804e8837ad70b8018358a0a03eea0bec71d7dc2b096a",
|
||||
strip_prefix = "rules_ml_toolchain-d321763a84c900bc29b4f5459a4f81fad19b2356",
|
||||
sha256 = "77ad040f826af31ce3142e3b8bcf6c61972b4f95c84185676fa1af325fbf52c6",
|
||||
strip_prefix = "rules_ml_toolchain-a912c87727405e2145b168e5b62a5d5ae7232cb2",
|
||||
urls = [
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/d321763a84c900bc29b4f5459a4f81fad19b2356.tar.gz",
|
||||
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/a912c87727405e2145b168e5b62a5d5ae7232cb2.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user