tensorflow/third_party/xla/build_tools/ci/build.py
A. Unique TensorFlower fdcc8a6888 Replace RBE Docker container image: use Docker image without pre-installed CUDA packages.
Enable CUDA forward-compatibility mode in all RBE jobs by default.

Forward compatibility mode in hermetic CUDA allows the linker to use the user-mode driver from Bazel cache, so there is no need to install UMD in the RBE Docker image.

UMD on RBE machines is rarely updated, thus RBE jobs need forward compatibility mode to enable the most recent CUDA features usage in the tests.

The non-RBE job runners are updated more often, hence we can update the drivers on those machines and not rely on forward compatibility mode.

PiperOrigin-RevId: 810595379
2025-09-23 15:16:44 -07:00

872 lines
26 KiB
Python
Executable File

#!/usr/bin/python3
# Copyright 2024 The OpenXLA Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""XLA build script for use in CI.
This build script aims to be completely agnostic to the specifics of the VM, the
exceptions are uses of `KOKORO_ARTIFACTS_DIR` and `GITHUB_WORKSPACE` to know
where JAX or TensorFlow lives depending on which build is being executed.
To update the goldens associated with this file, run:
```PYTHONDONTWRITEBYTECODE=1 python3 build.py \
--dump_commands > golden_commands.txt```
"""
import argparse
import dataclasses
import enum
import logging
import os
import subprocess
import sys
from typing import Any, ClassVar, Dict, List, Tuple
# TODO(ddunleavy): move this to the bazelrc
_DEFAULT_BAZEL_OPTIONS = dict(
color="yes",
test_output="errors",
verbose_failures=True,
keep_going=True,
nobuild_tests_only=True,
profile="profile.json.gz",
flaky_test_attempts=3,
jobs=150,
bes_upload_mode="fully_async",
)
_KW_ONLY_IF_PYTHON310 = {"kw_only": True} if sys.version_info >= (3, 10) else {}
_XLA_DEFAULT_TARGET_PATTERNS = (
"//xla/...",
"//build_tools/...",
"@local_tsl//tsl/...",
)
_XLA_ONEAPI_TARGET_PATTERNS = (
"//xla/stream_executor/sycl/...",
"//xla/service/gpu/...",
)
_XLA_CPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS = (
"//xla/tools/multihost_hlo_runner:hlo_runner_main",
"//xla/tools:compute_xspace_stats_main",
)
_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS = (
"//xla/tools/multihost_hlo_runner:hlo_runner_main_gpu",
"//xla/tools:compute_xspace_stats_main_gpu",
)
_KOKORO_ARTIFACTS_DIR = os.environ.get(
"KOKORO_ARTIFACTS_DIR", "$KOKORO_ARTIFACTS_DIR"
)
_GITHUB_WORKSPACE = os.environ.get("GITHUB_WORKSPACE", "$GITHUB_WORKSPACE")
def retry(
args: List[str], delay_seconds: int = 15, retries: int = 3
) -> List[str]:
# Possibly a slight abuse of `parallel` as nothing happens in parallel, just
# retries with delay if the command fails.
# pyformat:disable
return [
"parallel", "--ungroup",
"--retries", str(retries),
"--delay", str(delay_seconds),
"--nonall",
"--", *args,
]
def sh(args, check=True, **kwargs):
logging.info("Starting process: %s", " ".join(args))
return subprocess.run(args, check=check, **kwargs)
def _dict_to_cli_options(d: Dict[str, Any]) -> List[str]:
# pylint: disable=g-bool-id-comparison
return [f"--{k}" if v is True else f"--{k}={v}" for k, v in d.items()]
def _write_to_sponge_config(key, value) -> None:
with open("custom_sponge_config.csv", "a") as f:
f.write(f"{key},{value}\n")
class BuildType(enum.Enum):
"""Enum representing all types of builds.
Should be named as `REPO,OS,HOST_TYPE,BACKEND,GPU_TYPE,CI_TYPE`.
"""
XLA_LINUX_X86_CPU_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_ARM64_CPU_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS = enum.auto()
# Presubmit builds for regression testing.
XLA_LINUX_ARM64_CPU_48_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_CPU_128_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS = enum.auto()
XLA_MACOS_X86_CPU_KOKORO = enum.auto()
XLA_MACOS_ARM64_CPU_KOKORO = enum.auto()
JAX_LINUX_X86_CPU_GITHUB_ACTIONS = enum.auto()
JAX_LINUX_X86_GPU_L4_GITHUB_ACTIONS = enum.auto()
TENSORFLOW_LINUX_X86_CPU_GITHUB_ACTIONS = enum.auto()
TENSORFLOW_LINUX_X86_GPU_L4_GITHUB_ACTIONS = enum.auto()
@classmethod
def from_str(cls, s):
try:
return cls[s.replace(" ", "_").upper()]
except KeyError:
# Sloppy looking exception handling, but argparse will catch ValueError
# and give a pleasant error message. KeyError would not work here.
raise ValueError # pylint: disable=raise-missing-from
@dataclasses.dataclass(frozen=True, **_KW_ONLY_IF_PYTHON310)
class Build:
"""Class representing a build of XLA."""
_builds: ClassVar[Dict[BuildType, "Build"]] = {}
type_: BuildType
repo: str
target_patterns: Tuple[str, ...]
subcommand: str = "test"
configs: Tuple[str, ...] = ()
build_tag_filters: Tuple[str, ...] = ()
test_tag_filters: Tuple[str, ...] = ()
action_env: Dict[str, Any] = dataclasses.field(default_factory=dict)
test_env: Dict[str, Any] = dataclasses.field(default_factory=dict)
repo_env: Dict[str, Any] = dataclasses.field(default_factory=dict)
override_repository: Dict[str, str] = dataclasses.field(default_factory=dict)
options: Dict[str, Any] = dataclasses.field(default_factory=dict)
extra_setup_commands: Tuple[List[str], ...] = ()
def __post_init__(self):
# pylint: disable=protected-access
assert (
self.type_ not in self.__class__._builds
), "Can't have multiple builds of same BuildType!"
assert (
self.repo == "openxla/xla" or self.override_repository
), "Must override repo if repo under test isn't XLA!"
self.__class__._builds[self.type_] = self
@classmethod
def all_builds(cls):
return cls._builds
def bazel_command(
self, subcommand: str = "test", extra_options: Tuple[str, ...] = ()
) -> List[str]:
"""Returns a bazel test command for this build.
Args:
subcommand: The subcommand to give to bazel. `test` by default.
extra_options: Extra options. For now just used to pass in `--nobuild`.
Returns: List of command line arguments
"""
options = _dict_to_cli_options(self.options)
configs = [f"--config={config}" for config in self.configs]
build_tag_filters = (
f"--build_tag_filters={','.join(self.build_tag_filters)}"
)
test_tag_filters = f"--test_tag_filters={','.join(self.test_tag_filters)}"
action_env = [f"--action_env={k}={v}" for k, v in self.action_env.items()]
test_env = [f"--test_env={k}={v}" for k, v in self.test_env.items()]
repo_env = [f"--repo_env={k}={v}" for k, v in self.repo_env.items()]
override_repository = [
f"--override_repository={k}={v}"
for k, v in self.override_repository.items()
]
tag_filters = [build_tag_filters, test_tag_filters]
all_options = (
tag_filters
+ configs
+ action_env
+ test_env
+ repo_env
+ override_repository
+ options
+ list(extra_options)
)
return ["bazel", subcommand, *all_options, "--", *self.target_patterns]
def commands(self) -> List[List[str]]:
"""Returns list of commands for a build."""
cmds = []
cmds.extend(self.extra_setup_commands)
# We really want `bazel fetch` here, but it uses `bazel query` and not
# `cquery`, which means that it fails due to config issues that aren't
# problems in practice.
# TODO(ddunleavy): Remove the condition here. Need to get parallel on the
# MacOS VM.
macos_build = (
self.type_ == BuildType.XLA_MACOS_X86_CPU_KOKORO
or self.type_ == BuildType.XLA_MACOS_ARM64_CPU_KOKORO
)
if not macos_build:
cmds.append(
retry(
self.bazel_command(
subcommand="build", extra_options=("--nobuild",)
)
)
)
cmds.append(self.bazel_command(subcommand=self.subcommand))
cmds.append(["bazel", "analyze-profile", "profile.json.gz"])
return cmds
def _tag_filters_for_compute_capability(
compute_capability: int,
) -> Tuple[str, ...]:
"""Returns the tag filters for the given compute capability."""
tag_filters = (f"requires-gpu-sm{compute_capability}-only",)
for cc in (60, 70, 80, 90, 100):
if compute_capability >= cc:
tag_filters += (f"requires-gpu-sm{cc}",)
else:
tag_filters += (f"-requires-gpu-sm{cc}",)
tag_filters += (f"-requires-gpu-sm{cc}-only",)
tag_filters += ("-requires-gpu-amd",)
tag_filters += ("-requires-gpu-intel",)
return tag_filters
def nvidia_gpu_build_with_compute_capability(
*,
type_: BuildType,
configs: Tuple[str, ...],
compute_capability: int,
) -> Build:
extra_gpu_tags = _tag_filters_for_compute_capability(compute_capability)
return Build(
type_=type_,
repo="openxla/xla",
target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
configs=configs,
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ extra_gpu_tags,
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={"TF_CUDA_COMPUTE_CAPABILITIES": f"{compute_capability/10}"},
extra_setup_commands=(["nvidia-smi"],),
)
cpu_x86_tag_filter = (
"-no_oss",
"-gpu",
"-requires-gpu-nvidia",
"-requires-gpu-amd",
"-requires-gpu-intel",
)
Build(
type_=BuildType.XLA_LINUX_X86_CPU_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "nonccl", "rbe_linux_cpu"),
target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
build_tag_filters=cpu_x86_tag_filter,
test_tag_filters=cpu_x86_tag_filter,
options={**_DEFAULT_BAZEL_OPTIONS, "//xla/tsl:ci_build": True},
)
cpu_arm_tag_filter = (
"-no_oss",
"-gpu",
"-requires-gpu-nvidia",
"-requires-gpu-amd",
"-requires-gpu-intel",
"-not_run:arm",
)
Build(
type_=BuildType.XLA_LINUX_ARM64_CPU_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "rbe_cross_compile_linux_arm64", "nonccl"),
target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
options={
**_DEFAULT_BAZEL_OPTIONS,
"build_tests_only": True,
"//xla/tsl:ci_build": True,
},
build_tag_filters=cpu_arm_tag_filter,
test_tag_filters=cpu_arm_tag_filter,
)
nvidia_gpu_build_with_compute_capability(
type_=BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS,
configs=("warnings", "rbe_linux_cuda_nvcc"),
compute_capability=75,
)
oneapi_build_tag_filter = (
"oneapi-only",
"requires-gpu-intel",
"-requires-gpu-amd",
"-requires-gpu-nvidia",
"-no_oss",
"-cuda-only",
"-rocm-only",
"-no-oneapi",
)
oneapi_test_tag_filter = (
"oneapi-only",
# This build of oneAPI backend runs on X86 host without an Intel GPU,so
# we are excluding the tests requiring Intel GPU
"-requires-gpu-intel",
"-requires-gpu-amd",
"-requires-gpu-nvidia",
"-no_oss",
"-cuda-only",
"-rocm-only",
"-no-oneapi",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("sycl", "sycl_hermetic", "icpx_clang"),
target_patterns=_XLA_ONEAPI_TARGET_PATTERNS,
build_tag_filters=oneapi_build_tag_filter,
test_tag_filters=oneapi_test_tag_filter,
options={**_DEFAULT_BAZEL_OPTIONS, "//xla/tsl:ci_build": True},
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_CPU_128_VCPU_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "nonccl", "rbe_linux_cpu"),
target_patterns=_XLA_CPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
build_tag_filters=cpu_x86_tag_filter,
test_tag_filters=cpu_x86_tag_filter,
options={**_DEFAULT_BAZEL_OPTIONS, "//xla/tsl:ci_build": True},
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_ARM64_CPU_48_VCPU_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "rbe_cross_compile_linux_arm64", "nonccl"),
target_patterns=_XLA_CPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
options={
**_DEFAULT_BAZEL_OPTIONS,
"build_tests_only": False,
"//xla/tsl:ci_build": True,
},
build_tag_filters=cpu_arm_tag_filter,
test_tag_filters=cpu_arm_tag_filter,
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
configs=("warnings", "rbe_linux_cuda_nvcc"),
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=75),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
configs=("warnings", "rbe_linux_cuda_nvcc"),
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=75),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
"@local_config_cuda//cuda:include_cuda_libs": False,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "rbe_linux_cuda_nvcc"),
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=75),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("warnings", "rbe_linux_cuda_nvcc"),
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=75),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
"@local_config_cuda//cuda:include_cuda_libs": False,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "7.5",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=(),
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=100),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
# Use User Mode and Kernel Mode Drivers pre-installed on the system.
"//xla/tsl:ci_build": True,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "10",
"HERMETIC_CUDA_VERSION": "12.8.0",
"HERMETIC_CUDNN_VERSION": "9.8.0",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
Build(
type_=BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=(),
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
test_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
)
+ _tag_filters_for_compute_capability(compute_capability=100),
build_tag_filters=(
"-no_oss",
"requires-gpu-nvidia",
"gpu",
"-rocm-only",
"-oneapi-only",
),
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
# Use User Mode and Kernel Mode Drivers pre-installed on the system.
"//xla/tsl:ci_build": True,
"@local_config_cuda//cuda:include_cuda_libs": False,
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_CUDA_COMPUTE_CAPABILITIES": "10",
"HERMETIC_CUDA_VERSION": "12.8.0",
"HERMETIC_CUDNN_VERSION": "9.8.0",
},
extra_setup_commands=(["nvidia-smi"],),
subcommand="build",
)
macos_tag_filter = (
"-no_oss",
"-gpu",
"-no_mac",
"-mac_excluded",
"-requires-gpu-nvidia",
"-requires-gpu-amd",
"-requires-gpu-intel",
)
Build(
type_=BuildType.XLA_MACOS_X86_CPU_KOKORO,
repo="openxla/xla",
configs=("nonccl",),
target_patterns=(
"//xla/...",
"-//xla/hlo/experimental/...",
"-//xla/python_api/...",
"-//xla/python/...",
"-//xla/service/gpu/...",
),
options={
**_DEFAULT_BAZEL_OPTIONS,
"macos_minimum_os": "10.15",
"test_tmpdir": "/Volumes/BuildData/bazel_output",
"define": "xnn_enable_avxvnniint8=false",
"//xla/tsl:ci_build": True,
},
build_tag_filters=macos_tag_filter,
test_tag_filters=macos_tag_filter,
extra_setup_commands=(
[
"sudo",
"wget",
"--no-verbose",
"-O",
"/usr/local/bin/bazel",
"https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-darwin-amd64",
],
["chmod", "+x", "/usr/local/bin/bazel"],
["bazel", "--version"], # Sanity check due to strange failures
["mkdir", "-p", "/Volumes/BuildData/bazel_output"],
),
)
Build(
type_=BuildType.XLA_MACOS_ARM64_CPU_KOKORO,
repo="openxla/xla",
configs=("nonccl",),
target_patterns=(
"//xla/...",
"-//xla/hlo/experimental/...",
"-//xla/python_api/...",
"-//xla/python/...",
"-//xla/service/gpu/...",
),
options={
**_DEFAULT_BAZEL_OPTIONS,
"macos_minimum_os": "10.15",
"test_tmpdir": "/tmpfs/bazel_output",
"test_size_filters": "small,medium",
"define": "xnn_enable_avxvnniint8=false",
"//xla/tsl:ci_build": True,
},
build_tag_filters=macos_tag_filter,
test_tag_filters=macos_tag_filter,
extra_setup_commands=(
["df", "-h"], # Debug "No space left on device" error: b/396611909.
["bazel", "--version"], # Sanity check due to strange failures
["mkdir", "-p", "/tmpfs/bazel_output"],
),
)
Build(
type_=BuildType.JAX_LINUX_X86_CPU_GITHUB_ACTIONS,
repo="google/jax",
configs=("rbe_linux_x86_64",),
target_patterns=("//tests:cpu_tests", "//tests:backend_independent_tests"),
test_env=dict(
JAX_NUM_GENERATED_CASES=25,
JAX_SKIP_SLOW_TESTS=1,
),
override_repository=dict(
xla=f"{_GITHUB_WORKSPACE}/openxla/xla",
),
options=_DEFAULT_BAZEL_OPTIONS,
repo_env={"HERMETIC_PYTHON_VERSION": "3.12"},
)
Build(
type_=BuildType.JAX_LINUX_X86_GPU_L4_GITHUB_ACTIONS,
repo="google/jax",
configs=("rbe_linux_x86_64_cuda",),
target_patterns=("//tests:gpu_tests", "//tests:backend_independent_tests"),
build_tag_filters=("-multiaccelerator",),
test_tag_filters=("-multiaccelerator",),
test_env=dict(
JAX_SKIP_SLOW_TESTS=1,
TF_CPP_MIN_LOG_LEVEL=0,
JAX_EXCLUDE_TEST_TARGETS="PmapTest.testSizeOverflow",
),
override_repository=dict(
xla=f"{_GITHUB_WORKSPACE}/openxla/xla",
),
options=_DEFAULT_BAZEL_OPTIONS,
repo_env={"HERMETIC_PYTHON_VERSION": "3.11"},
extra_setup_commands=(["nvidia-smi"],),
)
tensorflow_tag_filters = (
"-no_oss",
"-tf_tosa",
"-oss_excluded",
"-oss_serial",
"-tpu",
"-benchmark-test",
"-v1only",
)
tensorflow_cpu_tag_filters = tensorflow_tag_filters + ("-gpu",)
tensorflow_gpu_tag_filters = tensorflow_tag_filters + (
"-no_gpu",
"-no_gpu_presubmit",
"-no_cuda11",
"+gpu",
)
Build(
type_=BuildType.TENSORFLOW_LINUX_X86_CPU_GITHUB_ACTIONS,
repo="tensorflow/tensorflow",
configs=(
"release_cpu_linux",
"rbe_linux_cpu",
),
target_patterns=(
"//tensorflow/compiler/...",
"-//tensorflow/compiler/tf2tensorrt/...",
"//tensorflow/python/...",
"-//tensorflow/python/distribute/...",
"-//tensorflow/python/kernel_tests/...",
"-//tensorflow/python/data/...",
"-//tensorflow/python/compiler/tensorrt/...",
),
build_tag_filters=tensorflow_cpu_tag_filters,
test_tag_filters=tensorflow_cpu_tag_filters,
options=dict(
verbose_failures=True,
test_output="errors",
profile="profile.json.gz",
test_lang_filters="cc,py",
color="yes",
),
override_repository=dict(
local_xla=f"{_GITHUB_WORKSPACE}/openxla/xla",
),
repo_env={"USE_PYWRAP_RULES": "True"},
extra_setup_commands=(
# This is pretty devious - but we have to do some adhoc extra Copybara
# work here to get XLA into the shape TF expects. b/407638223
# pyformat:disable
[
"find",
f"{_GITHUB_WORKSPACE}/openxla/xla",
"-type", "f",
"-exec", "sed", "-i", "s/@local_xla/@local_xla/g", "{}", "+",
],
[
"find",
f"{_GITHUB_WORKSPACE}/openxla/xla",
"-type", "f",
"-exec", "sed", "-i", "s/@local_tsl/@local_tsl/g", "{}", "+",
],
),
)
Build(
type_=BuildType.TENSORFLOW_LINUX_X86_GPU_L4_GITHUB_ACTIONS,
repo="tensorflow/tensorflow",
configs=(
"release_gpu_linux",
"rbe_linux_cuda",
),
target_patterns=(
"//tensorflow/compiler/...",
"-//tensorflow/compiler/tf2tensorrt/...",
"//tensorflow/python/...",
"-//tensorflow/python/distribute/...",
"-//tensorflow/python/kernel_tests/...",
"-//tensorflow/python/data/...",
"-//tensorflow/python/compiler/tensorrt/...",
),
build_tag_filters=tensorflow_gpu_tag_filters,
test_tag_filters=tensorflow_gpu_tag_filters,
override_repository=dict(
local_xla=f"{_GITHUB_WORKSPACE}/openxla/xla",
),
options=dict(
verbose_failures=True,
test_output="errors",
profile="profile.json.gz",
test_lang_filters="cc,py",
color="yes",
),
repo_env={"USE_PYWRAP_RULES": "True"},
extra_setup_commands=(
# This is pretty devious - but we have to do some adhoc extra Copybara
# work here to get XLA into the shape TF expects. b/407638223
# pyformat:disable
[
"find",
f"{_GITHUB_WORKSPACE}/openxla/xla",
"-type", "f",
"-exec", "sed", "-i", "s/@local_xla/@local_xla/g", "{}", "+",
],
[
"find",
f"{_GITHUB_WORKSPACE}/openxla/xla",
"-type", "f",
"-exec", "sed", "-i", "s/@local_tsl/@local_tsl/g", "{}", "+",
],
["nvidia-smi"],
),
)
def dump_all_build_commands():
"""Used to generate what commands are run for each build."""
# Awkward workaround b/c Build instances are not hashable
for build in sorted(Build.all_builds().values(), key=lambda b: str(b.type_)):
sys.stdout.write(f"# BEGIN {build.type_}\n")
for cmd in build.commands():
sys.stdout.write(" ".join(cmd) + "\n")
sys.stdout.write(f"# END {build.type_}\n")
def _parse_args():
"""Defines flags and parses args."""
parser = argparse.ArgumentParser(allow_abbrev=False)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
"--build",
type=BuildType.from_str,
choices=list(BuildType),
)
group.add_argument(
"--dump_commands",
action="store_true",
)
return parser.parse_args()
def main():
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
args = _parse_args()
if args.dump_commands:
dump_all_build_commands()
return
else:
for cmd in Build.all_builds()[args.build].commands():
sh(cmd)
if __name__ == "__main__":
main()