mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
Add Kokoro continuous job for testing XLA Linux GPU with NVCC.
PiperOrigin-RevId: 577849947
This commit is contained in:
parent
88c88b89b4
commit
88e5914db5
30
.bazelrc
30
.bazelrc
|
|
@ -55,6 +55,7 @@
|
|||
#
|
||||
# rbe_linux_cpu: RBE options to build with only CPU support.
|
||||
# rbe_linux_cuda: RBE options to build with GPU support using clang.
|
||||
# rbe_linux_cuda_nvcc: RBE options to build with GPU support using nvcc.
|
||||
#
|
||||
# rbe_win_py39: Windows Python 3.9 RBE config
|
||||
#
|
||||
|
|
@ -525,6 +526,35 @@ build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.16-clang_c
|
|||
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.16-clang_config_nccl"
|
||||
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
build:rbe_linux_cuda_nvcc --config=cuda
|
||||
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:jax_cuda_pip_rpaths=true
|
||||
build:rbe_linux_cuda_nvcc --define=xla_python_enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --config=tensorrt
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_75,compute_80"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDA_VERSION="12"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDNN_VERSION="8"
|
||||
build:rbe_linux_cuda_nvcc --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2"
|
||||
build:rbe_linux_cuda_nvcc --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
|
||||
build:rbe_linux_cuda_nvcc --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
build:rbe_linux_cuda_nvcc --crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --config=rbe_linux
|
||||
build:rbe_linux_cuda_nvcc --host_crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --extra_toolchains="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain-linux-x86_64"
|
||||
build:rbe_linux_cuda_nvcc --extra_execution_platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --host_platform="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_PYTHON_CONFIG_REPO="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_python3.9"
|
||||
build:rbe_linux_cuda_nvcc --python_path="/usr/bin/python3"
|
||||
# These you may need to change for your own GCP project.
|
||||
common:rbe_linux_cuda_nvcc --remote_instance_name=projects/tensorflow-testing/instances/default_instance
|
||||
build:rbe_linux_cuda_nvcc --repo_env=REMOTE_GPU_TESTING=1
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_cuda"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_TENSORRT_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_tensorrt"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_NCCL_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_nccl"
|
||||
test:rbe_linux_cuda_nvcc --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
# TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed
|
||||
build:rbe_win --config=rbe_base
|
||||
build:rbe_win --crosstool_top="//tensorflow/tools/toolchains/win/tf_win_05022023:toolchain"
|
||||
|
|
|
|||
30
third_party/xla/.bazelrc
vendored
30
third_party/xla/.bazelrc
vendored
|
|
@ -55,6 +55,7 @@
|
|||
#
|
||||
# rbe_linux_cpu: RBE options to build with only CPU support.
|
||||
# rbe_linux_cuda: RBE options to build with GPU support using clang.
|
||||
# rbe_linux_cuda_nvcc: RBE options to build with GPU support using nvcc.
|
||||
#
|
||||
# rbe_win_py39: Windows Python 3.9 RBE config
|
||||
#
|
||||
|
|
@ -525,6 +526,35 @@ build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.16-clang_c
|
|||
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.16-clang_config_nccl"
|
||||
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
build:rbe_linux_cuda_nvcc --config=cuda
|
||||
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:jax_cuda_pip_rpaths=true
|
||||
build:rbe_linux_cuda_nvcc --define=xla_python_enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --config=tensorrt
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_75,compute_80"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDA_VERSION="12"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDNN_VERSION="8"
|
||||
build:rbe_linux_cuda_nvcc --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2"
|
||||
build:rbe_linux_cuda_nvcc --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
|
||||
build:rbe_linux_cuda_nvcc --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
build:rbe_linux_cuda_nvcc --crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --config=rbe_linux
|
||||
build:rbe_linux_cuda_nvcc --host_crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --extra_toolchains="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain-linux-x86_64"
|
||||
build:rbe_linux_cuda_nvcc --extra_execution_platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --host_platform="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_PYTHON_CONFIG_REPO="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_python3.9"
|
||||
build:rbe_linux_cuda_nvcc --python_path="/usr/bin/python3"
|
||||
# These you may need to change for your own GCP project.
|
||||
common:rbe_linux_cuda_nvcc --remote_instance_name=projects/tensorflow-testing/instances/default_instance
|
||||
build:rbe_linux_cuda_nvcc --repo_env=REMOTE_GPU_TESTING=1
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_cuda"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_TENSORRT_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_tensorrt"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_NCCL_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_nccl"
|
||||
test:rbe_linux_cuda_nvcc --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
# TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed
|
||||
build:rbe_win --config=rbe_base
|
||||
build:rbe_win --crosstool_top="//tensorflow/tools/toolchains/win/tf_win_05022023:toolchain"
|
||||
|
|
|
|||
13
third_party/xla/.kokoro/linux/build.sh
vendored
13
third_party/xla/.kokoro/linux/build.sh
vendored
|
|
@ -26,6 +26,10 @@ function is_linux_gpu_job() {
|
|||
[[ "$KOKORO_JOB_NAME" =~ tensorflow/xla/linux/.*gpu.* ]]
|
||||
}
|
||||
|
||||
function is_use_nvcc() {
|
||||
[[ -z "${USE_NVCC:-}" ]] || [[ "$USE_NVCC" == "true" ]]
|
||||
}
|
||||
|
||||
# Pull the container (in case it was updated since the instance started) and
|
||||
# store its SHA in the Sponge log.
|
||||
docker pull "$DOCKER_IMAGE"
|
||||
|
|
@ -44,16 +48,23 @@ RC_FILE="/usertools/cpu.bazelrc"
|
|||
TARGET_FILTER=""
|
||||
TAGS_FILTER="-no_oss,-oss_excluded,-oss_serial"
|
||||
ADDITIONAL_FLAGS=""
|
||||
RBE_CONFIG=""
|
||||
|
||||
if is_linux_gpu_job ; then
|
||||
TAGS_FILTER="$TAGS_FILTER,gpu,requires-gpu-nvidia,-no_gpu"
|
||||
ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute"
|
||||
RC_FILE="/usertools/gpu.bazelrc"
|
||||
if is_use_nvcc ; then
|
||||
RBE_CONFIG="rbe_linux_cuda_nvcc"
|
||||
else
|
||||
RBE_CONFIG="rbe_linux_cuda"
|
||||
fi
|
||||
echo "***NOTE: nvidia-smi lists the highest CUDA version the driver supports, which may be different than the version of CUDA actually used!!***"
|
||||
nvidia-smi
|
||||
else
|
||||
TAGS_FILTER="$TAGS_FILTER,-gpu,-requires-gpu-nvidia"
|
||||
ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --config=nonccl"
|
||||
RBE_CONFIG="rbe_linux_cpu"
|
||||
fi
|
||||
|
||||
# Build & test XLA
|
||||
|
|
@ -65,7 +76,7 @@ docker exec xla bazel --bazelrc=$RC_FILE \
|
|||
--features=layering_check \
|
||||
--profile=/tf/pkg/profile.json.gz \
|
||||
--flaky_test_attempts=3 \
|
||||
--config=rbe \
|
||||
--config=$RBE_CONFIG \
|
||||
--jobs=150 \
|
||||
--nobuild_tests_only \
|
||||
$ADDITIONAL_FLAGS \
|
||||
|
|
|
|||
30
third_party/xla/third_party/tsl/.bazelrc
vendored
30
third_party/xla/third_party/tsl/.bazelrc
vendored
|
|
@ -55,6 +55,7 @@
|
|||
#
|
||||
# rbe_linux_cpu: RBE options to build with only CPU support.
|
||||
# rbe_linux_cuda: RBE options to build with GPU support using clang.
|
||||
# rbe_linux_cuda_nvcc: RBE options to build with GPU support using nvcc.
|
||||
#
|
||||
# rbe_win_py39: Windows Python 3.9 RBE config
|
||||
#
|
||||
|
|
@ -525,6 +526,35 @@ build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.16-clang_c
|
|||
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.16-clang_config_nccl"
|
||||
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
build:rbe_linux_cuda_nvcc --config=cuda
|
||||
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --@local_xla//xla/python:jax_cuda_pip_rpaths=true
|
||||
build:rbe_linux_cuda_nvcc --define=xla_python_enable_gpu=true
|
||||
build:rbe_linux_cuda_nvcc --config=tensorrt
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_75,compute_80"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDA_VERSION="12"
|
||||
build:rbe_linux_cuda_nvcc --action_env=TF_CUDNN_VERSION="8"
|
||||
build:rbe_linux_cuda_nvcc --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2"
|
||||
build:rbe_linux_cuda_nvcc --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
|
||||
build:rbe_linux_cuda_nvcc --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
build:rbe_linux_cuda_nvcc --crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --config=rbe_linux
|
||||
build:rbe_linux_cuda_nvcc --host_crosstool_top="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain"
|
||||
build:rbe_linux_cuda_nvcc --extra_toolchains="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_cuda//crosstool:toolchain-linux-x86_64"
|
||||
build:rbe_linux_cuda_nvcc --extra_execution_platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --host_platform="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --platforms="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_platform//:platform"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_PYTHON_CONFIG_REPO="@ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9_config_python3.9"
|
||||
build:rbe_linux_cuda_nvcc --python_path="/usr/bin/python3"
|
||||
# These you may need to change for your own GCP project.
|
||||
common:rbe_linux_cuda_nvcc --remote_instance_name=projects/tensorflow-testing/instances/default_instance
|
||||
build:rbe_linux_cuda_nvcc --repo_env=REMOTE_GPU_TESTING=1
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_CUDA_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_cuda"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_TENSORRT_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_tensorrt"
|
||||
build:rbe_linux_cuda_nvcc --repo_env=TF_NCCL_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9_config_nccl"
|
||||
test:rbe_linux_cuda_nvcc --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
|
||||
|
||||
# TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed
|
||||
build:rbe_win --config=rbe_base
|
||||
build:rbe_win --crosstool_top="//tensorflow/tools/toolchains/win/tf_win_05022023:toolchain"
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user